From 3c2dd39c150d48c1427eb332ef810b24f834aaf3 Mon Sep 17 00:00:00 2001
From: luoluoyuyu <zhenyu@apache.org>
Date: Sun, 15 Mar 2026 19:58:42 +0800
Subject: [PATCH 01/44] feat(sql): migrate to DataFusion-based streaming SQL
 planner

---
 Cargo.lock                                    | 2422 ++++++++++++++++-
 Cargo.toml                                    |   28 +
 src/coordinator/analyze/analyzer.rs           |   11 +-
 src/coordinator/coordinator.rs                |    4 +-
 src/coordinator/execution/executor.rs         |   14 +
 src/coordinator/mod.rs                        |    2 +-
 src/coordinator/plan/logical_plan_visitor.rs  |   35 +-
 src/coordinator/plan/mod.rs                   |    2 +
 .../plan/streaming_sql_plan.rs}               |   30 +-
 src/coordinator/plan/visitor.rs               |    8 +-
 src/coordinator/statement/mod.rs              |    2 +
 src/coordinator/statement/streaming_sql.rs    |   39 +
 src/coordinator/statement/visitor.rs          |    7 +
 src/datastream/logical.rs                     |  317 +++
 src/datastream/mod.rs                         |    2 +
 src/datastream/optimizers.rs                  |  100 +
 src/lib.rs                                    |    1 +
 src/server/handler.rs                         |   10 +-
 src/sql/grammar.pest                          |  134 -
 src/sql/mod.rs                                |    7 +-
 src/sql/parser/sql_parser.rs                  |  249 --
 src/sql/planner/extension/aggregate.rs        |  348 +++
 src/sql/planner/extension/join.rs             |   61 +
 src/sql/planner/extension/key_calculation.rs  |  138 +
 src/sql/planner/extension/mod.rs              |  153 ++
 src/sql/planner/extension/projection.rs       |   91 +
 src/sql/planner/extension/remote_table.rs     |   71 +
 src/sql/planner/extension/watermark_node.rs   |  110 +
 src/sql/planner/extension/window_fn.rs        |   62 +
 src/sql/planner/mod.rs                        |  355 +++
 src/sql/planner/parse.rs                      |  183 ++
 src/sql/planner/plan/aggregate.rs             |  275 ++
 src/sql/planner/plan/join.rs                  |  242 ++
 src/sql/planner/plan/mod.rs                   |  449 +++
 src/sql/planner/plan/window_fn.rs             |  178 ++
 src/sql/planner/schemas.rs                    |   59 +
 src/sql/planner/sql_to_plan.rs                |   22 +
 src/sql/planner/types.rs                      |  513 ++++
 38 files changed, 6199 insertions(+), 535 deletions(-)
 rename src/{sql/parser/mod.rs => coordinator/plan/streaming_sql_plan.rs} (52%)
 create mode 100644 src/coordinator/statement/streaming_sql.rs
 create mode 100644 src/datastream/logical.rs
 create mode 100644 src/datastream/mod.rs
 create mode 100644 src/datastream/optimizers.rs
 delete mode 100644 src/sql/grammar.pest
 delete mode 100644 src/sql/parser/sql_parser.rs
 create mode 100644 src/sql/planner/extension/aggregate.rs
 create mode 100644 src/sql/planner/extension/join.rs
 create mode 100644 src/sql/planner/extension/key_calculation.rs
 create mode 100644 src/sql/planner/extension/mod.rs
 create mode 100644 src/sql/planner/extension/projection.rs
 create mode 100644 src/sql/planner/extension/remote_table.rs
 create mode 100644 src/sql/planner/extension/watermark_node.rs
 create mode 100644 src/sql/planner/extension/window_fn.rs
 create mode 100644 src/sql/planner/mod.rs
 create mode 100644 src/sql/planner/parse.rs
 create mode 100644 src/sql/planner/plan/aggregate.rs
 create mode 100644 src/sql/planner/plan/join.rs
 create mode 100644 src/sql/planner/plan/mod.rs
 create mode 100644 src/sql/planner/plan/window_fn.rs
 create mode 100644 src/sql/planner/schemas.rs
 create mode 100644 src/sql/planner/sql_to_plan.rs
 create mode 100644 src/sql/planner/types.rs

diff --git a/Cargo.lock b/Cargo.lock
index 26f07400..6cf6182a 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -11,6 +11,12 @@ dependencies = [
  "gimli",
 ]
 
+[[package]]
+name = "adler2"
+version = "2.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
+
 [[package]]
 name = "ahash"
 version = "0.8.12"
@@ -34,6 +40,21 @@ dependencies = [
  "memchr",
 ]
 
+[[package]]
+name = "alloc-no-stdlib"
+version = "2.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3"
+
+[[package]]
+name = "alloc-stdlib"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece"
+dependencies = [
+ "alloc-no-stdlib",
+]
+
 [[package]]
 name = "allocator-api2"
 version = "0.2.21"
@@ -111,12 +132,68 @@ version = "1.0.100"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61"
 
+[[package]]
+name = "ar_archive_writer"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7eb93bbb63b9c227414f6eb3a0adfddca591a8ce1e9b60661bb08969b87e340b"
+dependencies = [
+ "object",
+]
+
 [[package]]
 name = "arbitrary"
 version = "1.4.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c3d036a3c4ab069c7b410a2ce876bd74808d2d0888a82667669f8e783a898bf1"
 
+[[package]]
+name = "arrayref"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb"
+
+[[package]]
+name = "arrayvec"
+version = "0.7.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
+
+[[package]]
+name = "arrow"
+version = "55.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f3f15b4c6b148206ff3a2b35002e08929c2462467b62b9c02036d9c34f9ef994"
+dependencies = [
+ "arrow-arith",
+ "arrow-array 55.2.0",
+ "arrow-buffer 55.2.0",
+ "arrow-cast 55.2.0",
+ "arrow-csv",
+ "arrow-data 55.2.0",
+ "arrow-ipc 55.2.0",
+ "arrow-json 55.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "arrow-ord",
+ "arrow-row",
+ "arrow-schema 55.2.0",
+ "arrow-select 55.2.0",
+ "arrow-string",
+]
+
+[[package]]
+name = "arrow-arith"
+version = "55.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "30feb679425110209ae35c3fbf82404a39a4c0436bb3ec36164d8bffed2a4ce4"
+dependencies = [
+ "arrow-array 55.2.0",
+ "arrow-buffer 55.2.0",
+ "arrow-data 55.2.0",
+ "arrow-schema 55.2.0",
+ "chrono",
+ "num",
+]
+
 [[package]]
 name = "arrow-array"
 version = "52.2.0"
@@ -124,15 +201,32 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "16f4a9468c882dc66862cef4e1fd8423d47e67972377d85d80e022786427768c"
 dependencies = [
  "ahash",
- "arrow-buffer",
- "arrow-data",
- "arrow-schema",
+ "arrow-buffer 52.2.0",
+ "arrow-data 52.2.0",
+ "arrow-schema 52.2.0",
  "chrono",
  "half",
  "hashbrown 0.14.5",
  "num",
 ]
 
+[[package]]
+name = "arrow-array"
+version = "55.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "70732f04d285d49054a48b72c54f791bb3424abae92d27aafdf776c98af161c8"
+dependencies = [
+ "ahash",
+ "arrow-buffer 55.2.0",
+ "arrow-data 55.2.0",
+ "arrow-schema 55.2.0",
+ "chrono",
+ "chrono-tz",
+ "half",
+ "hashbrown 0.15.5",
+ "num",
+]
+
 [[package]]
 name = "arrow-buffer"
 version = "52.2.0"
@@ -144,34 +238,93 @@ dependencies = [
  "num",
 ]
 
+[[package]]
+name = "arrow-buffer"
+version = "55.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "169b1d5d6cb390dd92ce582b06b23815c7953e9dfaaea75556e89d890d19993d"
+dependencies = [
+ "bytes",
+ "half",
+ "num",
+]
+
 [[package]]
 name = "arrow-cast"
 version = "52.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "da26719e76b81d8bc3faad1d4dbdc1bcc10d14704e63dc17fc9f3e7e1e567c8e"
 dependencies = [
- "arrow-array",
- "arrow-buffer",
- "arrow-data",
- "arrow-schema",
- "arrow-select",
+ "arrow-array 52.2.0",
+ "arrow-buffer 52.2.0",
+ "arrow-data 52.2.0",
+ "arrow-schema 52.2.0",
+ "arrow-select 52.2.0",
+ "atoi",
+ "base64",
+ "chrono",
+ "half",
+ "lexical-core 0.8.5",
+ "num",
+ "ryu",
+]
+
+[[package]]
+name = "arrow-cast"
+version = "55.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e4f12eccc3e1c05a766cafb31f6a60a46c2f8efec9b74c6e0648766d30686af8"
+dependencies = [
+ "arrow-array 55.2.0",
+ "arrow-buffer 55.2.0",
+ "arrow-data 55.2.0",
+ "arrow-schema 55.2.0",
+ "arrow-select 55.2.0",
  "atoi",
  "base64",
  "chrono",
+ "comfy-table",
  "half",
- "lexical-core",
+ "lexical-core 1.0.6",
  "num",
  "ryu",
 ]
 
+[[package]]
+name = "arrow-csv"
+version = "55.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "012c9fef3f4a11573b2c74aec53712ff9fdae4a95f4ce452d1bbf088ee00f06b"
+dependencies = [
+ "arrow-array 55.2.0",
+ "arrow-cast 55.2.0",
+ "arrow-schema 55.2.0",
+ "chrono",
+ "csv",
+ "csv-core",
+ "regex",
+]
+
 [[package]]
 name = "arrow-data"
 version = "52.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "dd9d6f18c65ef7a2573ab498c374d8ae364b4a4edf67105357491c031f716ca5"
 dependencies = [
- "arrow-buffer",
- "arrow-schema",
+ "arrow-buffer 52.2.0",
+ "arrow-schema 52.2.0",
+ "half",
+ "num",
+]
+
+[[package]]
+name = "arrow-data"
+version = "55.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8de1ce212d803199684b658fc4ba55fb2d7e87b213de5af415308d2fee3619c2"
+dependencies = [
+ "arrow-buffer 55.2.0",
+ "arrow-schema 55.2.0",
  "half",
  "num",
 ]
@@ -182,12 +335,96 @@ version = "52.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e786e1cdd952205d9a8afc69397b317cfbb6e0095e445c69cda7e8da5c1eeb0f"
 dependencies = [
- "arrow-array",
- "arrow-buffer",
- "arrow-cast",
- "arrow-data",
- "arrow-schema",
- "flatbuffers",
+ "arrow-array 52.2.0",
+ "arrow-buffer 52.2.0",
+ "arrow-cast 52.2.0",
+ "arrow-data 52.2.0",
+ "arrow-schema 52.2.0",
+ "flatbuffers 24.12.23",
+]
+
+[[package]]
+name = "arrow-ipc"
+version = "55.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d9ea5967e8b2af39aff5d9de2197df16e305f47f404781d3230b2dc672da5d92"
+dependencies = [
+ "arrow-array 55.2.0",
+ "arrow-buffer 55.2.0",
+ "arrow-data 55.2.0",
+ "arrow-schema 55.2.0",
+ "flatbuffers 25.12.19",
+ "lz4_flex",
+]
+
+[[package]]
+name = "arrow-json"
+version = "55.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5709d974c4ea5be96d900c01576c7c0b99705f4a3eec343648cb1ca863988a9c"
+dependencies = [
+ "arrow-array 55.2.0",
+ "arrow-buffer 55.2.0",
+ "arrow-cast 55.2.0",
+ "arrow-data 55.2.0",
+ "arrow-schema 55.2.0",
+ "chrono",
+ "half",
+ "indexmap 2.12.1",
+ "lexical-core 1.0.6",
+ "memchr",
+ "num",
+ "serde",
+ "serde_json",
+ "simdutf8",
+]
+
+[[package]]
+name = "arrow-json"
+version = "55.2.0"
+source = "git+https://github.com/ArroyoSystems/arrow-rs?branch=55.2.0%2Fjson#d31f8d8f97c6e1394b52927cd8c23c14fec6ba16"
+dependencies = [
+ "arrow-array 55.2.0",
+ "arrow-buffer 55.2.0",
+ "arrow-cast 55.2.0",
+ "arrow-data 55.2.0",
+ "arrow-schema 55.2.0",
+ "base64",
+ "chrono",
+ "half",
+ "indexmap 2.12.1",
+ "lexical-core 1.0.6",
+ "memchr",
+ "num",
+ "serde",
+ "serde_json",
+ "simdutf8",
+]
+
+[[package]]
+name = "arrow-ord"
+version = "55.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6506e3a059e3be23023f587f79c82ef0bcf6d293587e3272d20f2d30b969b5a7"
+dependencies = [
+ "arrow-array 55.2.0",
+ "arrow-buffer 55.2.0",
+ "arrow-data 55.2.0",
+ "arrow-schema 55.2.0",
+ "arrow-select 55.2.0",
+]
+
+[[package]]
+name = "arrow-row"
+version = "55.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "52bf7393166beaf79b4bed9bfdf19e97472af32ce5b6b48169d321518a08cae2"
+dependencies = [
+ "arrow-array 55.2.0",
+ "arrow-buffer 55.2.0",
+ "arrow-data 55.2.0",
+ "arrow-schema 55.2.0",
+ "half",
 ]
 
 [[package]]
@@ -196,6 +433,16 @@ version = "52.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9e972cd1ff4a4ccd22f86d3e53e835c2ed92e0eea6a3e8eadb72b4f1ac802cf8"
 
+[[package]]
+name = "arrow-schema"
+version = "55.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "af7686986a3bf2254c9fb130c623cdcb2f8e1f15763e7c71c310f0834da3d292"
+dependencies = [
+ "serde",
+ "serde_json",
+]
+
 [[package]]
 name = "arrow-select"
 version = "52.2.0"
@@ -203,11 +450,59 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "600bae05d43483d216fb3494f8c32fdbefd8aa4e1de237e790dbb3d9f44690a3"
 dependencies = [
  "ahash",
- "arrow-array",
- "arrow-buffer",
- "arrow-data",
- "arrow-schema",
+ "arrow-array 52.2.0",
+ "arrow-buffer 52.2.0",
+ "arrow-data 52.2.0",
+ "arrow-schema 52.2.0",
+ "num",
+]
+
+[[package]]
+name = "arrow-select"
+version = "55.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dd2b45757d6a2373faa3352d02ff5b54b098f5e21dccebc45a21806bc34501e5"
+dependencies = [
+ "ahash",
+ "arrow-array 55.2.0",
+ "arrow-buffer 55.2.0",
+ "arrow-data 55.2.0",
+ "arrow-schema 55.2.0",
+ "num",
+]
+
+[[package]]
+name = "arrow-string"
+version = "55.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0377d532850babb4d927a06294314b316e23311503ed580ec6ce6a0158f49d40"
+dependencies = [
+ "arrow-array 55.2.0",
+ "arrow-buffer 55.2.0",
+ "arrow-data 55.2.0",
+ "arrow-schema 55.2.0",
+ "arrow-select 55.2.0",
+ "memchr",
  "num",
+ "regex",
+ "regex-syntax",
+]
+
+[[package]]
+name = "async-compression"
+version = "0.4.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "06575e6a9673580f52661c92107baabffbf41e2141373441cbcdc47cb733003c"
+dependencies = [
+ "bzip2",
+ "flate2",
+ "futures-core",
+ "memchr",
+ "pin-project-lite",
+ "tokio",
+ "xz2",
+ "zstd",
+ "zstd-safe",
 ]
 
 [[package]]
@@ -311,12 +606,49 @@ dependencies = [
  "tower-service",
 ]
 
+[[package]]
+name = "backtrace"
+version = "0.3.76"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bb531853791a215d7c62a30daf0dde835f381ab5de4589cfe7c649d2cbe92bd6"
+dependencies = [
+ "addr2line",
+ "cfg-if",
+ "libc",
+ "miniz_oxide",
+ "object",
+ "rustc-demangle",
+ "windows-link",
+]
+
+[[package]]
+name = "backtrace-ext"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "537beee3be4a18fb023b570f80e3ae28003db9167a751266b259926e25539d50"
+dependencies = [
+ "backtrace",
+]
+
 [[package]]
 name = "base64"
 version = "0.22.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
 
+[[package]]
+name = "bigdecimal"
+version = "0.4.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4d6867f1565b3aad85681f1015055b087fcfd840d6aeee6eee7f2da317603695"
+dependencies = [
+ "autocfg",
+ "libm",
+ "num-bigint",
+ "num-integer",
+ "num-traits",
+]
+
 [[package]]
 name = "bincode"
 version = "1.3.3"
@@ -386,6 +718,41 @@ dependencies = [
  "typenum",
 ]
 
+[[package]]
+name = "bitvec"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c"
+dependencies = [
+ "funty",
+ "radium",
+ "tap",
+ "wyz",
+]
+
+[[package]]
+name = "blake2"
+version = "0.10.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe"
+dependencies = [
+ "digest",
+]
+
+[[package]]
+name = "blake3"
+version = "1.8.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2468ef7d57b3fb7e16b576e8377cdbde2320c60e1491e961d11da40fc4f02a2d"
+dependencies = [
+ "arrayref",
+ "arrayvec",
+ "cc",
+ "cfg-if",
+ "constant_time_eq",
+ "cpufeatures",
+]
+
 [[package]]
 name = "block-buffer"
 version = "0.10.4"
@@ -395,6 +762,27 @@ dependencies = [
  "generic-array",
 ]
 
+[[package]]
+name = "brotli"
+version = "8.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4bd8b9603c7aa97359dbd97ecf258968c95f3adddd6db2f7e7a5bef101c84560"
+dependencies = [
+ "alloc-no-stdlib",
+ "alloc-stdlib",
+ "brotli-decompressor",
+]
+
+[[package]]
+name = "brotli-decompressor"
+version = "5.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "874bb8112abecc98cbd6d81ea4fa7e94fb9449648c93cc89aa40c81c24d7de03"
+dependencies = [
+ "alloc-no-stdlib",
+ "alloc-stdlib",
+]
+
 [[package]]
 name = "bumpalo"
 version = "3.19.1"
@@ -404,12 +792,27 @@ dependencies = [
  "allocator-api2",
 ]
 
+[[package]]
+name = "byteorder"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
+
 [[package]]
 name = "bytes"
 version = "1.11.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3"
 
+[[package]]
+name = "bzip2"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "49ecfb22d906f800d4fe833b6282cf4dc1c298f5057ca0b5445e5c209735ca47"
+dependencies = [
+ "bzip2-sys",
+]
+
 [[package]]
 name = "bzip2-sys"
 version = "0.1.13+1.0.8"
@@ -429,7 +832,7 @@ dependencies = [
  "cap-primitives",
  "cap-std",
  "io-lifetimes",
- "windows-sys 0.59.0",
+ "windows-sys 0.52.0",
 ]
 
 [[package]]
@@ -458,7 +861,7 @@ dependencies = [
  "maybe-owned",
  "rustix 1.1.3",
  "rustix-linux-procfs",
- "windows-sys 0.59.0",
+ "windows-sys 0.52.0",
  "winx",
 ]
 
@@ -469,7 +872,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d8144c22e24bbcf26ade86cb6501a0916c46b7e4787abdb0045a467eb1645a1d"
 dependencies = [
  "ambient-authority",
- "rand",
+ "rand 0.8.5",
 ]
 
 [[package]]
@@ -542,6 +945,26 @@ dependencies = [
  "windows-link",
 ]
 
+[[package]]
+name = "chrono-tz"
+version = "0.10.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a6139a8597ed92cf816dfb33f5dd6cf0bb93a6adc938f11039f371bc5bcd26c3"
+dependencies = [
+ "chrono",
+ "phf 0.12.1",
+]
+
+[[package]]
+name = "chumsky"
+version = "0.9.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8eebd66744a15ded14960ab4ccdbfb51ad3b81f51f3f04a80adac98c985396c9"
+dependencies = [
+ "hashbrown 0.14.5",
+ "stacker",
+]
+
 [[package]]
 name = "clang-sys"
 version = "1.8.1"
@@ -581,7 +1004,7 @@ version = "4.5.49"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2a0b5487afeab2deb2ff4e03a807ad1a03ac532ff5a2cee5d86884440c7f7671"
 dependencies = [
- "heck",
+ "heck 0.5.0",
  "proc-macro2",
  "quote",
  "syn",
@@ -620,6 +1043,15 @@ dependencies = [
  "thiserror 2.0.17",
 ]
 
+[[package]]
+name = "codegen_template"
+version = "0.1.0"
+source = "git+https://github.com/ArroyoSystems/cornucopia?branch=sqlite#6a1a87a8bab82068d4a41525995ed0e715382209"
+dependencies = [
+ "unicode-xid",
+ "unscanny",
+]
+
 [[package]]
 name = "colorchoice"
 version = "1.0.4"
@@ -657,12 +1089,59 @@ dependencies = [
  "tiny-keccak",
 ]
 
+[[package]]
+name = "constant_time_eq"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3d52eff69cd5e647efe296129160853a42795992097e8af39800e1060caeea9b"
+
 [[package]]
 name = "core-foundation-sys"
 version = "0.8.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
 
+[[package]]
+name = "cornucopia"
+version = "0.9.0"
+source = "git+https://github.com/ArroyoSystems/cornucopia?branch=sqlite#6a1a87a8bab82068d4a41525995ed0e715382209"
+dependencies = [
+ "chumsky",
+ "clap",
+ "codegen_template",
+ "heck 0.4.1",
+ "indexmap 2.12.1",
+ "miette",
+ "postgres",
+ "postgres-types",
+ "prettyplease",
+ "rusqlite",
+ "syn",
+ "thiserror 1.0.69",
+]
+
+[[package]]
+name = "cornucopia_async"
+version = "0.6.0"
+source = "git+https://github.com/ArroyoSystems/cornucopia?branch=sqlite#6a1a87a8bab82068d4a41525995ed0e715382209"
+dependencies = [
+ "async-trait",
+ "cornucopia_client_core",
+ "deadpool-postgres",
+ "rusqlite",
+ "tokio-postgres",
+]
+
+[[package]]
+name = "cornucopia_client_core"
+version = "0.4.0"
+source = "git+https://github.com/ArroyoSystems/cornucopia?branch=sqlite#6a1a87a8bab82068d4a41525995ed0e715382209"
+dependencies = [
+ "fallible-iterator 0.2.0",
+ "postgres-protocol",
+ "postgres-types",
+]
+
 [[package]]
 name = "cpp_demangle"
 version = "0.4.5"
@@ -757,7 +1236,7 @@ dependencies = [
  "cranelift-assembler-x64-meta",
  "cranelift-codegen-shared",
  "cranelift-srcgen",
- "heck",
+ "heck 0.5.0",
  "pulley-interpreter",
 ]
 
@@ -823,85 +1302,750 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a4e378a54e7168a689486d67ee1f818b7e5356e54ae51a1d7a53f4f13f7f8b7a"
 
 [[package]]
-name = "crc32fast"
-version = "1.5.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511"
+name = "crc32fast"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
+name = "crossbeam-channel"
+version = "0.5.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2"
+dependencies = [
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-deque"
+version = "0.8.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
+dependencies = [
+ "crossbeam-epoch",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-epoch"
+version = "0.9.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
+dependencies = [
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-utils"
+version = "0.8.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
+
+[[package]]
+name = "crossterm"
+version = "0.29.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d8b9f2e4c67f833b660cdb0a3523065869fb35570177239812ed4c905aeff87b"
+dependencies = [
+ "bitflags 2.10.0",
+ "crossterm_winapi",
+ "document-features",
+ "parking_lot",
+ "rustix 1.1.3",
+ "winapi",
+]
+
+[[package]]
+name = "crossterm_winapi"
+version = "0.9.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "acdd7c62a3665c7f6830a51635d9ac9b23ed385797f70a83bb8bafe9c572ab2b"
+dependencies = [
+ "winapi",
+]
+
+[[package]]
+name = "crunchy"
+version = "0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5"
+
+[[package]]
+name = "crypto-common"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a"
+dependencies = [
+ "generic-array",
+ "typenum",
+]
+
+[[package]]
+name = "csv"
+version = "1.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "52cd9d68cf7efc6ddfaaee42e7288d3a99d613d4b50f76ce9827ae0c6e14f938"
+dependencies = [
+ "csv-core",
+ "itoa",
+ "ryu",
+ "serde_core",
+]
+
+[[package]]
+name = "csv-core"
+version = "0.1.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "704a3c26996a80471189265814dbc2c257598b96b8a7feae2d31ace646bb9782"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "dashmap"
+version = "6.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf"
+dependencies = [
+ "cfg-if",
+ "crossbeam-utils",
+ "hashbrown 0.14.5",
+ "lock_api",
+ "once_cell",
+ "parking_lot_core",
+]
+
+[[package]]
+name = "datafusion"
+version = "48.0.1"
+source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c"
+dependencies = [
+ "arrow",
+ "arrow-ipc 55.2.0",
+ "arrow-schema 55.2.0",
+ "async-trait",
+ "bytes",
+ "bzip2",
+ "chrono",
+ "datafusion-catalog",
+ "datafusion-catalog-listing",
+ "datafusion-common",
+ "datafusion-common-runtime",
+ "datafusion-datasource",
+ "datafusion-datasource-csv",
+ "datafusion-datasource-json",
+ "datafusion-datasource-parquet",
+ "datafusion-execution",
+ "datafusion-expr",
+ "datafusion-expr-common",
+ "datafusion-functions",
+ "datafusion-functions-aggregate",
+ "datafusion-functions-nested",
+ "datafusion-functions-table",
+ "datafusion-functions-window",
+ "datafusion-optimizer",
+ "datafusion-physical-expr",
+ "datafusion-physical-expr-common",
+ "datafusion-physical-optimizer",
+ "datafusion-physical-plan",
+ "datafusion-session",
+ "datafusion-sql",
+ "flate2",
+ "futures",
+ "itertools 0.14.0",
+ "log",
+ "object_store",
+ "parking_lot",
+ "parquet 55.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "rand 0.9.2",
+ "regex",
+ "sqlparser",
+ "tempfile",
+ "tokio",
+ "url",
+ "uuid",
+ "xz2",
+ "zstd",
+]
+
+[[package]]
+name = "datafusion-catalog"
+version = "48.0.1"
+source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c"
+dependencies = [
+ "arrow",
+ "async-trait",
+ "dashmap",
+ "datafusion-common",
+ "datafusion-common-runtime",
+ "datafusion-datasource",
+ "datafusion-execution",
+ "datafusion-expr",
+ "datafusion-physical-expr",
+ "datafusion-physical-plan",
+ "datafusion-session",
+ "datafusion-sql",
+ "futures",
+ "itertools 0.14.0",
+ "log",
+ "object_store",
+ "parking_lot",
+ "tokio",
+]
+
+[[package]]
+name = "datafusion-catalog-listing"
+version = "48.0.1"
+source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c"
+dependencies = [
+ "arrow",
+ "async-trait",
+ "datafusion-catalog",
+ "datafusion-common",
+ "datafusion-datasource",
+ "datafusion-execution",
+ "datafusion-expr",
+ "datafusion-physical-expr",
+ "datafusion-physical-expr-common",
+ "datafusion-physical-plan",
+ "datafusion-session",
+ "futures",
+ "log",
+ "object_store",
+ "tokio",
+]
+
+[[package]]
+name = "datafusion-common"
+version = "48.0.1"
+source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c"
+dependencies = [
+ "ahash",
+ "arrow",
+ "arrow-ipc 55.2.0",
+ "base64",
+ "half",
+ "hashbrown 0.14.5",
+ "indexmap 2.12.1",
+ "libc",
+ "log",
+ "object_store",
+ "parquet 55.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "paste",
+ "recursive",
+ "sqlparser",
+ "tokio",
+ "web-time",
+]
+
+[[package]]
+name = "datafusion-common-runtime"
+version = "48.0.1"
+source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c"
+dependencies = [
+ "futures",
+ "log",
+ "tokio",
+]
+
+[[package]]
+name = "datafusion-datasource"
+version = "48.0.1"
+source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c"
+dependencies = [
+ "arrow",
+ "async-compression",
+ "async-trait",
+ "bytes",
+ "bzip2",
+ "chrono",
+ "datafusion-common",
+ "datafusion-common-runtime",
+ "datafusion-execution",
+ "datafusion-expr",
+ "datafusion-physical-expr",
+ "datafusion-physical-expr-common",
+ "datafusion-physical-plan",
+ "datafusion-session",
+ "flate2",
+ "futures",
+ "glob",
+ "itertools 0.14.0",
+ "log",
+ "object_store",
+ "parquet 55.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "rand 0.9.2",
+ "tempfile",
+ "tokio",
+ "tokio-util",
+ "url",
+ "xz2",
+ "zstd",
+]
+
+[[package]]
+name = "datafusion-datasource-csv"
+version = "48.0.1"
+source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c"
+dependencies = [
+ "arrow",
+ "async-trait",
+ "bytes",
+ "datafusion-catalog",
+ "datafusion-common",
+ "datafusion-common-runtime",
+ "datafusion-datasource",
+ "datafusion-execution",
+ "datafusion-expr",
+ "datafusion-physical-expr",
+ "datafusion-physical-expr-common",
+ "datafusion-physical-plan",
+ "datafusion-session",
+ "futures",
+ "object_store",
+ "regex",
+ "tokio",
+]
+
+[[package]]
+name = "datafusion-datasource-json"
+version = "48.0.1"
+source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c"
+dependencies = [
+ "arrow",
+ "async-trait",
+ "bytes",
+ "datafusion-catalog",
+ "datafusion-common",
+ "datafusion-common-runtime",
+ "datafusion-datasource",
+ "datafusion-execution",
+ "datafusion-expr",
+ "datafusion-physical-expr",
+ "datafusion-physical-expr-common",
+ "datafusion-physical-plan",
+ "datafusion-session",
+ "futures",
+ "object_store",
+ "serde_json",
+ "tokio",
+]
+
+[[package]]
+name = "datafusion-datasource-parquet"
+version = "48.0.1"
+source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c"
+dependencies = [
+ "arrow",
+ "async-trait",
+ "bytes",
+ "datafusion-catalog",
+ "datafusion-common",
+ "datafusion-common-runtime",
+ "datafusion-datasource",
+ "datafusion-execution",
+ "datafusion-expr",
+ "datafusion-functions-aggregate",
+ "datafusion-physical-expr",
+ "datafusion-physical-expr-common",
+ "datafusion-physical-optimizer",
+ "datafusion-physical-plan",
+ "datafusion-session",
+ "futures",
+ "itertools 0.14.0",
+ "log",
+ "object_store",
+ "parking_lot",
+ "parquet 55.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "rand 0.9.2",
+ "tokio",
+]
+
+[[package]]
+name = "datafusion-doc"
+version = "48.0.1"
+source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c"
+
+[[package]]
+name = "datafusion-execution"
+version = "48.0.1"
+source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c"
+dependencies = [
+ "arrow",
+ "dashmap",
+ "datafusion-common",
+ "datafusion-expr",
+ "futures",
+ "log",
+ "object_store",
+ "parking_lot",
+ "rand 0.9.2",
+ "tempfile",
+ "url",
+]
+
+[[package]]
+name = "datafusion-expr"
+version = "48.0.1"
+source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c"
+dependencies = [
+ "arrow",
+ "chrono",
+ "datafusion-common",
+ "datafusion-doc",
+ "datafusion-expr-common",
+ "datafusion-functions-aggregate-common",
+ "datafusion-functions-window-common",
+ "datafusion-physical-expr-common",
+ "indexmap 2.12.1",
+ "paste",
+ "recursive",
+ "serde_json",
+ "sqlparser",
+]
+
+[[package]]
+name = "datafusion-expr-common"
+version = "48.0.1"
+source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c"
+dependencies = [
+ "arrow",
+ "datafusion-common",
+ "indexmap 2.12.1",
+ "itertools 0.14.0",
+ "paste",
+]
+
+[[package]]
+name = "datafusion-functions"
+version = "48.0.1"
+source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c"
+dependencies = [
+ "arrow",
+ "arrow-buffer 55.2.0",
+ "base64",
+ "blake2",
+ "blake3",
+ "chrono",
+ "datafusion-common",
+ "datafusion-doc",
+ "datafusion-execution",
+ "datafusion-expr",
+ "datafusion-expr-common",
+ "datafusion-macros",
+ "hex",
+ "itertools 0.14.0",
+ "log",
+ "md-5",
+ "rand 0.9.2",
+ "regex",
+ "sha2",
+ "unicode-segmentation",
+ "uuid",
+]
+
+[[package]]
+name = "datafusion-functions-aggregate"
+version = "48.0.1"
+source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c"
+dependencies = [
+ "ahash",
+ "arrow",
+ "datafusion-common",
+ "datafusion-doc",
+ "datafusion-execution",
+ "datafusion-expr",
+ "datafusion-functions-aggregate-common",
+ "datafusion-macros",
+ "datafusion-physical-expr",
+ "datafusion-physical-expr-common",
+ "half",
+ "log",
+ "paste",
+]
+
+[[package]]
+name = "datafusion-functions-aggregate-common"
+version = "48.0.1"
+source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c"
+dependencies = [
+ "ahash",
+ "arrow",
+ "datafusion-common",
+ "datafusion-expr-common",
+ "datafusion-physical-expr-common",
+]
+
+[[package]]
+name = "datafusion-functions-nested"
+version = "48.0.1"
+source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c"
+dependencies = [
+ "arrow",
+ "arrow-ord",
+ "datafusion-common",
+ "datafusion-doc",
+ "datafusion-execution",
+ "datafusion-expr",
+ "datafusion-functions",
+ "datafusion-functions-aggregate",
+ "datafusion-macros",
+ "datafusion-physical-expr-common",
+ "itertools 0.14.0",
+ "log",
+ "paste",
+]
+
+[[package]]
+name = "datafusion-functions-table"
+version = "48.0.1"
+source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c"
+dependencies = [
+ "arrow",
+ "async-trait",
+ "datafusion-catalog",
+ "datafusion-common",
+ "datafusion-expr",
+ "datafusion-physical-plan",
+ "parking_lot",
+ "paste",
+]
+
+[[package]]
+name = "datafusion-functions-window"
+version = "48.0.1"
+source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c"
+dependencies = [
+ "arrow",
+ "datafusion-common",
+ "datafusion-doc",
+ "datafusion-expr",
+ "datafusion-functions-window-common",
+ "datafusion-macros",
+ "datafusion-physical-expr",
+ "datafusion-physical-expr-common",
+ "log",
+ "paste",
+]
+
+[[package]]
+name = "datafusion-functions-window-common"
+version = "48.0.1"
+source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c"
+dependencies = [
+ "datafusion-common",
+ "datafusion-physical-expr-common",
+]
+
+[[package]]
+name = "datafusion-macros"
+version = "48.0.1"
+source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c"
+dependencies = [
+ "datafusion-expr",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "datafusion-optimizer"
+version = "48.0.1"
+source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c"
+dependencies = [
+ "arrow",
+ "chrono",
+ "datafusion-common",
+ "datafusion-expr",
+ "datafusion-physical-expr",
+ "indexmap 2.12.1",
+ "itertools 0.14.0",
+ "log",
+ "recursive",
+ "regex",
+ "regex-syntax",
+]
+
+[[package]]
+name = "datafusion-physical-expr"
+version = "48.0.1"
+source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c"
+dependencies = [
+ "ahash",
+ "arrow",
+ "datafusion-common",
+ "datafusion-expr",
+ "datafusion-expr-common",
+ "datafusion-functions-aggregate-common",
+ "datafusion-physical-expr-common",
+ "half",
+ "hashbrown 0.14.5",
+ "indexmap 2.12.1",
+ "itertools 0.14.0",
+ "log",
+ "paste",
+ "petgraph 0.8.3",
+]
+
+[[package]]
+name = "datafusion-physical-expr-common"
+version = "48.0.1"
+source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c"
+dependencies = [
+ "ahash",
+ "arrow",
+ "datafusion-common",
+ "datafusion-expr-common",
+ "hashbrown 0.14.5",
+ "itertools 0.14.0",
+]
+
+[[package]]
+name = "datafusion-physical-optimizer"
+version = "48.0.1"
+source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c"
 dependencies = [
- "cfg-if",
+ "arrow",
+ "datafusion-common",
+ "datafusion-execution",
+ "datafusion-expr",
+ "datafusion-expr-common",
+ "datafusion-physical-expr",
+ "datafusion-physical-expr-common",
+ "datafusion-physical-plan",
+ "itertools 0.14.0",
+ "log",
+ "recursive",
 ]
 
 [[package]]
-name = "crossbeam-channel"
-version = "0.5.15"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2"
+name = "datafusion-physical-plan"
+version = "48.0.1"
+source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c"
 dependencies = [
- "crossbeam-utils",
+ "ahash",
+ "arrow",
+ "arrow-ord",
+ "arrow-schema 55.2.0",
+ "async-trait",
+ "chrono",
+ "datafusion-common",
+ "datafusion-common-runtime",
+ "datafusion-execution",
+ "datafusion-expr",
+ "datafusion-functions-window-common",
+ "datafusion-physical-expr",
+ "datafusion-physical-expr-common",
+ "futures",
+ "half",
+ "hashbrown 0.14.5",
+ "indexmap 2.12.1",
+ "itertools 0.14.0",
+ "log",
+ "parking_lot",
+ "pin-project-lite",
+ "tokio",
 ]
 
 [[package]]
-name = "crossbeam-deque"
-version = "0.8.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
+name = "datafusion-proto"
+version = "48.0.1"
+source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c"
 dependencies = [
- "crossbeam-epoch",
- "crossbeam-utils",
+ "arrow",
+ "chrono",
+ "datafusion",
+ "datafusion-common",
+ "datafusion-expr",
+ "datafusion-proto-common",
+ "object_store",
+ "prost",
 ]
 
 [[package]]
-name = "crossbeam-epoch"
-version = "0.9.18"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
+name = "datafusion-proto-common"
+version = "48.0.1"
+source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c"
 dependencies = [
- "crossbeam-utils",
+ "arrow",
+ "datafusion-common",
+ "prost",
 ]
 
 [[package]]
-name = "crossbeam-utils"
-version = "0.8.21"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
+name = "datafusion-session"
+version = "48.0.1"
+source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c"
+dependencies = [
+ "arrow",
+ "async-trait",
+ "dashmap",
+ "datafusion-common",
+ "datafusion-common-runtime",
+ "datafusion-execution",
+ "datafusion-expr",
+ "datafusion-physical-expr",
+ "datafusion-physical-plan",
+ "datafusion-sql",
+ "futures",
+ "itertools 0.14.0",
+ "log",
+ "object_store",
+ "parking_lot",
+ "tokio",
+]
 
 [[package]]
-name = "crossterm"
-version = "0.29.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d8b9f2e4c67f833b660cdb0a3523065869fb35570177239812ed4c905aeff87b"
+name = "datafusion-sql"
+version = "48.0.1"
+source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c"
 dependencies = [
- "bitflags 2.10.0",
- "crossterm_winapi",
- "document-features",
- "parking_lot",
- "rustix 1.1.3",
- "winapi",
+ "arrow",
+ "bigdecimal",
+ "datafusion-common",
+ "datafusion-expr",
+ "indexmap 2.12.1",
+ "log",
+ "recursive",
+ "regex",
+ "sqlparser",
 ]
 
 [[package]]
-name = "crossterm_winapi"
-version = "0.9.1"
+name = "deadpool"
+version = "0.12.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "acdd7c62a3665c7f6830a51635d9ac9b23ed385797f70a83bb8bafe9c572ab2b"
+checksum = "0be2b1d1d6ec8d846f05e137292d0b89133caf95ef33695424c09568bdd39b1b"
 dependencies = [
- "winapi",
+ "deadpool-runtime",
+ "lazy_static",
+ "num_cpus",
+ "tokio",
 ]
 
 [[package]]
-name = "crunchy"
-version = "0.2.4"
+name = "deadpool-postgres"
+version = "0.14.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5"
+checksum = "3d697d376cbfa018c23eb4caab1fd1883dd9c906a8c034e8d9a3cb06a7e0bef9"
+dependencies = [
+ "async-trait",
+ "deadpool",
+ "getrandom 0.2.16",
+ "tokio",
+ "tokio-postgres",
+ "tracing",
+]
 
 [[package]]
-name = "crypto-common"
-version = "0.1.7"
+name = "deadpool-runtime"
+version = "0.1.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a"
+checksum = "092966b41edc516079bdf31ec78a2e0588d1d0c08f78b91d8307215928642b2b"
 dependencies = [
- "generic-array",
- "typenum",
+ "tokio",
 ]
 
 [[package]]
@@ -930,6 +2074,7 @@ checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
 dependencies = [
  "block-buffer",
  "crypto-common",
+ "subtle",
 ]
 
 [[package]]
@@ -985,6 +2130,12 @@ dependencies = [
  "shared_child",
 ]
 
+[[package]]
+name = "dyn-clone"
+version = "1.0.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555"
+
 [[package]]
 name = "either"
 version = "1.15.0"
@@ -1053,12 +2204,24 @@ version = "3.3.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "dea2df4cf52843e0452895c455a1a2cfbb842a1e7329671acf418fdc53ed4c59"
 
+[[package]]
+name = "fallible-iterator"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7"
+
 [[package]]
 name = "fallible-iterator"
 version = "0.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649"
 
+[[package]]
+name = "fallible-streaming-iterator"
+version = "0.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a"
+
 [[package]]
 name = "fastrand"
 version = "2.3.0"
@@ -1073,7 +2236,7 @@ checksum = "0ce92ff622d6dadf7349484f42c93271a0d49b7cc4d466a936405bacbe10aa78"
 dependencies = [
  "cfg-if",
  "rustix 1.1.3",
- "windows-sys 0.59.0",
+ "windows-sys 0.52.0",
 ]
 
 [[package]]
@@ -1104,6 +2267,27 @@ dependencies = [
  "rustc_version",
 ]
 
+[[package]]
+name = "flatbuffers"
+version = "25.12.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "35f6839d7b3b98adde531effaf34f0c2badc6f4735d26fe74709d8e513a96ef3"
+dependencies = [
+ "bitflags 2.10.0",
+ "rustc_version",
+]
+
+[[package]]
+name = "flate2"
+version = "1.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c"
+dependencies = [
+ "crc32fast",
+ "miniz_oxide",
+ "zlib-rs",
+]
+
 [[package]]
 name = "fnv"
 version = "1.0.7"
@@ -1133,7 +2317,7 @@ checksum = "94e7099f6313ecacbe1256e8ff9d617b75d1bcb16a6fddef94866d225a01a14a"
 dependencies = [
  "io-lifetimes",
  "rustix 1.1.3",
- "windows-sys 0.59.0",
+ "windows-sys 0.52.0",
 ]
 
 [[package]]
@@ -1141,20 +2325,37 @@ name = "function-stream"
 version = "0.6.0"
 dependencies = [
  "anyhow",
- "arrow-array",
- "arrow-ipc",
- "arrow-schema",
+ "arrow-array 52.2.0",
+ "arrow-ipc 52.2.0",
+ "arrow-json 55.2.0 (git+https://github.com/ArroyoSystems/arrow-rs?branch=55.2.0%2Fjson)",
+ "arrow-schema 52.2.0",
  "async-trait",
  "base64",
  "bincode",
  "clap",
+ "cornucopia",
+ "cornucopia_async",
  "crossbeam-channel",
+ "datafusion",
+ "datafusion-common",
+ "datafusion-execution",
+ "datafusion-expr",
+ "datafusion-functions",
+ "datafusion-functions-aggregate",
+ "datafusion-functions-window",
+ "datafusion-physical-expr",
+ "datafusion-physical-plan",
+ "datafusion-proto",
+ "itertools 0.14.0",
+ "jiter",
  "log",
  "lru",
  "num_cpus",
  "parking_lot",
+ "parquet 55.2.0 (git+https://github.com/ArroyoSystems/arrow-rs?branch=55.2.0%2Fparquet)",
  "pest",
  "pest_derive",
+ "petgraph 0.7.1",
  "proctitle",
  "protocol",
  "rdkafka",
@@ -1162,6 +2363,8 @@ dependencies = [
  "serde",
  "serde_json",
  "serde_yaml",
+ "sqlparser",
+ "strum",
  "thiserror 2.0.17",
  "tokio",
  "tokio-stream",
@@ -1169,6 +2372,8 @@ dependencies = [
  "tracing",
  "tracing-appender",
  "tracing-subscriber",
+ "typify",
+ "unicase",
  "uuid",
  "wasmtime",
  "wasmtime-wasi",
@@ -1178,9 +2383,9 @@ dependencies = [
 name = "function-stream-cli"
 version = "0.1.0"
 dependencies = [
- "arrow-array",
- "arrow-ipc",
- "arrow-schema",
+ "arrow-array 52.2.0",
+ "arrow-ipc 52.2.0",
+ "arrow-schema 52.2.0",
  "clap",
  "comfy-table",
  "function-stream",
@@ -1191,6 +2396,12 @@ dependencies = [
  "tonic",
 ]
 
+[[package]]
+name = "funty"
+version = "2.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c"
+
 [[package]]
 name = "futures"
 version = "0.3.31"
@@ -1199,6 +2410,7 @@ checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876"
 dependencies = [
  "futures-channel",
  "futures-core",
+ "futures-executor",
  "futures-io",
  "futures-sink",
  "futures-task",
@@ -1221,12 +2433,34 @@ version = "0.3.31"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e"
 
+[[package]]
+name = "futures-executor"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f"
+dependencies = [
+ "futures-core",
+ "futures-task",
+ "futures-util",
+]
+
 [[package]]
 name = "futures-io"
 version = "0.3.31"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6"
 
+[[package]]
+name = "futures-macro"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
 [[package]]
 name = "futures-sink"
 version = "0.3.31"
@@ -1248,6 +2482,7 @@ dependencies = [
  "futures-channel",
  "futures-core",
  "futures-io",
+ "futures-macro",
  "futures-sink",
  "futures-task",
  "memchr",
@@ -1287,8 +2522,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592"
 dependencies = [
  "cfg-if",
+ "js-sys",
  "libc",
- "wasi",
+ "wasi 0.11.1+wasi-snapshot-preview1",
+ "wasm-bindgen",
 ]
 
 [[package]]
@@ -1309,7 +2546,7 @@ version = "0.32.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e629b9b98ef3dd8afe6ca2bd0f89306cec16d43d907889945bc5d6687f2f13c7"
 dependencies = [
- "fallible-iterator",
+ "fallible-iterator 0.3.0",
  "indexmap 2.12.1",
  "stable_deref_trait",
 ]
@@ -1357,11 +2594,24 @@ version = "0.12.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
 
+[[package]]
+name = "hashbrown"
+version = "0.13.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e"
+dependencies = [
+ "ahash",
+]
+
 [[package]]
 name = "hashbrown"
 version = "0.14.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
+dependencies = [
+ "ahash",
+ "allocator-api2",
+]
 
 [[package]]
 name = "hashbrown"
@@ -1381,6 +2631,21 @@ version = "0.16.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100"
 
+[[package]]
+name = "hashlink"
+version = "0.9.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6ba4ff7128dee98c7dc9794b6a411377e1404dba1c97deb8d1a55297bd25d8af"
+dependencies = [
+ "hashbrown 0.14.5",
+]
+
+[[package]]
+name = "heck"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
+
 [[package]]
 name = "heck"
 version = "0.5.0"
@@ -1393,6 +2658,21 @@ version = "0.5.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c"
 
+[[package]]
+name = "hex"
+version = "0.4.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
+
+[[package]]
+name = "hmac"
+version = "0.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e"
+dependencies = [
+ "digest",
+]
+
 [[package]]
 name = "home"
 version = "0.5.12"
@@ -1649,7 +2929,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "af1955a75fa080c677d3972822ec4bad316169ab1cfc6c257a942c2265dbe5fe"
 dependencies = [
  "bitmaps",
- "rand_core",
+ "rand_core 0.6.4",
  "rand_xoshiro",
  "sized-chunks",
  "typenum",
@@ -1678,6 +2958,12 @@ dependencies = [
  "serde_core",
 ]
 
+[[package]]
+name = "integer-encoding"
+version = "3.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02"
+
 [[package]]
 name = "io-extras"
 version = "0.18.4"
@@ -1685,7 +2971,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2285ddfe3054097ef4b2fe909ef8c3bcd1ea52a8f0d274416caebeef39f04a65"
 dependencies = [
  "io-lifetimes",
- "windows-sys 0.59.0",
+ "windows-sys 0.52.0",
 ]
 
 [[package]]
@@ -1711,6 +2997,12 @@ dependencies = [
  "windows-sys 0.61.2",
 ]
 
+[[package]]
+name = "is_ci"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7655c9839580ee829dfacba1d1278c2b7883e50a277ff7541299489d6bdfdc45"
+
 [[package]]
 name = "is_terminal_polyfill"
 version = "1.70.2"
@@ -1761,6 +3053,19 @@ dependencies = [
  "cc",
 ]
 
+[[package]]
+name = "jiter"
+version = "0.10.0"
+source = "git+https://github.com/ArroyoSystems/jiter?branch=disable_python#e5a90990780433a5972031a62eff87555d98884d"
+dependencies = [
+ "ahash",
+ "bitvec",
+ "lexical-parse-float 1.0.6",
+ "num-bigint",
+ "num-traits",
+ "smallvec",
+]
+
 [[package]]
 name = "jobserver"
 version = "0.1.34"
@@ -1811,11 +3116,24 @@ version = "0.8.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2cde5de06e8d4c2faabc400238f9ae1c74d5412d03a7bd067645ccbc47070e46"
 dependencies = [
- "lexical-parse-float",
- "lexical-parse-integer",
- "lexical-util",
- "lexical-write-float",
- "lexical-write-integer",
+ "lexical-parse-float 0.8.5",
+ "lexical-parse-integer 0.8.6",
+ "lexical-util 0.8.5",
+ "lexical-write-float 0.8.5",
+ "lexical-write-integer 0.8.5",
+]
+
+[[package]]
+name = "lexical-core"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7d8d125a277f807e55a77304455eb7b1cb52f2b18c143b60e766c120bd64a594"
+dependencies = [
+ "lexical-parse-float 1.0.6",
+ "lexical-parse-integer 1.0.6",
+ "lexical-util 1.0.7",
+ "lexical-write-float 1.0.6",
+ "lexical-write-integer 1.0.6",
 ]
 
 [[package]]
@@ -1824,21 +3142,40 @@ version = "0.8.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "683b3a5ebd0130b8fb52ba0bdc718cc56815b6a097e28ae5a6997d0ad17dc05f"
 dependencies = [
- "lexical-parse-integer",
- "lexical-util",
+ "lexical-parse-integer 0.8.6",
+ "lexical-util 0.8.5",
  "static_assertions",
 ]
 
+[[package]]
+name = "lexical-parse-float"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "52a9f232fbd6f550bc0137dcb5f99ab674071ac2d690ac69704593cb4abbea56"
+dependencies = [
+ "lexical-parse-integer 1.0.6",
+ "lexical-util 1.0.7",
+]
+
 [[package]]
 name = "lexical-parse-integer"
 version = "0.8.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6d0994485ed0c312f6d965766754ea177d07f9c00c9b82a5ee62ed5b47945ee9"
 dependencies = [
- "lexical-util",
+ "lexical-util 0.8.5",
  "static_assertions",
 ]
 
+[[package]]
+name = "lexical-parse-integer"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a7a039f8fb9c19c996cd7b2fcce303c1b2874fe1aca544edc85c4a5f8489b34"
+dependencies = [
+ "lexical-util 1.0.7",
+]
+
 [[package]]
 name = "lexical-util"
 version = "0.8.5"
@@ -1848,27 +3185,52 @@ dependencies = [
  "static_assertions",
 ]
 
+[[package]]
+name = "lexical-util"
+version = "1.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2604dd126bb14f13fb5d1bd6a66155079cb9fa655b37f875b3a742c705dbed17"
+
 [[package]]
 name = "lexical-write-float"
 version = "0.8.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "accabaa1c4581f05a3923d1b4cfd124c329352288b7b9da09e766b0668116862"
 dependencies = [
- "lexical-util",
- "lexical-write-integer",
+ "lexical-util 0.8.5",
+ "lexical-write-integer 0.8.5",
  "static_assertions",
 ]
 
+[[package]]
+name = "lexical-write-float"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "50c438c87c013188d415fbabbb1dceb44249ab81664efbd31b14ae55dabb6361"
+dependencies = [
+ "lexical-util 1.0.7",
+ "lexical-write-integer 1.0.6",
+]
+
 [[package]]
 name = "lexical-write-integer"
 version = "0.8.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e1b6f3d1f4422866b68192d62f77bc5c700bee84f3069f2469d7bc8c77852446"
 dependencies = [
- "lexical-util",
+ "lexical-util 0.8.5",
  "static_assertions",
 ]
 
+[[package]]
+name = "lexical-write-integer"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "409851a618475d2d5796377cad353802345cba92c867d9fbcde9cf4eac4e14df"
+dependencies = [
+ "lexical-util 1.0.7",
+]
+
 [[package]]
 name = "libc"
 version = "0.2.179"
@@ -1917,6 +3279,17 @@ dependencies = [
  "zstd-sys",
 ]
 
+[[package]]
+name = "libsqlite3-sys"
+version = "0.28.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0c10584274047cb335c23d3e61bcef8e323adae7c5c8c760540f73610177fc3f"
+dependencies = [
+ "cc",
+ "pkg-config",
+ "vcpkg",
+]
+
 [[package]]
 name = "libz-sys"
 version = "1.1.23"
@@ -1972,19 +3345,39 @@ checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897"
 name = "lru"
 version = "0.12.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "234cf4f4a04dc1f57e24b96cc0cd600cf2af460d4161ac5ecdd0af8e1f3b2a38"
+checksum = "234cf4f4a04dc1f57e24b96cc0cd600cf2af460d4161ac5ecdd0af8e1f3b2a38"
+dependencies = [
+ "hashbrown 0.15.5",
+]
+
+[[package]]
+name = "lz4-sys"
+version = "1.11.1+lz4-1.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6bd8c0d6c6ed0cd30b3652886bb8711dc4bb01d637a68105a3d5158039b418e6"
+dependencies = [
+ "cc",
+ "libc",
+]
+
+[[package]]
+name = "lz4_flex"
+version = "0.11.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "373f5eceeeab7925e0c1098212f2fbc4d416adec9d35051a6ab251e824c1854a"
 dependencies = [
- "hashbrown 0.15.5",
+ "twox-hash",
 ]
 
 [[package]]
-name = "lz4-sys"
-version = "1.11.1+lz4-1.10.0"
+name = "lzma-sys"
+version = "0.1.20"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6bd8c0d6c6ed0cd30b3652886bb8711dc4bb01d637a68105a3d5158039b418e6"
+checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27"
 dependencies = [
  "cc",
  "libc",
+ "pkg-config",
 ]
 
 [[package]]
@@ -2017,6 +3410,16 @@ version = "0.3.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4facc753ae494aeb6e3c22f839b158aebd4f9270f55cd3c79906c45476c47ab4"
 
+[[package]]
+name = "md-5"
+version = "0.10.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf"
+dependencies = [
+ "cfg-if",
+ "digest",
+]
+
 [[package]]
 name = "memchr"
 version = "2.7.6"
@@ -2032,6 +3435,38 @@ dependencies = [
  "rustix 1.1.3",
 ]
 
+[[package]]
+name = "miette"
+version = "5.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "59bb584eaeeab6bd0226ccf3509a69d7936d148cf3d036ad350abe35e8c6856e"
+dependencies = [
+ "backtrace",
+ "backtrace-ext",
+ "is-terminal",
+ "miette-derive",
+ "once_cell",
+ "owo-colors",
+ "supports-color",
+ "supports-hyperlinks",
+ "supports-unicode",
+ "terminal_size",
+ "textwrap",
+ "thiserror 1.0.69",
+ "unicode-width 0.1.14",
+]
+
+[[package]]
+name = "miette-derive"
+version = "5.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "49e7bc1560b95a3c4a25d03de42fe76ca718ab92d1a22a55b9b4cf67b3ae635c"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
 [[package]]
 name = "mime"
 version = "0.3.17"
@@ -2044,6 +3479,16 @@ version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
 
+[[package]]
+name = "miniz_oxide"
+version = "0.8.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316"
+dependencies = [
+ "adler2",
+ "simd-adler32",
+]
+
 [[package]]
 name = "mio"
 version = "1.1.1"
@@ -2051,7 +3496,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc"
 dependencies = [
  "libc",
- "wasi",
+ "wasi 0.11.1+wasi-snapshot-preview1",
  "windows-sys 0.61.2",
 ]
 
@@ -2225,6 +3670,30 @@ dependencies = [
  "memchr",
 ]
 
+[[package]]
+name = "object_store"
+version = "0.12.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fbfbfff40aeccab00ec8a910b57ca8ecf4319b335c542f2edcd19dd25a1e2a00"
+dependencies = [
+ "async-trait",
+ "bytes",
+ "chrono",
+ "futures",
+ "http",
+ "humantime",
+ "itertools 0.14.0",
+ "parking_lot",
+ "percent-encoding",
+ "thiserror 2.0.17",
+ "tokio",
+ "tracing",
+ "url",
+ "walkdir",
+ "wasm-bindgen-futures",
+ "web-time",
+]
+
 [[package]]
 name = "once_cell"
 version = "1.21.3"
@@ -2249,6 +3718,15 @@ dependencies = [
  "vcpkg",
 ]
 
+[[package]]
+name = "ordered-float"
+version = "2.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "68f19d67e5a2795c94e73e0bb1cc1a7edeb2e28efd39e2e1c9b7a40c1108b11c"
+dependencies = [
+ "num-traits",
+]
+
 [[package]]
 name = "os_pipe"
 version = "1.2.3"
@@ -2259,6 +3737,12 @@ dependencies = [
  "windows-sys 0.61.2",
 ]
 
+[[package]]
+name = "owo-colors"
+version = "3.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c1b04fb49957986fdce4d6ee7a65027d55d4b6d2265e5848bbb507b58ccfdb6f"
+
 [[package]]
 name = "parking_lot"
 version = "0.12.5"
@@ -2282,6 +3766,80 @@ dependencies = [
  "windows-link",
 ]
 
+[[package]]
+name = "parquet"
+version = "55.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b17da4150748086bd43352bc77372efa9b6e3dbd06a04831d2a98c041c225cfa"
+dependencies = [
+ "ahash",
+ "arrow-array 55.2.0",
+ "arrow-buffer 55.2.0",
+ "arrow-cast 55.2.0",
+ "arrow-data 55.2.0",
+ "arrow-ipc 55.2.0",
+ "arrow-schema 55.2.0",
+ "arrow-select 55.2.0",
+ "base64",
+ "brotli",
+ "bytes",
+ "chrono",
+ "flate2",
+ "futures",
+ "half",
+ "hashbrown 0.15.5",
+ "lz4_flex",
+ "num",
+ "num-bigint",
+ "object_store",
+ "paste",
+ "seq-macro",
+ "simdutf8",
+ "snap",
+ "thrift",
+ "tokio",
+ "twox-hash",
+ "zstd",
+]
+
+[[package]]
+name = "parquet"
+version = "55.2.0"
+source = "git+https://github.com/ArroyoSystems/arrow-rs?branch=55.2.0%2Fparquet#d1d2dd8edf673cddc79ba6403dc6508263a2ddda"
+dependencies = [
+ "ahash",
+ "arrow-array 55.2.0",
+ "arrow-buffer 55.2.0",
+ "arrow-cast 55.2.0",
+ "arrow-data 55.2.0",
+ "arrow-ipc 55.2.0",
+ "arrow-schema 55.2.0",
+ "arrow-select 55.2.0",
+ "base64",
+ "brotli",
+ "bytes",
+ "chrono",
+ "flate2",
+ "half",
+ "hashbrown 0.15.5",
+ "lz4_flex",
+ "num",
+ "num-bigint",
+ "paste",
+ "seq-macro",
+ "simdutf8",
+ "snap",
+ "thrift",
+ "twox-hash",
+ "zstd",
+]
+
+[[package]]
+name = "paste"
+version = "1.0.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"
+
 [[package]]
 name = "peeking_take_while"
 version = "0.1.2"
@@ -2357,6 +3915,55 @@ dependencies = [
  "indexmap 2.12.1",
 ]
 
+[[package]]
+name = "petgraph"
+version = "0.8.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8701b58ea97060d5e5b155d383a69952a60943f0e6dfe30b04c287beb0b27455"
+dependencies = [
+ "fixedbitset 0.5.7",
+ "hashbrown 0.15.5",
+ "indexmap 2.12.1",
+ "serde",
+]
+
+[[package]]
+name = "phf"
+version = "0.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "913273894cec178f401a31ec4b656318d95473527be05c0752cc41cdc32be8b7"
+dependencies = [
+ "phf_shared 0.12.1",
+]
+
+[[package]]
+name = "phf"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c1562dc717473dbaa4c1f85a36410e03c047b2e7df7f45ee938fbef64ae7fadf"
+dependencies = [
+ "phf_shared 0.13.1",
+ "serde",
+]
+
+[[package]]
+name = "phf_shared"
+version = "0.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "06005508882fb681fd97892ecff4b7fd0fee13ef1aa569f8695dae7ab9099981"
+dependencies = [
+ "siphasher",
+]
+
+[[package]]
+name = "phf_shared"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e57fef6bc5981e38c2ce2d63bfa546861309f875b8a75f092d1d54ae2d64f266"
+dependencies = [
+ "siphasher",
+]
+
 [[package]]
 name = "pin-project"
 version = "1.1.10"
@@ -2407,6 +4014,49 @@ dependencies = [
  "serde",
 ]
 
+[[package]]
+name = "postgres"
+version = "0.19.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e7c48ece1c6cda0db61b058c1721378da76855140e9214339fa1317decacb176"
+dependencies = [
+ "bytes",
+ "fallible-iterator 0.2.0",
+ "futures-util",
+ "log",
+ "tokio",
+ "tokio-postgres",
+]
+
+[[package]]
+name = "postgres-protocol"
+version = "0.6.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3ee9dd5fe15055d2b6806f4736aa0c9637217074e224bbec46d4041b91bb9491"
+dependencies = [
+ "base64",
+ "byteorder",
+ "bytes",
+ "fallible-iterator 0.2.0",
+ "hmac",
+ "md-5",
+ "memchr",
+ "rand 0.9.2",
+ "sha2",
+ "stringprep",
+]
+
+[[package]]
+name = "postgres-types"
+version = "0.2.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "54b858f82211e84682fecd373f68e1ceae642d8d751a1ebd13f33de6257b3e20"
+dependencies = [
+ "bytes",
+ "fallible-iterator 0.2.0",
+ "postgres-protocol",
+]
+
 [[package]]
 name = "potential_utf"
 version = "0.1.4"
@@ -2486,7 +4136,7 @@ version = "0.13.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "be769465445e8c1474e9c5dac2018218498557af32d9ed057325ec9a41ae81bf"
 dependencies = [
- "heck",
+ "heck 0.5.0",
  "itertools 0.14.0",
  "log",
  "multimap",
@@ -2533,6 +4183,16 @@ dependencies = [
  "tonic-build",
 ]
 
+[[package]]
+name = "psm"
+version = "0.1.30"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3852766467df634d74f0b2d7819bf8dc483a0eb2e3b0f50f756f9cfe8b0d18d8"
+dependencies = [
+ "ar_archive_writer",
+ "cc",
+]
+
 [[package]]
 name = "pulley-interpreter"
 version = "41.0.3"
@@ -2571,6 +4231,12 @@ version = "5.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
 
+[[package]]
+name = "radium"
+version = "0.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09"
+
 [[package]]
 name = "radix_trie"
 version = "0.2.1"
@@ -2588,8 +4254,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
 dependencies = [
  "libc",
- "rand_chacha",
- "rand_core",
+ "rand_chacha 0.3.1",
+ "rand_core 0.6.4",
+]
+
+[[package]]
+name = "rand"
+version = "0.9.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1"
+dependencies = [
+ "rand_chacha 0.9.0",
+ "rand_core 0.9.5",
 ]
 
 [[package]]
@@ -2599,7 +4275,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
 dependencies = [
  "ppv-lite86",
- "rand_core",
+ "rand_core 0.6.4",
+]
+
+[[package]]
+name = "rand_chacha"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb"
+dependencies = [
+ "ppv-lite86",
+ "rand_core 0.9.5",
 ]
 
 [[package]]
@@ -2611,13 +4297,22 @@ dependencies = [
  "getrandom 0.2.16",
 ]
 
+[[package]]
+name = "rand_core"
+version = "0.9.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c"
+dependencies = [
+ "getrandom 0.3.4",
+]
+
 [[package]]
 name = "rand_xoshiro"
 version = "0.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6f97cdb2a36ed4183de61b2f824cc45c9f1037f28afe0a322e9fff4c108b5aaa"
 dependencies = [
- "rand_core",
+ "rand_core 0.6.4",
 ]
 
 [[package]]
@@ -2673,6 +4368,26 @@ dependencies = [
  "sasl2-sys",
 ]
 
+[[package]]
+name = "recursive"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0786a43debb760f491b1bc0269fe5e84155353c67482b9e60d0cfb596054b43e"
+dependencies = [
+ "recursive-proc-macro-impl",
+ "stacker",
+]
+
+[[package]]
+name = "recursive-proc-macro-impl"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b"
+dependencies = [
+ "quote",
+ "syn",
+]
+
 [[package]]
 name = "redox_syscall"
 version = "0.5.18"
@@ -2737,6 +4452,16 @@ version = "0.8.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58"
 
+[[package]]
+name = "regress"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "82a9ecfa0cb04d0b04dddb99b8ccf4f66bc8dfd23df694b398570bd8ae3a50fb"
+dependencies = [
+ "hashbrown 0.13.2",
+ "memchr",
+]
+
 [[package]]
 name = "rocksdb"
 version = "0.21.0"
@@ -2747,6 +4472,21 @@ dependencies = [
  "librocksdb-sys",
 ]
 
+[[package]]
+name = "rusqlite"
+version = "0.31.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b838eba278d213a8beaf485bd313fd580ca4505a00d5871caeb1457c55322cae"
+dependencies = [
+ "bitflags 2.10.0",
+ "fallible-iterator 0.3.0",
+ "fallible-streaming-iterator",
+ "hashlink",
+ "libsqlite3-sys",
+ "serde_json",
+ "smallvec",
+]
+
 [[package]]
 name = "rustc-demangle"
 version = "0.1.26"
@@ -2784,7 +4524,7 @@ dependencies = [
  "errno",
  "libc",
  "linux-raw-sys 0.4.15",
- "windows-sys 0.59.0",
+ "windows-sys 0.52.0",
 ]
 
 [[package]]
@@ -2844,6 +4584,15 @@ version = "1.0.22"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a50f4cf475b65d88e057964e0e9bb1f0aa9bbb2036dc65c64596b42932536984"
 
+[[package]]
+name = "same-file"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
+dependencies = [
+ "winapi-util",
+]
+
 [[package]]
 name = "sasl2-sys"
 version = "0.1.22+2.1.28"
@@ -2856,6 +4605,30 @@ dependencies = [
  "pkg-config",
 ]
 
+[[package]]
+name = "schemars"
+version = "0.8.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3fbf2ae1b8bc8e02df939598064d22402220cd5bbcca1c76f7d6a310974d5615"
+dependencies = [
+ "dyn-clone",
+ "schemars_derive",
+ "serde",
+ "serde_json",
+]
+
+[[package]]
+name = "schemars_derive"
+version = "0.8.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32e265784ad618884abaea0600a9adf15393368d840e0222d101a072f3f7534d"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "serde_derive_internals",
+ "syn",
+]
+
 [[package]]
 name = "scopeguard"
 version = "1.2.0"
@@ -2872,6 +4645,12 @@ dependencies = [
  "serde_core",
 ]
 
+[[package]]
+name = "seq-macro"
+version = "0.3.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc"
+
 [[package]]
 name = "serde"
 version = "1.0.228"
@@ -2902,6 +4681,17 @@ dependencies = [
  "syn",
 ]
 
+[[package]]
+name = "serde_derive_internals"
+version = "0.29.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
 [[package]]
 name = "serde_json"
 version = "1.0.148"
@@ -2924,6 +4714,18 @@ dependencies = [
  "serde_core",
 ]
 
+[[package]]
+name = "serde_tokenstream"
+version = "0.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d7c49585c52c01f13c5c2ebb333f14f6885d76daa768d8a037d28017ec538c69"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "serde",
+ "syn",
+]
+
 [[package]]
 name = "serde_yaml"
 version = "0.9.34+deprecated"
@@ -3005,6 +4807,24 @@ dependencies = [
  "libc",
 ]
 
+[[package]]
+name = "simd-adler32"
+version = "0.3.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2"
+
+[[package]]
+name = "simdutf8"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e"
+
+[[package]]
+name = "siphasher"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e"
+
 [[package]]
 name = "sized-chunks"
 version = "0.6.5"
@@ -3030,6 +4850,18 @@ dependencies = [
  "serde",
 ]
 
+[[package]]
+name = "smawk"
+version = "0.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b7c388c1b5e93756d0c740965c41e8822f866621d41acbdf6336a6a168f8840c"
+
+[[package]]
+name = "snap"
+version = "1.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b"
+
 [[package]]
 name = "socket2"
 version = "0.5.10"
@@ -3050,24 +4882,125 @@ dependencies = [
  "windows-sys 0.60.2",
 ]
 
+[[package]]
+name = "sqlparser"
+version = "0.55.0"
+source = "git+https://github.com/FunctionStream/sqlparser-rs?branch=0.6.0%2Ffunction-sql-parser#7e7cfb6145a426a26a7db12ae5874fed8b9c6b95"
+dependencies = [
+ "log",
+ "recursive",
+ "sqlparser_derive",
+]
+
+[[package]]
+name = "sqlparser_derive"
+version = "0.3.0"
+source = "git+https://github.com/FunctionStream/sqlparser-rs?branch=0.6.0%2Ffunction-sql-parser#7e7cfb6145a426a26a7db12ae5874fed8b9c6b95"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
 [[package]]
 name = "stable_deref_trait"
 version = "1.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596"
 
+[[package]]
+name = "stacker"
+version = "0.1.23"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "08d74a23609d509411d10e2176dc2a4346e3b4aea2e7b1869f19fdedbc71c013"
+dependencies = [
+ "cc",
+ "cfg-if",
+ "libc",
+ "psm",
+ "windows-sys 0.52.0",
+ "windows-sys 0.59.0",
+]
+
 [[package]]
 name = "static_assertions"
 version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
 
+[[package]]
+name = "stringprep"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7b4df3d392d81bd458a8a621b8bffbd2302a12ffe288a9d931670948749463b1"
+dependencies = [
+ "unicode-bidi",
+ "unicode-normalization",
+ "unicode-properties",
+]
+
 [[package]]
 name = "strsim"
 version = "0.11.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
 
+[[package]]
+name = "strum"
+version = "0.26.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06"
+dependencies = [
+ "strum_macros",
+]
+
+[[package]]
+name = "strum_macros"
+version = "0.26.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be"
+dependencies = [
+ "heck 0.5.0",
+ "proc-macro2",
+ "quote",
+ "rustversion",
+ "syn",
+]
+
+[[package]]
+name = "subtle"
+version = "2.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292"
+
+[[package]]
+name = "supports-color"
+version = "2.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d6398cde53adc3c4557306a96ce67b302968513830a77a95b2b17305d9719a89"
+dependencies = [
+ "is-terminal",
+ "is_ci",
+]
+
+[[package]]
+name = "supports-hyperlinks"
+version = "2.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f84231692eb0d4d41e4cdd0cabfdd2e6cd9e255e65f80c9aa7c98dd502b4233d"
+dependencies = [
+ "is-terminal",
+]
+
+[[package]]
+name = "supports-unicode"
+version = "2.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f850c19edd184a205e883199a261ed44471c81e39bd95b1357f5febbef00e77a"
+dependencies = [
+ "is-terminal",
+]
+
 [[package]]
 name = "syn"
 version = "2.0.113"
@@ -3108,10 +5041,16 @@ dependencies = [
  "fd-lock",
  "io-lifetimes",
  "rustix 0.38.44",
- "windows-sys 0.59.0",
+ "windows-sys 0.52.0",
  "winx",
 ]
 
+[[package]]
+name = "tap"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369"
+
 [[package]]
 name = "target-lexicon"
 version = "0.13.4"
@@ -3140,6 +5079,27 @@ dependencies = [
  "winapi-util",
 ]
 
+[[package]]
+name = "terminal_size"
+version = "0.1.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "633c1a546cee861a1a6d0dc69ebeca693bf4296661ba7852b9d21d159e0506df"
+dependencies = [
+ "libc",
+ "winapi",
+]
+
+[[package]]
+name = "textwrap"
+version = "0.15.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b7b3e525a49ec206798b40326a44121291b530c963cfb01018f63e135bac543d"
+dependencies = [
+ "smawk",
+ "unicode-linebreak",
+ "unicode-width 0.1.14",
+]
+
 [[package]]
 name = "thiserror"
 version = "1.0.69"
@@ -3189,6 +5149,17 @@ dependencies = [
  "cfg-if",
 ]
 
+[[package]]
+name = "thrift"
+version = "0.17.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7e54bc85fc7faa8bc175c4bab5b92ba8d9a3ce893d0e9f42cc455c8ab16a9e09"
+dependencies = [
+ "byteorder",
+ "integer-encoding",
+ "ordered-float",
+]
+
 [[package]]
 name = "time"
 version = "0.3.44"
@@ -3239,6 +5210,21 @@ dependencies = [
  "zerovec",
 ]
 
+[[package]]
+name = "tinyvec"
+version = "1.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3e61e67053d25a4e82c844e8424039d9745781b3fc4f32b8d55ed50f5f667ef3"
+dependencies = [
+ "tinyvec_macros",
+]
+
+[[package]]
+name = "tinyvec_macros"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
+
 [[package]]
 name = "tokio"
 version = "1.49.0"
@@ -3267,6 +5253,32 @@ dependencies = [
  "syn",
 ]
 
+[[package]]
+name = "tokio-postgres"
+version = "0.7.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dcea47c8f71744367793f16c2db1f11cb859d28f436bdb4ca9193eb1f787ee42"
+dependencies = [
+ "async-trait",
+ "byteorder",
+ "bytes",
+ "fallible-iterator 0.2.0",
+ "futures-channel",
+ "futures-util",
+ "log",
+ "parking_lot",
+ "percent-encoding",
+ "phf 0.13.1",
+ "pin-project-lite",
+ "postgres-protocol",
+ "postgres-types",
+ "rand 0.9.2",
+ "socket2 0.6.1",
+ "tokio",
+ "tokio-util",
+ "whoami",
+]
+
 [[package]]
 name = "tokio-stream"
 version = "0.1.18"
@@ -3397,7 +5409,7 @@ dependencies = [
  "indexmap 1.9.3",
  "pin-project",
  "pin-project-lite",
- "rand",
+ "rand 0.8.5",
  "slab",
  "tokio",
  "tokio-util",
@@ -3525,24 +5537,104 @@ version = "0.2.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b"
 
+[[package]]
+name = "twox-hash"
+version = "2.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c"
+
 [[package]]
 name = "typenum"
 version = "1.19.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb"
 
+[[package]]
+name = "typify"
+version = "0.0.13"
+source = "git+https://github.com/ArroyoSystems/typify.git?branch=arroyo#d14b6fc016bf9d63618d8b43b4d74a648980737b"
+dependencies = [
+ "typify-impl",
+ "typify-macro",
+]
+
+[[package]]
+name = "typify-impl"
+version = "0.0.13"
+source = "git+https://github.com/ArroyoSystems/typify.git?branch=arroyo#d14b6fc016bf9d63618d8b43b4d74a648980737b"
+dependencies = [
+ "heck 0.4.1",
+ "log",
+ "proc-macro2",
+ "quote",
+ "regress",
+ "schemars",
+ "serde_json",
+ "syn",
+ "thiserror 1.0.69",
+ "unicode-ident",
+]
+
+[[package]]
+name = "typify-macro"
+version = "0.0.13"
+source = "git+https://github.com/ArroyoSystems/typify.git?branch=arroyo#d14b6fc016bf9d63618d8b43b4d74a648980737b"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "schemars",
+ "serde",
+ "serde_json",
+ "serde_tokenstream",
+ "syn",
+ "typify-impl",
+]
+
 [[package]]
 name = "ucd-trie"
 version = "0.1.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971"
 
+[[package]]
+name = "unicase"
+version = "2.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dbc4bc3a9f746d862c45cb89d705aa10f187bb96c76001afab07a0d35ce60142"
+
+[[package]]
+name = "unicode-bidi"
+version = "0.3.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c1cb5db39152898a79168971543b1cb5020dff7fe43c8dc468b0885f5e29df5"
+
 [[package]]
 name = "unicode-ident"
 version = "1.0.22"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5"
 
+[[package]]
+name = "unicode-linebreak"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3b09c83c3c29d37506a3e260c08c03743a6bb66a9cd432c6934ab501a190571f"
+
+[[package]]
+name = "unicode-normalization"
+version = "0.1.25"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5fd4f6878c9cb28d874b009da9e8d183b5abc80117c40bbd187a1fde336be6e8"
+dependencies = [
+ "tinyvec",
+]
+
+[[package]]
+name = "unicode-properties"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7df058c713841ad818f1dc5d3fd88063241cc61f49f5fbea4b951e8cf5a8d71d"
+
 [[package]]
 name = "unicode-segmentation"
 version = "1.12.0"
@@ -3573,6 +5665,12 @@ version = "0.2.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861"
 
+[[package]]
+name = "unscanny"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e9df2af067a7953e9c3831320f35c1cc0600c30d44d9f7a12b01db1cd88d6b47"
+
 [[package]]
 name = "url"
 version = "2.5.7"
@@ -3626,6 +5724,16 @@ version = "0.9.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
 
+[[package]]
+name = "walkdir"
+version = "2.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b"
+dependencies = [
+ "same-file",
+ "winapi-util",
+]
+
 [[package]]
 name = "want"
 version = "0.3.1"
@@ -3641,6 +5749,15 @@ version = "0.11.1+wasi-snapshot-preview1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
 
+[[package]]
+name = "wasi"
+version = "0.14.7+wasi-0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "883478de20367e224c0090af9cf5f9fa85bed63a95c1abf3afc5c083ebc06e8c"
+dependencies = [
+ "wasip2",
+]
+
 [[package]]
 name = "wasip2"
 version = "1.0.1+wasi-0.2.4"
@@ -3650,6 +5767,15 @@ dependencies = [
  "wit-bindgen",
 ]
 
+[[package]]
+name = "wasite"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "66fe902b4a6b8028a753d5424909b764ccf79b7a209eac9bf97e59cda9f71a42"
+dependencies = [
+ "wasi 0.14.7+wasi-0.2.4",
+]
+
 [[package]]
 name = "wasm-bindgen"
 version = "0.2.106"
@@ -3663,6 +5789,19 @@ dependencies = [
  "wasm-bindgen-shared",
 ]
 
+[[package]]
+name = "wasm-bindgen-futures"
+version = "0.4.56"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "836d9622d604feee9e5de25ac10e3ea5f2d65b41eac0d9ce72eb5deae707ce7c"
+dependencies = [
+ "cfg-if",
+ "js-sys",
+ "once_cell",
+ "wasm-bindgen",
+ "web-sys",
+]
+
 [[package]]
 name = "wasm-bindgen-macro"
 version = "0.2.106"
@@ -3702,7 +5841,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "af801b6f36459023eaec63fdbaedad2fd5a4ab7dc74ecc110a8b5d375c5775e4"
 dependencies = [
  "anyhow",
- "heck",
+ "heck 0.5.0",
  "im-rc",
  "indexmap 2.12.1",
  "log",
@@ -4005,7 +6144,7 @@ checksum = "87acbd416227cdd279565ba49e57cf7f08d112657c3b3f39b70250acdfd094fe"
 dependencies = [
  "anyhow",
  "bitflags 2.10.0",
- "heck",
+ "heck 0.5.0",
  "indexmap 2.12.1",
  "wit-parser",
 ]
@@ -4085,6 +6224,37 @@ dependencies = [
  "wast 243.0.0",
 ]
 
+[[package]]
+name = "web-sys"
+version = "0.3.83"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b32828d774c412041098d182a8b38b16ea816958e07cf40eec2bc080ae137ac"
+dependencies = [
+ "js-sys",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "web-time"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb"
+dependencies = [
+ "js-sys",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "whoami"
+version = "2.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8fae98cf96deed1b7572272dfc777713c249ae40aa1cf8862e091e8b745f5361"
+dependencies = [
+ "libredox",
+ "wasite",
+ "web-sys",
+]
+
 [[package]]
 name = "wiggle"
 version = "41.0.3"
@@ -4106,7 +6276,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "57f773d51c1696bd7d028aa35c884d9fc58f48d79a1176dfbad6c908de314235"
 dependencies = [
  "anyhow",
- "heck",
+ "heck 0.5.0",
  "proc-macro2",
  "quote",
  "syn",
@@ -4416,7 +6586,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3f3fd376f71958b862e7afb20cfe5a22830e1963462f3a17f49d82a6c1d1f42d"
 dependencies = [
  "bitflags 2.10.0",
- "windows-sys 0.59.0",
+ "windows-sys 0.52.0",
 ]
 
 [[package]]
@@ -4461,6 +6631,24 @@ version = "0.6.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9"
 
+[[package]]
+name = "wyz"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed"
+dependencies = [
+ "tap",
+]
+
+[[package]]
+name = "xz2"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2"
+dependencies = [
+ "lzma-sys",
+]
+
 [[package]]
 name = "yoke"
 version = "0.8.1"
@@ -4558,6 +6746,12 @@ dependencies = [
  "syn",
 ]
 
+[[package]]
+name = "zlib-rs"
+version = "0.6.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3be3d40e40a133f9c916ee3f9f4fa2d9d63435b5fbe1bfc6d9dae0aa0ada1513"
+
 [[package]]
 name = "zmij"
 version = "1.0.10"
diff --git a/Cargo.toml b/Cargo.toml
index 4b855aa9..0d906ca6 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -52,8 +52,36 @@ arrow-array = "52"
 arrow-ipc = "52"
 arrow-schema = "52"
 proctitle = "0.1"
+unicase = "2.7"
+petgraph = "0.7"
+itertools = "0.14"
+strum = { version = "0.26", features = ["derive"] }
+datafusion-functions-aggregate = {git = 'https://github.com/ArroyoSystems/arrow-datafusion', branch = '48.0.1/arroyo'}
+
+typify = { git = 'https://github.com/ArroyoSystems/typify.git', branch = 'arroyo' }
+parquet = {git = 'https://github.com/ArroyoSystems/arrow-rs', branch = '55.2.0/parquet'}
+arrow-json = {git = 'https://github.com/ArroyoSystems/arrow-rs', branch = '55.2.0/json'}
+datafusion = {git = 'https://github.com/ArroyoSystems/arrow-datafusion', branch = '48.0.1/arroyo'}
+datafusion-common = {git = 'https://github.com/ArroyoSystems/arrow-datafusion', branch = '48.0.1/arroyo'}
+datafusion-execution = {git = 'https://github.com/ArroyoSystems/arrow-datafusion', branch = '48.0.1/arroyo'}
+datafusion-expr = {git = 'https://github.com/ArroyoSystems/arrow-datafusion', branch = '48.0.1/arroyo'}
+datafusion-physical-expr = {git = 'https://github.com/ArroyoSystems/arrow-datafusion', branch = '48.0.1/arroyo'}
+datafusion-physical-plan = {git = 'https://github.com/ArroyoSystems/arrow-datafusion', branch = '48.0.1/arroyo'}
+datafusion-proto = {git = 'https://github.com/ArroyoSystems/arrow-datafusion', branch = '48.0.1/arroyo'}
+datafusion-functions = {git = 'https://github.com/ArroyoSystems/arrow-datafusion', branch = '48.0.1/arroyo'}
+datafusion-functions-window = {git = 'https://github.com/ArroyoSystems/arrow-datafusion', branch = '48.0.1/arroyo'}
+
+sqlparser = { git = "https://github.com/FunctionStream/sqlparser-rs", branch = "0.6.0/function-sql-parser" }
+
+cornucopia_async = { git = "https://github.com/ArroyoSystems/cornucopia", branch = "sqlite" }
+cornucopia = { git = "https://github.com/ArroyoSystems/cornucopia", branch = "sqlite" }
+jiter = {git = "https://github.com/ArroyoSystems/jiter", branch = "disable_python" }
+
 
 [features]
 default = ["incremental-cache", "python"]
 incremental-cache = ["wasmtime/incremental-cache"]
 python = []
+
+[patch."https://github.com/ArroyoSystems/sqlparser-rs"]
+sqlparser = { git = "https://github.com/FunctionStream/sqlparser-rs", branch = "0.6.0/function-sql-parser" }
diff --git a/src/coordinator/analyze/analyzer.rs b/src/coordinator/analyze/analyzer.rs
index 30552191..58056b67 100644
--- a/src/coordinator/analyze/analyzer.rs
+++ b/src/coordinator/analyze/analyzer.rs
@@ -14,7 +14,7 @@ use super::Analysis;
 use crate::coordinator::execution_context::ExecutionContext;
 use crate::coordinator::statement::{
     CreateFunction, CreatePythonFunction, DropFunction, ShowFunctions, StartFunction, Statement,
-    StatementVisitor, StatementVisitorContext, StatementVisitorResult, StopFunction,
+    StatementVisitor, StatementVisitorContext, StatementVisitorResult, StopFunction, StreamingSql,
 };
 use std::fmt;
 
@@ -115,4 +115,13 @@ impl StatementVisitor for Analyzer<'_> {
     ) -> StatementVisitorResult {
         StatementVisitorResult::Analyze(Box::new(stmt.clone()))
     }
+
+    fn visit_streaming_sql(
+        &self,
+        stmt: &StreamingSql,
+        _context: &StatementVisitorContext,
+    ) -> StatementVisitorResult {
+        // TODO: add semantic analysis for streaming SQL (schema validation, etc.)
+        StatementVisitorResult::Analyze(Box::new(StreamingSql::new(stmt.statement.clone())))
+    }
 }
diff --git a/src/coordinator/coordinator.rs b/src/coordinator/coordinator.rs
index 4ad766d5..378c670b 100644
--- a/src/coordinator/coordinator.rs
+++ b/src/coordinator/coordinator.rs
@@ -20,6 +20,7 @@ use crate::coordinator::execution::Executor;
 use crate::coordinator::plan::{LogicalPlanVisitor, LogicalPlanner, PlanNode};
 use crate::coordinator::statement::Statement;
 use crate::runtime::taskexecutor::TaskManager;
+use crate::sql::planner::StreamSchemaProvider;
 
 use super::execution_context::ExecutionContext;
 
@@ -90,7 +91,8 @@ impl Coordinator {
     }
 
     fn step_build_logical_plan(&self, analysis: &Analysis) -> Result<Box<dyn PlanNode>> {
-        let visitor = LogicalPlanVisitor::new();
+        let schema_provider = StreamSchemaProvider::new();
+        let visitor = LogicalPlanVisitor::new(schema_provider);
         let plan = visitor.visit(analysis);
         Ok(plan)
     }
diff --git a/src/coordinator/execution/executor.rs b/src/coordinator/execution/executor.rs
index 7e44217e..5d96bf45 100644
--- a/src/coordinator/execution/executor.rs
+++ b/src/coordinator/execution/executor.rs
@@ -14,6 +14,7 @@ use crate::coordinator::dataset::{ExecuteResult, ShowFunctionsResult, empty_reco
 use crate::coordinator::plan::{
     CreateFunctionPlan, CreatePythonFunctionPlan, DropFunctionPlan, PlanNode, PlanVisitor,
     PlanVisitorContext, PlanVisitorResult, ShowFunctionsPlan, StartFunctionPlan, StopFunctionPlan,
+    StreamingSqlPlan,
 };
 use crate::coordinator::statement::{ConfigSource, FunctionSource};
 use crate::runtime::taskexecutor::TaskManager;
@@ -200,4 +201,17 @@ impl PlanVisitor for Executor {
 
         PlanVisitorResult::Execute(result)
     }
+
+    fn visit_streaming_sql_plan(
+        &self,
+        plan: &StreamingSqlPlan,
+        _context: &PlanVisitorContext,
+    ) -> PlanVisitorResult {
+        // TODO: apply rewrite_plan for streaming transformations, then execute
+        let result = Err(ExecuteError::Internal(format!(
+            "Streaming SQL execution not yet implemented. LogicalPlan:\n{}",
+            plan.logical_plan.display_indent()
+        )));
+        PlanVisitorResult::Execute(result)
+    }
 }
diff --git a/src/coordinator/mod.rs b/src/coordinator/mod.rs
index 0b94d4bf..26627a8b 100644
--- a/src/coordinator/mod.rs
+++ b/src/coordinator/mod.rs
@@ -23,5 +23,5 @@ pub use coordinator::Coordinator;
 pub use dataset::{DataSet, ShowFunctionsResult};
 pub use statement::{
     CreateFunction, CreatePythonFunction, DropFunction, PythonModule, ShowFunctions, StartFunction,
-    Statement, StopFunction,
+    Statement, StopFunction, StreamingSql,
 };
diff --git a/src/coordinator/plan/logical_plan_visitor.rs b/src/coordinator/plan/logical_plan_visitor.rs
index 536fec37..3462d033 100644
--- a/src/coordinator/plan/logical_plan_visitor.rs
+++ b/src/coordinator/plan/logical_plan_visitor.rs
@@ -10,22 +10,26 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+use tracing::debug;
+
 use crate::coordinator::analyze::analysis::Analysis;
 use crate::coordinator::plan::{
     CreateFunctionPlan, CreatePythonFunctionPlan, DropFunctionPlan, PlanNode, ShowFunctionsPlan,
-    StartFunctionPlan, StopFunctionPlan,
+    StartFunctionPlan, StopFunctionPlan, StreamingSqlPlan,
 };
 use crate::coordinator::statement::{
     CreateFunction, CreatePythonFunction, DropFunction, ShowFunctions, StartFunction,
-    StatementVisitor, StatementVisitorContext, StatementVisitorResult, StopFunction,
+    StatementVisitor, StatementVisitorContext, StatementVisitorResult, StopFunction, StreamingSql,
 };
+use crate::sql::planner::StreamSchemaProvider;
 
-#[derive(Debug, Default)]
-pub struct LogicalPlanVisitor;
+pub struct LogicalPlanVisitor {
+    schema_provider: StreamSchemaProvider,
+}
 
 impl LogicalPlanVisitor {
-    pub fn new() -> Self {
-        Self
+    pub fn new(schema_provider: StreamSchemaProvider) -> Self {
+        Self { schema_provider }
     }
 
     pub fn visit(&self, analysis: &Analysis) -> Box<dyn PlanNode> {
@@ -51,7 +55,6 @@ impl StatementVisitor for LogicalPlanVisitor {
         let config_source = stmt.get_config_source().cloned();
         let extra_props = stmt.get_extra_properties().clone();
 
-        // Name will be read from config file during execution
         StatementVisitorResult::Plan(Box::new(CreateFunctionPlan::new(
             function_source,
             config_source,
@@ -106,4 +109,22 @@ impl StatementVisitor for LogicalPlanVisitor {
             config_content,
         )))
     }
+
+    fn visit_streaming_sql(
+        &self,
+        stmt: &StreamingSql,
+        _context: &StatementVisitorContext,
+    ) -> StatementVisitorResult {
+        let sql_to_rel = datafusion::sql::planner::SqlToRel::new(&self.schema_provider);
+
+        match sql_to_rel.sql_statement_to_plan(stmt.statement.clone()) {
+            Ok(plan) => {
+                debug!("Logical plan:\n{}", plan.display_graphviz());
+                StatementVisitorResult::Plan(Box::new(StreamingSqlPlan::new(plan)))
+            }
+            Err(e) => {
+                panic!("Failed to convert SQL statement to logical plan: {e}");
+            }
+        }
+    }
 }
diff --git a/src/coordinator/plan/mod.rs b/src/coordinator/plan/mod.rs
index 9aa403b5..744410e1 100644
--- a/src/coordinator/plan/mod.rs
+++ b/src/coordinator/plan/mod.rs
@@ -18,6 +18,7 @@ mod optimizer;
 mod show_functions_plan;
 mod start_function_plan;
 mod stop_function_plan;
+mod streaming_sql_plan;
 mod visitor;
 
 pub use create_function_plan::CreateFunctionPlan;
@@ -28,6 +29,7 @@ pub use optimizer::LogicalPlanner;
 pub use show_functions_plan::ShowFunctionsPlan;
 pub use start_function_plan::StartFunctionPlan;
 pub use stop_function_plan::StopFunctionPlan;
+pub use streaming_sql_plan::StreamingSqlPlan;
 pub use visitor::{PlanVisitor, PlanVisitorContext, PlanVisitorResult};
 
 use std::fmt;
diff --git a/src/sql/parser/mod.rs b/src/coordinator/plan/streaming_sql_plan.rs
similarity index 52%
rename from src/sql/parser/mod.rs
rename to src/coordinator/plan/streaming_sql_plan.rs
index 11f4b18e..607420a8 100644
--- a/src/sql/parser/mod.rs
+++ b/src/coordinator/plan/streaming_sql_plan.rs
@@ -10,33 +10,23 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-mod sql_parser;
+use datafusion::logical_expr::LogicalPlan;
 
-pub use sql_parser::SqlParser;
+use super::{PlanNode, PlanVisitor, PlanVisitorContext, PlanVisitorResult};
 
 #[derive(Debug)]
-pub struct ParseError {
-    pub message: String,
+pub struct StreamingSqlPlan {
+    pub logical_plan: LogicalPlan,
 }
 
-impl std::fmt::Display for ParseError {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "Parse error: {}", self.message)
+impl StreamingSqlPlan {
+    pub fn new(logical_plan: LogicalPlan) -> Self {
+        Self { logical_plan }
     }
 }
 
-impl std::error::Error for ParseError {}
-
-impl From<String> for ParseError {
-    fn from(message: String) -> Self {
-        ParseError { message }
-    }
-}
-
-impl ParseError {
-    pub fn new(message: impl Into<String>) -> Self {
-        Self {
-            message: message.into(),
-        }
+impl PlanNode for StreamingSqlPlan {
+    fn accept(&self, visitor: &dyn PlanVisitor, context: &PlanVisitorContext) -> PlanVisitorResult {
+        visitor.visit_streaming_sql_plan(self, context)
     }
 }
diff --git a/src/coordinator/plan/visitor.rs b/src/coordinator/plan/visitor.rs
index 44059c67..e8bd0ffc 100644
--- a/src/coordinator/plan/visitor.rs
+++ b/src/coordinator/plan/visitor.rs
@@ -12,7 +12,7 @@
 
 use super::{
     CreateFunctionPlan, CreatePythonFunctionPlan, DropFunctionPlan, ShowFunctionsPlan,
-    StartFunctionPlan, StopFunctionPlan,
+    StartFunctionPlan, StopFunctionPlan, StreamingSqlPlan,
 };
 
 /// Context passed to PlanVisitor methods
@@ -84,4 +84,10 @@ pub trait PlanVisitor {
         plan: &CreatePythonFunctionPlan,
         context: &PlanVisitorContext,
     ) -> PlanVisitorResult;
+
+    fn visit_streaming_sql_plan(
+        &self,
+        plan: &StreamingSqlPlan,
+        context: &PlanVisitorContext,
+    ) -> PlanVisitorResult;
 }
diff --git a/src/coordinator/statement/mod.rs b/src/coordinator/statement/mod.rs
index f887209c..a115af91 100644
--- a/src/coordinator/statement/mod.rs
+++ b/src/coordinator/statement/mod.rs
@@ -16,6 +16,7 @@ mod drop_function;
 mod show_functions;
 mod start_function;
 mod stop_function;
+mod streaming_sql;
 mod visitor;
 
 pub use create_function::{ConfigSource, CreateFunction, FunctionSource};
@@ -24,6 +25,7 @@ pub use drop_function::DropFunction;
 pub use show_functions::ShowFunctions;
 pub use start_function::StartFunction;
 pub use stop_function::StopFunction;
+pub use streaming_sql::StreamingSql;
 pub use visitor::{StatementVisitor, StatementVisitorContext, StatementVisitorResult};
 
 use std::fmt;
diff --git a/src/coordinator/statement/streaming_sql.rs b/src/coordinator/statement/streaming_sql.rs
new file mode 100644
index 00000000..1aa49205
--- /dev/null
+++ b/src/coordinator/statement/streaming_sql.rs
@@ -0,0 +1,39 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use datafusion::sql::sqlparser::ast::Statement as DFStatement;
+
+use super::{Statement, StatementVisitor, StatementVisitorContext, StatementVisitorResult};
+
+/// Wraps a DataFusion SQL statement (SELECT, INSERT, CREATE TABLE, etc.)
+/// so it can flow through the same Statement → StatementVisitor pipeline
+/// as FunctionStream DDL commands.
+#[derive(Debug)]
+pub struct StreamingSql {
+    pub statement: DFStatement,
+}
+
+impl StreamingSql {
+    pub fn new(statement: DFStatement) -> Self {
+        Self { statement }
+    }
+}
+
+impl Statement for StreamingSql {
+    fn accept(
+        &self,
+        visitor: &dyn StatementVisitor,
+        context: &StatementVisitorContext,
+    ) -> StatementVisitorResult {
+        visitor.visit_streaming_sql(self, context)
+    }
+}
diff --git a/src/coordinator/statement/visitor.rs b/src/coordinator/statement/visitor.rs
index 13ce2cfc..c9a63831 100644
--- a/src/coordinator/statement/visitor.rs
+++ b/src/coordinator/statement/visitor.rs
@@ -12,6 +12,7 @@
 
 use super::{
     CreateFunction, CreatePythonFunction, DropFunction, ShowFunctions, StartFunction, StopFunction,
+    StreamingSql,
 };
 use crate::coordinator::plan::PlanNode;
 use crate::coordinator::statement::Statement;
@@ -87,4 +88,10 @@ pub trait StatementVisitor {
         stmt: &CreatePythonFunction,
         context: &StatementVisitorContext,
     ) -> StatementVisitorResult;
+
+    fn visit_streaming_sql(
+        &self,
+        stmt: &StreamingSql,
+        context: &StatementVisitorContext,
+    ) -> StatementVisitorResult;
 }
diff --git a/src/datastream/logical.rs b/src/datastream/logical.rs
new file mode 100644
index 00000000..60101bdd
--- /dev/null
+++ b/src/datastream/logical.rs
@@ -0,0 +1,317 @@
+use itertools::Itertools;
+
+use crate::datastream::optimizers::Optimizer;
+use crate::sql::planner::types::StreamSchema;
+use datafusion::arrow::datatypes::DataType;
+use petgraph::Direction;
+use petgraph::dot::Dot;
+use petgraph::graph::DiGraph;
+use std::collections::{HashMap, HashSet};
+use std::fmt::{Debug, Display, Formatter};
+use std::sync::Arc;
+use strum::{Display, EnumString};
+
+#[derive(Clone, Copy, Debug, Eq, PartialEq, EnumString, Display)]
+pub enum OperatorName {
+    ExpressionWatermark,
+    ArrowValue,
+    ArrowKey,
+    Projection,
+    AsyncUdf,
+    Join,
+    InstantJoin,
+    LookupJoin,
+    WindowFunction,
+    TumblingWindowAggregate,
+    SlidingWindowAggregate,
+    SessionWindowAggregate,
+    UpdatingAggregate,
+    ConnectorSource,
+    ConnectorSink,
+}
+
+#[derive(Copy, Clone, Debug, Eq, PartialEq, PartialOrd, Ord)]
+pub enum LogicalEdgeType {
+    Forward,
+    Shuffle,
+    LeftJoin,
+    RightJoin,
+}
+
+impl Display for LogicalEdgeType {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        match self {
+            LogicalEdgeType::Forward => write!(f, "→"),
+            LogicalEdgeType::Shuffle => write!(f, "⤨"),
+            LogicalEdgeType::LeftJoin => write!(f, "-[left]⤨"),
+            LogicalEdgeType::RightJoin => write!(f, "-[right]⤨"),
+        }
+    }
+}
+
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct LogicalEdge {
+    pub edge_type: LogicalEdgeType,
+    pub schema: Arc<StreamSchema>,
+}
+
+impl LogicalEdge {
+    pub fn new(edge_type: LogicalEdgeType, schema: StreamSchema) -> Self {
+        LogicalEdge {
+            edge_type,
+            schema: Arc::new(schema),
+        }
+    }
+
+    pub fn project_all(edge_type: LogicalEdgeType, schema: StreamSchema) -> Self {
+        LogicalEdge {
+            edge_type,
+            schema: Arc::new(schema),
+        }
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct ChainedLogicalOperator {
+    pub operator_id: String,
+    pub operator_name: OperatorName,
+    pub operator_config: Vec<u8>,
+}
+
+#[derive(Clone, Debug)]
+pub struct OperatorChain {
+    pub(crate) operators: Vec<ChainedLogicalOperator>,
+    pub(crate) edges: Vec<Arc<StreamSchema>>,
+}
+
+impl OperatorChain {
+    pub fn new(operator: ChainedLogicalOperator) -> Self {
+        Self {
+            operators: vec![operator],
+            edges: vec![],
+        }
+    }
+
+    pub fn iter(
+        &self,
+    ) -> impl Iterator<Item = (&ChainedLogicalOperator, Option<&Arc<StreamSchema>>)> {
+        self.operators
+            .iter()
+            .zip_longest(self.edges.iter())
+            .map(|e| e.left_and_right())
+            .map(|(l, r)| (l.unwrap(), r))
+    }
+
+    pub fn iter_mut(
+        &mut self,
+    ) -> impl Iterator<Item = (&mut ChainedLogicalOperator, Option<&Arc<StreamSchema>>)> {
+        self.operators
+            .iter_mut()
+            .zip_longest(self.edges.iter())
+            .map(|e| e.left_and_right())
+            .map(|(l, r)| (l.unwrap(), r))
+    }
+
+    pub fn first(&self) -> &ChainedLogicalOperator {
+        &self.operators[0]
+    }
+
+    pub fn len(&self) -> usize {
+        self.operators.len()
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.operators.is_empty()
+    }
+
+    pub fn is_source(&self) -> bool {
+        self.operators[0].operator_name == OperatorName::ConnectorSource
+    }
+
+    pub fn is_sink(&self) -> bool {
+        self.operators[0].operator_name == OperatorName::ConnectorSink
+    }
+}
+
+#[derive(Clone)]
+pub struct LogicalNode {
+    pub node_id: u32,
+    pub description: String,
+    pub operator_chain: OperatorChain,
+    pub parallelism: usize,
+}
+
+impl LogicalNode {
+    pub fn single(
+        id: u32,
+        operator_id: String,
+        name: OperatorName,
+        config: Vec<u8>,
+        description: String,
+        parallelism: usize,
+    ) -> Self {
+        Self {
+            node_id: id,
+            description,
+            operator_chain: OperatorChain {
+                operators: vec![ChainedLogicalOperator {
+                    operator_id,
+                    operator_name: name,
+                    operator_config: config,
+                }],
+                edges: vec![],
+            },
+            parallelism,
+        }
+    }
+}
+
+impl Display for LogicalNode {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{}", self.description)
+    }
+}
+
+impl Debug for LogicalNode {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        write!(
+            f,
+            "{}[{}]",
+            self.operator_chain
+                .operators
+                .iter()
+                .map(|op| op.operator_id.clone())
+                .collect::<Vec<_>>()
+                .join(" -> "),
+            self.parallelism
+        )
+    }
+}
+
+pub type LogicalGraph = DiGraph<LogicalNode, LogicalEdge>;
+
+#[derive(Clone, Debug, Eq, PartialEq, Hash, PartialOrd)]
+pub struct DylibUdfConfig {
+    pub dylib_path: String,
+    pub arg_types: Vec<DataType>,
+    pub return_type: DataType,
+    pub aggregate: bool,
+    pub is_async: bool,
+}
+
+#[derive(Clone, Debug, Eq, PartialEq, Hash)]
+pub struct PythonUdfConfig {
+    pub arg_types: Vec<DataType>,
+    pub return_type: DataType,
+    pub name: Arc<String>,
+    pub definition: Arc<String>,
+}
+
+#[derive(Clone, Debug, Default)]
+pub struct ProgramConfig {
+    pub udf_dylibs: HashMap<String, DylibUdfConfig>,
+    pub python_udfs: HashMap<String, PythonUdfConfig>,
+}
+
+#[derive(Clone, Debug, Default)]
+pub struct LogicalProgram {
+    pub graph: LogicalGraph,
+    pub program_config: ProgramConfig,
+}
+
+impl LogicalProgram {
+    pub fn new(graph: LogicalGraph, program_config: ProgramConfig) -> Self {
+        Self {
+            graph,
+            program_config,
+        }
+    }
+
+    pub fn optimize(&mut self, optimizer: &dyn Optimizer) {
+        optimizer.optimize(&mut self.graph);
+    }
+
+    pub fn update_parallelism(&mut self, overrides: &HashMap<u32, usize>) {
+        for node in self.graph.node_weights_mut() {
+            if let Some(p) = overrides.get(&node.node_id) {
+                node.parallelism = *p;
+            }
+        }
+    }
+
+    pub fn dot(&self) -> String {
+        format!("{:?}", Dot::with_config(&self.graph, &[]))
+    }
+
+    pub fn task_count(&self) -> usize {
+        self.graph.node_weights().map(|nw| nw.parallelism).sum()
+    }
+
+    pub fn sources(&self) -> HashSet<u32> {
+        self.graph
+            .externals(Direction::Incoming)
+            .map(|t| self.graph.node_weight(t).unwrap().node_id)
+            .collect()
+    }
+
+    pub fn tasks_per_operator(&self) -> HashMap<String, usize> {
+        let mut tasks_per_operator = HashMap::new();
+        for node in self.graph.node_weights() {
+            for op in &node.operator_chain.operators {
+                tasks_per_operator.insert(op.operator_id.clone(), node.parallelism);
+            }
+        }
+        tasks_per_operator
+    }
+
+    pub fn operator_names_by_id(&self) -> HashMap<String, String> {
+        let mut m = HashMap::new();
+        for node in self.graph.node_weights() {
+            for op in &node.operator_chain.operators {
+                m.insert(op.operator_id.clone(), op.operator_name.to_string());
+            }
+        }
+        m
+    }
+
+    pub fn tasks_per_node(&self) -> HashMap<u32, usize> {
+        let mut tasks_per_node = HashMap::new();
+        for node in self.graph.node_weights() {
+            tasks_per_node.insert(node.node_id, node.parallelism);
+        }
+        tasks_per_node
+    }
+
+    pub fn features(&self) -> HashSet<String> {
+        let mut s = HashSet::new();
+        for n in self.graph.node_weights() {
+            for t in &n.operator_chain.operators {
+                let feature = match &t.operator_name {
+                    OperatorName::AsyncUdf => "async-udf".to_string(),
+                    OperatorName::ExpressionWatermark
+                    | OperatorName::ArrowValue
+                    | OperatorName::ArrowKey
+                    | OperatorName::Projection => continue,
+                    OperatorName::Join => "join-with-expiration".to_string(),
+                    OperatorName::InstantJoin => "windowed-join".to_string(),
+                    OperatorName::WindowFunction => "sql-window-function".to_string(),
+                    OperatorName::LookupJoin => "lookup-join".to_string(),
+                    OperatorName::TumblingWindowAggregate => {
+                        "sql-tumbling-window-aggregate".to_string()
+                    }
+                    OperatorName::SlidingWindowAggregate => {
+                        "sql-sliding-window-aggregate".to_string()
+                    }
+                    OperatorName::SessionWindowAggregate => {
+                        "sql-session-window-aggregate".to_string()
+                    }
+                    OperatorName::UpdatingAggregate => "sql-updating-aggregate".to_string(),
+                    OperatorName::ConnectorSource => "connector-source".to_string(),
+                    OperatorName::ConnectorSink => "connector-sink".to_string(),
+                };
+                s.insert(feature);
+            }
+        }
+        s
+    }
+}
diff --git a/src/datastream/mod.rs b/src/datastream/mod.rs
new file mode 100644
index 00000000..994a96b4
--- /dev/null
+++ b/src/datastream/mod.rs
@@ -0,0 +1,2 @@
+pub mod logical;
+pub mod optimizers;
diff --git a/src/datastream/optimizers.rs b/src/datastream/optimizers.rs
new file mode 100644
index 00000000..2d258aff
--- /dev/null
+++ b/src/datastream/optimizers.rs
@@ -0,0 +1,100 @@
+use crate::datastream::logical::{LogicalEdgeType, LogicalGraph};
+use petgraph::prelude::*;
+use petgraph::visit::NodeRef;
+use std::mem;
+
+pub trait Optimizer {
+    fn optimize_once(&self, plan: &mut LogicalGraph) -> bool;
+
+    fn optimize(&self, plan: &mut LogicalGraph) {
+        loop {
+            if !self.optimize_once(plan) {
+                break;
+            }
+        }
+    }
+}
+
+pub struct ChainingOptimizer {}
+
+fn remove_in_place<N, E>(graph: &mut DiGraph<N, E>, node: NodeIndex) {
+    let incoming = graph.edges_directed(node, Incoming).next().unwrap();
+
+    let parent = incoming.source().id();
+    let incoming = incoming.id();
+    graph.remove_edge(incoming);
+
+    let outgoing: Vec<_> = graph
+        .edges_directed(node, Outgoing)
+        .map(|e| (e.id(), e.target().id()))
+        .collect();
+
+    for (edge, target) in outgoing {
+        let weight = graph.remove_edge(edge).unwrap();
+        graph.add_edge(parent, target, weight);
+    }
+
+    graph.remove_node(node);
+}
+
+impl Optimizer for ChainingOptimizer {
+    fn optimize_once(&self, plan: &mut LogicalGraph) -> bool {
+        let node_indices: Vec<NodeIndex> = plan.node_indices().collect();
+
+        for &node_idx in &node_indices {
+            let cur = plan.node_weight(node_idx).unwrap();
+
+            if cur.operator_chain.is_source() {
+                continue;
+            }
+
+            let mut successors = plan.edges_directed(node_idx, Outgoing).collect::<Vec<_>>();
+
+            if successors.len() != 1 {
+                continue;
+            }
+
+            let edge = successors.remove(0);
+            let edge_type = edge.weight().edge_type;
+
+            if edge_type != LogicalEdgeType::Forward {
+                continue;
+            }
+
+            let successor_idx = edge.target();
+
+            let successor_node = plan.node_weight(successor_idx).unwrap();
+
+            if cur.parallelism != successor_node.parallelism
+                || successor_node.operator_chain.is_sink()
+            {
+                continue;
+            }
+
+            if plan.edges_directed(successor_idx, Incoming).count() > 1 {
+                continue;
+            }
+
+            let mut new_cur = cur.clone();
+
+            new_cur.description = format!("{} -> {}", cur.description, successor_node.description);
+
+            new_cur
+                .operator_chain
+                .operators
+                .extend(successor_node.operator_chain.operators.clone());
+
+            new_cur
+                .operator_chain
+                .edges
+                .push(edge.weight().schema.clone());
+
+            mem::swap(&mut new_cur, plan.node_weight_mut(node_idx).unwrap());
+
+            remove_in_place(plan, successor_idx);
+            return true;
+        }
+
+        false
+    }
+}
diff --git a/src/lib.rs b/src/lib.rs
index a6bb4d28..e8596864 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -16,6 +16,7 @@
 
 pub mod config;
 pub mod coordinator;
+pub mod datastream;
 pub mod logging;
 pub mod runtime;
 pub mod server;
diff --git a/src/server/handler.rs b/src/server/handler.rs
index 4721a5a1..45b0cd07 100644
--- a/src/server/handler.rs
+++ b/src/server/handler.rs
@@ -29,7 +29,7 @@ use crate::coordinator::{
     CreateFunction, CreatePythonFunction, DataSet, DropFunction, ShowFunctions,
     ShowFunctionsResult, StartFunction, Statement, StopFunction,
 };
-use crate::sql::SqlParser;
+use crate::sql::planner::parse::parse_sql;
 
 pub struct FunctionStreamServiceImpl {
     coordinator: Arc<Coordinator>,
@@ -70,10 +70,10 @@ impl FunctionStreamService for FunctionStreamServiceImpl {
         let req = request.into_inner();
 
         let parse_start = Instant::now();
-        let stmt = match SqlParser::parse(&req.sql) {
-            Ok(stmt) => {
+        let parsed = match parse_sql(&req.sql) {
+            Ok(parsed) => {
                 log::debug!("SQL parsed in {}ms", parse_start.elapsed().as_millis());
-                stmt
+                parsed
             }
             Err(e) => {
                 return Ok(TonicResponse::new(Self::build_response(
@@ -85,7 +85,7 @@ impl FunctionStreamService for FunctionStreamServiceImpl {
         };
 
         let exec_start = Instant::now();
-        let result = self.coordinator.execute(stmt.as_ref());
+        let result = self.coordinator.execute(parsed.as_ref());
         log::debug!(
             "Coordinator execution finished in {}ms",
             exec_start.elapsed().as_millis()
diff --git a/src/sql/grammar.pest b/src/sql/grammar.pest
deleted file mode 100644
index 15f70dd7..00000000
--- a/src/sql/grammar.pest
+++ /dev/null
@@ -1,134 +0,0 @@
-// =============================================================================
-// FUNCTION SQL Grammar
-// 
-// Using pest PEG syntax, referencing ANTLR style
-// =============================================================================
-
-// =============================================================================
-// 1. Whitespace (automatically skipped)
-// =============================================================================
-
-WHITESPACE = _{ " " | "\t" | "\r" | "\n" }
-
-// =============================================================================
-// 2. Keywords (case-insensitive)
-// =============================================================================
-
-kw_create    = _{ C ~ R ~ E ~ A ~ T ~ E }
-kw_drop      = _{ D ~ R ~ O ~ P }
-kw_start     = _{ S ~ T ~ A ~ R ~ T }
-kw_stop      = _{ S ~ T ~ O ~ P }
-kw_show      = _{ S ~ H ~ O ~ W }
-kw_with      = _{ W ~ I ~ T ~ H }
-kw_function  = _{ F ~ U ~ N ~ C ~ T ~ I ~ O ~ N }
-kw_functions = _{ F ~ U ~ N ~ C ~ T ~ I ~ O ~ N ~ S }
-
-// =============================================================================
-// 3. Operators & Symbols
-// =============================================================================
-
-LPAREN  = _{ "(" }
-RPAREN  = _{ ")" }
-COMMA   = _{ "," }
-EQ      = _{ "=" }
-SQUOTE  = _{ "'" }
-DQUOTE  = _{ "\"" }
-
-// =============================================================================
-// 4. Literals
-// =============================================================================
-
-// String literal (single or double quotes)
-string_literal = @{ 
-    SQUOTE ~ string_inner_single ~ SQUOTE | 
-    DQUOTE ~ string_inner_double ~ DQUOTE 
-}
-
-string_inner_single = @{ (!(SQUOTE | "\\") ~ ANY | escape_seq)* }
-string_inner_double = @{ (!(DQUOTE | "\\") ~ ANY | escape_seq)* }
-escape_seq = @{ "\\" ~ ANY }
-
-// =============================================================================
-// 5. Identifiers
-// =============================================================================
-
-// Task name identifier
-identifier = @{ (ASCII_ALPHA | "_") ~ (ASCII_ALPHANUMERIC | "_" | "-")* }
-
-// =============================================================================
-// 6. Statements
-// =============================================================================
-
-// Entry rule
-statement = _{ 
-    SOI ~ (
-        create_stmt | 
-        drop_stmt | 
-        start_stmt | 
-        stop_stmt | 
-        show_stmt
-    ) ~ EOI 
-}
-
-// CREATE FUNCTION WITH (...)
-// Note: name is read from config file, not from SQL statement
-create_stmt = { kw_create ~ kw_function ~ kw_with ~ properties }
-
-// DROP FUNCTION name
-drop_stmt = { kw_drop ~ kw_function ~ identifier }
-
-// START FUNCTION name
-start_stmt = { kw_start ~ kw_function ~ identifier }
-
-// STOP FUNCTION name
-stop_stmt = { kw_stop ~ kw_function ~ identifier }
-
-// SHOW FUNCTIONS
-show_stmt = { kw_show ~ kw_functions }
-
-// =============================================================================
-// 7. Properties
-// =============================================================================
-
-// Property list ('key'='value', ...)
-properties = { LPAREN ~ property ~ (COMMA ~ property)* ~ RPAREN }
-
-// Single property 'key'='value'
-property = { property_key ~ EQ ~ property_value }
-
-// Property key (string)
-property_key = { string_literal }
-
-// Property value (string)
-property_value = { string_literal }
-
-// =============================================================================
-// 8. Character Fragments (for case-insensitive matching)
-// =============================================================================
-
-A = _{ "A" | "a" }
-B = _{ "B" | "b" }
-C = _{ "C" | "c" }
-D = _{ "D" | "d" }
-E = _{ "E" | "e" }
-F = _{ "F" | "f" }
-G = _{ "G" | "g" }
-H = _{ "H" | "h" }
-I = _{ "I" | "i" }
-J = _{ "J" | "j" }
-K = _{ "K" | "k" }
-L = _{ "L" | "l" }
-M = _{ "M" | "m" }
-N = _{ "N" | "n" }
-O = _{ "O" | "o" }
-P = _{ "P" | "p" }
-Q = _{ "Q" | "q" }
-R = _{ "R" | "r" }
-S = _{ "S" | "s" }
-T = _{ "T" | "t" }
-U = _{ "U" | "u" }
-V = _{ "V" | "v" }
-W = _{ "W" | "w" }
-X = _{ "X" | "x" }
-Y = _{ "Y" | "y" }
-Z = _{ "Z" | "z" }
diff --git a/src/sql/mod.rs b/src/sql/mod.rs
index ed3c2e30..31b5c4b9 100644
--- a/src/sql/mod.rs
+++ b/src/sql/mod.rs
@@ -10,6 +10,9 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-pub mod parser;
+pub mod planner;
 
-pub use parser::SqlParser;
+pub use planner::StreamSchemaProvider;
+pub use planner::parse::parse_sql;
+pub use planner::plan::rewrite_plan;
+pub use planner::sql_to_plan::statement_to_plan;
diff --git a/src/sql/parser/sql_parser.rs b/src/sql/parser/sql_parser.rs
deleted file mode 100644
index dc110745..00000000
--- a/src/sql/parser/sql_parser.rs
+++ /dev/null
@@ -1,249 +0,0 @@
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use pest::Parser;
-use pest_derive::Parser;
-
-use super::ParseError;
-use crate::coordinator::{
-    CreateFunction, DropFunction, ShowFunctions, StartFunction, Statement, StopFunction,
-};
-use std::collections::HashMap;
-
-#[derive(Parser)]
-#[grammar = "src/sql/grammar.pest"]
-struct Grammar;
-
-#[derive(Debug, Default)]
-pub struct SqlParser;
-
-impl SqlParser {
-    pub fn parse(sql: &str) -> Result<Box<dyn Statement>, ParseError> {
-        let pairs = Grammar::parse(Rule::statement, sql)
-            .map_err(|e| ParseError::new(format!("Parse error: {}", e)))?;
-
-        for pair in pairs {
-            return match pair.as_rule() {
-                Rule::create_stmt => {
-                    handle_create_stmt(pair).map(|stmt| stmt as Box<dyn Statement>)
-                }
-                Rule::drop_stmt => handle_drop_stmt(pair).map(|stmt| stmt as Box<dyn Statement>),
-                Rule::start_stmt => handle_start_stmt(pair).map(|stmt| stmt as Box<dyn Statement>),
-                Rule::stop_stmt => handle_stop_stmt(pair).map(|stmt| stmt as Box<dyn Statement>),
-                Rule::show_stmt => handle_show_stmt(pair).map(|stmt| stmt as Box<dyn Statement>),
-                _ => continue,
-            };
-        }
-
-        Err(ParseError::new("Unknown statement type"))
-    }
-}
-
-fn handle_create_stmt(
-    pair: pest::iterators::Pair<Rule>,
-) -> Result<Box<CreateFunction>, ParseError> {
-    let mut inner = pair.into_inner();
-    // Note: name is read from config file, not from SQL statement
-    // Pass empty string here, name will be read from config file later
-    let properties = inner
-        .next()
-        .map(parse_properties)
-        .ok_or_else(|| ParseError::new("Missing WITH clause"))?;
-
-    Ok(Box::new(
-        CreateFunction::from_properties(properties).map_err(ParseError::from)?,
-    ))
-}
-
-fn handle_drop_stmt(pair: pest::iterators::Pair<Rule>) -> Result<Box<DropFunction>, ParseError> {
-    let mut inner = pair.into_inner();
-    let name = inner.next().map(extract_string).unwrap_or_default();
-    Ok(Box::new(DropFunction::new(name)))
-}
-
-fn handle_start_stmt(pair: pest::iterators::Pair<Rule>) -> Result<Box<StartFunction>, ParseError> {
-    let mut inner = pair.into_inner();
-    let name = inner.next().map(extract_string).unwrap_or_default();
-    Ok(Box::new(StartFunction::new(name)))
-}
-
-fn handle_stop_stmt(pair: pest::iterators::Pair<Rule>) -> Result<Box<StopFunction>, ParseError> {
-    let mut inner = pair.into_inner();
-    let name = inner.next().map(extract_string).unwrap_or_default();
-    Ok(Box::new(StopFunction::new(name)))
-}
-
-fn handle_show_stmt(_pair: pest::iterators::Pair<Rule>) -> Result<Box<ShowFunctions>, ParseError> {
-    Ok(Box::new(ShowFunctions::new()))
-}
-
-fn extract_string(pair: pest::iterators::Pair<Rule>) -> String {
-    match pair.as_rule() {
-        Rule::string_literal => {
-            let s = pair.as_str();
-            if (s.starts_with('\'') && s.ends_with('\''))
-                || (s.starts_with('"') && s.ends_with('"'))
-            {
-                unescape_string(&s[1..s.len() - 1])
-            } else {
-                unescape_string(s)
-            }
-        }
-        Rule::identifier => pair.as_str().to_string(),
-        _ => pair.as_str().to_string(),
-    }
-}
-
-fn unescape_string(s: &str) -> String {
-    let mut result = String::with_capacity(s.len());
-    let mut chars = s.chars().peekable();
-
-    while let Some(ch) = chars.next() {
-        if ch == '\\' {
-            if let Some(&next) = chars.peek() {
-                chars.next();
-                match next {
-                    'n' => result.push('\n'),
-                    't' => result.push('\t'),
-                    'r' => result.push('\r'),
-                    '\\' => result.push('\\'),
-                    '\'' => result.push('\''),
-                    '"' => result.push('"'),
-                    _ => {
-                        result.push('\\');
-                        result.push(next);
-                    }
-                }
-            } else {
-                result.push(ch);
-            }
-        } else {
-            result.push(ch);
-        }
-    }
-
-    result
-}
-
-fn parse_properties(pair: pest::iterators::Pair<Rule>) -> HashMap<String, String> {
-    let mut properties = HashMap::new();
-
-    for prop in pair.into_inner() {
-        if prop.as_rule() == Rule::property {
-            let mut inner = prop.into_inner();
-            if let (Some(key_pair), Some(val_pair)) = (inner.next(), inner.next()) {
-                let key = key_pair
-                    .into_inner()
-                    .next()
-                    .map(extract_string)
-                    .unwrap_or_default();
-                let value = val_pair
-                    .into_inner()
-                    .next()
-                    .map(extract_string)
-                    .unwrap_or_default();
-                properties.insert(key, value);
-            }
-        }
-    }
-
-    properties
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_create_function() {
-        let sql =
-            "CREATE FUNCTION WITH ('function_path'='./test.wasm', 'config_path'='./config.yml')";
-        let _stmt = SqlParser::parse(sql).unwrap();
-    }
-
-    #[test]
-    fn test_create_function_minimal() {
-        let sql = "CREATE FUNCTION WITH ('function_path'='./processor.wasm')";
-        let _stmt = SqlParser::parse(sql).unwrap();
-    }
-
-    // Note: SQL only supports Path mode, not Bytes mode
-    // Bytes mode is only for gRPC requests
-
-    #[test]
-    fn test_drop_function() {
-        let sql = "DROP FUNCTION my_task";
-        let _stmt = SqlParser::parse(sql).unwrap();
-    }
-
-    #[test]
-    fn test_start_function() {
-        let sql = "START FUNCTION my_task";
-        let _stmt = SqlParser::parse(sql).unwrap();
-    }
-
-    #[test]
-    fn test_stop_function() {
-        let sql = "STOP FUNCTION my_task";
-        let _stmt = SqlParser::parse(sql).unwrap();
-    }
-
-    #[test]
-    fn test_show_functions() {
-        let sql = "SHOW FUNCTIONS";
-        let _stmt = SqlParser::parse(sql).unwrap();
-    }
-
-    #[test]
-    fn test_case_insensitive_keywords() {
-        let sql1 = "create function with ('function_path'='./test.wasm')";
-        let _stmt1 = SqlParser::parse(sql1).unwrap();
-
-        let sql2 = "Create Function With ('Function_Path'='./test.wasm')";
-        let _stmt2 = SqlParser::parse(sql2).unwrap();
-
-        let sql3 = "show functions";
-        let _stmt3 = SqlParser::parse(sql3).unwrap();
-
-        let sql4 = "start function my_task";
-        let _stmt4 = SqlParser::parse(sql4).unwrap();
-    }
-
-    #[test]
-    fn test_case_insensitive_property_keys() {
-        let sql1 =
-            "CREATE FUNCTION WITH ('function_path'='./test.wasm', 'config_path'='./config.yml')";
-        let _stmt1 = SqlParser::parse(sql1).unwrap();
-
-        let sql2 =
-            "CREATE FUNCTION WITH ('Function_Path'='./test.wasm', 'Config_Path'='./config.yml')";
-        let _stmt2 = SqlParser::parse(sql2).unwrap();
-
-        let sql3 =
-            "CREATE FUNCTION WITH ('FUNCTION_PATH'='./test.wasm', 'CONFIG_PATH'='./config.yml')";
-        let _stmt3 = SqlParser::parse(sql3).unwrap();
-
-        // Note: SQL only supports Path mode (function_path, config_path)
-        // Bytes mode (function, config) is only for gRPC requests
-    }
-
-    #[test]
-    fn test_with_extra_properties() {
-        let sql = r#"CREATE FUNCTION WITH (
-            'function_path'='./test.wasm',
-            'config_path'='./config.yml',
-            'parallelism'='4',
-            'memory-limit'='256mb'
-        )"#;
-        let _stmt = SqlParser::parse(sql).unwrap();
-    }
-}
diff --git a/src/sql/planner/extension/aggregate.rs b/src/sql/planner/extension/aggregate.rs
new file mode 100644
index 00000000..911e595f
--- /dev/null
+++ b/src/sql/planner/extension/aggregate.rs
@@ -0,0 +1,348 @@
+use std::fmt::Formatter;
+use std::sync::Arc;
+use std::time::Duration;
+
+use datafusion::arrow::datatypes::DataType;
+use datafusion::common::{Column, DFSchemaRef, Result, ScalarValue, internal_err};
+use datafusion::logical_expr;
+use datafusion::logical_expr::{
+    BinaryExpr, Expr, Extension, LogicalPlan, UserDefinedLogicalNodeCore, expr::ScalarFunction,
+};
+
+use crate::multifield_partial_ord;
+use crate::sql::planner::extension::{NamedNode, StreamExtension, TimestampAppendExtension};
+use crate::sql::planner::types::{
+    DFField, StreamSchema, TIMESTAMP_FIELD, WindowBehavior, WindowType, fields_with_qualifiers,
+    schema_from_df_fields, schema_from_df_fields_with_metadata,
+};
+
+pub(crate) const AGGREGATE_EXTENSION_NAME: &str = "AggregateExtension";
+
+/// Extension node for windowed aggregate operations in streaming SQL.
+/// Supports tumbling, sliding, session, and instant window aggregations.
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub(crate) struct AggregateExtension {
+    pub(crate) window_behavior: WindowBehavior,
+    pub(crate) aggregate: LogicalPlan,
+    pub(crate) schema: DFSchemaRef,
+    pub(crate) key_fields: Vec<usize>,
+    pub(crate) final_calculation: LogicalPlan,
+}
+
+multifield_partial_ord!(AggregateExtension, aggregate, key_fields, final_calculation);
+
+impl AggregateExtension {
+    pub fn new(
+        window_behavior: WindowBehavior,
+        aggregate: LogicalPlan,
+        key_fields: Vec<usize>,
+    ) -> Self {
+        let final_calculation =
+            Self::final_projection(&aggregate, window_behavior.clone()).unwrap();
+        Self {
+            window_behavior,
+            aggregate,
+            schema: final_calculation.schema().clone(),
+            key_fields,
+            final_calculation,
+        }
+    }
+
+    /// Build the final projection after aggregation, which adds the window struct
+    /// and computes the output timestamp based on the window behavior.
+    pub fn final_projection(
+        aggregate_plan: &LogicalPlan,
+        window_behavior: WindowBehavior,
+    ) -> Result<LogicalPlan> {
+        let timestamp_field: DFField = aggregate_plan.inputs()[0]
+            .schema()
+            .qualified_field_with_unqualified_name(TIMESTAMP_FIELD)?
+            .into();
+        let timestamp_append = LogicalPlan::Extension(Extension {
+            node: Arc::new(TimestampAppendExtension::new(
+                aggregate_plan.clone(),
+                timestamp_field.qualifier().cloned(),
+            )),
+        });
+        let mut aggregate_fields = fields_with_qualifiers(aggregate_plan.schema());
+        let mut aggregate_expressions: Vec<_> = aggregate_fields
+            .iter()
+            .map(|field| Expr::Column(field.qualified_column()))
+            .collect();
+
+        let (window_field, window_index, width, is_nested) = match window_behavior {
+            WindowBehavior::InData => return Ok(timestamp_append),
+            WindowBehavior::FromOperator {
+                window,
+                window_field,
+                window_index,
+                is_nested,
+            } => match window {
+                WindowType::Tumbling { width, .. } | WindowType::Sliding { width, .. } => {
+                    (window_field, window_index, width, is_nested)
+                }
+                WindowType::Session { .. } => {
+                    return Ok(LogicalPlan::Extension(Extension {
+                        node: Arc::new(WindowAppendExtension::new(
+                            timestamp_append,
+                            window_field,
+                            window_index,
+                        )),
+                    }));
+                }
+                WindowType::Instant => return Ok(timestamp_append),
+            },
+        };
+
+        if is_nested {
+            return Self::nested_final_projection(
+                timestamp_append,
+                window_field,
+                window_index,
+                width,
+            );
+        }
+
+        let timestamp_column =
+            Column::new(timestamp_field.qualifier().cloned(), timestamp_field.name());
+        aggregate_fields.insert(window_index, window_field.clone());
+
+        let window_expression = Self::build_window_struct_expr(&timestamp_column, width);
+        aggregate_expressions.insert(
+            window_index,
+            window_expression
+                .alias_qualified(window_field.qualifier().cloned(), window_field.name()),
+        );
+        aggregate_fields.push(timestamp_field);
+
+        let bin_end_calculation = Expr::BinaryExpr(BinaryExpr {
+            left: Box::new(Expr::Column(timestamp_column.clone())),
+            op: logical_expr::Operator::Plus,
+            right: Box::new(Expr::Literal(
+                ScalarValue::IntervalMonthDayNano(Some(
+                    datafusion::arrow::datatypes::IntervalMonthDayNanoType::make_value(
+                        0,
+                        0,
+                        (width.as_nanos() - 1) as i64,
+                    ),
+                )),
+                None,
+            )),
+        });
+        aggregate_expressions.push(bin_end_calculation);
+
+        Ok(LogicalPlan::Projection(
+            logical_expr::Projection::try_new_with_schema(
+                aggregate_expressions,
+                Arc::new(timestamp_append),
+                Arc::new(schema_from_df_fields(&aggregate_fields)?),
+            )?,
+        ))
+    }
+
+    fn build_window_struct_expr(timestamp_column: &Column, width: Duration) -> Expr {
+        let start_expr = Expr::Column(timestamp_column.clone());
+        let end_expr = Expr::BinaryExpr(BinaryExpr {
+            left: Box::new(Expr::Column(timestamp_column.clone())),
+            op: logical_expr::Operator::Plus,
+            right: Box::new(Expr::Literal(
+                ScalarValue::IntervalMonthDayNano(Some(
+                    datafusion::arrow::datatypes::IntervalMonthDayNanoType::make_value(
+                        0,
+                        0,
+                        width.as_nanos() as i64,
+                    ),
+                )),
+                None,
+            )),
+        });
+
+        Expr::ScalarFunction(ScalarFunction {
+            func: Arc::new(datafusion::logical_expr::ScalarUDF::new_from_impl(
+                WindowStructUdf {},
+            )),
+            args: vec![start_expr, end_expr],
+        })
+    }
+
+    fn nested_final_projection(
+        aggregate_plan: LogicalPlan,
+        window_field: DFField,
+        window_index: usize,
+        width: Duration,
+    ) -> Result<LogicalPlan> {
+        let timestamp_field: DFField = aggregate_plan
+            .schema()
+            .qualified_field_with_unqualified_name(TIMESTAMP_FIELD)
+            .unwrap()
+            .into();
+        let timestamp_column =
+            Column::new(timestamp_field.qualifier().cloned(), timestamp_field.name());
+
+        let mut aggregate_fields = fields_with_qualifiers(aggregate_plan.schema());
+        let mut aggregate_expressions: Vec<_> = aggregate_fields
+            .iter()
+            .map(|field| Expr::Column(field.qualified_column()))
+            .collect();
+        aggregate_fields.insert(window_index, window_field.clone());
+
+        let window_expression = Self::build_window_struct_expr(&timestamp_column, width);
+        aggregate_expressions.insert(
+            window_index,
+            window_expression
+                .alias_qualified(window_field.qualifier().cloned(), window_field.name()),
+        );
+
+        Ok(LogicalPlan::Projection(
+            logical_expr::Projection::try_new_with_schema(
+                aggregate_expressions,
+                Arc::new(aggregate_plan),
+                Arc::new(schema_from_df_fields(&aggregate_fields).unwrap()),
+            )
+            .unwrap(),
+        ))
+    }
+}
+
+impl UserDefinedLogicalNodeCore for AggregateExtension {
+    fn name(&self) -> &str {
+        AGGREGATE_EXTENSION_NAME
+    }
+
+    fn inputs(&self) -> Vec<&LogicalPlan> {
+        vec![&self.aggregate]
+    }
+
+    fn schema(&self) -> &DFSchemaRef {
+        &self.schema
+    }
+
+    fn expressions(&self) -> Vec<Expr> {
+        vec![]
+    }
+
+    fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result {
+        write!(
+            f,
+            "AggregateExtension: {} | window_behavior: {:?}",
+            self.schema(),
+            match &self.window_behavior {
+                WindowBehavior::InData => "InData".to_string(),
+                WindowBehavior::FromOperator { window, .. } => format!("FromOperator({window:?})"),
+            }
+        )
+    }
+
+    fn with_exprs_and_inputs(&self, _exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> Result<Self> {
+        if inputs.len() != 1 {
+            return internal_err!("input size inconsistent");
+        }
+        Ok(Self::new(
+            self.window_behavior.clone(),
+            inputs[0].clone(),
+            self.key_fields.clone(),
+        ))
+    }
+}
+
+impl StreamExtension for AggregateExtension {
+    fn node_name(&self) -> Option<NamedNode> {
+        None
+    }
+
+    fn output_schema(&self) -> StreamSchema {
+        let output_schema = (*self.schema).clone().into();
+        StreamSchema::from_schema_keys(Arc::new(output_schema), vec![]).unwrap()
+    }
+}
+
+/// Extension for appending window struct (start, end) to the output
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub(crate) struct WindowAppendExtension {
+    pub(crate) input: LogicalPlan,
+    pub(crate) window_field: DFField,
+    pub(crate) window_index: usize,
+    pub(crate) schema: DFSchemaRef,
+}
+
+multifield_partial_ord!(WindowAppendExtension, input, window_index);
+
+impl WindowAppendExtension {
+    fn new(input: LogicalPlan, window_field: DFField, window_index: usize) -> Self {
+        let mut fields = fields_with_qualifiers(input.schema());
+        fields.insert(window_index, window_field.clone());
+        let metadata = input.schema().metadata().clone();
+        Self {
+            input,
+            window_field,
+            window_index,
+            schema: Arc::new(schema_from_df_fields_with_metadata(&fields, metadata).unwrap()),
+        }
+    }
+}
+
+impl UserDefinedLogicalNodeCore for WindowAppendExtension {
+    fn name(&self) -> &str {
+        "WindowAppendExtension"
+    }
+
+    fn inputs(&self) -> Vec<&LogicalPlan> {
+        vec![&self.input]
+    }
+
+    fn schema(&self) -> &DFSchemaRef {
+        &self.schema
+    }
+
+    fn expressions(&self) -> Vec<Expr> {
+        vec![]
+    }
+
+    fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        write!(
+            f,
+            "WindowAppendExtension: field {:?} at {}",
+            self.window_field, self.window_index
+        )
+    }
+
+    fn with_exprs_and_inputs(&self, _exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> Result<Self> {
+        Ok(Self::new(
+            inputs[0].clone(),
+            self.window_field.clone(),
+            self.window_index,
+        ))
+    }
+}
+
+/// Placeholder UDF to construct the window struct at plan time
+#[derive(Debug)]
+struct WindowStructUdf;
+
+impl datafusion::logical_expr::ScalarUDFImpl for WindowStructUdf {
+    fn as_any(&self) -> &dyn std::any::Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        "window"
+    }
+
+    fn signature(&self) -> &datafusion::logical_expr::Signature {
+        &datafusion::logical_expr::Signature {
+            type_signature: datafusion::logical_expr::TypeSignature::Any(2),
+            volatility: datafusion::logical_expr::Volatility::Immutable,
+        }
+    }
+
+    fn return_type(&self, _args: &[DataType]) -> Result<DataType> {
+        Ok(crate::sql::planner::schemas::window_arrow_struct())
+    }
+
+    fn invoke_with_args(
+        &self,
+        _args: datafusion::logical_expr::ScalarFunctionArgs,
+    ) -> Result<datafusion::logical_expr::ColumnarValue> {
+        unimplemented!("WindowStructUdf is a plan-time-only function")
+    }
+}
diff --git a/src/sql/planner/extension/join.rs b/src/sql/planner/extension/join.rs
new file mode 100644
index 00000000..0b4fa13a
--- /dev/null
+++ b/src/sql/planner/extension/join.rs
@@ -0,0 +1,61 @@
+use std::time::Duration;
+
+use datafusion::common::{DFSchemaRef, Result};
+use datafusion::logical_expr::expr::Expr;
+use datafusion::logical_expr::{LogicalPlan, UserDefinedLogicalNodeCore};
+
+use crate::sql::planner::extension::{NamedNode, StreamExtension};
+use crate::sql::planner::types::StreamSchema;
+
+use std::sync::Arc;
+
+pub(crate) const JOIN_NODE_NAME: &str = "JoinNode";
+
+/// Extension node for streaming joins.
+/// Supports instant joins (windowed, no state) and updating joins (with TTL-based state).
+#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd)]
+pub struct JoinExtension {
+    pub(crate) rewritten_join: LogicalPlan,
+    pub(crate) is_instant: bool,
+    pub(crate) ttl: Option<Duration>,
+}
+
+impl StreamExtension for JoinExtension {
+    fn node_name(&self) -> Option<NamedNode> {
+        None
+    }
+
+    fn output_schema(&self) -> StreamSchema {
+        StreamSchema::from_schema_unkeyed(Arc::new(self.schema().as_ref().into())).unwrap()
+    }
+}
+
+impl UserDefinedLogicalNodeCore for JoinExtension {
+    fn name(&self) -> &str {
+        JOIN_NODE_NAME
+    }
+
+    fn inputs(&self) -> Vec<&LogicalPlan> {
+        vec![&self.rewritten_join]
+    }
+
+    fn schema(&self) -> &DFSchemaRef {
+        self.rewritten_join.schema()
+    }
+
+    fn expressions(&self) -> Vec<Expr> {
+        vec![]
+    }
+
+    fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        write!(f, "JoinExtension: {}", self.schema())
+    }
+
+    fn with_exprs_and_inputs(&self, _exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> Result<Self> {
+        Ok(Self {
+            rewritten_join: inputs[0].clone(),
+            is_instant: self.is_instant,
+            ttl: self.ttl,
+        })
+    }
+}
diff --git a/src/sql/planner/extension/key_calculation.rs b/src/sql/planner/extension/key_calculation.rs
new file mode 100644
index 00000000..f60c4d32
--- /dev/null
+++ b/src/sql/planner/extension/key_calculation.rs
@@ -0,0 +1,138 @@
+use std::fmt::Formatter;
+use std::sync::Arc;
+
+use datafusion::arrow::datatypes::{Field, Schema};
+use datafusion::common::{DFSchemaRef, Result, internal_err};
+use datafusion::logical_expr::{Expr, ExprSchemable, LogicalPlan, UserDefinedLogicalNodeCore};
+
+use crate::multifield_partial_ord;
+use crate::sql::planner::extension::{NamedNode, StreamExtension};
+use crate::sql::planner::types::{
+    StreamSchema, fields_with_qualifiers, schema_from_df_fields_with_metadata,
+};
+
+pub(crate) const KEY_CALCULATION_NAME: &str = "KeyCalculationExtension";
+
+/// Two ways of specifying keys: column indices or expressions to evaluate
+#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd)]
+pub enum KeysOrExprs {
+    Keys(Vec<usize>),
+    Exprs(Vec<Expr>),
+}
+
+/// Calculation for computing keyed data, used for shuffling data to correct nodes
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub(crate) struct KeyCalculationExtension {
+    pub(crate) name: Option<String>,
+    pub(crate) input: LogicalPlan,
+    pub(crate) keys: KeysOrExprs,
+    pub(crate) schema: DFSchemaRef,
+}
+
+multifield_partial_ord!(KeyCalculationExtension, name, input, keys);
+
+impl KeyCalculationExtension {
+    pub fn new_named_and_trimmed(input: LogicalPlan, keys: Vec<usize>, name: String) -> Self {
+        let output_fields: Vec<_> = fields_with_qualifiers(input.schema())
+            .into_iter()
+            .enumerate()
+            .filter_map(|(index, field)| {
+                if !keys.contains(&index) {
+                    Some(field.clone())
+                } else {
+                    None
+                }
+            })
+            .collect();
+
+        let schema =
+            schema_from_df_fields_with_metadata(&output_fields, input.schema().metadata().clone())
+                .unwrap();
+        Self {
+            name: Some(name),
+            input,
+            keys: KeysOrExprs::Keys(keys),
+            schema: Arc::new(schema),
+        }
+    }
+
+    pub fn new(input: LogicalPlan, keys: KeysOrExprs) -> Self {
+        let schema = input.schema().clone();
+        Self {
+            name: None,
+            input,
+            keys,
+            schema,
+        }
+    }
+}
+
+impl StreamExtension for KeyCalculationExtension {
+    fn node_name(&self) -> Option<NamedNode> {
+        None
+    }
+
+    fn output_schema(&self) -> StreamSchema {
+        let input_schema = self.input.schema().as_ref();
+        match &self.keys {
+            KeysOrExprs::Keys(keys) => {
+                StreamSchema::from_schema_keys(Arc::new(input_schema.into()), keys.clone()).unwrap()
+            }
+            KeysOrExprs::Exprs(exprs) => {
+                let mut fields = vec![];
+                for (i, e) in exprs.iter().enumerate() {
+                    let (dt, nullable) = e.data_type_and_nullable(input_schema).unwrap();
+                    fields.push(Field::new(format!("__key_{i}"), dt, nullable).into());
+                }
+                for f in input_schema.fields().iter() {
+                    fields.push(f.clone());
+                }
+                StreamSchema::from_schema_keys(
+                    Arc::new(Schema::new(fields)),
+                    (1..=exprs.len()).collect(),
+                )
+                .unwrap()
+            }
+        }
+    }
+}
+
+impl UserDefinedLogicalNodeCore for KeyCalculationExtension {
+    fn name(&self) -> &str {
+        KEY_CALCULATION_NAME
+    }
+
+    fn inputs(&self) -> Vec<&LogicalPlan> {
+        vec![&self.input]
+    }
+
+    fn schema(&self) -> &DFSchemaRef {
+        &self.schema
+    }
+
+    fn expressions(&self) -> Vec<Expr> {
+        vec![]
+    }
+
+    fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result {
+        write!(f, "KeyCalculationExtension: {}", self.schema())
+    }
+
+    fn with_exprs_and_inputs(&self, exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> Result<Self> {
+        if inputs.len() != 1 {
+            return internal_err!("input size inconsistent");
+        }
+
+        let keys = match &self.keys {
+            KeysOrExprs::Keys(k) => KeysOrExprs::Keys(k.clone()),
+            KeysOrExprs::Exprs(_) => KeysOrExprs::Exprs(exprs),
+        };
+
+        Ok(Self {
+            name: self.name.clone(),
+            input: inputs[0].clone(),
+            keys,
+            schema: self.schema.clone(),
+        })
+    }
+}
diff --git a/src/sql/planner/extension/mod.rs b/src/sql/planner/extension/mod.rs
new file mode 100644
index 00000000..96ac5f32
--- /dev/null
+++ b/src/sql/planner/extension/mod.rs
@@ -0,0 +1,153 @@
+use std::fmt::Debug;
+use std::sync::Arc;
+
+use datafusion::common::{DFSchemaRef, DataFusionError, Result, TableReference};
+use datafusion::logical_expr::{
+    Expr, LogicalPlan, UserDefinedLogicalNode, UserDefinedLogicalNodeCore,
+};
+
+use crate::sql::planner::schemas::{add_timestamp_field, has_timestamp_field};
+use crate::sql::planner::types::StreamSchema;
+
+pub(crate) mod aggregate;
+pub(crate) mod join;
+pub(crate) mod key_calculation;
+pub(crate) mod projection;
+pub(crate) mod remote_table;
+pub(crate) mod watermark_node;
+pub(crate) mod window_fn;
+
+pub(crate) trait StreamExtension: Debug {
+    fn node_name(&self) -> Option<NamedNode>;
+    fn output_schema(&self) -> StreamSchema;
+    fn transparent(&self) -> bool {
+        false
+    }
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub enum NamedNode {
+    Source(TableReference),
+    Watermark(TableReference),
+    RemoteTable(TableReference),
+    Sink(TableReference),
+}
+
+fn try_from_t<T: StreamExtension + 'static>(
+    node: &dyn UserDefinedLogicalNode,
+) -> std::result::Result<&dyn StreamExtension, ()> {
+    node.as_any()
+        .downcast_ref::<T>()
+        .map(|t| t as &dyn StreamExtension)
+        .ok_or(())
+}
+
+impl<'a> TryFrom<&'a dyn UserDefinedLogicalNode> for &'a dyn StreamExtension {
+    type Error = DataFusionError;
+
+    fn try_from(node: &'a dyn UserDefinedLogicalNode) -> Result<Self, Self::Error> {
+        use aggregate::AggregateExtension;
+        use join::JoinExtension;
+        use key_calculation::KeyCalculationExtension;
+        use projection::ProjectionExtension;
+        use remote_table::RemoteTableExtension;
+        use watermark_node::WatermarkNode;
+        use window_fn::WindowFunctionExtension;
+
+        try_from_t::<WatermarkNode>(node)
+            .or_else(|_| try_from_t::<KeyCalculationExtension>(node))
+            .or_else(|_| try_from_t::<AggregateExtension>(node))
+            .or_else(|_| try_from_t::<RemoteTableExtension>(node))
+            .or_else(|_| try_from_t::<JoinExtension>(node))
+            .or_else(|_| try_from_t::<WindowFunctionExtension>(node))
+            .or_else(|_| try_from_t::<ProjectionExtension>(node))
+            .map_err(|_| DataFusionError::Plan(format!("unexpected node: {}", node.name())))
+    }
+}
+
+impl<'a> TryFrom<&'a Arc<dyn UserDefinedLogicalNode>> for &'a dyn StreamExtension {
+    type Error = DataFusionError;
+
+    fn try_from(node: &'a Arc<dyn UserDefinedLogicalNode>) -> Result<Self, Self::Error> {
+        TryFrom::try_from(node.as_ref())
+    }
+}
+
+#[macro_export]
+macro_rules! multifield_partial_ord {
+    ($ty:ty, $($field:tt), *) => {
+        impl PartialOrd for $ty {
+            fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
+                $(
+                    let cmp = self.$field.partial_cmp(&other.$field)?;
+                    if cmp != std::cmp::Ordering::Equal {
+                        return Some(cmp);
+                    }
+                )*
+                Some(std::cmp::Ordering::Equal)
+            }
+        }
+    }
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub(crate) struct TimestampAppendExtension {
+    pub(crate) input: LogicalPlan,
+    pub(crate) qualifier: Option<TableReference>,
+    pub(crate) schema: DFSchemaRef,
+}
+
+impl TimestampAppendExtension {
+    pub(crate) fn new(input: LogicalPlan, qualifier: Option<TableReference>) -> Self {
+        if has_timestamp_field(input.schema()) {
+            unreachable!(
+                "shouldn't be adding timestamp to a plan that already has it: {:?}",
+                input.schema()
+            );
+        }
+        let schema = add_timestamp_field(input.schema().clone(), qualifier.clone()).unwrap();
+        Self {
+            input,
+            qualifier,
+            schema,
+        }
+    }
+}
+
+multifield_partial_ord!(TimestampAppendExtension, input, qualifier);
+
+impl UserDefinedLogicalNodeCore for TimestampAppendExtension {
+    fn name(&self) -> &str {
+        "TimestampAppendExtension"
+    }
+
+    fn inputs(&self) -> Vec<&LogicalPlan> {
+        vec![&self.input]
+    }
+
+    fn schema(&self) -> &DFSchemaRef {
+        &self.schema
+    }
+
+    fn expressions(&self) -> Vec<Expr> {
+        vec![]
+    }
+
+    fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        write!(
+            f,
+            "TimestampAppendExtension({:?}): {}",
+            self.qualifier,
+            self.schema
+                .fields()
+                .iter()
+                .map(|f| f.name().to_string())
+                .collect::<Vec<_>>()
+                .join(", ")
+        )
+    }
+
+    fn with_exprs_and_inputs(&self, _exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> Result<Self> {
+        Ok(Self::new(inputs[0].clone(), self.qualifier.clone()))
+    }
+}
diff --git a/src/sql/planner/extension/projection.rs b/src/sql/planner/extension/projection.rs
new file mode 100644
index 00000000..f7ecb6ed
--- /dev/null
+++ b/src/sql/planner/extension/projection.rs
@@ -0,0 +1,91 @@
+use std::fmt::Formatter;
+use std::sync::Arc;
+
+use datafusion::common::{DFSchemaRef, Result};
+use datafusion::logical_expr::{Expr, ExprSchemable, LogicalPlan, UserDefinedLogicalNodeCore};
+
+use crate::multifield_partial_ord;
+use crate::sql::planner::extension::{NamedNode, StreamExtension};
+use crate::sql::planner::types::{DFField, StreamSchema, schema_from_df_fields};
+
+pub(crate) const PROJECTION_NAME: &str = "ProjectionExtension";
+
+/// Projection operations for streaming SQL plans.
+/// Handles column projections, shuffles for key-based operations, etc.
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub(crate) struct ProjectionExtension {
+    pub(crate) inputs: Vec<LogicalPlan>,
+    pub(crate) name: Option<String>,
+    pub(crate) exprs: Vec<Expr>,
+    pub(crate) schema: DFSchemaRef,
+    pub(crate) shuffle: bool,
+}
+
+multifield_partial_ord!(ProjectionExtension, name, exprs);
+
+impl ProjectionExtension {
+    pub(crate) fn new(inputs: Vec<LogicalPlan>, name: Option<String>, exprs: Vec<Expr>) -> Self {
+        let input_schema = inputs.first().unwrap().schema();
+        let fields: Vec<DFField> = exprs
+            .iter()
+            .map(|e| DFField::from(e.to_field(input_schema).unwrap()))
+            .collect();
+
+        let schema = Arc::new(schema_from_df_fields(&fields).unwrap());
+
+        Self {
+            inputs,
+            name,
+            exprs,
+            schema,
+            shuffle: false,
+        }
+    }
+
+    pub(crate) fn shuffled(mut self) -> Self {
+        self.shuffle = true;
+        self
+    }
+}
+
+impl StreamExtension for ProjectionExtension {
+    fn node_name(&self) -> Option<NamedNode> {
+        None
+    }
+
+    fn output_schema(&self) -> StreamSchema {
+        StreamSchema::from_schema_unkeyed(Arc::new(self.schema.as_arrow().clone())).unwrap()
+    }
+}
+
+impl UserDefinedLogicalNodeCore for ProjectionExtension {
+    fn name(&self) -> &str {
+        PROJECTION_NAME
+    }
+
+    fn inputs(&self) -> Vec<&LogicalPlan> {
+        self.inputs.iter().collect()
+    }
+
+    fn schema(&self) -> &DFSchemaRef {
+        &self.schema
+    }
+
+    fn expressions(&self) -> Vec<Expr> {
+        vec![]
+    }
+
+    fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result {
+        write!(f, "ProjectionExtension: {}", self.schema())
+    }
+
+    fn with_exprs_and_inputs(&self, exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> Result<Self> {
+        Ok(Self {
+            name: self.name.clone(),
+            inputs,
+            exprs,
+            schema: self.schema.clone(),
+            shuffle: self.shuffle,
+        })
+    }
+}
diff --git a/src/sql/planner/extension/remote_table.rs b/src/sql/planner/extension/remote_table.rs
new file mode 100644
index 00000000..4935efd9
--- /dev/null
+++ b/src/sql/planner/extension/remote_table.rs
@@ -0,0 +1,71 @@
+use std::fmt::Formatter;
+use std::sync::Arc;
+
+use datafusion::common::{DFSchemaRef, Result, TableReference, internal_err};
+use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore};
+
+use crate::multifield_partial_ord;
+use crate::sql::planner::extension::{NamedNode, StreamExtension};
+use crate::sql::planner::types::StreamSchema;
+
+pub(crate) const REMOTE_TABLE_NAME: &str = "RemoteTableExtension";
+
+/// Lightweight extension that segments the execution graph and enables merging
+/// nodes with the same name. Allows materializing intermediate results.
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub(crate) struct RemoteTableExtension {
+    pub(crate) input: LogicalPlan,
+    pub(crate) name: TableReference,
+    pub(crate) schema: DFSchemaRef,
+    pub(crate) materialize: bool,
+}
+
+multifield_partial_ord!(RemoteTableExtension, input, name, materialize);
+
+impl StreamExtension for RemoteTableExtension {
+    fn node_name(&self) -> Option<NamedNode> {
+        if self.materialize {
+            Some(NamedNode::RemoteTable(self.name.to_owned()))
+        } else {
+            None
+        }
+    }
+
+    fn output_schema(&self) -> StreamSchema {
+        StreamSchema::from_schema_keys(Arc::new(self.schema.as_ref().into()), vec![]).unwrap()
+    }
+}
+
+impl UserDefinedLogicalNodeCore for RemoteTableExtension {
+    fn name(&self) -> &str {
+        REMOTE_TABLE_NAME
+    }
+
+    fn inputs(&self) -> Vec<&LogicalPlan> {
+        vec![&self.input]
+    }
+
+    fn schema(&self) -> &DFSchemaRef {
+        &self.schema
+    }
+
+    fn expressions(&self) -> Vec<Expr> {
+        vec![]
+    }
+
+    fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result {
+        write!(f, "RemoteTableExtension: {}", self.schema)
+    }
+
+    fn with_exprs_and_inputs(&self, _exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> Result<Self> {
+        if inputs.len() != 1 {
+            return internal_err!("input size inconsistent");
+        }
+        Ok(Self {
+            input: inputs[0].clone(),
+            name: self.name.clone(),
+            schema: self.schema.clone(),
+            materialize: self.materialize,
+        })
+    }
+}
diff --git a/src/sql/planner/extension/watermark_node.rs b/src/sql/planner/extension/watermark_node.rs
new file mode 100644
index 00000000..eb776ff2
--- /dev/null
+++ b/src/sql/planner/extension/watermark_node.rs
@@ -0,0 +1,110 @@
+use std::fmt::Formatter;
+use std::sync::Arc;
+
+use datafusion::common::{DFSchemaRef, Result, TableReference, internal_err};
+use datafusion::error::DataFusionError;
+use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore};
+
+use crate::multifield_partial_ord;
+use crate::sql::planner::extension::{NamedNode, StreamExtension};
+use crate::sql::planner::schemas::add_timestamp_field;
+use crate::sql::planner::types::{StreamSchema, TIMESTAMP_FIELD};
+
+pub(crate) const WATERMARK_NODE_NAME: &str = "WatermarkNode";
+
+/// Represents a watermark node in the streaming query plan.
+/// Watermarks track event-time progress and enable time-based operations.
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct WatermarkNode {
+    pub input: LogicalPlan,
+    pub qualifier: TableReference,
+    pub watermark_expression: Expr,
+    pub schema: DFSchemaRef,
+    timestamp_index: usize,
+}
+
+multifield_partial_ord!(
+    WatermarkNode,
+    input,
+    qualifier,
+    watermark_expression,
+    timestamp_index
+);
+
+impl UserDefinedLogicalNodeCore for WatermarkNode {
+    fn name(&self) -> &str {
+        WATERMARK_NODE_NAME
+    }
+
+    fn inputs(&self) -> Vec<&LogicalPlan> {
+        vec![&self.input]
+    }
+
+    fn schema(&self) -> &DFSchemaRef {
+        &self.schema
+    }
+
+    fn expressions(&self) -> Vec<Expr> {
+        vec![self.watermark_expression.clone()]
+    }
+
+    fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result {
+        write!(f, "WatermarkNode({}): {}", self.qualifier, self.schema)
+    }
+
+    fn with_exprs_and_inputs(&self, exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> Result<Self> {
+        if inputs.len() != 1 {
+            return internal_err!("input size inconsistent");
+        }
+        if exprs.len() != 1 {
+            return internal_err!("expected one expression; found {}", exprs.len());
+        }
+
+        let timestamp_index = self
+            .schema
+            .index_of_column_by_name(Some(&self.qualifier), TIMESTAMP_FIELD)
+            .ok_or_else(|| DataFusionError::Plan("missing timestamp column".to_string()))?;
+
+        Ok(Self {
+            input: inputs[0].clone(),
+            qualifier: self.qualifier.clone(),
+            watermark_expression: exprs.into_iter().next().unwrap(),
+            schema: self.schema.clone(),
+            timestamp_index,
+        })
+    }
+}
+
+impl StreamExtension for WatermarkNode {
+    fn node_name(&self) -> Option<NamedNode> {
+        Some(NamedNode::Watermark(self.qualifier.clone()))
+    }
+
+    fn output_schema(&self) -> StreamSchema {
+        self.stream_schema()
+    }
+}
+
+impl WatermarkNode {
+    pub(crate) fn new(
+        input: LogicalPlan,
+        qualifier: TableReference,
+        watermark_expression: Expr,
+    ) -> Result<Self> {
+        let schema = add_timestamp_field(input.schema().clone(), Some(qualifier.clone()))?;
+        let timestamp_index = schema
+            .index_of_column_by_name(None, TIMESTAMP_FIELD)
+            .ok_or_else(|| DataFusionError::Plan("missing _timestamp column".to_string()))?;
+        Ok(Self {
+            input,
+            qualifier,
+            watermark_expression,
+            schema,
+            timestamp_index,
+        })
+    }
+
+    pub(crate) fn stream_schema(&self) -> StreamSchema {
+        StreamSchema::new_unkeyed(Arc::new(self.schema.as_ref().into()), self.timestamp_index)
+    }
+}
diff --git a/src/sql/planner/extension/window_fn.rs b/src/sql/planner/extension/window_fn.rs
new file mode 100644
index 00000000..6e6e1c36
--- /dev/null
+++ b/src/sql/planner/extension/window_fn.rs
@@ -0,0 +1,62 @@
+use std::sync::Arc;
+
+use datafusion::common::{DFSchemaRef, Result};
+use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore};
+
+use crate::sql::planner::extension::{NamedNode, StreamExtension};
+use crate::sql::planner::types::StreamSchema;
+
+pub(crate) const WINDOW_FUNCTION_EXTENSION_NAME: &str = "WindowFunctionExtension";
+
+/// Extension for window functions (e.g., ROW_NUMBER, RANK) over windowed input.
+/// Window functions require already-windowed input and are evaluated per-window.
+#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd)]
+pub(crate) struct WindowFunctionExtension {
+    pub(crate) window_plan: LogicalPlan,
+    pub(crate) key_fields: Vec<usize>,
+}
+
+impl WindowFunctionExtension {
+    pub fn new(window_plan: LogicalPlan, key_fields: Vec<usize>) -> Self {
+        Self {
+            window_plan,
+            key_fields,
+        }
+    }
+}
+
+impl UserDefinedLogicalNodeCore for WindowFunctionExtension {
+    fn name(&self) -> &str {
+        WINDOW_FUNCTION_EXTENSION_NAME
+    }
+
+    fn inputs(&self) -> Vec<&LogicalPlan> {
+        vec![&self.window_plan]
+    }
+
+    fn schema(&self) -> &DFSchemaRef {
+        self.window_plan.schema()
+    }
+
+    fn expressions(&self) -> Vec<Expr> {
+        vec![]
+    }
+
+    fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        write!(f, "WindowFunction: {}", self.schema())
+    }
+
+    fn with_exprs_and_inputs(&self, _exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> Result<Self> {
+        Ok(Self::new(inputs[0].clone(), self.key_fields.clone()))
+    }
+}
+
+impl StreamExtension for WindowFunctionExtension {
+    fn node_name(&self) -> Option<NamedNode> {
+        None
+    }
+
+    fn output_schema(&self) -> StreamSchema {
+        StreamSchema::from_schema_unkeyed(Arc::new(self.schema().as_ref().clone().into())).unwrap()
+    }
+}
diff --git a/src/sql/planner/mod.rs b/src/sql/planner/mod.rs
new file mode 100644
index 00000000..ead5e212
--- /dev/null
+++ b/src/sql/planner/mod.rs
@@ -0,0 +1,355 @@
+#![allow(clippy::new_without_default)]
+
+pub(crate) mod extension;
+pub mod parse;
+pub mod plan;
+pub mod schemas;
+pub mod sql_to_plan;
+pub mod types;
+
+use std::collections::{HashMap, HashSet};
+use std::sync::Arc;
+
+use datafusion::arrow::datatypes::{self as datatypes, DataType, Field, Schema};
+use datafusion::common::{Result, plan_err};
+use datafusion::datasource::DefaultTableSource;
+use datafusion::error::DataFusionError;
+use datafusion::execution::{FunctionRegistry, SessionStateDefaults};
+use datafusion::logical_expr::expr_rewriter::FunctionRewrite;
+use datafusion::logical_expr::planner::ExprPlanner;
+use datafusion::logical_expr::{
+    AggregateUDF, Expr, LogicalPlan, ScalarUDF, TableSource, WindowUDF,
+};
+use datafusion::optimizer::Analyzer;
+use datafusion::sql::TableReference;
+use datafusion::sql::planner::ContextProvider;
+use unicase::UniCase;
+
+use crate::sql::planner::schemas::window_arrow_struct;
+use crate::sql::planner::types::{PlaceholderUdf, PlanningOptions};
+
+/// Catalog provider for streaming SQL queries.
+/// Manages tables, UDFs, and configuration for streaming SQL planning.
+#[derive(Clone, Default)]
+pub struct StreamSchemaProvider {
+    pub source_defs: HashMap<String, String>,
+    tables: HashMap<UniCase<String>, StreamTable>,
+    pub functions: HashMap<String, Arc<ScalarUDF>>,
+    pub aggregate_functions: HashMap<String, Arc<AggregateUDF>>,
+    pub window_functions: HashMap<String, Arc<WindowUDF>>,
+    config_options: datafusion::config::ConfigOptions,
+    pub expr_planners: Vec<Arc<dyn ExprPlanner>>,
+    pub planning_options: PlanningOptions,
+    pub analyzer: Analyzer,
+}
+
+/// Represents a table registered in the streaming SQL context
+#[derive(Clone, Debug)]
+pub enum StreamTable {
+    Source {
+        name: String,
+        schema: Arc<Schema>,
+        event_time_field: Option<String>,
+        watermark_field: Option<String>,
+    },
+    Sink {
+        name: String,
+        schema: Arc<Schema>,
+    },
+    Memory {
+        name: String,
+        logical_plan: Option<LogicalPlan>,
+    },
+}
+
+impl StreamTable {
+    pub fn name(&self) -> &str {
+        match self {
+            StreamTable::Source { name, .. } => name,
+            StreamTable::Sink { name, .. } => name,
+            StreamTable::Memory { name, .. } => name,
+        }
+    }
+
+    pub fn get_fields(&self) -> Vec<Arc<Field>> {
+        match self {
+            StreamTable::Source { schema, .. } => schema.fields().to_vec(),
+            StreamTable::Sink { schema, .. } => schema.fields().to_vec(),
+            StreamTable::Memory { .. } => vec![],
+        }
+    }
+}
+
+#[derive(Debug)]
+struct LogicalBatchInput {
+    table_name: String,
+    schema: Arc<Schema>,
+}
+
+impl datafusion::datasource::TableProvider for LogicalBatchInput {
+    fn as_any(&self) -> &dyn std::any::Any {
+        self
+    }
+
+    fn schema(&self) -> Arc<Schema> {
+        self.schema.clone()
+    }
+
+    fn table_type(&self) -> datafusion::datasource::TableType {
+        datafusion::datasource::TableType::Base
+    }
+
+    fn scan<'life0, 'life1, 'life2, 'life3, 'async_trait>(
+        &'life0 self,
+        _state: &'life1 dyn datafusion::catalog::Session,
+        _projection: Option<&'life2 Vec<usize>>,
+        _filters: &'life3 [Expr],
+        _limit: Option<usize>,
+    ) -> std::pin::Pin<
+        Box<
+            dyn std::future::Future<
+                    Output = Result<Arc<dyn datafusion::physical_plan::ExecutionPlan>>,
+                > + Send
+                + 'async_trait,
+        >,
+    >
+    where
+        'life0: 'async_trait,
+        'life1: 'async_trait,
+        'life2: 'async_trait,
+        'life3: 'async_trait,
+        Self: 'async_trait,
+    {
+        unimplemented!("LogicalBatchInput is for planning only")
+    }
+}
+
+fn create_table(table_name: String, schema: Arc<Schema>) -> Arc<dyn TableSource> {
+    let table_provider = LogicalBatchInput { table_name, schema };
+    let wrapped = Arc::new(table_provider);
+    let provider = DefaultTableSource::new(wrapped);
+    Arc::new(provider)
+}
+
+impl StreamSchemaProvider {
+    pub fn new() -> Self {
+        let mut registry = Self {
+            ..Default::default()
+        };
+
+        registry
+            .register_udf(PlaceholderUdf::with_return(
+                "hop",
+                vec![
+                    DataType::Interval(datatypes::IntervalUnit::MonthDayNano),
+                    DataType::Interval(datatypes::IntervalUnit::MonthDayNano),
+                ],
+                window_arrow_struct(),
+            ))
+            .unwrap();
+
+        registry
+            .register_udf(PlaceholderUdf::with_return(
+                "tumble",
+                vec![DataType::Interval(datatypes::IntervalUnit::MonthDayNano)],
+                window_arrow_struct(),
+            ))
+            .unwrap();
+
+        registry
+            .register_udf(PlaceholderUdf::with_return(
+                "session",
+                vec![DataType::Interval(datatypes::IntervalUnit::MonthDayNano)],
+                window_arrow_struct(),
+            ))
+            .unwrap();
+
+        registry
+            .register_udf(PlaceholderUdf::with_return(
+                "unnest",
+                vec![DataType::List(Arc::new(Field::new(
+                    "field",
+                    DataType::Utf8,
+                    true,
+                )))],
+                DataType::Utf8,
+            ))
+            .unwrap();
+
+        registry
+            .register_udf(PlaceholderUdf::with_return(
+                "row_time",
+                vec![],
+                DataType::Timestamp(datatypes::TimeUnit::Nanosecond, None),
+            ))
+            .unwrap();
+
+        for p in SessionStateDefaults::default_scalar_functions() {
+            registry.register_udf(p).unwrap();
+        }
+        for p in SessionStateDefaults::default_aggregate_functions() {
+            registry.register_udaf(p).unwrap();
+        }
+        for p in SessionStateDefaults::default_window_functions() {
+            registry.register_udwf(p).unwrap();
+        }
+        for p in SessionStateDefaults::default_expr_planners() {
+            registry.register_expr_planner(p).unwrap();
+        }
+
+        registry
+    }
+
+    pub fn add_source_table(
+        &mut self,
+        name: String,
+        schema: Arc<Schema>,
+        event_time_field: Option<String>,
+        watermark_field: Option<String>,
+    ) {
+        self.tables.insert(
+            UniCase::new(name.clone()),
+            StreamTable::Source {
+                name,
+                schema,
+                event_time_field,
+                watermark_field,
+            },
+        );
+    }
+
+    pub fn add_sink_table(&mut self, name: String, schema: Arc<Schema>) {
+        self.tables.insert(
+            UniCase::new(name.clone()),
+            StreamTable::Sink { name, schema },
+        );
+    }
+
+    fn insert_table(&mut self, table: StreamTable) {
+        self.tables
+            .insert(UniCase::new(table.name().to_string()), table);
+    }
+
+    pub fn get_table(&self, table_name: impl Into<String>) -> Option<&StreamTable> {
+        self.tables.get(&UniCase::new(table_name.into()))
+    }
+
+    pub fn get_table_mut(&mut self, table_name: impl Into<String>) -> Option<&mut StreamTable> {
+        self.tables.get_mut(&UniCase::new(table_name.into()))
+    }
+}
+
+impl ContextProvider for StreamSchemaProvider {
+    fn get_table_source(&self, name: TableReference) -> Result<Arc<dyn TableSource>> {
+        let table = self
+            .get_table(name.to_string())
+            .ok_or_else(|| DataFusionError::Plan(format!("Table {name} not found")))?;
+
+        let fields = table.get_fields();
+        let schema = Arc::new(Schema::new_with_metadata(
+            fields
+                .iter()
+                .map(|f| f.as_ref().clone())
+                .collect::<Vec<Field>>(),
+            HashMap::new(),
+        ));
+        Ok(create_table(name.to_string(), schema))
+    }
+
+    fn get_function_meta(&self, name: &str) -> Option<Arc<ScalarUDF>> {
+        self.functions.get(name).cloned()
+    }
+
+    fn get_aggregate_meta(&self, name: &str) -> Option<Arc<AggregateUDF>> {
+        self.aggregate_functions.get(name).cloned()
+    }
+
+    fn get_variable_type(&self, _variable_names: &[String]) -> Option<DataType> {
+        None
+    }
+
+    fn options(&self) -> &datafusion::config::ConfigOptions {
+        &self.config_options
+    }
+
+    fn get_window_meta(&self, name: &str) -> Option<Arc<WindowUDF>> {
+        self.window_functions.get(name).cloned()
+    }
+
+    fn udf_names(&self) -> Vec<String> {
+        self.functions.keys().cloned().collect()
+    }
+
+    fn udaf_names(&self) -> Vec<String> {
+        self.aggregate_functions.keys().cloned().collect()
+    }
+
+    fn udwf_names(&self) -> Vec<String> {
+        self.window_functions.keys().cloned().collect()
+    }
+
+    fn get_expr_planners(&self) -> &[Arc<dyn ExprPlanner>] {
+        &self.expr_planners
+    }
+}
+
+impl FunctionRegistry for StreamSchemaProvider {
+    fn udfs(&self) -> HashSet<String> {
+        self.functions.keys().cloned().collect()
+    }
+
+    fn udf(&self, name: &str) -> Result<Arc<ScalarUDF>> {
+        if let Some(f) = self.functions.get(name) {
+            Ok(Arc::clone(f))
+        } else {
+            plan_err!("No UDF with name {name}")
+        }
+    }
+
+    fn udaf(&self, name: &str) -> Result<Arc<AggregateUDF>> {
+        if let Some(f) = self.aggregate_functions.get(name) {
+            Ok(Arc::clone(f))
+        } else {
+            plan_err!("No UDAF with name {name}")
+        }
+    }
+
+    fn udwf(&self, name: &str) -> Result<Arc<WindowUDF>> {
+        if let Some(f) = self.window_functions.get(name) {
+            Ok(Arc::clone(f))
+        } else {
+            plan_err!("No UDWF with name {name}")
+        }
+    }
+
+    fn register_function_rewrite(
+        &mut self,
+        rewrite: Arc<dyn FunctionRewrite + Send + Sync>,
+    ) -> Result<()> {
+        self.analyzer.add_function_rewrite(rewrite);
+        Ok(())
+    }
+
+    fn register_udf(&mut self, udf: Arc<ScalarUDF>) -> Result<Option<Arc<ScalarUDF>>> {
+        Ok(self.functions.insert(udf.name().to_string(), udf))
+    }
+
+    fn register_udaf(&mut self, udaf: Arc<AggregateUDF>) -> Result<Option<Arc<AggregateUDF>>> {
+        Ok(self
+            .aggregate_functions
+            .insert(udaf.name().to_string(), udaf))
+    }
+
+    fn register_udwf(&mut self, udwf: Arc<WindowUDF>) -> Result<Option<Arc<WindowUDF>>> {
+        Ok(self.window_functions.insert(udwf.name().to_string(), udwf))
+    }
+
+    fn register_expr_planner(&mut self, expr_planner: Arc<dyn ExprPlanner>) -> Result<()> {
+        self.expr_planners.push(expr_planner);
+        Ok(())
+    }
+
+    fn expr_planners(&self) -> Vec<Arc<dyn ExprPlanner>> {
+        self.expr_planners.clone()
+    }
+}
diff --git a/src/sql/planner/parse.rs b/src/sql/planner/parse.rs
new file mode 100644
index 00000000..dfaec9a6
--- /dev/null
+++ b/src/sql/planner/parse.rs
@@ -0,0 +1,183 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::collections::HashMap;
+
+use datafusion::common::{Result, plan_err};
+use datafusion::error::DataFusionError;
+use datafusion::sql::sqlparser::ast::{SqlOption, Statement as DFStatement};
+use datafusion::sql::sqlparser::dialect::FunctionStreamDialect;
+use datafusion::sql::sqlparser::parser::Parser;
+
+use crate::coordinator::{
+    CreateFunction, DropFunction, ShowFunctions, StartFunction, Statement as CoordinatorStatement,
+    StopFunction, StreamingSql,
+};
+
+/// Stage 1: String → Box<dyn Statement>
+///
+/// Parses SQL using FunctionStreamDialect (from sqlparser-rs), then classifies
+/// the result into either a FunctionStream DDL statement or a StreamingSql,
+/// both unified under the coordinator's Statement trait.
+pub fn parse_sql(query: &str) -> Result<Box<dyn CoordinatorStatement>> {
+    let trimmed = query.trim();
+    if trimmed.is_empty() {
+        return plan_err!("Query is empty");
+    }
+
+    let dialect = FunctionStreamDialect {};
+    let mut statements = Parser::parse_sql(&dialect, trimmed)
+        .map_err(|e| DataFusionError::Plan(format!("SQL parse error: {e}")))?;
+
+    if statements.is_empty() {
+        return plan_err!("No SQL statements found");
+    }
+
+    let stmt = statements.remove(0);
+    classify_statement(stmt)
+}
+
+/// Classify a parsed DataFusion Statement into the coordinator's Statement type.
+///
+/// FunctionStream DDL (CREATE/DROP/START/STOP FUNCTION, SHOW FUNCTIONS)
+/// is converted to concrete coordinator types; everything else is wrapped
+/// in StreamingSql.
+fn classify_statement(stmt: DFStatement) -> Result<Box<dyn CoordinatorStatement>> {
+    match stmt {
+        DFStatement::CreateFunctionWith { options } => {
+            let properties = sql_options_to_map(&options);
+            let create_fn = CreateFunction::from_properties(properties)
+                .map_err(|e| DataFusionError::Plan(format!("CREATE FUNCTION: {e}")))?;
+            Ok(Box::new(create_fn))
+        }
+        DFStatement::StartFunction { name } => Ok(Box::new(StartFunction::new(name.to_string()))),
+        DFStatement::StopFunction { name } => Ok(Box::new(StopFunction::new(name.to_string()))),
+        DFStatement::DropFunction { func_desc, .. } => {
+            let name = func_desc
+                .first()
+                .map(|d| d.name.to_string())
+                .unwrap_or_default();
+            Ok(Box::new(DropFunction::new(name)))
+        }
+        DFStatement::ShowFunctions { .. } => Ok(Box::new(ShowFunctions::new())),
+        other => Ok(Box::new(StreamingSql::new(other))),
+    }
+}
+
+/// Convert Vec<SqlOption> (KeyValue pairs) into HashMap.
+fn sql_options_to_map(options: &[SqlOption]) -> HashMap<String, String> {
+    options
+        .iter()
+        .filter_map(|opt| match opt {
+            SqlOption::KeyValue { key, value } => Some((
+                key.value.clone(),
+                value.to_string().trim_matches('\'').to_string(),
+            )),
+            _ => None,
+        })
+        .collect()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn is_streaming_sql(stmt: &dyn CoordinatorStatement) -> bool {
+        let debug = format!("{:?}", stmt);
+        debug.starts_with("StreamingSql")
+    }
+
+    fn is_ddl(stmt: &dyn CoordinatorStatement) -> bool {
+        !is_streaming_sql(stmt)
+    }
+
+    #[test]
+    fn test_parse_create_function() {
+        let sql =
+            "CREATE FUNCTION WITH ('function_path'='./test.wasm', 'config_path'='./config.yml')";
+        let stmt = parse_sql(sql).unwrap();
+        assert!(is_ddl(stmt.as_ref()));
+    }
+
+    #[test]
+    fn test_parse_create_function_minimal() {
+        let sql = "CREATE FUNCTION WITH ('function_path'='./processor.wasm')";
+        let stmt = parse_sql(sql).unwrap();
+        assert!(is_ddl(stmt.as_ref()));
+    }
+
+    #[test]
+    fn test_parse_drop_function() {
+        let sql = "DROP FUNCTION my_task";
+        let stmt = parse_sql(sql).unwrap();
+        assert!(is_ddl(stmt.as_ref()));
+    }
+
+    #[test]
+    fn test_parse_start_function() {
+        let sql = "START FUNCTION my_task";
+        let stmt = parse_sql(sql).unwrap();
+        assert!(is_ddl(stmt.as_ref()));
+    }
+
+    #[test]
+    fn test_parse_stop_function() {
+        let sql = "STOP FUNCTION my_task";
+        let stmt = parse_sql(sql).unwrap();
+        assert!(is_ddl(stmt.as_ref()));
+    }
+
+    #[test]
+    fn test_parse_show_functions() {
+        let sql = "SHOW FUNCTIONS";
+        let stmt = parse_sql(sql).unwrap();
+        assert!(is_ddl(stmt.as_ref()));
+    }
+
+    #[test]
+    fn test_parse_case_insensitive() {
+        let sql1 = "create function with ('function_path'='./test.wasm')";
+        assert!(is_ddl(parse_sql(sql1).unwrap().as_ref()));
+
+        let sql2 = "show functions";
+        assert!(is_ddl(parse_sql(sql2).unwrap().as_ref()));
+
+        let sql3 = "start function my_task";
+        assert!(is_ddl(parse_sql(sql3).unwrap().as_ref()));
+    }
+
+    #[test]
+    fn test_parse_streaming_sql() {
+        let sql =
+            "SELECT count(*), tumble(interval '1 minute') as window FROM events GROUP BY window";
+        let stmt = parse_sql(sql).unwrap();
+        assert!(is_streaming_sql(stmt.as_ref()));
+    }
+
+    #[test]
+    fn test_parse_empty() {
+        assert!(parse_sql("").is_err());
+        assert!(parse_sql("  ").is_err());
+    }
+
+    #[test]
+    fn test_parse_with_extra_properties() {
+        let sql = r#"CREATE FUNCTION WITH (
+            'function_path'='./test.wasm',
+            'config_path'='./config.yml',
+            'parallelism'='4',
+            'memory-limit'='256mb'
+        )"#;
+        let stmt = parse_sql(sql).unwrap();
+        assert!(is_ddl(stmt.as_ref()));
+    }
+}
diff --git a/src/sql/planner/plan/aggregate.rs b/src/sql/planner/plan/aggregate.rs
new file mode 100644
index 00000000..6ed7499d
--- /dev/null
+++ b/src/sql/planner/plan/aggregate.rs
@@ -0,0 +1,275 @@
+use std::sync::Arc;
+
+use datafusion::common::tree_node::{Transformed, TreeNodeRewriter};
+use datafusion::common::{DFSchema, Result, not_impl_err, plan_err};
+use datafusion::functions_aggregate::expr_fn::max;
+use datafusion::logical_expr;
+use datafusion::logical_expr::{Aggregate, Expr, Extension, LogicalPlan};
+use datafusion::prelude::col;
+use tracing::debug;
+
+use crate::sql::planner::StreamSchemaProvider;
+use crate::sql::planner::extension::aggregate::AggregateExtension;
+use crate::sql::planner::extension::key_calculation::{KeyCalculationExtension, KeysOrExprs};
+use crate::sql::planner::plan::WindowDetectingVisitor;
+use crate::sql::planner::types::{
+    DFField, TIMESTAMP_FIELD, WindowBehavior, WindowType, fields_with_qualifiers, find_window,
+    schema_from_df_fields_with_metadata,
+};
+
+pub(crate) struct AggregateRewriter<'a> {
+    pub schema_provider: &'a StreamSchemaProvider,
+}
+
+impl AggregateRewriter<'_> {
+    /// Rewrite a non-windowed aggregate into an updating aggregate with key calculation
+    pub fn rewrite_non_windowed_aggregate(
+        input: Arc<LogicalPlan>,
+        mut key_fields: Vec<DFField>,
+        group_expr: Vec<Expr>,
+        mut aggr_expr: Vec<Expr>,
+        schema: Arc<DFSchema>,
+        _schema_provider: &StreamSchemaProvider,
+    ) -> Result<Transformed<LogicalPlan>> {
+        let key_count = key_fields.len();
+        key_fields.extend(fields_with_qualifiers(input.schema()));
+
+        let key_schema = Arc::new(schema_from_df_fields_with_metadata(
+            &key_fields,
+            schema.metadata().clone(),
+        )?);
+
+        let mut key_projection_expressions: Vec<_> = group_expr
+            .iter()
+            .zip(key_fields.iter())
+            .map(|(expr, f)| expr.clone().alias(f.name().to_string()))
+            .collect();
+
+        key_projection_expressions.extend(
+            fields_with_qualifiers(input.schema())
+                .iter()
+                .map(|field| Expr::Column(field.qualified_column())),
+        );
+
+        let key_projection =
+            LogicalPlan::Projection(logical_expr::Projection::try_new_with_schema(
+                key_projection_expressions,
+                input.clone(),
+                key_schema,
+            )?);
+
+        let key_plan = LogicalPlan::Extension(Extension {
+            node: Arc::new(KeyCalculationExtension::new(
+                key_projection,
+                KeysOrExprs::Keys((0..key_count).collect()),
+            )),
+        });
+
+        let Ok(timestamp_field) = key_plan
+            .schema()
+            .qualified_field_with_unqualified_name(TIMESTAMP_FIELD)
+        else {
+            return plan_err!("no timestamp field found in schema");
+        };
+
+        let timestamp_field: DFField = timestamp_field.into();
+        let column = timestamp_field.qualified_column();
+        aggr_expr.push(max(col(column.clone())).alias(TIMESTAMP_FIELD));
+
+        let mut output_schema_fields = fields_with_qualifiers(&schema);
+        output_schema_fields.push(timestamp_field.clone());
+        let output_schema = Arc::new(schema_from_df_fields_with_metadata(
+            &output_schema_fields,
+            schema.metadata().clone(),
+        )?);
+
+        let aggregate = Aggregate::try_new_with_schema(
+            Arc::new(key_plan),
+            group_expr,
+            aggr_expr,
+            output_schema,
+        )?;
+
+        debug!(
+            "non-windowed aggregate field names: {:?}",
+            aggregate
+                .schema
+                .fields()
+                .iter()
+                .map(|f| f.name())
+                .collect::<Vec<_>>()
+        );
+
+        let final_plan = LogicalPlan::Aggregate(aggregate);
+        Ok(Transformed::yes(final_plan))
+    }
+}
+
+impl TreeNodeRewriter for AggregateRewriter<'_> {
+    type Node = LogicalPlan;
+
+    fn f_up(&mut self, node: Self::Node) -> Result<Transformed<Self::Node>> {
+        let LogicalPlan::Aggregate(Aggregate {
+            input,
+            mut group_expr,
+            aggr_expr,
+            schema,
+            ..
+        }) = node
+        else {
+            return Ok(Transformed::no(node));
+        };
+
+        let mut window_group_expr: Vec<_> = group_expr
+            .iter()
+            .enumerate()
+            .filter_map(|(i, expr)| {
+                find_window(expr)
+                    .map(|option| option.map(|inner| (i, inner)))
+                    .transpose()
+            })
+            .collect::<Result<Vec<_>>>()?;
+
+        if window_group_expr.len() > 1 {
+            return not_impl_err!(
+                "do not support {} window expressions in group by",
+                window_group_expr.len()
+            );
+        }
+
+        let mut key_fields: Vec<DFField> = fields_with_qualifiers(&schema)
+            .iter()
+            .take(group_expr.len())
+            .map(|field| {
+                DFField::new(
+                    field.qualifier().cloned(),
+                    format!("_key_{}", field.name()),
+                    field.data_type().clone(),
+                    field.is_nullable(),
+                )
+            })
+            .collect();
+
+        let mut window_detecting_visitor = WindowDetectingVisitor::default();
+        input.visit_with_subqueries(&mut window_detecting_visitor)?;
+
+        let window = window_detecting_visitor.window;
+        let window_behavior = match (window.is_some(), !window_group_expr.is_empty()) {
+            (true, true) => {
+                let input_window = window.unwrap();
+                let (window_index, group_by_window_type) = window_group_expr.pop().unwrap();
+                if group_by_window_type != input_window {
+                    return Err(datafusion::error::DataFusionError::NotImplemented(
+                        "window in group by does not match input window".to_string(),
+                    ));
+                }
+                let matching_field = window_detecting_visitor.fields.iter().next();
+                match matching_field {
+                    Some(field) => {
+                        group_expr[window_index] = Expr::Column(field.qualified_column());
+                        WindowBehavior::InData
+                    }
+                    None => {
+                        if matches!(input_window, WindowType::Session { .. }) {
+                            return plan_err!("can't reinvoke session window in nested aggregates");
+                        }
+                        group_expr.remove(window_index);
+                        key_fields.remove(window_index);
+                        let window_field = schema.qualified_field(window_index).into();
+                        WindowBehavior::FromOperator {
+                            window: input_window,
+                            window_field,
+                            window_index,
+                            is_nested: true,
+                        }
+                    }
+                }
+            }
+            (true, false) => WindowBehavior::InData,
+            (false, true) => {
+                let (window_index, window_type) = window_group_expr.pop().unwrap();
+                group_expr.remove(window_index);
+                key_fields.remove(window_index);
+                let window_field = schema.qualified_field(window_index).into();
+                WindowBehavior::FromOperator {
+                    window: window_type,
+                    window_field,
+                    window_index,
+                    is_nested: false,
+                }
+            }
+            (false, false) => {
+                return Self::rewrite_non_windowed_aggregate(
+                    input,
+                    key_fields,
+                    group_expr,
+                    aggr_expr,
+                    schema,
+                    self.schema_provider,
+                );
+            }
+        };
+
+        let key_count = key_fields.len();
+        key_fields.extend(fields_with_qualifiers(input.schema()));
+
+        let key_schema = Arc::new(schema_from_df_fields_with_metadata(
+            &key_fields,
+            schema.metadata().clone(),
+        )?);
+
+        let mut key_projection_expressions: Vec<_> = group_expr
+            .iter()
+            .zip(key_fields.iter())
+            .map(|(expr, f)| expr.clone().alias(f.name().to_string()))
+            .collect();
+
+        key_projection_expressions.extend(
+            fields_with_qualifiers(input.schema())
+                .iter()
+                .map(|field| Expr::Column(field.qualified_column())),
+        );
+
+        let key_projection =
+            LogicalPlan::Projection(logical_expr::Projection::try_new_with_schema(
+                key_projection_expressions,
+                input.clone(),
+                key_schema,
+            )?);
+
+        let key_plan = LogicalPlan::Extension(Extension {
+            node: Arc::new(KeyCalculationExtension::new(
+                key_projection,
+                KeysOrExprs::Keys((0..key_count).collect()),
+            )),
+        });
+
+        let mut aggregate_schema_fields = fields_with_qualifiers(&schema);
+        if let WindowBehavior::FromOperator { window_index, .. } = &window_behavior {
+            aggregate_schema_fields.remove(*window_index);
+        }
+        let internal_schema = Arc::new(schema_from_df_fields_with_metadata(
+            &aggregate_schema_fields,
+            schema.metadata().clone(),
+        )?);
+
+        let rewritten_aggregate = Aggregate::try_new_with_schema(
+            Arc::new(key_plan),
+            group_expr,
+            aggr_expr,
+            internal_schema,
+        )?;
+
+        let aggregate_extension = AggregateExtension::new(
+            window_behavior,
+            LogicalPlan::Aggregate(rewritten_aggregate),
+            (0..key_count).collect(),
+        );
+        let final_plan = LogicalPlan::Extension(Extension {
+            node: Arc::new(aggregate_extension),
+        });
+
+        WindowDetectingVisitor::get_window(&final_plan)?;
+        Ok(Transformed::yes(final_plan))
+    }
+}
diff --git a/src/sql/planner/plan/join.rs b/src/sql/planner/plan/join.rs
new file mode 100644
index 00000000..f8225905
--- /dev/null
+++ b/src/sql/planner/plan/join.rs
@@ -0,0 +1,242 @@
+use std::sync::Arc;
+
+use datafusion::common::tree_node::{Transformed, TreeNodeRewriter};
+use datafusion::common::{
+    Column, DataFusionError, JoinConstraint, JoinType, Result, ScalarValue, Spans, TableReference,
+    not_impl_err,
+};
+use datafusion::logical_expr;
+use datafusion::logical_expr::expr::Alias;
+use datafusion::logical_expr::{
+    BinaryExpr, Case, Expr, Extension, Join, LogicalPlan, Projection, build_join_schema,
+};
+use datafusion::prelude::coalesce;
+
+use crate::sql::planner::StreamSchemaProvider;
+use crate::sql::planner::extension::join::JoinExtension;
+use crate::sql::planner::extension::key_calculation::KeyCalculationExtension;
+use crate::sql::planner::plan::WindowDetectingVisitor;
+use crate::sql::planner::types::{
+    WindowType, fields_with_qualifiers, schema_from_df_fields_with_metadata,
+};
+
+pub(crate) struct JoinRewriter<'a> {
+    pub schema_provider: &'a StreamSchemaProvider,
+}
+
+impl JoinRewriter<'_> {
+    fn check_join_windowing(join: &Join) -> Result<bool> {
+        let left_window = WindowDetectingVisitor::get_window(&join.left)?;
+        let right_window = WindowDetectingVisitor::get_window(&join.right)?;
+        match (left_window, right_window) {
+            (None, None) => {
+                if join.join_type == JoinType::Inner {
+                    Ok(false)
+                } else {
+                    Err(DataFusionError::NotImplemented(
+                        "can't handle non-inner joins without windows".into(),
+                    ))
+                }
+            }
+            (None, Some(_)) => Err(DataFusionError::NotImplemented(
+                "can't handle mixed windowing between left (non-windowed) and right (windowed)"
+                    .into(),
+            )),
+            (Some(_), None) => Err(DataFusionError::NotImplemented(
+                "can't handle mixed windowing between left (windowed) and right (non-windowed)"
+                    .into(),
+            )),
+            (Some(left_window), Some(right_window)) => {
+                if left_window != right_window {
+                    return Err(DataFusionError::NotImplemented(
+                        "can't handle mixed windowing between left and right".into(),
+                    ));
+                }
+                if let WindowType::Session { .. } = left_window {
+                    return Err(DataFusionError::NotImplemented(
+                        "can't handle session windows in joins".into(),
+                    ));
+                }
+                Ok(true)
+            }
+        }
+    }
+
+    fn create_join_key_plan(
+        input: Arc<LogicalPlan>,
+        join_expressions: Vec<Expr>,
+        name: &'static str,
+    ) -> Result<LogicalPlan> {
+        let key_count = join_expressions.len();
+
+        let join_expressions: Vec<_> = join_expressions
+            .into_iter()
+            .enumerate()
+            .map(|(index, expr)| {
+                expr.alias_qualified(
+                    Some(TableReference::bare("_stream")),
+                    format!("_key_{index}"),
+                )
+            })
+            .chain(
+                fields_with_qualifiers(input.schema())
+                    .iter()
+                    .map(|field| Expr::Column(field.qualified_column())),
+            )
+            .collect();
+
+        let projection = Projection::try_new(join_expressions, input)?;
+        let key_calculation_extension = KeyCalculationExtension::new_named_and_trimmed(
+            LogicalPlan::Projection(projection),
+            (0..key_count).collect(),
+            name.to_string(),
+        );
+        Ok(LogicalPlan::Extension(Extension {
+            node: Arc::new(key_calculation_extension),
+        }))
+    }
+
+    fn post_join_timestamp_projection(&mut self, input: LogicalPlan) -> Result<LogicalPlan> {
+        let schema = input.schema().clone();
+        let mut schema_with_timestamp = fields_with_qualifiers(&schema);
+        let timestamp_fields = schema_with_timestamp
+            .iter()
+            .filter(|field| field.name() == "_timestamp")
+            .cloned()
+            .collect::<Vec<_>>();
+
+        if timestamp_fields.len() != 2 {
+            return not_impl_err!("join must have two timestamp fields");
+        }
+
+        schema_with_timestamp.retain(|field| field.name() != "_timestamp");
+        let mut projection_expr = schema_with_timestamp
+            .iter()
+            .map(|field| {
+                Expr::Column(Column {
+                    relation: field.qualifier().cloned(),
+                    name: field.name().to_string(),
+                    spans: Spans::default(),
+                })
+            })
+            .collect::<Vec<_>>();
+
+        schema_with_timestamp.push(timestamp_fields[0].clone());
+
+        let output_schema = Arc::new(schema_from_df_fields_with_metadata(
+            &schema_with_timestamp,
+            schema.metadata().clone(),
+        )?);
+
+        let left_field = &timestamp_fields[0];
+        let left_column = Expr::Column(Column {
+            relation: left_field.qualifier().cloned(),
+            name: left_field.name().to_string(),
+            spans: Spans::default(),
+        });
+        let right_field = &timestamp_fields[1];
+        let right_column = Expr::Column(Column {
+            relation: right_field.qualifier().cloned(),
+            name: right_field.name().to_string(),
+            spans: Spans::default(),
+        });
+
+        let max_timestamp = Expr::Case(Case {
+            expr: Some(Box::new(Expr::BinaryExpr(BinaryExpr {
+                left: Box::new(left_column.clone()),
+                op: logical_expr::Operator::GtEq,
+                right: Box::new(right_column.clone()),
+            }))),
+            when_then_expr: vec![
+                (
+                    Box::new(Expr::Literal(ScalarValue::Boolean(Some(true)), None)),
+                    Box::new(left_column.clone()),
+                ),
+                (
+                    Box::new(Expr::Literal(ScalarValue::Boolean(Some(false)), None)),
+                    Box::new(right_column.clone()),
+                ),
+            ],
+            else_expr: Some(Box::new(coalesce(vec![
+                left_column.clone(),
+                right_column.clone(),
+            ]))),
+        });
+
+        projection_expr.push(Expr::Alias(Alias {
+            expr: Box::new(max_timestamp),
+            relation: timestamp_fields[0].qualifier().cloned(),
+            name: timestamp_fields[0].name().to_string(),
+            metadata: None,
+        }));
+
+        Ok(LogicalPlan::Projection(Projection::try_new_with_schema(
+            projection_expr,
+            Arc::new(input),
+            output_schema,
+        )?))
+    }
+}
+
+impl TreeNodeRewriter for JoinRewriter<'_> {
+    type Node = LogicalPlan;
+
+    fn f_up(&mut self, node: Self::Node) -> Result<Transformed<Self::Node>> {
+        let LogicalPlan::Join(join) = node else {
+            return Ok(Transformed::no(node));
+        };
+
+        let is_instant = Self::check_join_windowing(&join)?;
+
+        let Join {
+            left,
+            right,
+            on,
+            filter,
+            join_type,
+            join_constraint: JoinConstraint::On,
+            schema: _,
+            null_equals_null: false,
+        } = join
+        else {
+            return not_impl_err!("can't handle join constraint other than ON");
+        };
+
+        if on.is_empty() && !is_instant {
+            return not_impl_err!("Updating joins must include an equijoin condition");
+        }
+
+        let (left_expressions, right_expressions): (Vec<_>, Vec<_>) =
+            on.clone().into_iter().unzip();
+
+        let left_input = Self::create_join_key_plan(left, left_expressions, "left")?;
+        let right_input = Self::create_join_key_plan(right, right_expressions, "right")?;
+
+        let rewritten_join = LogicalPlan::Join(Join {
+            schema: Arc::new(build_join_schema(
+                left_input.schema(),
+                right_input.schema(),
+                &join_type,
+            )?),
+            left: Arc::new(left_input),
+            right: Arc::new(right_input),
+            on,
+            join_type,
+            join_constraint: JoinConstraint::On,
+            null_equals_null: false,
+            filter,
+        });
+
+        let final_logical_plan = self.post_join_timestamp_projection(rewritten_join)?;
+
+        let join_extension = JoinExtension {
+            rewritten_join: final_logical_plan,
+            is_instant,
+            ttl: (!is_instant).then_some(self.schema_provider.planning_options.ttl),
+        };
+
+        Ok(Transformed::yes(LogicalPlan::Extension(Extension {
+            node: Arc::new(join_extension),
+        })))
+    }
+}
diff --git a/src/sql/planner/plan/mod.rs b/src/sql/planner/plan/mod.rs
new file mode 100644
index 00000000..8d1dd388
--- /dev/null
+++ b/src/sql/planner/plan/mod.rs
@@ -0,0 +1,449 @@
+use std::collections::HashSet;
+use std::sync::Arc;
+
+use datafusion::common::tree_node::{Transformed, TreeNodeRecursion};
+use datafusion::common::{
+    Column, DataFusionError, Result, Spans, TableReference, plan_err,
+    tree_node::{TreeNode, TreeNodeRewriter, TreeNodeVisitor},
+};
+use datafusion::logical_expr::{
+    Aggregate, Expr, Extension, Filter, LogicalPlan, SubqueryAlias, expr::Alias,
+};
+
+use crate::sql::planner::extension::StreamExtension;
+use crate::sql::planner::extension::aggregate::{AGGREGATE_EXTENSION_NAME, AggregateExtension};
+use crate::sql::planner::extension::join::JOIN_NODE_NAME;
+use crate::sql::planner::extension::remote_table::RemoteTableExtension;
+use crate::sql::planner::schemas::{add_timestamp_field, has_timestamp_field};
+use crate::sql::planner::types::{
+    DFField, TIMESTAMP_FIELD, WindowBehavior, WindowType, fields_with_qualifiers, find_window,
+};
+
+use self::aggregate::AggregateRewriter;
+use self::join::JoinRewriter;
+use self::window_fn::WindowFunctionRewriter;
+
+pub(crate) mod aggregate;
+pub(crate) mod join;
+pub(crate) mod window_fn;
+
+use super::StreamSchemaProvider;
+use tracing::debug;
+
+/// Stage 3: LogicalPlan → Streaming LogicalPlan
+///
+/// Rewrites a standard DataFusion logical plan into one that supports
+/// streaming semantics (timestamps, windows, watermarks).
+pub fn rewrite_plan(
+    plan: LogicalPlan,
+    schema_provider: &StreamSchemaProvider,
+) -> Result<LogicalPlan> {
+    let rewritten_plan = plan.rewrite_with_subqueries(&mut StreamRewriter { schema_provider })?;
+
+    rewritten_plan
+        .data
+        .visit_with_subqueries(&mut TimeWindowUdfChecker {})?;
+
+    debug!(
+        "Streaming logical plan:\n{}",
+        rewritten_plan.data.display_graphviz()
+    );
+
+    Ok(rewritten_plan.data)
+}
+
+/// Visitor that detects window types in a logical plan
+#[derive(Debug, Default)]
+pub(crate) struct WindowDetectingVisitor {
+    pub(crate) window: Option<WindowType>,
+    pub(crate) fields: HashSet<DFField>,
+}
+
+impl WindowDetectingVisitor {
+    pub(crate) fn get_window(logical_plan: &LogicalPlan) -> Result<Option<WindowType>> {
+        let mut visitor = WindowDetectingVisitor {
+            window: None,
+            fields: HashSet::new(),
+        };
+        logical_plan.visit_with_subqueries(&mut visitor)?;
+        Ok(visitor.window.take())
+    }
+}
+
+fn extract_column(expr: &Expr) -> Option<&Column> {
+    match expr {
+        Expr::Column(column) => Some(column),
+        Expr::Alias(Alias { expr, .. }) => extract_column(expr),
+        _ => None,
+    }
+}
+
+impl TreeNodeVisitor<'_> for WindowDetectingVisitor {
+    type Node = LogicalPlan;
+
+    fn f_down(&mut self, node: &Self::Node) -> Result<TreeNodeRecursion> {
+        let LogicalPlan::Extension(Extension { node }) = node else {
+            return Ok(TreeNodeRecursion::Continue);
+        };
+
+        if node.name() == JOIN_NODE_NAME {
+            let input_windows: HashSet<_> = node
+                .inputs()
+                .iter()
+                .map(|input| Self::get_window(input))
+                .collect::<Result<HashSet<_>>>()?;
+            if input_windows.len() > 1 {
+                return Err(DataFusionError::Plan(
+                    "can't handle mixed windowing between left and right".to_string(),
+                ));
+            }
+            self.window = input_windows
+                .into_iter()
+                .next()
+                .expect("join has at least one input");
+            return Ok(TreeNodeRecursion::Jump);
+        }
+        Ok(TreeNodeRecursion::Continue)
+    }
+
+    fn f_up(&mut self, node: &Self::Node) -> Result<TreeNodeRecursion> {
+        match node {
+            LogicalPlan::Projection(projection) => {
+                let window_expressions = projection
+                    .expr
+                    .iter()
+                    .enumerate()
+                    .filter_map(|(index, expr)| {
+                        if let Some(column) = extract_column(expr) {
+                            let input_field = projection
+                                .input
+                                .schema()
+                                .field_with_name(column.relation.as_ref(), &column.name);
+                            let input_field = match input_field {
+                                Ok(field) => field,
+                                Err(err) => return Some(Err(err)),
+                            };
+                            if self.fields.contains(
+                                &(column.relation.clone(), Arc::new(input_field.clone())).into(),
+                            ) {
+                                return self.window.clone().map(|window| Ok((index, window)));
+                            }
+                        }
+                        find_window(expr)
+                            .map(|option| option.map(|inner| (index, inner)))
+                            .transpose()
+                    })
+                    .collect::<Result<Vec<_>>>()?;
+                self.fields.clear();
+                for (index, window) in window_expressions {
+                    if let Some(existing_window) = &self.window {
+                        if *existing_window != window {
+                            return plan_err!(
+                                "can't window by both {:?} and {:?}",
+                                existing_window,
+                                window
+                            );
+                        }
+                        self.fields
+                            .insert(projection.schema.qualified_field(index).into());
+                    } else {
+                        return plan_err!(
+                            "can't call a windowing function without grouping by it in an aggregate"
+                        );
+                    }
+                }
+            }
+            LogicalPlan::SubqueryAlias(subquery_alias) => {
+                self.fields = self
+                    .fields
+                    .drain()
+                    .map(|field| {
+                        Ok(subquery_alias
+                            .schema
+                            .qualified_field(
+                                subquery_alias
+                                    .input
+                                    .schema()
+                                    .index_of_column(&field.qualified_column())?,
+                            )
+                            .into())
+                    })
+                    .collect::<Result<HashSet<_>>>()?;
+            }
+            LogicalPlan::Aggregate(Aggregate {
+                input,
+                group_expr,
+                aggr_expr: _,
+                schema,
+                ..
+            }) => {
+                let window_expressions = group_expr
+                    .iter()
+                    .enumerate()
+                    .filter_map(|(index, expr)| {
+                        if let Some(column) = extract_column(expr) {
+                            let input_field = input
+                                .schema()
+                                .field_with_name(column.relation.as_ref(), &column.name);
+                            let input_field = match input_field {
+                                Ok(field) => field,
+                                Err(err) => return Some(Err(err)),
+                            };
+                            if self
+                                .fields
+                                .contains(&(column.relation.as_ref(), input_field).into())
+                            {
+                                return self.window.clone().map(|window| Ok((index, window)));
+                            }
+                        }
+                        find_window(expr)
+                            .map(|option| option.map(|inner| (index, inner)))
+                            .transpose()
+                    })
+                    .collect::<Result<Vec<_>>>()?;
+                self.fields.clear();
+                for (index, window) in window_expressions {
+                    if let Some(existing_window) = &self.window {
+                        if *existing_window != window {
+                            return Err(DataFusionError::Plan(
+                                "window expressions do not match".to_string(),
+                            ));
+                        }
+                    } else {
+                        self.window = Some(window);
+                    }
+                    self.fields.insert(schema.qualified_field(index).into());
+                }
+            }
+            LogicalPlan::Extension(Extension { node }) => {
+                if node.name() == AGGREGATE_EXTENSION_NAME {
+                    let aggregate_extension = node
+                        .as_any()
+                        .downcast_ref::<AggregateExtension>()
+                        .expect("should be aggregate extension");
+
+                    match &aggregate_extension.window_behavior {
+                        WindowBehavior::FromOperator {
+                            window,
+                            window_field,
+                            window_index: _,
+                            is_nested,
+                        } => {
+                            if self.window.is_some() && !*is_nested {
+                                return Err(DataFusionError::Plan(
+                                    "aggregate node should not be recalculating window, as input is windowed.".to_string(),
+                                ));
+                            }
+                            self.window = Some(window.clone());
+                            self.fields.insert(window_field.clone());
+                        }
+                        WindowBehavior::InData => {
+                            let input_fields = self.fields.clone();
+                            self.fields.clear();
+                            for field in fields_with_qualifiers(node.schema()) {
+                                if input_fields.contains(&field) {
+                                    self.fields.insert(field);
+                                }
+                            }
+                            if self.fields.is_empty() {
+                                return Err(DataFusionError::Plan(
+                                    "must have window in aggregate. Make sure you are calling one of the windowing functions (hop, tumble, session) or using the window field of the input".to_string(),
+                                ));
+                            }
+                        }
+                    }
+                }
+            }
+            _ => {}
+        }
+        Ok(TreeNodeRecursion::Continue)
+    }
+}
+
+/// Main rewriter for streaming SQL plans.
+/// Rewrites standard logical plans into streaming-aware plans with
+/// timestamp propagation, window detection, and streaming operator insertion.
+pub struct StreamRewriter<'a> {
+    pub(crate) schema_provider: &'a StreamSchemaProvider,
+}
+
+impl TreeNodeRewriter for StreamRewriter<'_> {
+    type Node = LogicalPlan;
+
+    fn f_up(&mut self, mut node: Self::Node) -> Result<Transformed<Self::Node>> {
+        match node {
+            LogicalPlan::Projection(ref mut projection) => {
+                if !has_timestamp_field(&projection.schema) {
+                    let timestamp_field: DFField = projection
+                        .input
+                        .schema()
+                        .qualified_field_with_unqualified_name(TIMESTAMP_FIELD)
+                        .map_err(|_| {
+                            DataFusionError::Plan(format!(
+                                "No timestamp field found in projection input ({})",
+                                projection.input.display()
+                            ))
+                        })?
+                        .into();
+                    projection.schema = add_timestamp_field(
+                        projection.schema.clone(),
+                        timestamp_field.qualifier().cloned(),
+                    )
+                    .expect("in projection");
+                    projection.expr.push(Expr::Column(Column {
+                        relation: timestamp_field.qualifier().cloned(),
+                        name: TIMESTAMP_FIELD.to_string(),
+                        spans: Spans::default(),
+                    }));
+                }
+
+                // Rewrite row_time() calls to _timestamp column references
+                let rewritten = projection
+                    .expr
+                    .iter()
+                    .map(|expr| expr.clone().rewrite(&mut RowTimeRewriter {}))
+                    .collect::<Result<Vec<_>>>()?;
+                if rewritten.iter().any(|r| r.transformed) {
+                    projection.expr = rewritten.into_iter().map(|r| r.data).collect();
+                }
+                return Ok(Transformed::yes(node));
+            }
+            LogicalPlan::Aggregate(aggregate) => {
+                return AggregateRewriter {
+                    schema_provider: self.schema_provider,
+                }
+                .f_up(LogicalPlan::Aggregate(aggregate));
+            }
+            LogicalPlan::Join(join) => {
+                return JoinRewriter {
+                    schema_provider: self.schema_provider,
+                }
+                .f_up(LogicalPlan::Join(join));
+            }
+            LogicalPlan::Filter(f) => {
+                let expr = f
+                    .predicate
+                    .clone()
+                    .rewrite(&mut TimeWindowNullCheckRemover {})?;
+                return Ok(if expr.transformed {
+                    Transformed::yes(LogicalPlan::Filter(Filter::try_new(expr.data, f.input)?))
+                } else {
+                    Transformed::no(LogicalPlan::Filter(f))
+                });
+            }
+            LogicalPlan::Window(_) => {
+                return WindowFunctionRewriter {}.f_up(node);
+            }
+            LogicalPlan::Sort(_) => {
+                return plan_err!(
+                    "ORDER BY is not currently supported in streaming SQL ({})",
+                    node.display()
+                );
+            }
+            LogicalPlan::Repartition(_) => {
+                return plan_err!(
+                    "Repartitions are not currently supported ({})",
+                    node.display()
+                );
+            }
+            LogicalPlan::Union(mut union) => {
+                union.schema = union.inputs[0].schema().clone();
+                for input in union.inputs.iter_mut() {
+                    if let LogicalPlan::Extension(Extension { node }) = input.as_ref() {
+                        let stream_extension: &dyn StreamExtension = node.try_into().unwrap();
+                        if !stream_extension.transparent() {
+                            continue;
+                        }
+                    }
+                    let remote_table_extension = Arc::new(RemoteTableExtension {
+                        input: input.as_ref().clone(),
+                        name: TableReference::bare("union_input"),
+                        schema: union.schema.clone(),
+                        materialize: false,
+                    });
+                    *input = Arc::new(LogicalPlan::Extension(Extension {
+                        node: remote_table_extension,
+                    }));
+                }
+                return Ok(Transformed::yes(LogicalPlan::Union(union)));
+            }
+            LogicalPlan::SubqueryAlias(sa) => {
+                return Ok(Transformed::yes(LogicalPlan::SubqueryAlias(
+                    SubqueryAlias::try_new(sa.input, sa.alias)?,
+                )));
+            }
+            LogicalPlan::Limit(_) => {
+                return plan_err!(
+                    "LIMIT is not currently supported in streaming SQL ({})",
+                    node.display()
+                );
+            }
+            LogicalPlan::Explain(_) => {
+                return plan_err!("EXPLAIN is not supported ({})", node.display());
+            }
+            LogicalPlan::Analyze(_) => {
+                return plan_err!("ANALYZE is not supported ({})", node.display());
+            }
+            _ => {}
+        }
+        Ok(Transformed::no(node))
+    }
+}
+
+/// Rewrites row_time() function calls to _timestamp column references
+struct RowTimeRewriter;
+
+impl TreeNodeRewriter for RowTimeRewriter {
+    type Node = Expr;
+
+    fn f_up(&mut self, node: Self::Node) -> Result<Transformed<Self::Node>> {
+        if let Expr::ScalarFunction(ref func) = node {
+            if func.func.name() == "row_time" {
+                return Ok(Transformed::yes(Expr::Column(Column::new_unqualified(
+                    TIMESTAMP_FIELD.to_string(),
+                ))));
+            }
+        }
+        Ok(Transformed::no(node))
+    }
+}
+
+/// Removes IS NOT NULL checks on window expressions that get pushed down incorrectly
+pub(crate) struct TimeWindowNullCheckRemover;
+
+impl TreeNodeRewriter for TimeWindowNullCheckRemover {
+    type Node = Expr;
+
+    fn f_up(&mut self, node: Self::Node) -> Result<Transformed<Self::Node>> {
+        if let Expr::IsNotNull(ref inner) = node {
+            if find_window(inner)?.is_some() {
+                return Ok(Transformed::yes(Expr::Literal(
+                    datafusion::common::ScalarValue::Boolean(Some(true)),
+                    None,
+                )));
+            }
+        }
+        Ok(Transformed::no(node))
+    }
+}
+
+/// Checks that window UDFs (tumble/hop/session) are not used outside aggregates
+pub(crate) struct TimeWindowUdfChecker;
+
+impl TreeNodeVisitor<'_> for TimeWindowUdfChecker {
+    type Node = LogicalPlan;
+
+    fn f_up(&mut self, node: &Self::Node) -> Result<TreeNodeRecursion> {
+        if let LogicalPlan::Projection(projection) = node {
+            for expr in &projection.expr {
+                if let Some(window) = find_window(expr)? {
+                    return plan_err!(
+                        "Window function {:?} can only be used as a GROUP BY expression in an aggregate",
+                        window
+                    );
+                }
+            }
+        }
+        Ok(TreeNodeRecursion::Continue)
+    }
+}
diff --git a/src/sql/planner/plan/window_fn.rs b/src/sql/planner/plan/window_fn.rs
new file mode 100644
index 00000000..0bd3314f
--- /dev/null
+++ b/src/sql/planner/plan/window_fn.rs
@@ -0,0 +1,178 @@
+use std::sync::Arc;
+
+use datafusion::common::tree_node::Transformed;
+use datafusion::common::{Result as DFResult, plan_err, tree_node::TreeNodeRewriter};
+use datafusion::logical_expr;
+use datafusion::logical_expr::expr::WindowFunctionParams;
+use datafusion::logical_expr::{
+    Expr, Extension, LogicalPlan, Projection, Sort, Window, expr::WindowFunction,
+};
+use tracing::debug;
+
+use crate::sql::planner::extension::key_calculation::{KeyCalculationExtension, KeysOrExprs};
+use crate::sql::planner::extension::window_fn::WindowFunctionExtension;
+use crate::sql::planner::plan::{WindowDetectingVisitor, extract_column};
+use crate::sql::planner::types::{WindowType, fields_with_qualifiers, schema_from_df_fields};
+
+pub(crate) struct WindowFunctionRewriter;
+
+fn get_window_and_name(expr: &Expr) -> DFResult<(WindowFunction, String)> {
+    match expr {
+        Expr::Alias(alias) => {
+            let (window, _) = get_window_and_name(&alias.expr)?;
+            Ok((window, alias.name.clone()))
+        }
+        Expr::WindowFunction(window_function) => {
+            Ok((*window_function.clone(), expr.name_for_alias()?))
+        }
+        _ => plan_err!("Expect a column or alias expression, not {:?}", expr),
+    }
+}
+
+impl TreeNodeRewriter for WindowFunctionRewriter {
+    type Node = LogicalPlan;
+
+    fn f_up(&mut self, node: Self::Node) -> DFResult<Transformed<Self::Node>> {
+        let LogicalPlan::Window(window) = node else {
+            return Ok(Transformed::no(node));
+        };
+
+        debug!(
+            "Rewriting window function: {:?}",
+            LogicalPlan::Window(window.clone())
+        );
+
+        let mut window_detecting_visitor = WindowDetectingVisitor::default();
+        window
+            .input
+            .visit_with_subqueries(&mut window_detecting_visitor)?;
+
+        let Some(input_window) = window_detecting_visitor.window else {
+            return plan_err!("Window functions require already windowed input");
+        };
+        if matches!(input_window, WindowType::Session { .. }) {
+            return plan_err!("Window functions do not support session windows");
+        }
+
+        let input_window_fields = window_detecting_visitor.fields;
+
+        let Window {
+            input, window_expr, ..
+        } = window;
+
+        if window_expr.len() != 1 {
+            return plan_err!("Window functions require exactly one window expression");
+        }
+
+        let (WindowFunction { fun, params }, original_name) = get_window_and_name(&window_expr[0])?;
+
+        let mut window_field: Vec<_> = params
+            .partition_by
+            .iter()
+            .enumerate()
+            .filter_map(|(index, expr)| {
+                if let Some(column) = extract_column(expr) {
+                    let Ok(input_field) = input
+                        .schema()
+                        .field_with_name(column.relation.as_ref(), &column.name)
+                    else {
+                        return Some(plan_err!(
+                            "Column {} not found in input schema",
+                            column.name
+                        ));
+                    };
+                    if input_window_fields.contains(&(column.relation.as_ref(), input_field).into())
+                    {
+                        return Some(Ok((input_field.clone(), index)));
+                    }
+                }
+                None
+            })
+            .collect::<DFResult<_>>()?;
+
+        if window_field.len() != 1 {
+            return plan_err!(
+                "Window function requires exactly one window expression in partition_by"
+            );
+        }
+
+        let (_window_field, index) = window_field.pop().unwrap();
+        let mut additional_keys = params.partition_by.clone();
+        additional_keys.remove(index);
+        let key_count = additional_keys.len();
+
+        let params = WindowFunctionParams {
+            args: params.args,
+            partition_by: additional_keys.clone(),
+            order_by: params.order_by,
+            window_frame: params.window_frame,
+            null_treatment: params.null_treatment,
+        };
+
+        let new_window_func = WindowFunction { fun, params };
+
+        let mut key_projection_expressions: Vec<_> = additional_keys
+            .iter()
+            .enumerate()
+            .map(|(index, expression)| expression.clone().alias(format!("_key_{index}")))
+            .collect();
+
+        key_projection_expressions.extend(
+            fields_with_qualifiers(input.schema())
+                .iter()
+                .map(|field| Expr::Column(field.qualified_column())),
+        );
+
+        let auto_schema =
+            Projection::try_new(key_projection_expressions.clone(), input.clone())?.schema;
+        let mut key_fields = fields_with_qualifiers(&auto_schema)
+            .iter()
+            .take(additional_keys.len())
+            .cloned()
+            .collect::<Vec<_>>();
+        key_fields.extend(fields_with_qualifiers(input.schema()));
+        let key_schema = Arc::new(schema_from_df_fields(&key_fields)?);
+
+        let key_projection = LogicalPlan::Projection(Projection::try_new_with_schema(
+            key_projection_expressions,
+            input.clone(),
+            key_schema,
+        )?);
+
+        let key_plan = LogicalPlan::Extension(Extension {
+            node: Arc::new(KeyCalculationExtension::new(
+                key_projection,
+                KeysOrExprs::Keys((0..key_count).collect()),
+            )),
+        });
+
+        let mut sort_expressions: Vec<_> = additional_keys
+            .iter()
+            .map(|partition| logical_expr::expr::Sort {
+                expr: partition.clone(),
+                asc: true,
+                nulls_first: false,
+            })
+            .collect();
+        sort_expressions.extend(new_window_func.params.order_by.clone());
+
+        let shuffle = LogicalPlan::Sort(Sort {
+            expr: sort_expressions,
+            input: Arc::new(key_plan),
+            fetch: None,
+        });
+
+        let window_expr =
+            Expr::WindowFunction(Box::new(new_window_func)).alias_if_changed(original_name)?;
+
+        let rewritten_window_plan =
+            LogicalPlan::Window(Window::try_new(vec![window_expr], Arc::new(shuffle))?);
+
+        Ok(Transformed::yes(LogicalPlan::Extension(Extension {
+            node: Arc::new(WindowFunctionExtension::new(
+                rewritten_window_plan,
+                (0..key_count).collect(),
+            )),
+        })))
+    }
+}
diff --git a/src/sql/planner/schemas.rs b/src/sql/planner/schemas.rs
new file mode 100644
index 00000000..0440cc85
--- /dev/null
+++ b/src/sql/planner/schemas.rs
@@ -0,0 +1,59 @@
+use crate::sql::planner::types::{DFField, TIMESTAMP_FIELD};
+use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef, TimeUnit};
+use datafusion::common::{DFSchema, DFSchemaRef, Result as DFResult, TableReference};
+use std::{collections::HashMap, sync::Arc};
+
+pub fn window_arrow_struct() -> DataType {
+    DataType::Struct(
+        vec![
+            Arc::new(Field::new(
+                "start",
+                DataType::Timestamp(TimeUnit::Nanosecond, None),
+                false,
+            )),
+            Arc::new(Field::new(
+                "end",
+                DataType::Timestamp(TimeUnit::Nanosecond, None),
+                false,
+            )),
+        ]
+        .into(),
+    )
+}
+
+pub(crate) fn add_timestamp_field(
+    schema: DFSchemaRef,
+    qualifier: Option<TableReference>,
+) -> DFResult<DFSchemaRef> {
+    if has_timestamp_field(&schema) {
+        return Ok(schema);
+    }
+
+    let timestamp_field = DFField::new(
+        qualifier,
+        TIMESTAMP_FIELD,
+        DataType::Timestamp(TimeUnit::Nanosecond, None),
+        false,
+    );
+    Ok(Arc::new(schema.join(&DFSchema::new_with_metadata(
+        vec![timestamp_field.into()],
+        HashMap::new(),
+    )?)?))
+}
+
+pub(crate) fn has_timestamp_field(schema: &DFSchemaRef) -> bool {
+    schema
+        .fields()
+        .iter()
+        .any(|field| field.name() == TIMESTAMP_FIELD)
+}
+
+pub fn add_timestamp_field_arrow(schema: Schema) -> SchemaRef {
+    let mut fields = schema.fields().to_vec();
+    fields.push(Arc::new(Field::new(
+        TIMESTAMP_FIELD,
+        DataType::Timestamp(TimeUnit::Nanosecond, None),
+        false,
+    )));
+    Arc::new(Schema::new(fields))
+}
diff --git a/src/sql/planner/sql_to_plan.rs b/src/sql/planner/sql_to_plan.rs
new file mode 100644
index 00000000..049cd18e
--- /dev/null
+++ b/src/sql/planner/sql_to_plan.rs
@@ -0,0 +1,22 @@
+use datafusion::common::Result;
+use datafusion::logical_expr::LogicalPlan;
+use datafusion::sql::sqlparser::ast::Statement;
+use tracing::debug;
+
+use crate::sql::planner::StreamSchemaProvider;
+
+/// Stage 2: Statement → LogicalPlan
+///
+/// Converts a parsed SQL AST statement into a DataFusion logical plan
+/// using the StreamSchemaProvider as the catalog context.
+pub fn statement_to_plan(
+    statement: Statement,
+    schema_provider: &StreamSchemaProvider,
+) -> Result<LogicalPlan> {
+    let sql_to_rel = datafusion::sql::planner::SqlToRel::new(schema_provider);
+    let plan = sql_to_rel.sql_statement_to_plan(statement)?;
+
+    debug!("Logical plan:\n{}", plan.display_graphviz());
+
+    Ok(plan)
+}
diff --git a/src/sql/planner/types.rs b/src/sql/planner/types.rs
new file mode 100644
index 00000000..2330c0de
--- /dev/null
+++ b/src/sql/planner/types.rs
@@ -0,0 +1,513 @@
+use std::collections::HashMap;
+use std::fmt::{Debug, Formatter};
+use std::sync::Arc;
+use std::time::Duration;
+
+use datafusion::arrow::datatypes::{
+    DECIMAL_DEFAULT_SCALE, DECIMAL128_MAX_PRECISION, DataType, Field, FieldRef, IntervalUnit,
+    Schema, SchemaRef, TimeUnit,
+};
+use datafusion::common::{Column, DFSchema, Result, TableReference, plan_datafusion_err, plan_err};
+use datafusion::logical_expr::{
+    ColumnarValue, Expr, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Signature, Volatility,
+};
+use std::any::Any;
+
+pub const TIMESTAMP_FIELD: &str = "_timestamp";
+
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum ProcessingMode {
+    Append,
+    Update,
+}
+
+#[derive(Clone, Debug, PartialEq, Eq, Hash)]
+pub enum WindowType {
+    Tumbling { width: Duration },
+    Sliding { width: Duration, slide: Duration },
+    Session { gap: Duration },
+    Instant,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub(crate) enum WindowBehavior {
+    FromOperator {
+        window: WindowType,
+        window_field: DFField,
+        window_index: usize,
+        is_nested: bool,
+    },
+    InData,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct DFField {
+    qualifier: Option<TableReference>,
+    field: FieldRef,
+}
+
+impl From<(Option<TableReference>, FieldRef)> for DFField {
+    fn from(value: (Option<TableReference>, FieldRef)) -> Self {
+        Self {
+            qualifier: value.0,
+            field: value.1,
+        }
+    }
+}
+
+impl From<(Option<&TableReference>, &Field)> for DFField {
+    fn from(value: (Option<&TableReference>, &Field)) -> Self {
+        Self {
+            qualifier: value.0.cloned(),
+            field: Arc::new(value.1.clone()),
+        }
+    }
+}
+
+impl From<DFField> for (Option<TableReference>, FieldRef) {
+    fn from(value: DFField) -> Self {
+        (value.qualifier, value.field)
+    }
+}
+
+impl DFField {
+    pub fn new(
+        qualifier: Option<TableReference>,
+        name: impl Into<String>,
+        data_type: DataType,
+        nullable: bool,
+    ) -> Self {
+        Self {
+            qualifier,
+            field: Arc::new(Field::new(name, data_type, nullable)),
+        }
+    }
+
+    pub fn new_unqualified(name: &str, data_type: DataType, nullable: bool) -> Self {
+        DFField {
+            qualifier: None,
+            field: Arc::new(Field::new(name, data_type, nullable)),
+        }
+    }
+
+    pub fn name(&self) -> &String {
+        self.field.name()
+    }
+
+    pub fn data_type(&self) -> &DataType {
+        self.field.data_type()
+    }
+
+    pub fn is_nullable(&self) -> bool {
+        self.field.is_nullable()
+    }
+
+    pub fn metadata(&self) -> &HashMap<String, String> {
+        self.field.metadata()
+    }
+
+    pub fn qualified_name(&self) -> String {
+        if let Some(qualifier) = &self.qualifier {
+            format!("{}.{}", qualifier, self.field.name())
+        } else {
+            self.field.name().to_owned()
+        }
+    }
+
+    pub fn qualified_column(&self) -> Column {
+        Column {
+            relation: self.qualifier.clone(),
+            name: self.field.name().to_string(),
+            spans: Default::default(),
+        }
+    }
+
+    pub fn unqualified_column(&self) -> Column {
+        Column {
+            relation: None,
+            name: self.field.name().to_string(),
+            spans: Default::default(),
+        }
+    }
+
+    pub fn qualifier(&self) -> Option<&TableReference> {
+        self.qualifier.as_ref()
+    }
+
+    pub fn field(&self) -> &FieldRef {
+        &self.field
+    }
+
+    pub fn strip_qualifier(mut self) -> Self {
+        self.qualifier = None;
+        self
+    }
+
+    pub fn with_nullable(mut self, nullable: bool) -> Self {
+        let f = self.field().as_ref().clone().with_nullable(nullable);
+        self.field = f.into();
+        self
+    }
+
+    pub fn with_metadata(mut self, metadata: HashMap<String, String>) -> Self {
+        let f = self.field().as_ref().clone().with_metadata(metadata);
+        self.field = f.into();
+        self
+    }
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct StreamSchema {
+    pub schema: SchemaRef,
+    pub timestamp_index: usize,
+    pub key_indices: Option<Vec<usize>>,
+}
+
+impl StreamSchema {
+    pub fn new(schema: SchemaRef, timestamp_index: usize, key_indices: Option<Vec<usize>>) -> Self {
+        Self {
+            schema,
+            timestamp_index,
+            key_indices,
+        }
+    }
+
+    pub fn new_unkeyed(schema: SchemaRef, timestamp_index: usize) -> Self {
+        Self {
+            schema,
+            timestamp_index,
+            key_indices: None,
+        }
+    }
+
+    pub fn from_fields(fields: Vec<Field>) -> Self {
+        let schema = Arc::new(Schema::new(fields));
+        let timestamp_index = schema
+            .column_with_name(TIMESTAMP_FIELD)
+            .map(|(i, _)| i)
+            .unwrap_or(0);
+        Self {
+            schema,
+            timestamp_index,
+            key_indices: None,
+        }
+    }
+
+    pub fn from_schema_keys(schema: SchemaRef, key_indices: Vec<usize>) -> Result<Self> {
+        let timestamp_index = schema
+            .column_with_name(TIMESTAMP_FIELD)
+            .ok_or_else(|| {
+                datafusion::error::DataFusionError::Plan(format!(
+                    "no {TIMESTAMP_FIELD} field in schema, schema is {schema:?}"
+                ))
+            })?
+            .0;
+        Ok(Self {
+            schema,
+            timestamp_index,
+            key_indices: Some(key_indices),
+        })
+    }
+
+    pub fn from_schema_unkeyed(schema: SchemaRef) -> Result<Self> {
+        let timestamp_index = schema
+            .column_with_name(TIMESTAMP_FIELD)
+            .ok_or_else(|| {
+                datafusion::error::DataFusionError::Plan(format!(
+                    "no {TIMESTAMP_FIELD} field in schema"
+                ))
+            })?
+            .0;
+        Ok(Self {
+            schema,
+            timestamp_index,
+            key_indices: None,
+        })
+    }
+}
+
+#[allow(clippy::type_complexity)]
+pub(crate) struct PlaceholderUdf {
+    name: String,
+    signature: Signature,
+    return_type: Arc<dyn Fn(&[DataType]) -> Result<DataType> + Send + Sync + 'static>,
+}
+
+impl Debug for PlaceholderUdf {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        write!(f, "PlaceholderUDF<{}>", self.name)
+    }
+}
+
+impl ScalarUDFImpl for PlaceholderUdf {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        &self.name
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, args: &[DataType]) -> Result<DataType> {
+        (self.return_type)(args)
+    }
+
+    fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> Result<ColumnarValue> {
+        unimplemented!("PlaceholderUdf should never be called at execution time");
+    }
+}
+
+impl PlaceholderUdf {
+    pub fn with_return(
+        name: impl Into<String>,
+        args: Vec<DataType>,
+        ret: DataType,
+    ) -> Arc<ScalarUDF> {
+        Arc::new(ScalarUDF::new_from_impl(PlaceholderUdf {
+            name: name.into(),
+            signature: Signature::exact(args, Volatility::Volatile),
+            return_type: Arc::new(move |_| Ok(ret.clone())),
+        }))
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct SqlConfig {
+    pub default_parallelism: usize,
+}
+
+impl Default for SqlConfig {
+    fn default() -> Self {
+        Self {
+            default_parallelism: 4,
+        }
+    }
+}
+
+#[derive(Clone)]
+pub struct PlanningOptions {
+    pub ttl: Duration,
+}
+
+impl Default for PlanningOptions {
+    fn default() -> Self {
+        Self {
+            ttl: Duration::from_secs(24 * 60 * 60),
+        }
+    }
+}
+
+pub fn convert_data_type(sql_type: &datafusion::sql::sqlparser::ast::DataType) -> Result<DataType> {
+    use datafusion::sql::sqlparser::ast::ArrayElemTypeDef;
+    use datafusion::sql::sqlparser::ast::DataType as SQLDataType;
+
+    match sql_type {
+        SQLDataType::Array(ArrayElemTypeDef::AngleBracket(inner_sql_type))
+        | SQLDataType::Array(ArrayElemTypeDef::SquareBracket(inner_sql_type, _)) => {
+            let data_type = convert_data_type(inner_sql_type)?;
+            Ok(DataType::List(Arc::new(Field::new(
+                "field", data_type, true,
+            ))))
+        }
+        SQLDataType::Array(ArrayElemTypeDef::None) => {
+            plan_err!("Arrays with unspecified type is not supported")
+        }
+        other => convert_simple_data_type(other),
+    }
+}
+
+fn convert_simple_data_type(
+    sql_type: &datafusion::sql::sqlparser::ast::DataType,
+) -> Result<DataType> {
+    use datafusion::sql::sqlparser::ast::DataType as SQLDataType;
+    use datafusion::sql::sqlparser::ast::{ExactNumberInfo, TimezoneInfo};
+
+    match sql_type {
+        SQLDataType::Boolean | SQLDataType::Bool => Ok(DataType::Boolean),
+        SQLDataType::TinyInt(_) => Ok(DataType::Int8),
+        SQLDataType::SmallInt(_) | SQLDataType::Int2(_) => Ok(DataType::Int16),
+        SQLDataType::Int(_) | SQLDataType::Integer(_) | SQLDataType::Int4(_) => Ok(DataType::Int32),
+        SQLDataType::BigInt(_) | SQLDataType::Int8(_) => Ok(DataType::Int64),
+        SQLDataType::TinyIntUnsigned(_) => Ok(DataType::UInt8),
+        SQLDataType::SmallIntUnsigned(_) | SQLDataType::Int2Unsigned(_) => Ok(DataType::UInt16),
+        SQLDataType::IntUnsigned(_)
+        | SQLDataType::UnsignedInteger
+        | SQLDataType::Int4Unsigned(_) => Ok(DataType::UInt32),
+        SQLDataType::BigIntUnsigned(_) | SQLDataType::Int8Unsigned(_) => Ok(DataType::UInt64),
+        SQLDataType::Float(_) | SQLDataType::Real | SQLDataType::Float4 => Ok(DataType::Float32),
+        SQLDataType::Double(_) | SQLDataType::DoublePrecision | SQLDataType::Float8 => {
+            Ok(DataType::Float64)
+        }
+        SQLDataType::Char(_)
+        | SQLDataType::Varchar(_)
+        | SQLDataType::Text
+        | SQLDataType::String(_) => Ok(DataType::Utf8),
+        SQLDataType::Timestamp(None, TimezoneInfo::None) | SQLDataType::Datetime(_) => {
+            Ok(DataType::Timestamp(TimeUnit::Nanosecond, None))
+        }
+        SQLDataType::Timestamp(Some(precision), TimezoneInfo::None) => match *precision {
+            0 => Ok(DataType::Timestamp(TimeUnit::Second, None)),
+            3 => Ok(DataType::Timestamp(TimeUnit::Millisecond, None)),
+            6 => Ok(DataType::Timestamp(TimeUnit::Microsecond, None)),
+            9 => Ok(DataType::Timestamp(TimeUnit::Nanosecond, None)),
+            _ => {
+                plan_err!(
+                    "unsupported precision {} -- supported: 0 (seconds), 3 (ms), 6 (us), 9 (ns)",
+                    precision
+                )
+            }
+        },
+        SQLDataType::Date => Ok(DataType::Date32),
+        SQLDataType::Time(None, tz_info) => {
+            if matches!(tz_info, TimezoneInfo::None)
+                || matches!(tz_info, TimezoneInfo::WithoutTimeZone)
+            {
+                Ok(DataType::Time64(TimeUnit::Nanosecond))
+            } else {
+                plan_err!("Unsupported SQL type {sql_type:?}")
+            }
+        }
+        SQLDataType::Numeric(exact_number_info) | SQLDataType::Decimal(exact_number_info) => {
+            let (precision, scale) = match *exact_number_info {
+                ExactNumberInfo::None => (None, None),
+                ExactNumberInfo::Precision(precision) => (Some(precision), None),
+                ExactNumberInfo::PrecisionAndScale(precision, scale) => {
+                    (Some(precision), Some(scale))
+                }
+            };
+            make_decimal_type(precision, scale)
+        }
+        SQLDataType::Bytea => Ok(DataType::Binary),
+        SQLDataType::Interval => Ok(DataType::Interval(IntervalUnit::MonthDayNano)),
+        SQLDataType::Struct(fields, _) => {
+            let fields: Vec<_> = fields
+                .iter()
+                .map(|f| {
+                    Ok::<_, datafusion::error::DataFusionError>(Arc::new(Field::new(
+                        f.field_name
+                            .as_ref()
+                            .ok_or_else(|| {
+                                plan_datafusion_err!("anonymous struct fields are not allowed")
+                            })?
+                            .to_string(),
+                        convert_data_type(&f.field_type)?,
+                        true,
+                    )))
+                })
+                .collect::<Result<_>>()?;
+            Ok(DataType::Struct(fields.into()))
+        }
+        _ => plan_err!("Unsupported SQL type {sql_type:?}"),
+    }
+}
+
+fn make_decimal_type(precision: Option<u64>, scale: Option<u64>) -> Result<DataType> {
+    let (precision, scale) = match (precision, scale) {
+        (Some(p), Some(s)) => (p as u8, s as i8),
+        (Some(p), None) => (p as u8, 0),
+        (None, Some(_)) => return plan_err!("Cannot specify only scale for decimal data type"),
+        (None, None) => (DECIMAL128_MAX_PRECISION, DECIMAL_DEFAULT_SCALE),
+    };
+
+    if precision == 0 || precision > DECIMAL128_MAX_PRECISION || scale.unsigned_abs() > precision {
+        plan_err!(
+            "Decimal(precision = {precision}, scale = {scale}) should satisfy `0 < precision <= 38`, and `scale <= precision`."
+        )
+    } else {
+        Ok(DataType::Decimal128(precision, scale))
+    }
+}
+
+pub fn fields_with_qualifiers(schema: &DFSchema) -> Vec<DFField> {
+    schema
+        .fields()
+        .iter()
+        .enumerate()
+        .map(|(i, f)| (schema.qualified_field(i).0.cloned(), f.clone()).into())
+        .collect()
+}
+
+pub fn schema_from_df_fields(fields: &[DFField]) -> Result<DFSchema> {
+    schema_from_df_fields_with_metadata(fields, HashMap::new())
+}
+
+pub fn schema_from_df_fields_with_metadata(
+    fields: &[DFField],
+    metadata: HashMap<String, String>,
+) -> Result<DFSchema> {
+    DFSchema::new_with_metadata(fields.iter().map(|t| t.clone().into()).collect(), metadata)
+}
+
+pub fn get_duration(expression: &Expr) -> Result<Duration> {
+    use datafusion::common::ScalarValue;
+
+    match expression {
+        Expr::Literal(ScalarValue::IntervalDayTime(Some(val)), _) => {
+            Ok(Duration::from_secs((val.days as u64) * 24 * 60 * 60)
+                + Duration::from_millis(val.milliseconds as u64))
+        }
+        Expr::Literal(ScalarValue::IntervalMonthDayNano(Some(val)), _) => {
+            if val.months != 0 {
+                return datafusion::common::not_impl_err!(
+                    "Windows do not support durations specified as months"
+                );
+            }
+            Ok(Duration::from_secs((val.days as u64) * 24 * 60 * 60)
+                + Duration::from_nanos(val.nanoseconds as u64))
+        }
+        _ => plan_err!(
+            "unsupported Duration expression, expect duration literal, not {}",
+            expression
+        ),
+    }
+}
+
+pub fn find_window(expression: &Expr) -> Result<Option<WindowType>> {
+    use datafusion::logical_expr::expr::Alias;
+    use datafusion::logical_expr::expr::ScalarFunction;
+
+    match expression {
+        Expr::ScalarFunction(ScalarFunction { func: fun, args }) => match fun.name() {
+            "hop" => {
+                if args.len() != 2 {
+                    unreachable!();
+                }
+                let slide = get_duration(&args[0])?;
+                let width = get_duration(&args[1])?;
+                if width.as_nanos() % slide.as_nanos() != 0 {
+                    return plan_err!(
+                        "hop() width {:?} must be a multiple of slide {:?}",
+                        width,
+                        slide
+                    );
+                }
+                if slide == width {
+                    Ok(Some(WindowType::Tumbling { width }))
+                } else {
+                    Ok(Some(WindowType::Sliding { width, slide }))
+                }
+            }
+            "tumble" => {
+                if args.len() != 1 {
+                    unreachable!("wrong number of arguments for tumble(), expect one");
+                }
+                let width = get_duration(&args[0])?;
+                Ok(Some(WindowType::Tumbling { width }))
+            }
+            "session" => {
+                if args.len() != 1 {
+                    unreachable!("wrong number of arguments for session(), expected one");
+                }
+                let gap = get_duration(&args[0])?;
+                Ok(Some(WindowType::Session { gap }))
+            }
+            _ => Ok(None),
+        },
+        Expr::Alias(Alias { expr, .. }) => find_window(expr),
+        _ => Ok(None),
+    }
+}

From 18c76c80faad1d442319705d988893b9a0a57a64 Mon Sep 17 00:00:00 2001
From: luoluoyuyu <zhenyu@apache.org>
Date: Mon, 16 Mar 2026 22:57:02 +0800
Subject: [PATCH 02/44] update

---
 Cargo.lock                                    |  124 +-
 Cargo.toml                                    |   14 +-
 protocol/Cargo.toml                           |    1 +
 protocol/build.rs                             |   48 +-
 protocol/proto/fs_api.proto                   |  289 ++++
 protocol/src/lib.rs                           |   25 +-
 src/api/checkpoints.rs                        |   96 ++
 src/api/connections.rs                        |  604 ++++++++
 src/api/metrics.rs                            |   41 +
 src/api/mod.rs                                |   43 +
 src/api/pipelines.rs                          |  156 ++
 src/api/public_ids.rs                         |   57 +
 src/api/schema_resolver.rs                    |   82 ++
 src/api/udfs.rs                               |   56 +
 src/api/var_str.rs                            |   79 +
 src/datastream/logical.rs                     |    2 +-
 src/lib.rs                                    |    2 +
 src/main.rs                                   |    3 +
 src/sql/catalog/connector.rs                  |   59 +
 src/sql/catalog/connector_table.rs            |  199 +++
 src/sql/catalog/field_spec.rs                 |   52 +
 src/sql/catalog/insert.rs                     |   55 +
 src/sql/catalog/mod.rs                        |   25 +
 src/sql/catalog/optimizer.rs                  |   95 ++
 src/sql/catalog/table.rs                      |  202 +++
 src/sql/catalog/utils.rs                      |   78 +
 src/sql/functions/mod.rs                      |  600 ++++++++
 src/sql/mod.rs                                |    5 +
 src/sql/physical/mod.rs                       | 1265 +++++++++++++++++
 src/sql/planner/extension/aggregate.rs        |    2 +-
 src/sql/planner/extension/debezium.rs         |  250 ++++
 src/sql/planner/extension/join.rs             |    2 +-
 src/sql/planner/extension/key_calculation.rs  |    2 +-
 src/sql/planner/extension/lookup.rs           |  127 ++
 src/sql/planner/extension/mod.rs              |  209 ++-
 src/sql/planner/extension/projection.rs       |    2 +-
 src/sql/planner/extension/remote_table.rs     |    2 +-
 src/sql/planner/extension/sink.rs             |  135 ++
 src/sql/planner/extension/table_source.rs     |   94 ++
 .../planner/extension/updating_aggregate.rs   |   89 ++
 src/sql/planner/extension/watermark_node.rs   |    2 +-
 src/sql/planner/extension/window_fn.rs        |    2 +-
 src/sql/planner/mod.rs                        |  598 ++++----
 src/sql/planner/physical_planner.rs           |  396 ++++++
 src/sql/planner/plan/aggregate.rs             |    2 +-
 src/sql/planner/plan/join.rs                  |    4 +-
 src/sql/planner/plan/mod.rs                   |    2 +-
 src/sql/planner/plan/window_fn.rs             |    2 +-
 src/sql/planner/rewrite/async_udf_rewriter.rs |  118 ++
 src/sql/planner/rewrite/mod.rs                |   27 +
 src/sql/planner/rewrite/row_time.rs           |   39 +
 .../planner/rewrite/sink_input_rewriter.rs    |   46 +
 .../rewrite/source_metadata_visitor.rs        |   57 +
 src/sql/planner/rewrite/source_rewriter.rs    |  272 ++++
 src/sql/planner/rewrite/time_window.rs        |   83 ++
 src/sql/planner/rewrite/unnest_rewriter.rs    |  178 +++
 src/sql/planner/schema_provider.rs            |  360 +++++
 src/sql/planner/schemas.rs                    |   64 +-
 src/sql/planner/types.rs                      |  513 -------
 src/sql/planner/udafs.rs                      |   31 +
 src/sql/types/data_type.rs                    |  144 ++
 src/sql/types/df_field.rs                     |  141 ++
 src/sql/types/mod.rs                          |   50 +
 src/sql/types/placeholder_udf.rs              |   58 +
 src/sql/types/stream_schema.rs                |   76 +
 src/sql/types/window.rs                       |   95 ++
 src/storage/task/rocksdb_storage.rs           |   44 +-
 src/types/arrow_ext.rs                        |  169 +++
 src/types/control.rs                          |  152 ++
 src/types/date.rs                             |   70 +
 src/types/debezium.rs                         |  136 ++
 src/types/df.rs                               |  394 +++++
 src/types/errors.rs                           |   67 +
 src/types/formats.rs                          |  234 +++
 src/types/hash.rs                             |   88 ++
 src/types/message.rs                          |   42 +
 src/types/mod.rs                              |   71 +
 src/types/operator_config.rs                  |   30 +
 src/types/task_info.rs                        |   80 ++
 src/types/time_utils.rs                       |   62 +
 src/types/worker.rs                           |   14 +
 81 files changed, 9339 insertions(+), 945 deletions(-)
 create mode 100644 protocol/proto/fs_api.proto
 create mode 100644 src/api/checkpoints.rs
 create mode 100644 src/api/connections.rs
 create mode 100644 src/api/metrics.rs
 create mode 100644 src/api/mod.rs
 create mode 100644 src/api/pipelines.rs
 create mode 100644 src/api/public_ids.rs
 create mode 100644 src/api/schema_resolver.rs
 create mode 100644 src/api/udfs.rs
 create mode 100644 src/api/var_str.rs
 create mode 100644 src/sql/catalog/connector.rs
 create mode 100644 src/sql/catalog/connector_table.rs
 create mode 100644 src/sql/catalog/field_spec.rs
 create mode 100644 src/sql/catalog/insert.rs
 create mode 100644 src/sql/catalog/mod.rs
 create mode 100644 src/sql/catalog/optimizer.rs
 create mode 100644 src/sql/catalog/table.rs
 create mode 100644 src/sql/catalog/utils.rs
 create mode 100644 src/sql/functions/mod.rs
 create mode 100644 src/sql/physical/mod.rs
 create mode 100644 src/sql/planner/extension/debezium.rs
 create mode 100644 src/sql/planner/extension/lookup.rs
 create mode 100644 src/sql/planner/extension/sink.rs
 create mode 100644 src/sql/planner/extension/table_source.rs
 create mode 100644 src/sql/planner/extension/updating_aggregate.rs
 create mode 100644 src/sql/planner/physical_planner.rs
 create mode 100644 src/sql/planner/rewrite/async_udf_rewriter.rs
 create mode 100644 src/sql/planner/rewrite/mod.rs
 create mode 100644 src/sql/planner/rewrite/row_time.rs
 create mode 100644 src/sql/planner/rewrite/sink_input_rewriter.rs
 create mode 100644 src/sql/planner/rewrite/source_metadata_visitor.rs
 create mode 100644 src/sql/planner/rewrite/source_rewriter.rs
 create mode 100644 src/sql/planner/rewrite/time_window.rs
 create mode 100644 src/sql/planner/rewrite/unnest_rewriter.rs
 create mode 100644 src/sql/planner/schema_provider.rs
 delete mode 100644 src/sql/planner/types.rs
 create mode 100644 src/sql/planner/udafs.rs
 create mode 100644 src/sql/types/data_type.rs
 create mode 100644 src/sql/types/df_field.rs
 create mode 100644 src/sql/types/mod.rs
 create mode 100644 src/sql/types/placeholder_udf.rs
 create mode 100644 src/sql/types/stream_schema.rs
 create mode 100644 src/sql/types/window.rs
 create mode 100644 src/types/arrow_ext.rs
 create mode 100644 src/types/control.rs
 create mode 100644 src/types/date.rs
 create mode 100644 src/types/debezium.rs
 create mode 100644 src/types/df.rs
 create mode 100644 src/types/errors.rs
 create mode 100644 src/types/formats.rs
 create mode 100644 src/types/hash.rs
 create mode 100644 src/types/message.rs
 create mode 100644 src/types/mod.rs
 create mode 100644 src/types/operator_config.rs
 create mode 100644 src/types/task_info.rs
 create mode 100644 src/types/time_utils.rs
 create mode 100644 src/types/worker.rs

diff --git a/Cargo.lock b/Cargo.lock
index 6cf6182a..cb19233d 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -651,11 +651,22 @@ dependencies = [
 
 [[package]]
 name = "bincode"
-version = "1.3.3"
+version = "2.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad"
+checksum = "36eaf5d7b090263e8150820482d5d93cd964a81e4019913c972f4edcc6edb740"
 dependencies = [
+ "bincode_derive",
  "serde",
+ "unty",
+]
+
+[[package]]
+name = "bincode_derive"
+version = "2.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bf95709a440f45e986983918d0e8a1f30a9b1df04918fc828670606804ac3c09"
+dependencies = [
+ "virtue",
 ]
 
 [[package]]
@@ -832,7 +843,7 @@ dependencies = [
  "cap-primitives",
  "cap-std",
  "io-lifetimes",
- "windows-sys 0.52.0",
+ "windows-sys 0.59.0",
 ]
 
 [[package]]
@@ -861,7 +872,7 @@ dependencies = [
  "maybe-owned",
  "rustix 1.1.3",
  "rustix-linux-procfs",
- "windows-sys 0.52.0",
+ "windows-sys 0.59.0",
  "winx",
 ]
 
@@ -941,7 +952,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2"
 dependencies = [
  "iana-time-zone",
+ "js-sys",
  "num-traits",
+ "wasm-bindgen",
  "windows-link",
 ]
 
@@ -2236,7 +2249,7 @@ checksum = "0ce92ff622d6dadf7349484f42c93271a0d49b7cc4d466a936405bacbe10aa78"
 dependencies = [
  "cfg-if",
  "rustix 1.1.3",
- "windows-sys 0.52.0",
+ "windows-sys 0.59.0",
 ]
 
 [[package]]
@@ -2317,7 +2330,7 @@ checksum = "94e7099f6313ecacbe1256e8ff9d617b75d1bcb16a6fddef94866d225a01a14a"
 dependencies = [
  "io-lifetimes",
  "rustix 1.1.3",
- "windows-sys 0.52.0",
+ "windows-sys 0.59.0",
 ]
 
 [[package]]
@@ -2325,13 +2338,15 @@ name = "function-stream"
 version = "0.6.0"
 dependencies = [
  "anyhow",
- "arrow-array 52.2.0",
- "arrow-ipc 52.2.0",
+ "arrow",
+ "arrow-array 55.2.0",
+ "arrow-ipc 55.2.0",
  "arrow-json 55.2.0 (git+https://github.com/ArroyoSystems/arrow-rs?branch=55.2.0%2Fjson)",
- "arrow-schema 52.2.0",
+ "arrow-schema 55.2.0",
  "async-trait",
  "base64",
  "bincode",
+ "chrono",
  "clap",
  "cornucopia",
  "cornucopia_async",
@@ -2346,6 +2361,7 @@ dependencies = [
  "datafusion-physical-expr",
  "datafusion-physical-plan",
  "datafusion-proto",
+ "futures",
  "itertools 0.14.0",
  "jiter",
  "log",
@@ -2357,11 +2373,13 @@ dependencies = [
  "pest_derive",
  "petgraph 0.7.1",
  "proctitle",
+ "prost",
  "protocol",
  "rdkafka",
  "rocksdb",
  "serde",
  "serde_json",
+ "serde_json_path",
  "serde_yaml",
  "sqlparser",
  "strum",
@@ -2377,6 +2395,7 @@ dependencies = [
  "uuid",
  "wasmtime",
  "wasmtime-wasi",
+ "xxhash-rust",
 ]
 
 [[package]]
@@ -2964,6 +2983,15 @@ version = "3.0.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02"
 
+[[package]]
+name = "inventory"
+version = "0.3.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "009ae045c87e7082cb72dab0ccd01ae075dd00141ddc108f43a0ea150a9e7227"
+dependencies = [
+ "rustversion",
+]
+
 [[package]]
 name = "io-extras"
 version = "0.18.4"
@@ -2971,7 +2999,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2285ddfe3054097ef4b2fe909ef8c3bcd1ea52a8f0d274416caebeef39f04a65"
 dependencies = [
  "io-lifetimes",
- "windows-sys 0.52.0",
+ "windows-sys 0.59.0",
 ]
 
 [[package]]
@@ -4179,6 +4207,7 @@ dependencies = [
  "env_logger",
  "log",
  "prost",
+ "serde",
  "tonic",
  "tonic-build",
 ]
@@ -4524,7 +4553,7 @@ dependencies = [
  "errno",
  "libc",
  "linux-raw-sys 0.4.15",
- "windows-sys 0.52.0",
+ "windows-sys 0.59.0",
 ]
 
 [[package]]
@@ -4705,6 +4734,56 @@ dependencies = [
  "zmij",
 ]
 
+[[package]]
+name = "serde_json_path"
+version = "0.7.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b992cea3194eea663ba99a042d61cea4bd1872da37021af56f6a37e0359b9d33"
+dependencies = [
+ "inventory",
+ "nom",
+ "regex",
+ "serde",
+ "serde_json",
+ "serde_json_path_core",
+ "serde_json_path_macros",
+ "thiserror 2.0.17",
+]
+
+[[package]]
+name = "serde_json_path_core"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dde67d8dfe7d4967b5a95e247d4148368ddd1e753e500adb34b3ffe40c6bc1bc"
+dependencies = [
+ "inventory",
+ "serde",
+ "serde_json",
+ "thiserror 2.0.17",
+]
+
+[[package]]
+name = "serde_json_path_macros"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "517acfa7f77ddaf5c43d5f119c44a683774e130b4247b7d3210f8924506cfac8"
+dependencies = [
+ "inventory",
+ "serde_json_path_core",
+ "serde_json_path_macros_internal",
+]
+
+[[package]]
+name = "serde_json_path_macros_internal"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "aafbefbe175fa9bf03ca83ef89beecff7d2a95aaacd5732325b90ac8c3bd7b90"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
 [[package]]
 name = "serde_spanned"
 version = "1.0.4"
@@ -4918,7 +4997,6 @@ dependencies = [
  "cfg-if",
  "libc",
  "psm",
- "windows-sys 0.52.0",
  "windows-sys 0.59.0",
 ]
 
@@ -5041,7 +5119,7 @@ dependencies = [
  "fd-lock",
  "io-lifetimes",
  "rustix 0.38.44",
- "windows-sys 0.52.0",
+ "windows-sys 0.59.0",
  "winx",
 ]
 
@@ -5671,6 +5749,12 @@ version = "0.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e9df2af067a7953e9c3831320f35c1cc0600c30d44d9f7a12b01db1cd88d6b47"
 
+[[package]]
+name = "unty"
+version = "0.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6d49784317cd0d1ee7ec5c716dd598ec5b4483ea832a2dced265471cc0f690ae"
+
 [[package]]
 name = "url"
 version = "2.5.7"
@@ -5724,6 +5808,12 @@ version = "0.9.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
 
+[[package]]
+name = "virtue"
+version = "0.0.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "051eb1abcf10076295e815102942cc58f9d5e3b4560e46e53c21e8ff6f3af7b1"
+
 [[package]]
 name = "walkdir"
 version = "2.5.0"
@@ -6586,7 +6676,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3f3fd376f71958b862e7afb20cfe5a22830e1963462f3a17f49d82a6c1d1f42d"
 dependencies = [
  "bitflags 2.10.0",
- "windows-sys 0.52.0",
+ "windows-sys 0.59.0",
 ]
 
 [[package]]
@@ -6640,6 +6730,12 @@ dependencies = [
  "tap",
 ]
 
+[[package]]
+name = "xxhash-rust"
+version = "0.8.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3"
+
 [[package]]
 name = "xz2"
 version = "0.1.7"
diff --git a/Cargo.toml b/Cargo.toml
index 0d906ca6..8b38dfe4 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -35,6 +35,7 @@ tonic = { version = "0.12", features = ["default"] }
 async-trait = "0.1"
 num_cpus = "1.0"
 protocol = { path = "./protocol" }
+prost = "0.13"
 rdkafka = { version = "0.38", features = ["cmake-build", "ssl", "gssapi"] }
 crossbeam-channel = "0.5"
 pest = "2.7"
@@ -44,13 +45,18 @@ wasmtime = { version = "41.0.3", features = ["component-model", "async"] }
 base64 = "0.22"
 wasmtime-wasi = "41.0.3"
 rocksdb = { version = "0.21", features = ["multi-threaded-cf", "lz4"] }
-bincode = "1.3"
+bincode = { version = "2", features = ["serde"] }
+chrono = "0.4"
 tokio-stream = "0.1.18"
 lru = "0.12"
 parking_lot = "0.12"
-arrow-array = "52"
-arrow-ipc = "52"
-arrow-schema = "52"
+arrow = { version = "55", default-features = false }
+arrow-array = "55"
+arrow-ipc = "55"
+arrow-schema = { version = "55", features = ["serde"] }
+futures = "0.3"
+serde_json_path = "0.7"
+xxhash-rust = { version = "0.8", features = ["xxh3"] }
 proctitle = "0.1"
 unicase = "2.7"
 petgraph = "0.7"
diff --git a/protocol/Cargo.toml b/protocol/Cargo.toml
index fde9de52..5fa7d0f0 100644
--- a/protocol/Cargo.toml
+++ b/protocol/Cargo.toml
@@ -9,6 +9,7 @@ repository = "https://github.com/your-username/rust-function-stream"
 [dependencies]
 prost = "0.13"
 tonic = { version = "0.12", features = ["default"] }
+serde = { version = "1.0", features = ["derive"] }
 log = "0.4"
 
 [build-dependencies]
diff --git a/protocol/build.rs b/protocol/build.rs
index 17e77d30..e258f456 100644
--- a/protocol/build.rs
+++ b/protocol/build.rs
@@ -10,54 +10,56 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-use std::path::Path;
+use std::path::{Path, PathBuf};
 
 fn main() -> Result<(), Box<dyn std::error::Error>> {
-    // Initialize logger for build script
     env_logger::init();
 
-    // Create output directories in the protocol package directory
-    // Use CARGO_MANIFEST_DIR to get the package root directory
     let manifest_dir = std::env::var("CARGO_MANIFEST_DIR")?;
     let out_dir = Path::new(&manifest_dir).join("generated");
-    let proto_file = Path::new(&manifest_dir).join("proto/function_stream.proto");
-
-    // Note: Cargo doesn't directly support cleaning custom directories via cargo clean.
-    // The generated directory will be automatically regenerated on each build if needed.
-    // To clean it manually, use: ./clean.sh or make clean or rm -rf protocol/generated
 
     log::info!("Generated code will be placed in: {}", out_dir.display());
-    log::info!("Proto file: {}", proto_file.display());
 
-    // Create output directories
     let cli_dir = out_dir.join("cli");
     let service_dir = out_dir.join("service");
 
     std::fs::create_dir_all(&cli_dir)?;
     std::fs::create_dir_all(&service_dir)?;
-    log::info!(
-        "Created output directories: {} and {}",
-        cli_dir.display(),
-        service_dir.display()
-    );
 
-    // Generate code for CLI - only client code needed
+    // 1. function_stream.proto → CLI (client) and Service (server)
     tonic_build::configure()
         .out_dir(&cli_dir)
-        .build_client(true) // Enable client code generation
-        .build_server(false) // Disable server code generation
+        .build_client(true)
+        .build_server(false)
         .compile_protos(&["proto/function_stream.proto"], &["proto"])?;
 
-    // Generate code for Service - only server code needed
     tonic_build::configure()
         .out_dir(&service_dir)
-        .build_client(false) // Disable client code generation
-        .build_server(true) // Enable server code generation
+        .build_client(false)
+        .build_server(true)
         .compile_protos(&["proto/function_stream.proto"], &["proto"])?;
 
+    // 2. fs_api.proto → with file descriptor set + serde for REST/JSON
+    let api_dir = out_dir.join("api");
+    std::fs::create_dir_all(&api_dir)?;
+
+    let descriptor_path =
+        PathBuf::from(std::env::var("OUT_DIR").unwrap()).join("fs_api_descriptor.bin");
+
+    tonic_build::configure()
+        .out_dir(&api_dir)
+        .protoc_arg("--experimental_allow_proto3_optional")
+        .file_descriptor_set_path(&descriptor_path)
+        .type_attribute(".", "#[derive(serde::Serialize, serde::Deserialize)]")
+        .type_attribute(".", "#[serde(rename_all = \"camelCase\")]")
+        .build_client(false)
+        .build_server(false)
+        .compile_protos(&["proto/fs_api.proto"], &["proto"])?;
+
     log::info!("Protocol Buffers code generated successfully");
     println!("cargo:rustc-env=PROTO_GEN_DIR={}", out_dir.display());
-    println!("cargo:rerun-if-changed={}", proto_file.display());
+    println!("cargo:rerun-if-changed=proto/function_stream.proto");
+    println!("cargo:rerun-if-changed=proto/fs_api.proto");
 
     Ok(())
 }
diff --git a/protocol/proto/fs_api.proto b/protocol/proto/fs_api.proto
new file mode 100644
index 00000000..24525583
--- /dev/null
+++ b/protocol/proto/fs_api.proto
@@ -0,0 +1,289 @@
+// Licensed under the Apache License, Version 2.0
+// Adapted from Arroyo's api.proto for FunctionStream
+
+syntax = "proto3";
+package fs_api;
+
+// ─────────────────────── Operators ───────────────────────
+
+message ConnectorOp {
+  string connector = 1;
+  string config = 2;
+  string description = 3;
+}
+
+message ProjectionOperator {
+  string name = 1;
+  FsSchema input_schema = 2;
+  FsSchema output_schema = 3;
+  repeated bytes exprs = 4;
+}
+
+message TumblingWindowAggregateOperator {
+  string name = 1;
+  uint64 width_micros = 2;
+  bytes binning_function = 3;
+  FsSchema input_schema = 4;
+  FsSchema partial_schema = 5;
+  bytes partial_aggregation_plan = 6;
+  bytes final_aggregation_plan = 7;
+  optional bytes final_projection = 8;
+}
+
+message SlidingWindowAggregateOperator {
+  string name = 1;
+  uint64 width_micros = 2;
+  uint64 slide_micros = 3;
+  bytes binning_function = 4;
+  FsSchema input_schema = 5;
+  FsSchema partial_schema = 6;
+  bytes partial_aggregation_plan = 7;
+  bytes final_aggregation_plan = 8;
+  bytes final_projection = 9;
+}
+
+message SessionWindowAggregateOperator {
+  string name = 1;
+  uint64 gap_micros = 2;
+  string window_field_name = 3;
+  uint64 window_index = 4;
+  FsSchema input_schema = 5;
+  FsSchema unkeyed_aggregate_schema = 6;
+  bytes partial_aggregation_plan = 7;
+  bytes final_aggregation_plan = 8;
+}
+
+message JoinOperator {
+  string name = 1;
+  FsSchema left_schema = 2;
+  FsSchema right_schema = 3;
+  FsSchema output_schema = 4;
+  bytes join_plan = 5;
+  optional uint64 ttl_micros = 6;
+}
+
+message LookupJoinCondition {
+  bytes left_expr = 1;
+  string right_key = 2;
+}
+
+message LookupJoinOperator {
+  FsSchema input_schema = 1;
+  FsSchema lookup_schema = 2;
+  ConnectorOp connector = 3;
+  repeated LookupJoinCondition key_exprs = 4;
+  JoinType join_type = 5;
+  optional uint64 ttl_micros = 6;
+  optional uint64 max_capacity_bytes = 7;
+}
+
+message WindowFunctionOperator {
+  string name = 1;
+  FsSchema input_schema = 2;
+  bytes binning_function = 3;
+  bytes window_function_plan = 4;
+}
+
+enum AsyncUdfOrdering {
+  UNORDERED = 0;
+  ORDERED = 1;
+}
+
+message AsyncUdfOperator {
+  string name = 1;
+  DylibUdfConfig udf = 2;
+  repeated bytes arg_exprs = 3;
+  repeated bytes final_exprs = 4;
+  AsyncUdfOrdering ordering = 5;
+  uint32 max_concurrency = 6;
+  uint64 timeout_micros = 7;
+}
+
+message UpdatingAggregateOperator {
+  string name = 1;
+  FsSchema input_schema = 2;
+  FsSchema final_schema = 3;
+  bytes aggregate_exec = 5;
+  bytes metadata_expr = 6;
+  uint64 flush_interval_micros = 7;
+  uint64 ttl_micros = 8;
+}
+
+// ─────────────────────── Watermark ───────────────────────
+
+message ExpressionWatermarkConfig {
+  uint64 period_micros = 1;
+  optional uint64 idle_time_micros = 2;
+  FsSchema input_schema = 3;
+  bytes expression = 4;
+}
+
+// ─────────────────────── Windows ───────────────────────
+
+message Window {
+  oneof window {
+    SlidingWindow sliding_window = 2;
+    TumblingWindow tumbling_window = 3;
+    InstantWindow instant_window = 4;
+    SessionWindow session_window = 5;
+  }
+}
+
+message SlidingWindow {
+  uint64 size_micros = 1;
+  uint64 slide_micros = 2;
+}
+
+message TumblingWindow {
+  uint64 size_micros = 1;
+}
+
+message InstantWindow {}
+
+message SessionWindow {
+  uint64 gap_micros = 1;
+}
+
+// ─────────────────────── Enums ───────────────────────
+
+enum JoinType {
+  INNER = 0;
+  LEFT = 1;
+  RIGHT = 2;
+  FULL = 3;
+}
+
+enum OffsetMode {
+  EARLIEST = 0;
+  LATEST = 1;
+}
+
+enum EdgeType {
+  UNUSED = 0;
+  FORWARD = 1;
+  SHUFFLE = 2;
+  LEFT_JOIN = 3;
+  RIGHT_JOIN = 4;
+}
+
+// ─────────────────── Physical Extension Nodes ───────────────────
+
+message MemExecNode {
+  string table_name = 1;
+  string schema = 2; // json-encoded
+}
+
+message UnnestExecNode {
+  string schema = 1; // json-encoded
+}
+
+message DebeziumDecodeNode {
+  string schema = 1; // json-encoded
+  repeated uint64 primary_keys = 2;
+}
+
+message DebeziumEncodeNode {
+  string schema = 1; // json-encoded
+}
+
+message FsExecNode {
+  oneof node {
+    MemExecNode mem_exec = 1;
+    UnnestExecNode unnest_exec = 2;
+    DebeziumDecodeNode debezium_decode = 3;
+    DebeziumEncodeNode debezium_encode = 4;
+  }
+}
+
+// ─────────────────── Checkpoints ───────────────────
+
+enum TaskCheckpointEventType {
+  ALIGNMENT_STARTED = 0;
+  CHECKPOINT_STARTED = 1;
+  CHECKPOINT_OPERATOR_SETUP_FINISHED = 2;
+  CHECKPOINT_SYNC_FINISHED = 3;
+  CHECKPOINT_PRE_COMMIT = 4;
+}
+
+message TaskCheckpointEvent {
+  uint64 time = 1;
+  TaskCheckpointEventType event_type = 2;
+}
+
+message TaskCheckpointDetail {
+  uint32 subtask_index = 1;
+  uint64 start_time = 2;
+  optional uint64 finish_time = 3;
+  optional uint64 bytes = 4;
+  repeated TaskCheckpointEvent events = 5;
+}
+
+message OperatorCheckpointDetail {
+  string operator_id = 1;
+  uint64 start_time = 2;
+  optional uint64 finish_time = 3;
+  bool has_state = 4;
+  optional uint64 started_metadata_write = 6;
+  map<uint32, TaskCheckpointDetail> tasks = 5;
+}
+
+// ─────────────────── UDF Config ───────────────────
+
+message DylibUdfConfig {
+  string dylib_path = 1;
+  repeated bytes arg_types = 2;
+  bytes return_type = 3;
+  bool aggregate = 4;
+  bool is_async = 5;
+}
+
+message PythonUdfConfig {
+  string name = 1;
+  repeated bytes arg_types = 2;
+  bytes return_type = 3;
+  string definition = 4;
+}
+
+message FsProgramConfig {
+  map<string, DylibUdfConfig> udf_dylibs = 1;
+  map<string, PythonUdfConfig> python_udfs = 2;
+}
+
+// ─────────────────── Arrow Program ───────────────────
+
+message FsProgram {
+  repeated FsNode nodes = 1;
+  repeated FsEdge edges = 2;
+  FsProgramConfig program_config = 3;
+}
+
+message FsSchema {
+  string arrow_schema = 1;  // json-encoded Arrow Schema
+  uint32 timestamp_index = 2;
+  repeated uint32 key_indices = 3;
+  bool has_keys = 4;
+  repeated uint32 routing_key_indices = 5;
+  bool has_routing_keys = 6;
+}
+
+message ChainedOperator {
+  string operator_id = 1;
+  string operator_name = 2;
+  bytes operator_config = 3;
+}
+
+message FsNode {
+  int32 node_index = 1;
+  uint32 node_id = 2;
+  uint32 parallelism = 3;
+  string description = 4;
+  repeated ChainedOperator operators = 5;
+  repeated FsSchema edges = 6;
+}
+
+message FsEdge {
+  int32 source = 1;
+  int32 target = 2;
+  FsSchema schema = 4;
+  EdgeType edge_type = 5;
+}
diff --git a/protocol/src/lib.rs b/protocol/src/lib.rs
index b0c6da06..f924a5c6 100644
--- a/protocol/src/lib.rs
+++ b/protocol/src/lib.rs
@@ -10,25 +10,30 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-// Protocol Buffers protocol definitions for function stream
-// This module exports the generated Protocol Buffers code
+// ─────────────── FunctionStream Service (original) ───────────────
 
-// CLI module - exports client code
 #[path = "../generated/cli/function_stream.rs"]
 pub mod cli;
 
-// Service module - exports server code
 #[path = "../generated/service/function_stream.rs"]
 pub mod service;
 
-// Re-export commonly used types from both modules
-// Data structures are the same in both, so we can re-export from either
 pub use cli::function_stream_service_client;
-
-// Re-export client-specific types
 pub use cli::function_stream_service_client::FunctionStreamServiceClient;
-
-// Re-export server-specific types
 pub use service::function_stream_service_server::{
     FunctionStreamService, FunctionStreamServiceServer,
 };
+
+// ─────────────── Streaming Pipeline API (fs_api.proto) ───────────────
+
+pub mod grpc {
+    /// Serde-annotated API types for streaming operators, schemas, programs.
+    #[allow(clippy::all)]
+    pub mod api {
+        include!("../generated/api/fs_api.rs");
+    }
+}
+
+/// File descriptor set for fs_api.proto (for gRPC reflection / REST gateway).
+pub const FS_API_FILE_DESCRIPTOR_SET: &[u8] =
+    tonic::include_file_descriptor_set!("fs_api_descriptor");
diff --git a/src/api/checkpoints.rs b/src/api/checkpoints.rs
new file mode 100644
index 00000000..8462f311
--- /dev/null
+++ b/src/api/checkpoints.rs
@@ -0,0 +1,96 @@
+use crate::types::to_micros;
+use serde::{Deserialize, Serialize};
+use std::time::SystemTime;
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+#[serde(rename_all = "snake_case")]
+pub struct Checkpoint {
+    pub epoch: u32,
+    pub backend: String,
+    pub start_time: u64,
+    pub finish_time: Option<u64>,
+    pub events: Vec<CheckpointEventSpan>,
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+#[serde(rename_all = "snake_case")]
+pub struct CheckpointEventSpan {
+    pub start_time: u64,
+    pub finish_time: u64,
+    pub event: String,
+    pub description: String,
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+#[serde(rename_all = "snake_case")]
+pub struct SubtaskCheckpointGroup {
+    pub index: u32,
+    pub bytes: u64,
+    pub event_spans: Vec<CheckpointEventSpan>,
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+#[serde(rename_all = "snake_case")]
+pub struct OperatorCheckpointGroup {
+    pub operator_id: String,
+    pub bytes: u64,
+    pub started_metadata_write: Option<u64>,
+    pub finish_time: Option<u64>,
+    pub subtasks: Vec<SubtaskCheckpointGroup>,
+}
+
+#[derive(Debug, Copy, Clone, Eq, PartialEq, Serialize, Deserialize)]
+pub enum JobCheckpointEventType {
+    Checkpointing,
+    CheckpointingOperators,
+    WritingMetadata,
+    Compacting,
+    Committing,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct JobCheckpointSpan {
+    pub event: JobCheckpointEventType,
+    pub start_time: u64,
+    pub finish_time: Option<u64>,
+}
+
+impl JobCheckpointSpan {
+    pub fn now(event: JobCheckpointEventType) -> Self {
+        Self {
+            event,
+            start_time: to_micros(SystemTime::now()),
+            finish_time: None,
+        }
+    }
+
+    pub fn finish(&mut self) {
+        if self.finish_time.is_none() {
+            self.finish_time = Some(to_micros(SystemTime::now()));
+        }
+    }
+}
+
+impl From<JobCheckpointSpan> for CheckpointEventSpan {
+    fn from(value: JobCheckpointSpan) -> Self {
+        let description = match value.event {
+            JobCheckpointEventType::Checkpointing => "The entire checkpointing process",
+            JobCheckpointEventType::CheckpointingOperators => {
+                "The time spent checkpointing operator states"
+            }
+            JobCheckpointEventType::WritingMetadata => "Writing the final checkpoint metadata",
+            JobCheckpointEventType::Compacting => "Compacting old checkpoints",
+            JobCheckpointEventType::Committing => {
+                "Running two-phase commit for transactional connectors"
+            }
+        }
+        .to_string();
+
+        Self {
+            start_time: value.start_time,
+            finish_time: value.finish_time.unwrap_or_default(),
+            event: format!("{:?}", value.event),
+            description,
+        }
+    }
+}
diff --git a/src/api/connections.rs b/src/api/connections.rs
new file mode 100644
index 00000000..eb69690e
--- /dev/null
+++ b/src/api/connections.rs
@@ -0,0 +1,604 @@
+use crate::types::formats::{BadData, Format, Framing};
+use crate::types::{FsExtensionType, FsSchema};
+use datafusion::arrow::datatypes::{DataType, Field, Fields, TimeUnit};
+use serde::ser::SerializeMap;
+use serde::{Deserialize, Serialize, Serializer};
+use std::collections::{BTreeMap, HashMap, HashSet};
+use std::fmt::{Display, Formatter};
+use std::sync::Arc;
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+#[serde(rename_all = "snake_case")]
+pub struct Connector {
+    pub id: String,
+    pub name: String,
+    pub icon: String,
+    pub description: String,
+    pub table_config: String,
+    pub enabled: bool,
+    pub source: bool,
+    pub sink: bool,
+    pub custom_schemas: bool,
+    pub testing: bool,
+    pub hidden: bool,
+    pub connection_config: Option<String>,
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+#[serde(rename_all = "snake_case")]
+pub struct ConnectionProfile {
+    pub id: String,
+    pub name: String,
+    pub connector: String,
+    pub config: serde_json::Value,
+    pub description: String,
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+#[serde(rename_all = "snake_case")]
+pub struct ConnectionProfilePost {
+    pub name: String,
+    pub connector: String,
+    pub config: serde_json::Value,
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
+#[serde(rename_all = "snake_case")]
+pub enum ConnectionType {
+    Source,
+    Sink,
+    Lookup,
+}
+
+impl Display for ConnectionType {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        match self {
+            ConnectionType::Source => write!(f, "SOURCE"),
+            ConnectionType::Sink => write!(f, "SINK"),
+            ConnectionType::Lookup => write!(f, "LOOKUP"),
+        }
+    }
+}
+
+impl TryFrom<String> for ConnectionType {
+    type Error = String;
+
+    fn try_from(value: String) -> Result<Self, Self::Error> {
+        match value.to_lowercase().as_str() {
+            "source" => Ok(ConnectionType::Source),
+            "sink" => Ok(ConnectionType::Sink),
+            "lookup" => Ok(ConnectionType::Lookup),
+            _ => Err(format!("Invalid connection type: {value}")),
+        }
+    }
+}
+
+// ─────────────────── Field Types ───────────────────
+
+#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq)]
+#[serde(tag = "type", rename_all = "snake_case")]
+pub enum FieldType {
+    Int32,
+    Int64,
+    Uint32,
+    Uint64,
+    #[serde(alias = "f32")]
+    Float32,
+    #[serde(alias = "f64")]
+    Float64,
+    Decimal128(DecimalField),
+    Bool,
+    #[serde(alias = "utf8")]
+    String,
+    #[serde(alias = "binary")]
+    Bytes,
+    Timestamp(TimestampField),
+    Json,
+    Struct(StructField),
+    List(ListField),
+}
+
+impl FieldType {
+    pub fn sql_type(&self) -> String {
+        match self {
+            FieldType::Int32 => "INTEGER".into(),
+            FieldType::Int64 => "BIGINT".into(),
+            FieldType::Uint32 => "INTEGER UNSIGNED".into(),
+            FieldType::Uint64 => "BIGINT UNSIGNED".into(),
+            FieldType::Float32 => "FLOAT".into(),
+            FieldType::Float64 => "DOUBLE".into(),
+            FieldType::Decimal128(f) => format!("DECIMAL({}, {})", f.precision, f.scale),
+            FieldType::Bool => "BOOLEAN".into(),
+            FieldType::String => "TEXT".into(),
+            FieldType::Bytes => "BINARY".into(),
+            FieldType::Timestamp(t) => format!("TIMESTAMP({})", t.unit.precision()),
+            FieldType::Json => "JSON".into(),
+            FieldType::List(item) => format!("{}[]", item.items.field_type.sql_type()),
+            FieldType::Struct(StructField { fields, .. }) => {
+                format!(
+                    "STRUCT <{}>",
+                    fields
+                        .iter()
+                        .map(|f| format!("{} {}", f.name, f.field_type.sql_type()))
+                        .collect::<Vec<_>>()
+                        .join(", ")
+                )
+            }
+        }
+    }
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq, Default)]
+#[serde(rename_all = "snake_case")]
+pub enum TimestampUnit {
+    #[serde(alias = "s")]
+    Second,
+    #[default]
+    #[serde(alias = "ms")]
+    Millisecond,
+    #[serde(alias = "µs", alias = "us")]
+    Microsecond,
+    #[serde(alias = "ns")]
+    Nanosecond,
+}
+
+impl TimestampUnit {
+    pub fn precision(&self) -> u8 {
+        match self {
+            TimestampUnit::Second => 0,
+            TimestampUnit::Millisecond => 3,
+            TimestampUnit::Microsecond => 6,
+            TimestampUnit::Nanosecond => 9,
+        }
+    }
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq)]
+#[serde(rename_all = "snake_case")]
+pub struct TimestampField {
+    #[serde(default)]
+    pub unit: TimestampUnit,
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq)]
+#[serde(rename_all = "snake_case")]
+pub struct DecimalField {
+    pub precision: u8,
+    pub scale: i8,
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq)]
+#[serde(rename_all = "snake_case")]
+pub struct StructField {
+    pub fields: Vec<SourceField>,
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq)]
+#[serde(rename_all = "snake_case")]
+pub struct ListField {
+    pub items: Box<ListFieldItem>,
+}
+
+fn default_item_name() -> String {
+    "item".to_string()
+}
+
+#[derive(Deserialize, Clone, Debug, PartialEq, Eq)]
+#[serde(rename_all = "snake_case")]
+pub struct ListFieldItem {
+    #[serde(default = "default_item_name")]
+    pub name: String,
+    #[serde(flatten)]
+    pub field_type: FieldType,
+    #[serde(default)]
+    pub required: bool,
+    #[serde(default)]
+    pub sql_name: Option<String>,
+}
+
+impl From<ListFieldItem> for Field {
+    fn from(value: ListFieldItem) -> Self {
+        SourceField {
+            name: value.name,
+            field_type: value.field_type,
+            required: value.required,
+            sql_name: None,
+            metadata_key: None,
+        }
+        .into()
+    }
+}
+
+impl Serialize for ListFieldItem {
+    fn serialize<S>(&self, s: S) -> Result<S::Ok, S::Error>
+    where
+        S: Serializer,
+    {
+        let mut f = Serializer::serialize_map(s, None)?;
+        f.serialize_entry("name", &self.name)?;
+        serialize_field_type_flat(&self.field_type, &mut f)?;
+        f.serialize_entry("required", &self.required)?;
+        f.serialize_entry("sql_name", &self.field_type.sql_type())?;
+        f.end()
+    }
+}
+
+impl TryFrom<Field> for ListFieldItem {
+    type Error = String;
+
+    fn try_from(value: Field) -> Result<Self, Self::Error> {
+        let source_field: SourceField = value.try_into()?;
+        Ok(Self {
+            name: source_field.name,
+            field_type: source_field.field_type,
+            required: source_field.required,
+            sql_name: None,
+        })
+    }
+}
+
+fn serialize_field_type_flat<M: SerializeMap>(ft: &FieldType, map: &mut M) -> Result<(), M::Error> {
+    let type_tag = match ft {
+        FieldType::Int32 => "int32",
+        FieldType::Int64 => "int64",
+        FieldType::Uint32 => "uint32",
+        FieldType::Uint64 => "uint64",
+        FieldType::Float32 => "float32",
+        FieldType::Float64 => "float64",
+        FieldType::Decimal128(_) => "decimal128",
+        FieldType::Bool => "bool",
+        FieldType::String => "string",
+        FieldType::Bytes => "bytes",
+        FieldType::Timestamp(_) => "timestamp",
+        FieldType::Json => "json",
+        FieldType::Struct(_) => "struct",
+        FieldType::List(_) => "list",
+    };
+    map.serialize_entry("type", type_tag)?;
+
+    match ft {
+        FieldType::Decimal128(d) => {
+            map.serialize_entry("precision", &d.precision)?;
+            map.serialize_entry("scale", &d.scale)?;
+        }
+        FieldType::Timestamp(t) => {
+            map.serialize_entry("unit", &t.unit)?;
+        }
+        FieldType::Struct(s) => {
+            map.serialize_entry("fields", &s.fields)?;
+        }
+        FieldType::List(l) => {
+            map.serialize_entry("items", &l.items)?;
+        }
+        _ => {}
+    }
+    Ok(())
+}
+
+// ─────────────────── Source Field ───────────────────
+
+#[derive(Deserialize, Clone, Debug, PartialEq, Eq)]
+#[serde(rename_all = "snake_case")]
+pub struct SourceField {
+    pub name: String,
+    #[serde(flatten)]
+    pub field_type: FieldType,
+    #[serde(default)]
+    pub required: bool,
+    #[serde(default)]
+    pub sql_name: Option<String>,
+    #[serde(default)]
+    pub metadata_key: Option<String>,
+}
+
+impl Serialize for SourceField {
+    fn serialize<S>(&self, s: S) -> Result<S::Ok, S::Error>
+    where
+        S: Serializer,
+    {
+        let mut f = Serializer::serialize_map(s, None)?;
+        f.serialize_entry("name", &self.name)?;
+        serialize_field_type_flat(&self.field_type, &mut f)?;
+        f.serialize_entry("required", &self.required)?;
+        if let Some(metadata_key) = &self.metadata_key {
+            f.serialize_entry("metadata_key", metadata_key)?;
+        }
+        f.serialize_entry("sql_name", &self.field_type.sql_type())?;
+        f.end()
+    }
+}
+
+impl From<SourceField> for Field {
+    fn from(f: SourceField) -> Self {
+        let (t, ext) = match f.field_type {
+            FieldType::Int32 => (DataType::Int32, None),
+            FieldType::Int64 => (DataType::Int64, None),
+            FieldType::Uint32 => (DataType::UInt32, None),
+            FieldType::Uint64 => (DataType::UInt64, None),
+            FieldType::Float32 => (DataType::Float32, None),
+            FieldType::Float64 => (DataType::Float64, None),
+            FieldType::Bool => (DataType::Boolean, None),
+            FieldType::String => (DataType::Utf8, None),
+            FieldType::Bytes => (DataType::Binary, None),
+            FieldType::Decimal128(d) => (DataType::Decimal128(d.precision, d.scale), None),
+            FieldType::Timestamp(TimestampField {
+                unit: TimestampUnit::Second,
+            }) => (DataType::Timestamp(TimeUnit::Second, None), None),
+            FieldType::Timestamp(TimestampField {
+                unit: TimestampUnit::Millisecond,
+            }) => (DataType::Timestamp(TimeUnit::Millisecond, None), None),
+            FieldType::Timestamp(TimestampField {
+                unit: TimestampUnit::Microsecond,
+            }) => (DataType::Timestamp(TimeUnit::Microsecond, None), None),
+            FieldType::Timestamp(TimestampField {
+                unit: TimestampUnit::Nanosecond,
+            }) => (DataType::Timestamp(TimeUnit::Nanosecond, None), None),
+            FieldType::Json => (DataType::Utf8, Some(FsExtensionType::JSON)),
+            FieldType::Struct(s) => (
+                DataType::Struct(Fields::from(
+                    s.fields
+                        .into_iter()
+                        .map(|t| t.into())
+                        .collect::<Vec<Field>>(),
+                )),
+                None,
+            ),
+            FieldType::List(t) => (DataType::List(Arc::new((*t.items).into())), None),
+        };
+
+        FsExtensionType::add_metadata(ext, Field::new(f.name, t, !f.required))
+    }
+}
+
+impl TryFrom<Field> for SourceField {
+    type Error = String;
+
+    fn try_from(f: Field) -> Result<Self, Self::Error> {
+        let field_type = match (f.data_type(), FsExtensionType::from_map(f.metadata())) {
+            (DataType::Boolean, None) => FieldType::Bool,
+            (DataType::Int32, None) => FieldType::Int32,
+            (DataType::Int64, None) => FieldType::Int64,
+            (DataType::UInt32, None) => FieldType::Uint32,
+            (DataType::UInt64, None) => FieldType::Uint64,
+            (DataType::Float32, None) => FieldType::Float32,
+            (DataType::Float64, None) => FieldType::Float64,
+            (DataType::Decimal128(p, s), None) => FieldType::Decimal128(DecimalField {
+                precision: *p,
+                scale: *s,
+            }),
+            (DataType::Binary, None) | (DataType::LargeBinary, None) => FieldType::Bytes,
+            (DataType::Timestamp(TimeUnit::Second, _), None) => {
+                FieldType::Timestamp(TimestampField {
+                    unit: TimestampUnit::Second,
+                })
+            }
+            (DataType::Timestamp(TimeUnit::Millisecond, _), None) => {
+                FieldType::Timestamp(TimestampField {
+                    unit: TimestampUnit::Millisecond,
+                })
+            }
+            (DataType::Timestamp(TimeUnit::Microsecond, _), None) => {
+                FieldType::Timestamp(TimestampField {
+                    unit: TimestampUnit::Microsecond,
+                })
+            }
+            (DataType::Timestamp(TimeUnit::Nanosecond, _), None) => {
+                FieldType::Timestamp(TimestampField {
+                    unit: TimestampUnit::Nanosecond,
+                })
+            }
+            (DataType::Utf8, None) => FieldType::String,
+            (DataType::Utf8, Some(FsExtensionType::JSON)) => FieldType::Json,
+            (DataType::Struct(fields), None) => {
+                let fields: Result<_, String> = fields
+                    .into_iter()
+                    .map(|f| (**f).clone().try_into())
+                    .collect();
+                FieldType::Struct(StructField { fields: fields? })
+            }
+            (DataType::List(item), None) => FieldType::List(ListField {
+                items: Box::new((**item).clone().try_into()?),
+            }),
+            dt => return Err(format!("Unsupported data type {dt:?}")),
+        };
+
+        Ok(SourceField {
+            name: f.name().clone(),
+            field_type,
+            required: !f.is_nullable(),
+            sql_name: None,
+            metadata_key: None,
+        })
+    }
+}
+
+// ─────────────────── Schema Definitions ───────────────────
+
+#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
+#[serde(rename_all = "snake_case", tag = "type")]
+pub enum SchemaDefinition {
+    JsonSchema {
+        schema: String,
+    },
+    ProtobufSchema {
+        schema: String,
+        #[serde(default)]
+        dependencies: HashMap<String, String>,
+    },
+    AvroSchema {
+        schema: String,
+    },
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
+#[serde(rename_all = "snake_case")]
+pub struct ConnectionSchema {
+    pub format: Option<Format>,
+    #[serde(default)]
+    pub bad_data: Option<BadData>,
+    #[serde(default)]
+    pub framing: Option<Framing>,
+    #[serde(default)]
+    pub fields: Vec<SourceField>,
+    #[serde(default)]
+    pub definition: Option<SchemaDefinition>,
+    #[serde(default)]
+    pub inferred: Option<bool>,
+    #[serde(default)]
+    pub primary_keys: HashSet<String>,
+}
+
+impl ConnectionSchema {
+    pub fn try_new(
+        format: Option<Format>,
+        bad_data: Option<BadData>,
+        framing: Option<Framing>,
+        fields: Vec<SourceField>,
+        definition: Option<SchemaDefinition>,
+        inferred: Option<bool>,
+        primary_keys: HashSet<String>,
+    ) -> anyhow::Result<Self> {
+        let s = ConnectionSchema {
+            format,
+            bad_data,
+            framing,
+            fields,
+            definition,
+            inferred,
+            primary_keys,
+        };
+        s.validate()
+    }
+
+    pub fn validate(self) -> anyhow::Result<Self> {
+        let non_metadata_fields: Vec<_> = self
+            .fields
+            .iter()
+            .filter(|f| f.metadata_key.is_none())
+            .collect();
+
+        if let Some(Format::RawString(_)) = &self.format {
+            if non_metadata_fields.len() != 1
+                || non_metadata_fields.first().unwrap().field_type != FieldType::String
+                || non_metadata_fields.first().unwrap().name != "value"
+            {
+                anyhow::bail!(
+                    "raw_string format requires a schema with a single field called `value` of type TEXT"
+                );
+            }
+        }
+
+        if let Some(Format::Json(json_format)) = &self.format {
+            if json_format.unstructured
+                && (non_metadata_fields.len() != 1
+                    || non_metadata_fields.first().unwrap().field_type != FieldType::Json
+                    || non_metadata_fields.first().unwrap().name != "value")
+            {
+                anyhow::bail!(
+                    "json format with unstructured flag enabled requires a schema with a single field called `value` of type JSON"
+                );
+            }
+        }
+
+        Ok(self)
+    }
+
+    pub fn fs_schema(&self) -> Arc<FsSchema> {
+        let fields: Vec<Field> = self.fields.iter().map(|f| f.clone().into()).collect();
+        Arc::new(FsSchema::from_fields(fields))
+    }
+}
+
+impl From<ConnectionSchema> for FsSchema {
+    fn from(val: ConnectionSchema) -> Self {
+        let fields: Vec<Field> = val.fields.into_iter().map(|f| f.into()).collect();
+        FsSchema::from_fields(fields)
+    }
+}
+
+// ─────────────────── Connection Table ───────────────────
+
+#[derive(Serialize, Clone, Debug)]
+#[serde(rename_all = "snake_case")]
+pub struct ConnectionTable {
+    #[serde(skip_serializing)]
+    pub id: i64,
+    #[serde(rename = "id")]
+    pub pub_id: String,
+    pub name: String,
+    pub created_at: u64,
+    pub connector: String,
+    pub connection_profile: Option<ConnectionProfile>,
+    pub table_type: ConnectionType,
+    pub config: serde_json::Value,
+    pub schema: ConnectionSchema,
+    pub consumers: u32,
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+#[serde(rename_all = "snake_case")]
+pub struct ConnectionTablePost {
+    pub name: String,
+    pub connector: String,
+    pub connection_profile_id: Option<String>,
+    pub config: serde_json::Value,
+    pub schema: Option<ConnectionSchema>,
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+#[serde(rename_all = "snake_case")]
+pub struct ConnectionAutocompleteResp {
+    pub values: BTreeMap<String, Vec<String>>,
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+#[serde(rename_all = "snake_case")]
+pub struct TestSourceMessage {
+    pub error: bool,
+    pub done: bool,
+    pub message: String,
+}
+
+impl TestSourceMessage {
+    pub fn info(message: impl Into<String>) -> Self {
+        Self {
+            error: false,
+            done: false,
+            message: message.into(),
+        }
+    }
+    pub fn error(message: impl Into<String>) -> Self {
+        Self {
+            error: true,
+            done: false,
+            message: message.into(),
+        }
+    }
+    pub fn done(message: impl Into<String>) -> Self {
+        Self {
+            error: false,
+            done: true,
+            message: message.into(),
+        }
+    }
+    pub fn fail(message: impl Into<String>) -> Self {
+        Self {
+            error: true,
+            done: true,
+            message: message.into(),
+        }
+    }
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+#[serde(rename_all = "snake_case")]
+pub struct ConfluentSchema {
+    pub schema: String,
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+#[serde(rename_all = "snake_case")]
+pub struct ConfluentSchemaQueryParams {
+    pub endpoint: String,
+    pub topic: String,
+}
diff --git a/src/api/metrics.rs b/src/api/metrics.rs
new file mode 100644
index 00000000..25d129e5
--- /dev/null
+++ b/src/api/metrics.rs
@@ -0,0 +1,41 @@
+use serde::{Deserialize, Serialize};
+
+#[derive(Serialize, Deserialize, Copy, Clone, Debug, Hash, PartialEq, Eq)]
+#[serde(rename_all = "snake_case")]
+pub enum MetricName {
+    BytesRecv,
+    BytesSent,
+    MessagesRecv,
+    MessagesSent,
+    Backpressure,
+    TxQueueSize,
+    TxQueueRem,
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+#[serde(rename_all = "snake_case")]
+pub struct Metric {
+    pub time: u64,
+    pub value: f64,
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+#[serde(rename_all = "snake_case")]
+pub struct SubtaskMetrics {
+    pub index: u32,
+    pub metrics: Vec<Metric>,
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+#[serde(rename_all = "snake_case")]
+pub struct MetricGroup {
+    pub name: MetricName,
+    pub subtasks: Vec<SubtaskMetrics>,
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+#[serde(rename_all = "snake_case")]
+pub struct OperatorMetricGroup {
+    pub node_id: u32,
+    pub metric_groups: Vec<MetricGroup>,
+}
diff --git a/src/api/mod.rs b/src/api/mod.rs
new file mode 100644
index 00000000..85cbcaaa
--- /dev/null
+++ b/src/api/mod.rs
@@ -0,0 +1,43 @@
+//! REST/RPC API types for the FunctionStream system.
+//!
+//! Adapted from Arroyo's `arroyo-rpc/src/api_types` and utility modules.
+
+pub mod checkpoints;
+pub mod connections;
+pub mod metrics;
+pub mod pipelines;
+pub mod public_ids;
+pub mod schema_resolver;
+pub mod udfs;
+pub mod var_str;
+
+use serde::{Deserialize, Serialize};
+
+pub use checkpoints::*;
+pub use connections::{
+    ConnectionProfile, ConnectionSchema, ConnectionType, Connector, FieldType, SchemaDefinition,
+    SourceField,
+};
+pub use metrics::*;
+pub use pipelines::*;
+pub use udfs::*;
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+#[serde(rename_all = "camelCase")]
+pub struct PaginatedCollection<T> {
+    pub data: Vec<T>,
+    pub has_more: bool,
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+#[serde(rename_all = "camelCase")]
+pub struct NonPaginatedCollection<T> {
+    pub data: Vec<T>,
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+#[serde(rename_all = "snake_case")]
+pub struct PaginationQueryParams {
+    pub starting_after: Option<String>,
+    pub limit: Option<u32>,
+}
diff --git a/src/api/pipelines.rs b/src/api/pipelines.rs
new file mode 100644
index 00000000..3c77ce7a
--- /dev/null
+++ b/src/api/pipelines.rs
@@ -0,0 +1,156 @@
+use super::udfs::Udf;
+use crate::types::control::ErrorDomain;
+use serde::{Deserialize, Serialize};
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+#[serde(rename_all = "snake_case")]
+pub struct ValidateQueryPost {
+    pub query: String,
+    pub udfs: Option<Vec<Udf>>,
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+#[serde(rename_all = "snake_case")]
+pub struct QueryValidationResult {
+    pub graph: Option<PipelineGraph>,
+    pub errors: Vec<String>,
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+#[serde(rename_all = "snake_case")]
+pub struct PipelinePost {
+    pub name: String,
+    pub query: String,
+    pub udfs: Option<Vec<Udf>>,
+    pub parallelism: u64,
+    pub checkpoint_interval_micros: Option<u64>,
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+#[serde(rename_all = "snake_case")]
+pub struct PreviewPost {
+    pub query: String,
+    pub udfs: Option<Vec<Udf>>,
+    #[serde(default)]
+    pub enable_sinks: bool,
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+#[serde(rename_all = "snake_case")]
+pub struct PipelinePatch {
+    pub parallelism: Option<u64>,
+    pub checkpoint_interval_micros: Option<u64>,
+    pub stop: Option<StopType>,
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+#[serde(rename_all = "snake_case")]
+pub struct PipelineRestart {
+    pub force: Option<bool>,
+    pub ignore_state: Option<bool>,
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+#[serde(rename_all = "snake_case")]
+pub struct Pipeline {
+    pub id: String,
+    pub name: String,
+    pub query: String,
+    pub udfs: Vec<Udf>,
+    pub checkpoint_interval_micros: u64,
+    pub stop: StopType,
+    pub created_at: u64,
+    pub action: Option<StopType>,
+    pub action_text: String,
+    pub action_in_progress: bool,
+    pub graph: PipelineGraph,
+    pub preview: bool,
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+#[serde(rename_all = "snake_case")]
+pub struct PipelineGraph {
+    pub nodes: Vec<PipelineNode>,
+    pub edges: Vec<PipelineEdge>,
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+#[serde(rename_all = "snake_case")]
+pub struct PipelineNode {
+    pub node_id: u32,
+    pub operator: String,
+    pub description: String,
+    pub parallelism: u32,
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+#[serde(rename_all = "snake_case")]
+pub struct PipelineEdge {
+    pub src_id: u32,
+    pub dest_id: u32,
+    pub key_type: String,
+    pub value_type: String,
+    pub edge_type: String,
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+#[serde(rename_all = "snake_case")]
+pub enum StopType {
+    None,
+    Checkpoint,
+    Graceful,
+    Immediate,
+    Force,
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+#[serde(rename_all = "snake_case")]
+pub struct FailureReason {
+    pub error: String,
+    pub domain: ErrorDomain,
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+#[serde(rename_all = "snake_case")]
+pub struct Job {
+    pub id: String,
+    pub running_desired: bool,
+    pub state: String,
+    pub run_id: u64,
+    pub start_time: Option<u64>,
+    pub finish_time: Option<u64>,
+    pub tasks: Option<u64>,
+    pub failure_reason: Option<FailureReason>,
+    pub created_at: u64,
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+#[serde(rename_all = "snake_case")]
+pub enum JobLogLevel {
+    Info,
+    Warn,
+    Error,
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+#[serde(rename_all = "snake_case")]
+pub struct JobLogMessage {
+    pub id: String,
+    pub created_at: u64,
+    pub operator_id: Option<String>,
+    pub task_index: Option<u64>,
+    pub level: JobLogLevel,
+    pub message: String,
+    pub details: String,
+    pub error_domain: Option<ErrorDomain>,
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+#[serde(rename_all = "snake_case")]
+pub struct OutputData {
+    pub operator_id: String,
+    pub subtask_idx: u32,
+    pub timestamps: Vec<u64>,
+    pub start_id: u64,
+    pub batch: String,
+}
diff --git a/src/api/public_ids.rs b/src/api/public_ids.rs
new file mode 100644
index 00000000..15a9f72e
--- /dev/null
+++ b/src/api/public_ids.rs
@@ -0,0 +1,57 @@
+use std::time::{SystemTime, UNIX_EPOCH};
+
+const ID_LENGTH: usize = 10;
+
+const ALPHABET: &[u8; 62] = b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
+
+pub enum IdTypes {
+    ApiKey,
+    ConnectionProfile,
+    Schema,
+    Pipeline,
+    JobConfig,
+    Checkpoint,
+    JobStatus,
+    ClusterInfo,
+    JobLogMessage,
+    ConnectionTable,
+    ConnectionTablePipeline,
+    Udf,
+}
+
+/// Generates a unique identifier with a type-specific prefix.
+///
+/// Uses a simple time + random approach instead of nanoid to avoid an extra dependency.
+pub fn generate_id(id_type: IdTypes) -> String {
+    let prefix = match id_type {
+        IdTypes::ApiKey => "ak",
+        IdTypes::ConnectionProfile => "cp",
+        IdTypes::Schema => "sch",
+        IdTypes::Pipeline => "pl",
+        IdTypes::JobConfig => "job",
+        IdTypes::Checkpoint => "chk",
+        IdTypes::JobStatus => "js",
+        IdTypes::ClusterInfo => "ci",
+        IdTypes::JobLogMessage => "jlm",
+        IdTypes::ConnectionTable => "ct",
+        IdTypes::ConnectionTablePipeline => "ctp",
+        IdTypes::Udf => "udf",
+    };
+
+    let nanos = SystemTime::now()
+        .duration_since(UNIX_EPOCH)
+        .unwrap_or_default()
+        .as_nanos();
+
+    let mut id = String::with_capacity(ID_LENGTH);
+    let mut seed = nanos;
+    for _ in 0..ID_LENGTH {
+        seed ^= seed
+            .wrapping_mul(6364136223846793005)
+            .wrapping_add(1442695040888963407);
+        let idx = (seed % ALPHABET.len() as u128) as usize;
+        id.push(ALPHABET[idx] as char);
+    }
+
+    format!("{prefix}_{id}")
+}
diff --git a/src/api/schema_resolver.rs b/src/api/schema_resolver.rs
new file mode 100644
index 00000000..a9124900
--- /dev/null
+++ b/src/api/schema_resolver.rs
@@ -0,0 +1,82 @@
+use async_trait::async_trait;
+
+/// Trait for resolving schemas by ID (e.g., from a schema registry).
+#[async_trait]
+pub trait SchemaResolver: Send {
+    async fn resolve_schema(&self, id: u32) -> Result<Option<String>, String>;
+}
+
+/// A resolver that always fails — used when no schema registry is configured.
+pub struct FailingSchemaResolver;
+
+impl Default for FailingSchemaResolver {
+    fn default() -> Self {
+        Self
+    }
+}
+
+#[async_trait]
+impl SchemaResolver for FailingSchemaResolver {
+    async fn resolve_schema(&self, id: u32) -> Result<Option<String>, String> {
+        Err(format!(
+            "Schema with id {id} not available, and no schema registry configured"
+        ))
+    }
+}
+
+/// A resolver that returns a fixed schema for a known ID.
+pub struct FixedSchemaResolver {
+    id: u32,
+    schema: String,
+}
+
+impl FixedSchemaResolver {
+    pub fn new(id: u32, schema: String) -> Self {
+        FixedSchemaResolver { id, schema }
+    }
+}
+
+#[async_trait]
+impl SchemaResolver for FixedSchemaResolver {
+    async fn resolve_schema(&self, id: u32) -> Result<Option<String>, String> {
+        if id == self.id {
+            Ok(Some(self.schema.clone()))
+        } else {
+            Err(format!("Unexpected schema id {}, expected {}", id, self.id))
+        }
+    }
+}
+
+/// A caching wrapper around any `SchemaResolver`.
+pub struct CachingSchemaResolver<R: SchemaResolver> {
+    inner: R,
+    cache: tokio::sync::RwLock<std::collections::HashMap<u32, String>>,
+}
+
+impl<R: SchemaResolver> CachingSchemaResolver<R> {
+    pub fn new(inner: R) -> Self {
+        Self {
+            inner,
+            cache: tokio::sync::RwLock::new(std::collections::HashMap::new()),
+        }
+    }
+}
+
+#[async_trait]
+impl<R: SchemaResolver + Sync> SchemaResolver for CachingSchemaResolver<R> {
+    async fn resolve_schema(&self, id: u32) -> Result<Option<String>, String> {
+        {
+            let cache = self.cache.read().await;
+            if let Some(schema) = cache.get(&id) {
+                return Ok(Some(schema.clone()));
+            }
+        }
+
+        let result = self.inner.resolve_schema(id).await?;
+        if let Some(ref schema) = result {
+            let mut cache = self.cache.write().await;
+            cache.insert(id, schema.clone());
+        }
+        Ok(result)
+    }
+}
diff --git a/src/api/udfs.rs b/src/api/udfs.rs
new file mode 100644
index 00000000..41085168
--- /dev/null
+++ b/src/api/udfs.rs
@@ -0,0 +1,56 @@
+use serde::{Deserialize, Serialize};
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+#[serde(rename_all = "snake_case")]
+pub struct Udf {
+    pub definition: String,
+    #[serde(default)]
+    pub language: UdfLanguage,
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+#[serde(rename_all = "snake_case")]
+pub struct ValidateUdfPost {
+    pub definition: String,
+    #[serde(default)]
+    pub language: UdfLanguage,
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+#[serde(rename_all = "snake_case")]
+pub struct UdfValidationResult {
+    pub udf_name: Option<String>,
+    pub errors: Vec<String>,
+}
+
+#[derive(Serialize, Deserialize, Copy, Clone, Debug, Default, Eq, PartialEq)]
+#[serde(rename_all = "snake_case")]
+pub enum UdfLanguage {
+    Python,
+    #[default]
+    Rust,
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+#[serde(rename_all = "snake_case")]
+pub struct UdfPost {
+    pub prefix: String,
+    #[serde(default)]
+    pub language: UdfLanguage,
+    pub definition: String,
+    pub description: Option<String>,
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+#[serde(rename_all = "snake_case")]
+pub struct GlobalUdf {
+    pub id: String,
+    pub prefix: String,
+    pub name: String,
+    pub language: UdfLanguage,
+    pub created_at: u64,
+    pub updated_at: u64,
+    pub definition: String,
+    pub description: Option<String>,
+    pub dylib_url: Option<String>,
+}
diff --git a/src/api/var_str.rs b/src/api/var_str.rs
new file mode 100644
index 00000000..c4256e38
--- /dev/null
+++ b/src/api/var_str.rs
@@ -0,0 +1,79 @@
+use serde::{Deserialize, Serialize};
+use std::env;
+
+/// A string that may contain `{{ VAR }}` placeholders for environment variable substitution.
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+#[serde(transparent)]
+pub struct VarStr {
+    raw_val: String,
+}
+
+impl VarStr {
+    pub fn new(raw_val: String) -> Self {
+        VarStr { raw_val }
+    }
+
+    pub fn raw(&self) -> &str {
+        &self.raw_val
+    }
+
+    /// Substitute `{{ VAR_NAME }}` patterns with the corresponding environment variable values.
+    pub fn sub_env_vars(&self) -> anyhow::Result<String> {
+        let mut result = self.raw_val.clone();
+        let mut start = 0;
+
+        while let Some(open) = result[start..].find("{{") {
+            let open_abs = start + open;
+            let Some(close) = result[open_abs..].find("}}") else {
+                break;
+            };
+            let close_abs = open_abs + close;
+
+            let var_name = result[open_abs + 2..close_abs].trim();
+            if var_name.is_empty() {
+                start = close_abs + 2;
+                continue;
+            }
+
+            match env::var(var_name) {
+                Ok(value) => {
+                    let full_match = &result[open_abs..close_abs + 2];
+                    let full_match_owned = full_match.to_string();
+                    result = result.replacen(&full_match_owned, &value, 1);
+                    start = open_abs + value.len();
+                }
+                Err(_) => {
+                    anyhow::bail!("Environment variable {} not found", var_name);
+                }
+            }
+        }
+
+        Ok(result)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_no_placeholders() {
+        let input = "This is a test string with no placeholders";
+        assert_eq!(
+            VarStr::new(input.to_string()).sub_env_vars().unwrap(),
+            input
+        );
+    }
+
+    #[test]
+    fn test_with_placeholders() {
+        unsafe { env::set_var("FS_TEST_VAR", "environment variable") };
+        let input = "This is a {{ FS_TEST_VAR }}";
+        let expected = "This is a environment variable";
+        assert_eq!(
+            VarStr::new(input.to_string()).sub_env_vars().unwrap(),
+            expected
+        );
+        unsafe { env::remove_var("FS_TEST_VAR") };
+    }
+}
diff --git a/src/datastream/logical.rs b/src/datastream/logical.rs
index 60101bdd..a6486760 100644
--- a/src/datastream/logical.rs
+++ b/src/datastream/logical.rs
@@ -1,7 +1,7 @@
 use itertools::Itertools;
 
 use crate::datastream::optimizers::Optimizer;
-use crate::sql::planner::types::StreamSchema;
+use crate::sql::types::StreamSchema;
 use datafusion::arrow::datatypes::DataType;
 use petgraph::Direction;
 use petgraph::dot::Dot;
diff --git a/src/lib.rs b/src/lib.rs
index e8596864..a41536c5 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -14,6 +14,7 @@
 
 #![allow(dead_code)]
 
+pub mod api;
 pub mod config;
 pub mod coordinator;
 pub mod datastream;
@@ -22,3 +23,4 @@ pub mod runtime;
 pub mod server;
 pub mod sql;
 pub mod storage;
+pub mod types;
diff --git a/src/main.rs b/src/main.rs
index 562b1526..29935d62 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -12,13 +12,16 @@
 
 #![allow(dead_code)]
 
+mod api;
 mod config;
 mod coordinator;
+mod datastream;
 mod logging;
 mod runtime;
 mod server;
 mod sql;
 mod storage;
+mod types;
 
 use anyhow::{Context, Result};
 use std::thread;
diff --git a/src/sql/catalog/connector.rs b/src/sql/catalog/connector.rs
new file mode 100644
index 00000000..01176d47
--- /dev/null
+++ b/src/sql/catalog/connector.rs
@@ -0,0 +1,59 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::fmt;
+
+/// Describes the role of a connection in the streaming pipeline.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub enum ConnectionType {
+    Source,
+    Sink,
+    Lookup,
+}
+
+impl fmt::Display for ConnectionType {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            ConnectionType::Source => write!(f, "source"),
+            ConnectionType::Sink => write!(f, "sink"),
+            ConnectionType::Lookup => write!(f, "lookup"),
+        }
+    }
+}
+
+/// A connector operation that describes how to interact with an external system.
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct ConnectorOp {
+    pub connector: String,
+    pub config: String,
+    pub description: String,
+}
+
+impl ConnectorOp {
+    pub fn new(connector: impl Into<String>, config: impl Into<String>) -> Self {
+        let connector = connector.into();
+        let description = connector.clone();
+        Self {
+            connector,
+            config: config.into(),
+            description,
+        }
+    }
+}
+
+/// Configuration for a connection profile (e.g., Kafka broker, Pulsar endpoint).
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct ConnectionProfile {
+    pub name: String,
+    pub connector: String,
+    pub config: std::collections::HashMap<String, String>,
+}
diff --git a/src/sql/catalog/connector_table.rs b/src/sql/catalog/connector_table.rs
new file mode 100644
index 00000000..8dae1745
--- /dev/null
+++ b/src/sql/catalog/connector_table.rs
@@ -0,0 +1,199 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::sync::Arc;
+use std::time::Duration;
+
+use datafusion::arrow::datatypes::{FieldRef, Schema};
+use datafusion::common::{Result, plan_err};
+use datafusion::logical_expr::Expr;
+
+use super::connector::{ConnectionType, ConnectorOp};
+use super::field_spec::FieldSpec;
+use crate::multifield_partial_ord;
+use crate::sql::types::ProcessingMode;
+
+/// Represents a table backed by an external connector (e.g., Kafka, Pulsar, NATS).
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct ConnectorTable {
+    pub id: Option<i64>,
+    pub connector: String,
+    pub name: String,
+    pub connection_type: ConnectionType,
+    pub fields: Vec<FieldSpec>,
+    pub config: String,
+    pub description: String,
+    pub event_time_field: Option<String>,
+    pub watermark_field: Option<String>,
+    pub idle_time: Option<Duration>,
+    pub primary_keys: Arc<Vec<String>>,
+    pub inferred_fields: Option<Vec<FieldRef>>,
+    pub partition_exprs: Arc<Option<Vec<Expr>>>,
+}
+
+multifield_partial_ord!(
+    ConnectorTable,
+    id,
+    connector,
+    name,
+    connection_type,
+    config,
+    description,
+    event_time_field,
+    watermark_field,
+    idle_time,
+    primary_keys
+);
+
+impl ConnectorTable {
+    pub fn new(
+        name: impl Into<String>,
+        connector: impl Into<String>,
+        connection_type: ConnectionType,
+    ) -> Self {
+        Self {
+            id: None,
+            connector: connector.into(),
+            name: name.into(),
+            connection_type,
+            fields: Vec::new(),
+            config: String::new(),
+            description: String::new(),
+            event_time_field: None,
+            watermark_field: None,
+            idle_time: None,
+            primary_keys: Arc::new(Vec::new()),
+            inferred_fields: None,
+            partition_exprs: Arc::new(None),
+        }
+    }
+
+    pub fn has_virtual_fields(&self) -> bool {
+        self.fields.iter().any(|f| f.is_virtual())
+    }
+
+    pub fn is_updating(&self) -> bool {
+        // TODO: check format for debezium/update mode
+        false
+    }
+
+    pub fn physical_schema(&self) -> Schema {
+        Schema::new(
+            self.fields
+                .iter()
+                .filter(|f| !f.is_virtual())
+                .map(|f| f.field().clone())
+                .collect::<Vec<_>>(),
+        )
+    }
+
+    pub fn connector_op(&self) -> ConnectorOp {
+        ConnectorOp {
+            connector: self.connector.clone(),
+            config: self.config.clone(),
+            description: self.description.clone(),
+        }
+    }
+
+    pub fn processing_mode(&self) -> ProcessingMode {
+        if self.is_updating() {
+            ProcessingMode::Update
+        } else {
+            ProcessingMode::Append
+        }
+    }
+
+    pub fn timestamp_override(&self) -> Result<Option<Expr>> {
+        if let Some(field_name) = &self.event_time_field {
+            if self.is_updating() {
+                return plan_err!("can't use event_time_field with update mode");
+            }
+            let _field = self.get_time_field(field_name)?;
+            Ok(Some(Expr::Column(datafusion::common::Column::from_name(
+                field_name,
+            ))))
+        } else {
+            Ok(None)
+        }
+    }
+
+    fn get_time_field(&self, field_name: &str) -> Result<&FieldSpec> {
+        self.fields
+            .iter()
+            .find(|f| {
+                f.field().name() == field_name
+                    && matches!(
+                        f.field().data_type(),
+                        datafusion::arrow::datatypes::DataType::Timestamp(..)
+                    )
+            })
+            .ok_or_else(|| {
+                datafusion::error::DataFusionError::Plan(format!(
+                    "field {field_name} not found or not a timestamp"
+                ))
+            })
+    }
+
+    pub fn watermark_column(&self) -> Result<Option<Expr>> {
+        if let Some(field_name) = &self.watermark_field {
+            let _field = self.get_time_field(field_name)?;
+            Ok(Some(Expr::Column(datafusion::common::Column::from_name(
+                field_name,
+            ))))
+        } else {
+            Ok(None)
+        }
+    }
+
+    pub fn as_sql_source(&self) -> Result<SourceOperator> {
+        match self.connection_type {
+            ConnectionType::Source => {}
+            ConnectionType::Sink | ConnectionType::Lookup => {
+                return plan_err!("cannot read from sink");
+            }
+        }
+
+        if self.is_updating() && self.has_virtual_fields() {
+            return plan_err!("can't read from a source with virtual fields and update mode");
+        }
+
+        let timestamp_override = self.timestamp_override()?;
+        let watermark_column = self.watermark_column()?;
+
+        Ok(SourceOperator {
+            name: self.name.clone(),
+            connector_op: self.connector_op(),
+            processing_mode: self.processing_mode(),
+            idle_time: self.idle_time,
+            struct_fields: self
+                .fields
+                .iter()
+                .filter(|f| !f.is_virtual())
+                .map(|f| Arc::new(f.field().clone()))
+                .collect(),
+            timestamp_override,
+            watermark_column,
+        })
+    }
+}
+
+/// A fully resolved source operator ready for execution graph construction.
+#[derive(Debug, Clone)]
+pub struct SourceOperator {
+    pub name: String,
+    pub connector_op: ConnectorOp,
+    pub processing_mode: ProcessingMode,
+    pub idle_time: Option<Duration>,
+    pub struct_fields: Vec<FieldRef>,
+    pub timestamp_override: Option<Expr>,
+    pub watermark_column: Option<Expr>,
+}
diff --git a/src/sql/catalog/field_spec.rs b/src/sql/catalog/field_spec.rs
new file mode 100644
index 00000000..2fe8a50e
--- /dev/null
+++ b/src/sql/catalog/field_spec.rs
@@ -0,0 +1,52 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use datafusion::arrow::datatypes::Field;
+use datafusion::logical_expr::Expr;
+
+/// Describes how a field in a connector table should be interpreted.
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub enum FieldSpec {
+    /// A regular struct field that maps to a column in the data.
+    Struct(Field),
+    /// A metadata field extracted from message metadata (e.g., Kafka headers).
+    Metadata { field: Field, key: String },
+    /// A virtual field computed from an expression over other fields.
+    Virtual { field: Field, expression: Box<Expr> },
+}
+
+impl FieldSpec {
+    pub fn is_virtual(&self) -> bool {
+        matches!(self, FieldSpec::Virtual { .. })
+    }
+
+    pub fn field(&self) -> &Field {
+        match self {
+            FieldSpec::Struct(f) => f,
+            FieldSpec::Metadata { field, .. } => field,
+            FieldSpec::Virtual { field, .. } => field,
+        }
+    }
+
+    pub fn metadata_key(&self) -> Option<&str> {
+        match self {
+            FieldSpec::Metadata { key, .. } => Some(key.as_str()),
+            _ => None,
+        }
+    }
+}
+
+impl From<Field> for FieldSpec {
+    fn from(value: Field) -> Self {
+        FieldSpec::Struct(value)
+    }
+}
diff --git a/src/sql/catalog/insert.rs b/src/sql/catalog/insert.rs
new file mode 100644
index 00000000..a4a3814a
--- /dev/null
+++ b/src/sql/catalog/insert.rs
@@ -0,0 +1,55 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use datafusion::common::Result;
+use datafusion::logical_expr::{DmlStatement, LogicalPlan, WriteOp};
+use datafusion::sql::sqlparser::ast::Statement;
+
+use super::optimizer::produce_optimized_plan;
+use crate::sql::planner::StreamSchemaProvider;
+
+/// Represents an INSERT operation in a streaming SQL pipeline.
+#[derive(Debug)]
+pub enum Insert {
+    /// Insert into a named sink table.
+    InsertQuery {
+        sink_name: String,
+        logical_plan: LogicalPlan,
+    },
+    /// An anonymous query (no explicit INSERT target).
+    Anonymous { logical_plan: LogicalPlan },
+}
+
+impl Insert {
+    pub fn try_from_statement(
+        statement: &Statement,
+        schema_provider: &StreamSchemaProvider,
+    ) -> Result<Insert> {
+        let logical_plan = produce_optimized_plan(statement, schema_provider)?;
+
+        match &logical_plan {
+            LogicalPlan::Dml(DmlStatement {
+                table_name,
+                op: WriteOp::Insert(_),
+                input,
+                ..
+            }) => {
+                let sink_name = table_name.to_string();
+                Ok(Insert::InsertQuery {
+                    sink_name,
+                    logical_plan: (**input).clone(),
+                })
+            }
+            _ => Ok(Insert::Anonymous { logical_plan }),
+        }
+    }
+}
diff --git a/src/sql/catalog/mod.rs b/src/sql/catalog/mod.rs
new file mode 100644
index 00000000..39c7bfcd
--- /dev/null
+++ b/src/sql/catalog/mod.rs
@@ -0,0 +1,25 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+pub mod connector;
+pub mod connector_table;
+pub mod field_spec;
+pub mod insert;
+pub mod optimizer;
+pub mod table;
+pub mod utils;
+
+pub use connector::{ConnectionType, ConnectorOp};
+pub use connector_table::{ConnectorTable, SourceOperator};
+pub use field_spec::FieldSpec;
+pub use insert::Insert;
+pub use table::Table;
diff --git a/src/sql/catalog/optimizer.rs b/src/sql/catalog/optimizer.rs
new file mode 100644
index 00000000..15abe61e
--- /dev/null
+++ b/src/sql/catalog/optimizer.rs
@@ -0,0 +1,95 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::sync::Arc;
+
+use datafusion::common::Result;
+use datafusion::common::config::ConfigOptions;
+use datafusion::logical_expr::LogicalPlan;
+use datafusion::optimizer::OptimizerContext;
+use datafusion::optimizer::OptimizerRule;
+use datafusion::optimizer::common_subexpr_eliminate::CommonSubexprEliminate;
+use datafusion::optimizer::decorrelate_lateral_join::DecorrelateLateralJoin;
+use datafusion::optimizer::decorrelate_predicate_subquery::DecorrelatePredicateSubquery;
+use datafusion::optimizer::eliminate_cross_join::EliminateCrossJoin;
+use datafusion::optimizer::eliminate_duplicated_expr::EliminateDuplicatedExpr;
+use datafusion::optimizer::eliminate_filter::EliminateFilter;
+use datafusion::optimizer::eliminate_group_by_constant::EliminateGroupByConstant;
+use datafusion::optimizer::eliminate_join::EliminateJoin;
+use datafusion::optimizer::eliminate_limit::EliminateLimit;
+use datafusion::optimizer::eliminate_nested_union::EliminateNestedUnion;
+use datafusion::optimizer::eliminate_one_union::EliminateOneUnion;
+use datafusion::optimizer::eliminate_outer_join::EliminateOuterJoin;
+use datafusion::optimizer::extract_equijoin_predicate::ExtractEquijoinPredicate;
+use datafusion::optimizer::filter_null_join_keys::FilterNullJoinKeys;
+use datafusion::optimizer::optimizer::Optimizer;
+use datafusion::optimizer::propagate_empty_relation::PropagateEmptyRelation;
+use datafusion::optimizer::push_down_filter::PushDownFilter;
+use datafusion::optimizer::push_down_limit::PushDownLimit;
+use datafusion::optimizer::replace_distinct_aggregate::ReplaceDistinctWithAggregate;
+use datafusion::optimizer::scalar_subquery_to_join::ScalarSubqueryToJoin;
+use datafusion::optimizer::simplify_expressions::SimplifyExpressions;
+use datafusion::sql::planner::SqlToRel;
+use datafusion::sql::sqlparser::ast::Statement;
+
+use crate::sql::planner::StreamSchemaProvider;
+
+/// Converts a SQL statement into an optimized DataFusion logical plan.
+///
+/// Applies the DataFusion analyzer followed by a curated set of optimizer rules
+/// suitable for streaming SQL (some rules like OptimizeProjections are excluded
+/// because they can drop event-time calculation fields).
+pub fn produce_optimized_plan(
+    statement: &Statement,
+    schema_provider: &StreamSchemaProvider,
+) -> Result<LogicalPlan> {
+    let sql_to_rel = SqlToRel::new(schema_provider);
+    let plan = sql_to_rel.sql_statement_to_plan(statement.clone())?;
+
+    let analyzed_plan = schema_provider.analyzer.execute_and_check(
+        plan,
+        &ConfigOptions::default(),
+        |_plan, _rule| {},
+    )?;
+
+    let rules: Vec<Arc<dyn OptimizerRule + Send + Sync>> = vec![
+        Arc::new(EliminateNestedUnion::new()),
+        Arc::new(SimplifyExpressions::new()),
+        Arc::new(ReplaceDistinctWithAggregate::new()),
+        Arc::new(EliminateJoin::new()),
+        Arc::new(DecorrelatePredicateSubquery::new()),
+        Arc::new(ScalarSubqueryToJoin::new()),
+        Arc::new(DecorrelateLateralJoin::new()),
+        Arc::new(ExtractEquijoinPredicate::new()),
+        Arc::new(EliminateDuplicatedExpr::new()),
+        Arc::new(EliminateFilter::new()),
+        Arc::new(EliminateCrossJoin::new()),
+        Arc::new(EliminateLimit::new()),
+        Arc::new(PropagateEmptyRelation::new()),
+        Arc::new(EliminateOneUnion::new()),
+        Arc::new(FilterNullJoinKeys::default()),
+        Arc::new(EliminateOuterJoin::new()),
+        Arc::new(PushDownLimit::new()),
+        Arc::new(PushDownFilter::new()),
+        Arc::new(EliminateGroupByConstant::new()),
+        Arc::new(CommonSubexprEliminate::new()),
+    ];
+
+    let optimizer = Optimizer::with_rules(rules);
+    let optimized = optimizer.optimize(
+        analyzed_plan,
+        &OptimizerContext::default(),
+        |_plan, _rule| {},
+    )?;
+
+    Ok(optimized)
+}
diff --git a/src/sql/catalog/table.rs b/src/sql/catalog/table.rs
new file mode 100644
index 00000000..b1d60028
--- /dev/null
+++ b/src/sql/catalog/table.rs
@@ -0,0 +1,202 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::sync::Arc;
+
+use datafusion::arrow::datatypes::FieldRef;
+use datafusion::common::{Result, plan_err};
+use datafusion::logical_expr::{Extension, LogicalPlan};
+use datafusion::sql::sqlparser::ast::Statement;
+
+use super::connector_table::ConnectorTable;
+use super::optimizer::produce_optimized_plan;
+use crate::sql::planner::StreamSchemaProvider;
+use crate::sql::planner::extension::remote_table::RemoteTableExtension;
+use crate::sql::planner::plan::rewrite_plan;
+use crate::sql::types::DFField;
+
+/// Represents all table types in the FunctionStream SQL catalog.
+#[allow(clippy::enum_variant_names)]
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub enum Table {
+    /// A lookup table backed by an external connector.
+    LookupTable(ConnectorTable),
+    /// A source/sink table backed by an external connector.
+    ConnectorTable(ConnectorTable),
+    /// An in-memory table with an optional logical plan (for views).
+    MemoryTable {
+        name: String,
+        fields: Vec<FieldRef>,
+        logical_plan: Option<LogicalPlan>,
+    },
+    /// A table defined by a query (CREATE VIEW / CREATE TABLE AS SELECT).
+    TableFromQuery {
+        name: String,
+        logical_plan: LogicalPlan,
+    },
+    /// A preview sink for debugging/inspection.
+    PreviewSink { logical_plan: LogicalPlan },
+}
+
+impl Table {
+    /// Try to construct a Table from a CREATE TABLE or CREATE VIEW statement.
+    pub fn try_from_statement(
+        statement: &Statement,
+        schema_provider: &StreamSchemaProvider,
+    ) -> Result<Option<Self>> {
+        use datafusion::logical_expr::{CreateMemoryTable, CreateView, DdlStatement};
+        use datafusion::sql::sqlparser::ast::CreateTable;
+
+        if let Statement::CreateTable(CreateTable {
+            name,
+            columns,
+            query: None,
+            ..
+        }) = statement
+        {
+            let name = name.to_string();
+
+            if columns.is_empty() {
+                return plan_err!("CREATE TABLE requires at least one column");
+            }
+
+            let fields: Vec<FieldRef> = columns
+                .iter()
+                .map(|col| {
+                    let data_type = crate::sql::types::convert_data_type(&col.data_type)
+                        .map(|(dt, _)| dt)
+                        .unwrap_or(datafusion::arrow::datatypes::DataType::Utf8);
+                    let nullable = !col.options.iter().any(|opt| {
+                        matches!(
+                            opt.option,
+                            datafusion::sql::sqlparser::ast::ColumnOption::NotNull
+                        )
+                    });
+                    Arc::new(datafusion::arrow::datatypes::Field::new(
+                        col.name.value.clone(),
+                        data_type,
+                        nullable,
+                    ))
+                })
+                .collect();
+
+            return Ok(Some(Table::MemoryTable {
+                name,
+                fields,
+                logical_plan: None,
+            }));
+        }
+
+        match produce_optimized_plan(statement, schema_provider) {
+            Ok(LogicalPlan::Ddl(DdlStatement::CreateView(CreateView { name, input, .. })))
+            | Ok(LogicalPlan::Ddl(DdlStatement::CreateMemoryTable(CreateMemoryTable {
+                name,
+                input,
+                ..
+            }))) => {
+                let rewritten = rewrite_plan(input.as_ref().clone(), schema_provider)?;
+                let schema = rewritten.schema().clone();
+                let remote = RemoteTableExtension {
+                    input: rewritten,
+                    name: name.to_owned(),
+                    schema,
+                    materialize: true,
+                };
+                Ok(Some(Table::TableFromQuery {
+                    name: name.to_string(),
+                    logical_plan: LogicalPlan::Extension(Extension {
+                        node: Arc::new(remote),
+                    }),
+                }))
+            }
+            _ => Ok(None),
+        }
+    }
+
+    pub fn name(&self) -> &str {
+        match self {
+            Table::MemoryTable { name, .. } | Table::TableFromQuery { name, .. } => name.as_str(),
+            Table::ConnectorTable(c) | Table::LookupTable(c) => c.name.as_str(),
+            Table::PreviewSink { .. } => "preview",
+        }
+    }
+
+    pub fn get_fields(&self) -> Vec<FieldRef> {
+        match self {
+            Table::MemoryTable { fields, .. } => fields.clone(),
+            Table::ConnectorTable(ConnectorTable {
+                fields,
+                inferred_fields,
+                ..
+            })
+            | Table::LookupTable(ConnectorTable {
+                fields,
+                inferred_fields,
+                ..
+            }) => inferred_fields.clone().unwrap_or_else(|| {
+                fields
+                    .iter()
+                    .map(|field| field.field().clone().into())
+                    .collect()
+            }),
+            Table::TableFromQuery { logical_plan, .. } => {
+                logical_plan.schema().fields().iter().cloned().collect()
+            }
+            Table::PreviewSink { logical_plan } => {
+                logical_plan.schema().fields().iter().cloned().collect()
+            }
+        }
+    }
+
+    pub fn set_inferred_fields(&mut self, fields: Vec<DFField>) -> Result<()> {
+        let Table::ConnectorTable(t) = self else {
+            return Ok(());
+        };
+
+        if !t.fields.is_empty() {
+            return Ok(());
+        }
+
+        if let Some(existing) = &t.inferred_fields {
+            let matches = existing.len() == fields.len()
+                && existing
+                    .iter()
+                    .zip(&fields)
+                    .all(|(a, b)| a.name() == b.name() && a.data_type() == b.data_type());
+
+            if !matches {
+                return plan_err!("all inserts into a table must share the same schema");
+            }
+        }
+
+        let fields: Vec<_> = fields.into_iter().map(|f| f.field().clone()).collect();
+        t.inferred_fields.replace(fields);
+
+        Ok(())
+    }
+
+    pub fn connector_op(&self) -> Result<super::connector::ConnectorOp> {
+        match self {
+            Table::ConnectorTable(c) | Table::LookupTable(c) => Ok(c.connector_op()),
+            Table::MemoryTable { .. } => plan_err!("can't write to a memory table"),
+            Table::TableFromQuery { .. } => plan_err!("can't write to a query-defined table"),
+            Table::PreviewSink { .. } => Ok(super::connector::ConnectorOp::new("preview", "")),
+        }
+    }
+
+    pub fn partition_exprs(&self) -> Option<&Vec<datafusion::logical_expr::Expr>> {
+        match self {
+            Table::ConnectorTable(c) => (*c.partition_exprs).as_ref(),
+            _ => None,
+        }
+    }
+}
diff --git a/src/sql/catalog/utils.rs b/src/sql/catalog/utils.rs
new file mode 100644
index 00000000..c0b8a7d0
--- /dev/null
+++ b/src/sql/catalog/utils.rs
@@ -0,0 +1,78 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::collections::HashMap;
+use std::sync::Arc;
+
+use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef, TimeUnit};
+use datafusion::common::{DFSchema, DFSchemaRef, Result as DFResult, TableReference};
+
+use crate::sql::types::{DFField, TIMESTAMP_FIELD};
+
+/// Returns the Arrow struct type for a window (start, end) pair.
+pub fn window_arrow_struct() -> DataType {
+    DataType::Struct(
+        vec![
+            Arc::new(Field::new(
+                "start",
+                DataType::Timestamp(TimeUnit::Nanosecond, None),
+                false,
+            )),
+            Arc::new(Field::new(
+                "end",
+                DataType::Timestamp(TimeUnit::Nanosecond, None),
+                false,
+            )),
+        ]
+        .into(),
+    )
+}
+
+/// Adds a `_timestamp` field to a DFSchema if it doesn't already have one.
+pub fn add_timestamp_field(
+    schema: DFSchemaRef,
+    qualifier: Option<TableReference>,
+) -> DFResult<DFSchemaRef> {
+    if has_timestamp_field(&schema) {
+        return Ok(schema);
+    }
+
+    let timestamp_field = DFField::new(
+        qualifier,
+        TIMESTAMP_FIELD,
+        DataType::Timestamp(TimeUnit::Nanosecond, None),
+        false,
+    );
+    Ok(Arc::new(schema.join(&DFSchema::new_with_metadata(
+        vec![timestamp_field.into()],
+        HashMap::new(),
+    )?)?))
+}
+
+/// Checks whether a DFSchema contains a `_timestamp` field.
+pub fn has_timestamp_field(schema: &DFSchemaRef) -> bool {
+    schema
+        .fields()
+        .iter()
+        .any(|field| field.name() == TIMESTAMP_FIELD)
+}
+
+/// Adds a `_timestamp` field to an Arrow Schema, returning a new SchemaRef.
+pub fn add_timestamp_field_arrow(schema: Schema) -> SchemaRef {
+    let mut fields = schema.fields().to_vec();
+    fields.push(Arc::new(Field::new(
+        TIMESTAMP_FIELD,
+        DataType::Timestamp(TimeUnit::Nanosecond, None),
+        false,
+    )));
+    Arc::new(Schema::new(fields))
+}
diff --git a/src/sql/functions/mod.rs b/src/sql/functions/mod.rs
new file mode 100644
index 00000000..84d3c7d4
--- /dev/null
+++ b/src/sql/functions/mod.rs
@@ -0,0 +1,600 @@
+use crate::sql::planner::StreamSchemaProvider;
+use datafusion::arrow::array::{
+    Array, ArrayRef, StringArray, UnionArray,
+    builder::{FixedSizeBinaryBuilder, ListBuilder, StringBuilder},
+    cast::{AsArray, as_string_array},
+    types::{Float64Type, Int64Type},
+};
+use datafusion::arrow::datatypes::{DataType, Field, UnionFields, UnionMode};
+use datafusion::arrow::row::{RowConverter, SortField};
+use datafusion::common::{DataFusionError, ScalarValue};
+use datafusion::common::{Result, TableReference};
+use datafusion::execution::FunctionRegistry;
+use datafusion::logical_expr::expr::{Alias, ScalarFunction};
+use datafusion::logical_expr::{
+    ColumnarValue, LogicalPlan, Projection, ScalarFunctionArgs, ScalarUDFImpl, Signature,
+    TypeSignature, Volatility, create_udf,
+};
+use datafusion::prelude::{Expr, col};
+use serde_json_path::JsonPath;
+use std::any::Any;
+use std::collections::HashMap;
+use std::fmt::{Debug, Write};
+use std::sync::{Arc, OnceLock};
+
+const SERIALIZE_JSON_UNION: &str = "serialize_json_union";
+
+/// Borrowed from DataFusion
+///
+/// Creates a singleton `ScalarUDF` of the `$UDF` function named `$GNAME` and a
+/// function named `$NAME` which returns that function named $NAME.
+///
+/// This is used to ensure creating the list of `ScalarUDF` only happens once.
+#[macro_export]
+macro_rules! make_udf_function {
+    ($UDF:ty, $GNAME:ident, $NAME:ident) => {
+        /// Singleton instance of the function
+        static $GNAME: std::sync::OnceLock<std::sync::Arc<datafusion::logical_expr::ScalarUDF>> =
+            std::sync::OnceLock::new();
+
+        /// Return a [`ScalarUDF`] for [`$UDF`]
+        ///
+        /// [`ScalarUDF`]: datafusion_expr::ScalarUDF
+        pub fn $NAME() -> std::sync::Arc<datafusion::logical_expr::ScalarUDF> {
+            $GNAME
+                .get_or_init(|| {
+                    std::sync::Arc::new(datafusion::logical_expr::ScalarUDF::new_from_impl(
+                        <$UDF>::default(),
+                    ))
+                })
+                .clone()
+        }
+    };
+}
+
+make_udf_function!(MultiHashFunction, MULTI_HASH, multi_hash);
+
+pub fn register_all(registry: &mut dyn FunctionRegistry) {
+    registry
+        .register_udf(Arc::new(create_udf(
+            "get_first_json_object",
+            vec![DataType::Utf8, DataType::Utf8],
+            DataType::Utf8,
+            Volatility::Immutable,
+            Arc::new(get_first_json_object),
+        )))
+        .unwrap();
+
+    registry
+        .register_udf(Arc::new(create_udf(
+            "extract_json",
+            vec![DataType::Utf8, DataType::Utf8],
+            DataType::List(Arc::new(Field::new("item", DataType::Utf8, true))),
+            Volatility::Immutable,
+            Arc::new(extract_json),
+        )))
+        .unwrap();
+
+    registry
+        .register_udf(Arc::new(create_udf(
+            "extract_json_string",
+            vec![DataType::Utf8, DataType::Utf8],
+            DataType::Utf8,
+            Volatility::Immutable,
+            Arc::new(extract_json_string),
+        )))
+        .unwrap();
+
+    registry
+        .register_udf(Arc::new(create_udf(
+            SERIALIZE_JSON_UNION,
+            vec![DataType::Union(union_fields(), UnionMode::Sparse)],
+            DataType::Utf8,
+            Volatility::Immutable,
+            Arc::new(serialize_json_union),
+        )))
+        .unwrap();
+
+    registry.register_udf(multi_hash()).unwrap();
+}
+
+fn parse_path(name: &str, path: &ScalarValue) -> Result<Arc<JsonPath>> {
+    let path = match path {
+        ScalarValue::Utf8(Some(s)) => JsonPath::parse(s)
+            .map_err(|e| DataFusionError::Execution(format!("Invalid json path '{s}': {e:?}")))?,
+        ScalarValue::Utf8(None) => {
+            return Err(DataFusionError::Execution(format!(
+                "The path argument to {name} cannot be null"
+            )));
+        }
+        _ => {
+            return Err(DataFusionError::Execution(format!(
+                "The path argument to {name} must be of type TEXT"
+            )));
+        }
+    };
+
+    Ok(Arc::new(path))
+}
+
+// Hash function that can take any number of arguments and produces a fast (non-cryptographic)
+// 128-bit hash from their string representations
+#[derive(Debug)]
+pub struct MultiHashFunction {
+    signature: Signature,
+}
+
+impl MultiHashFunction {
+    pub fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+        let mut hasher = xxhash_rust::xxh3::Xxh3::new();
+
+        let all_scalar = args.iter().all(|a| matches!(a, ColumnarValue::Scalar(_)));
+
+        let length = args
+            .iter()
+            .map(|t| match t {
+                ColumnarValue::Scalar(_) => 1,
+                ColumnarValue::Array(a) => a.len(),
+            })
+            .max()
+            .ok_or_else(|| {
+                DataFusionError::Plan("multi_hash must have at least one argument".to_string())
+            })?;
+
+        let row_builder = RowConverter::new(
+            args.iter()
+                .map(|t| SortField::new(t.data_type().clone()))
+                .collect(),
+        )?;
+
+        let arrays = args
+            .iter()
+            .map(|c| c.clone().into_array(length))
+            .collect::<Result<Vec<_>>>()?;
+        let rows = row_builder.convert_columns(&arrays)?;
+
+        if all_scalar {
+            hasher.update(rows.row(0).as_ref());
+            let result = hasher.digest128().to_be_bytes().to_vec();
+            hasher.reset();
+            Ok(ColumnarValue::Scalar(ScalarValue::FixedSizeBinary(
+                size_of::<u128>() as i32,
+                Some(result),
+            )))
+        } else {
+            let mut builder =
+                FixedSizeBinaryBuilder::with_capacity(length, size_of::<u128>() as i32);
+
+            for row in rows.iter() {
+                hasher.update(row.as_ref());
+                builder.append_value(hasher.digest128().to_be_bytes())?;
+                hasher.reset();
+            }
+
+            Ok(ColumnarValue::Array(Arc::new(builder.finish())))
+        }
+    }
+}
+
+impl Default for MultiHashFunction {
+    fn default() -> Self {
+        Self {
+            signature: Signature::new(TypeSignature::VariadicAny, Volatility::Immutable),
+        }
+    }
+}
+
+impl ScalarUDFImpl for MultiHashFunction {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        "multi_hash"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
+        Ok(DataType::FixedSizeBinary(size_of::<u128>() as i32))
+    }
+
+    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
+        self.invoke(&args.args)
+    }
+}
+
+fn json_function<T, ArrayT, F, ToS>(
+    name: &str,
+    f: F,
+    to_scalar: ToS,
+    args: &[ColumnarValue],
+) -> Result<ColumnarValue>
+where
+    ArrayT: Array + FromIterator<Option<T>> + 'static,
+    F: Fn(serde_json::Value, &JsonPath) -> Option<T>,
+    ToS: Fn(Option<T>) -> ScalarValue,
+{
+    assert_eq!(args.len(), 2);
+    Ok(match (&args[0], &args[1]) {
+        (ColumnarValue::Array(values), ColumnarValue::Scalar(path)) => {
+            let path = parse_path(name, path)?;
+            let vs = as_string_array(values);
+            ColumnarValue::Array(Arc::new(
+                vs.iter()
+                    .map(|s| s.and_then(|s| f(serde_json::from_str(s).ok()?, &path)))
+                    .collect::<ArrayT>(),
+            ) as ArrayRef)
+        }
+        (ColumnarValue::Scalar(value), ColumnarValue::Scalar(path)) => {
+            let path = parse_path(name, path)?;
+            let ScalarValue::Utf8(value) = value else {
+                return Err(DataFusionError::Execution(format!(
+                    "The value argument to {name} must be of type TEXT"
+                )));
+            };
+
+            let result = value
+                .as_ref()
+                .and_then(|v| f(serde_json::from_str(v).ok()?, &path));
+            ColumnarValue::Scalar(to_scalar(result))
+        }
+        _ => {
+            return Err(DataFusionError::Execution(
+                "The path argument to {name} must be a literal".to_string(),
+            ));
+        }
+    })
+}
+
+pub fn extract_json(args: &[ColumnarValue]) -> Result<ColumnarValue> {
+    assert_eq!(args.len(), 2);
+
+    let inner = |s, path: &JsonPath| {
+        Some(
+            path.query(&serde_json::from_str(s).ok()?)
+                .iter()
+                .map(|v| Some(v.to_string()))
+                .collect::<Vec<Option<String>>>(),
+        )
+    };
+
+    Ok(match (&args[0], &args[1]) {
+        (ColumnarValue::Array(values), ColumnarValue::Scalar(path)) => {
+            let path = parse_path("extract_json", path)?;
+            let values = as_string_array(values);
+
+            let mut builder = ListBuilder::with_capacity(StringBuilder::new(), values.len());
+
+            let queried = values.iter().map(|s| s.and_then(|s| inner(s, &path)));
+
+            for v in queried {
+                builder.append_option(v);
+            }
+
+            ColumnarValue::Array(Arc::new(builder.finish()))
+        }
+        (ColumnarValue::Scalar(value), ColumnarValue::Scalar(path)) => {
+            let path = parse_path("extract_json", path)?;
+            let ScalarValue::Utf8(v) = value else {
+                return Err(DataFusionError::Execution(
+                    "The value argument to extract_json must be of type TEXT".to_string(),
+                ));
+            };
+
+            let mut builder = ListBuilder::with_capacity(StringBuilder::new(), 1);
+            let result = v.as_ref().and_then(|s| inner(s, &path));
+            builder.append_option(result);
+
+            ColumnarValue::Scalar(ScalarValue::List(Arc::new(builder.finish())))
+        }
+        _ => {
+            return Err(DataFusionError::Execution(
+                "The path argument to extract_json must be a literal".to_string(),
+            ));
+        }
+    })
+}
+
+pub fn get_first_json_object(args: &[ColumnarValue]) -> Result<ColumnarValue> {
+    json_function::<String, StringArray, _, _>(
+        "get_first_json_object",
+        |s, path| path.query(&s).first().map(|v| v.to_string()),
+        |s| s.as_deref().into(),
+        args,
+    )
+}
+
+pub fn extract_json_string(args: &[ColumnarValue]) -> Result<ColumnarValue> {
+    json_function::<String, StringArray, _, _>(
+        "extract_json_string",
+        |s, path| {
+            path.query(&s)
+                .first()
+                .and_then(|v| v.as_str().map(|s| s.to_string()))
+        },
+        |s| s.as_deref().into(),
+        args,
+    )
+}
+
+// This code is vendored from
+// https://github.com/datafusion-contrib/datafusion-functions-json/blob/main/src/common_union.rs
+// as the `is_json_union` function is not public. It should be kept in sync with that code so
+// that we are able to detect JSON unions and rewrite them to serialized JSON for sinks.
+pub(crate) fn is_json_union(data_type: &DataType) -> bool {
+    match data_type {
+        DataType::Union(fields, UnionMode::Sparse) => fields == &union_fields(),
+        _ => false,
+    }
+}
+
+pub(crate) const TYPE_ID_NULL: i8 = 0;
+const TYPE_ID_BOOL: i8 = 1;
+const TYPE_ID_INT: i8 = 2;
+const TYPE_ID_FLOAT: i8 = 3;
+const TYPE_ID_STR: i8 = 4;
+const TYPE_ID_ARRAY: i8 = 5;
+const TYPE_ID_OBJECT: i8 = 6;
+
+fn union_fields() -> UnionFields {
+    static FIELDS: OnceLock<UnionFields> = OnceLock::new();
+    FIELDS
+        .get_or_init(|| {
+            let json_metadata: HashMap<String, String> =
+                HashMap::from_iter(vec![("is_json".to_string(), "true".to_string())]);
+            UnionFields::from_iter([
+                (
+                    TYPE_ID_NULL,
+                    Arc::new(Field::new("null", DataType::Null, true)),
+                ),
+                (
+                    TYPE_ID_BOOL,
+                    Arc::new(Field::new("bool", DataType::Boolean, false)),
+                ),
+                (
+                    TYPE_ID_INT,
+                    Arc::new(Field::new("int", DataType::Int64, false)),
+                ),
+                (
+                    TYPE_ID_FLOAT,
+                    Arc::new(Field::new("float", DataType::Float64, false)),
+                ),
+                (
+                    TYPE_ID_STR,
+                    Arc::new(Field::new("str", DataType::Utf8, false)),
+                ),
+                (
+                    TYPE_ID_ARRAY,
+                    Arc::new(
+                        Field::new("array", DataType::Utf8, false)
+                            .with_metadata(json_metadata.clone()),
+                    ),
+                ),
+                (
+                    TYPE_ID_OBJECT,
+                    Arc::new(
+                        Field::new("object", DataType::Utf8, false)
+                            .with_metadata(json_metadata.clone()),
+                    ),
+                ),
+            ])
+        })
+        .clone()
+}
+// End vendored code
+
+pub fn serialize_json_union(args: &[ColumnarValue]) -> Result<ColumnarValue> {
+    assert_eq!(args.len(), 1);
+    let array = match args.first().unwrap() {
+        ColumnarValue::Array(a) => a.clone(),
+        ColumnarValue::Scalar(s) => s.to_array_of_size(1)?,
+    };
+
+    let mut b = StringBuilder::with_capacity(array.len(), array.get_array_memory_size());
+
+    write_union(&mut b, &array)?;
+
+    Ok(ColumnarValue::Array(Arc::new(b.finish())))
+}
+
+fn write_union(b: &mut StringBuilder, array: &ArrayRef) -> Result<(), std::fmt::Error> {
+    assert!(
+        is_json_union(array.data_type()),
+        "array item is not a valid JSON union"
+    );
+    let json_union = array.as_any().downcast_ref::<UnionArray>().unwrap();
+
+    for i in 0..json_union.len() {
+        if json_union.is_null(i) {
+            b.append_null();
+        } else {
+            write_value(b, json_union.type_id(i), &json_union.value(i))?;
+            b.append_value("");
+        }
+    }
+
+    Ok(())
+}
+
+fn write_value(b: &mut StringBuilder, id: i8, a: &ArrayRef) -> Result<(), std::fmt::Error> {
+    match id {
+        TYPE_ID_NULL => write!(b, "null")?,
+        TYPE_ID_BOOL => write!(b, "{}", a.as_boolean().value(0))?,
+        TYPE_ID_INT => write!(b, "{}", a.as_primitive::<Int64Type>().value(0))?,
+        TYPE_ID_FLOAT => write!(b, "{}", a.as_primitive::<Float64Type>().value(0))?,
+        TYPE_ID_STR => {
+            // assumes that this is already a valid (escaped) json string as the only way to
+            // construct these values are by parsing (valid) JSON
+            b.write_char('"')?;
+            b.write_str(a.as_string::<i32>().value(0))?;
+            b.write_char('"')?;
+        }
+        TYPE_ID_ARRAY => {
+            b.write_str(a.as_string::<i32>().value(0))?;
+        }
+        TYPE_ID_OBJECT => {
+            b.write_str(a.as_string::<i32>().value(0))?;
+        }
+        _ => unreachable!("invalid union type in JSON union: {}", id),
+    }
+
+    Ok(())
+}
+
+pub(crate) fn serialize_outgoing_json(
+    registry: &StreamSchemaProvider,
+    node: Arc<LogicalPlan>,
+) -> LogicalPlan {
+    let exprs = node
+        .schema()
+        .fields()
+        .iter()
+        .map(|f| {
+            if is_json_union(f.data_type()) {
+                Expr::Alias(Alias::new(
+                    Expr::ScalarFunction(ScalarFunction::new_udf(
+                        registry.udf(SERIALIZE_JSON_UNION).unwrap(),
+                        vec![col(f.name())],
+                    )),
+                    Option::<TableReference>::None,
+                    f.name(),
+                ))
+            } else {
+                col(f.name())
+            }
+        })
+        .collect();
+
+    LogicalPlan::Projection(Projection::try_new(exprs, node).unwrap())
+}
+
+#[cfg(test)]
+mod test {
+    use datafusion::arrow::array::StringArray;
+    use datafusion::arrow::array::builder::{ListBuilder, StringBuilder};
+    use datafusion::common::ScalarValue;
+    use std::sync::Arc;
+
+    #[test]
+    fn test_extract_json() {
+        let input = Arc::new(StringArray::from(vec![
+            r#"{"a": 1, "b": 2, "c": { "d": "hello" }}"#,
+            r#"{"a": 3, "b": 4}"#,
+            r#"{"a": 5, "b": 6}"#,
+        ]));
+
+        let path = "$.c.d";
+
+        let result = super::extract_json(&[
+            super::ColumnarValue::Array(input),
+            super::ColumnarValue::Scalar(path.into()),
+        ])
+        .unwrap();
+
+        let mut expected = ListBuilder::new(StringBuilder::new());
+        expected.append_value(vec![Some("\"hello\"".to_string())]);
+        expected.append_value(Vec::<Option<String>>::new());
+        expected.append_value(Vec::<Option<String>>::new());
+        if let super::ColumnarValue::Array(result) = result {
+            assert_eq!(*result, expected.finish());
+        } else {
+            panic!("Expected array, got scalar");
+        }
+
+        let result = super::extract_json(&[
+            super::ColumnarValue::Scalar(r#"{"a": 1, "b": 2, "c": { "d": "hello" }}"#.into()),
+            super::ColumnarValue::Scalar(path.into()),
+        ])
+        .unwrap();
+
+        let mut expected = ListBuilder::with_capacity(StringBuilder::new(), 1);
+        expected.append_value(vec![Some("\"hello\"".to_string())]);
+
+        if let super::ColumnarValue::Scalar(ScalarValue::List(result)) = result {
+            assert_eq!(*result, expected.finish());
+        } else {
+            panic!("Expected scalar list");
+        }
+    }
+
+    #[test]
+    fn test_get_first_json_object() {
+        let input = Arc::new(StringArray::from(vec![
+            r#"{"a": 1, "b": 2}"#,
+            r#"{"a": 3}"#,
+            r#"{"a": 5, "b": 6}"#,
+        ]));
+
+        let path = "$.b";
+
+        let result = super::get_first_json_object(&[
+            super::ColumnarValue::Array(input),
+            super::ColumnarValue::Scalar(path.into()),
+        ])
+        .unwrap();
+
+        let expected = StringArray::from(vec![Some("2"), None, Some("6")]);
+
+        if let super::ColumnarValue::Array(result) = result {
+            assert_eq!(*result, expected);
+        } else {
+            panic!("Expected array, got scalar");
+        }
+
+        let result = super::get_first_json_object(&[
+            super::ColumnarValue::Scalar(r#"{"a": 1, "b": 2, "c": { "d": "hello" }}"#.into()),
+            super::ColumnarValue::Scalar("$.c.d".into()),
+        ])
+        .unwrap();
+
+        let expected = ScalarValue::Utf8(Some("\"hello\"".to_string()));
+
+        if let super::ColumnarValue::Scalar(result) = result {
+            assert_eq!(result, expected);
+        } else {
+            panic!("Expected scalar");
+        }
+    }
+
+    #[test]
+    fn test_extract_json_string() {
+        let input = Arc::new(StringArray::from(vec![
+            r#"{"a": 1, "b": 2, "c": { "d": "hello" }}"#,
+            r#"{"a": 3, "b": 4}"#,
+            r#"{"a": 5, "b": 6}"#,
+        ]));
+
+        let path = "$.c.d";
+
+        let result = super::extract_json_string(&[
+            super::ColumnarValue::Array(input),
+            super::ColumnarValue::Scalar(path.into()),
+        ])
+        .unwrap();
+
+        let expected = StringArray::from(vec![Some("hello"), None, None]);
+
+        if let super::ColumnarValue::Array(result) = result {
+            assert_eq!(*result, expected);
+        } else {
+            panic!("Expected array, got scalar");
+        }
+
+        let result = super::extract_json_string(&[
+            super::ColumnarValue::Scalar(r#"{"a": 1, "b": 2, "c": { "d": "hello" }}"#.into()),
+            super::ColumnarValue::Scalar(path.into()),
+        ])
+        .unwrap();
+
+        let expected = ScalarValue::Utf8(Some("hello".to_string()));
+
+        if let super::ColumnarValue::Scalar(result) = result {
+            assert_eq!(result, expected);
+        } else {
+            panic!("Expected scalar");
+        }
+    }
+}
diff --git a/src/sql/mod.rs b/src/sql/mod.rs
index 31b5c4b9..e0931530 100644
--- a/src/sql/mod.rs
+++ b/src/sql/mod.rs
@@ -10,9 +10,14 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+pub mod catalog;
+pub mod functions;
+pub mod physical;
 pub mod planner;
+pub mod types;
 
 pub use planner::StreamSchemaProvider;
 pub use planner::parse::parse_sql;
 pub use planner::plan::rewrite_plan;
 pub use planner::sql_to_plan::statement_to_plan;
+pub use planner::{CompiledSql, parse_and_get_arrow_program, parse_sql_statements};
diff --git a/src/sql/physical/mod.rs b/src/sql/physical/mod.rs
new file mode 100644
index 00000000..bfb37f11
--- /dev/null
+++ b/src/sql/physical/mod.rs
@@ -0,0 +1,1265 @@
+use datafusion::arrow::{
+    array::{
+        Array, AsArray, BooleanBuilder, PrimitiveArray, RecordBatch, StringArray, StructArray,
+        TimestampNanosecondArray, TimestampNanosecondBuilder, UInt32Builder,
+    },
+    buffer::NullBuffer,
+    compute::{concat, take},
+    datatypes::{DataType, Field, Fields, Schema, SchemaRef, TimeUnit},
+};
+use datafusion::common::{
+    DataFusionError, Result, ScalarValue, Statistics, UnnestOptions, not_impl_err, plan_err,
+};
+use datafusion::execution::{RecordBatchStream, SendableRecordBatchStream};
+use datafusion::{
+    execution::TaskContext,
+    physical_plan::{
+        DisplayAs, ExecutionPlan, Partitioning, memory::MemoryStream,
+        stream::RecordBatchStreamAdapter,
+    },
+};
+use std::collections::HashMap;
+use std::{
+    any::Any,
+    mem,
+    pin::Pin,
+    sync::{Arc, OnceLock, RwLock},
+    task::{Context, Poll},
+};
+
+use crate::make_udf_function;
+use crate::sql::functions::MultiHashFunction;
+use crate::sql::planner::rewrite::UNNESTED_COL;
+use crate::sql::planner::schemas::window_arrow_struct;
+use crate::types::{TIMESTAMP_FIELD, UPDATING_META_FIELD};
+use datafusion::arrow::datatypes::{TimestampNanosecondType, UInt64Type};
+use datafusion::catalog::memory::MemorySourceConfig;
+use datafusion::datasource::memory::DataSourceExec;
+use datafusion::logical_expr::{
+    ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignature, Volatility,
+};
+use datafusion::physical_expr::EquivalenceProperties;
+use datafusion::physical_plan::PlanProperties;
+use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType};
+use datafusion::physical_plan::unnest::{ListUnnest, UnnestExec};
+use datafusion_proto::physical_plan::PhysicalExtensionCodec;
+use futures::{
+    ready,
+    stream::{Stream, StreamExt},
+};
+use prost::Message;
+use protocol::grpc::api::{
+    DebeziumDecodeNode, DebeziumEncodeNode, FsExecNode, MemExecNode, UnnestExecNode,
+    fs_exec_node::Node,
+};
+use std::fmt::Debug;
+use tokio::sync::mpsc::UnboundedReceiver;
+use tokio_stream::wrappers::UnboundedReceiverStream;
+
+// ─────────────────── Updating Meta Helpers ───────────────────
+
+pub fn updating_meta_fields() -> Fields {
+    static FIELDS: OnceLock<Fields> = OnceLock::new();
+    FIELDS
+        .get_or_init(|| {
+            Fields::from(vec![
+                Field::new("is_retract", DataType::Boolean, true),
+                Field::new("id", DataType::FixedSizeBinary(16), true),
+            ])
+        })
+        .clone()
+}
+
+pub fn updating_meta_field() -> Arc<Field> {
+    static FIELD: OnceLock<Arc<Field>> = OnceLock::new();
+    FIELD
+        .get_or_init(|| {
+            Arc::new(Field::new(
+                UPDATING_META_FIELD,
+                DataType::Struct(updating_meta_fields()),
+                false,
+            ))
+        })
+        .clone()
+}
+
+// ─────────────────── WindowFunctionUdf ───────────────────
+
+#[derive(Debug)]
+pub struct WindowFunctionUdf {
+    signature: Signature,
+}
+
+impl Default for WindowFunctionUdf {
+    fn default() -> Self {
+        Self {
+            signature: Signature::new(
+                TypeSignature::Exact(vec![
+                    DataType::Timestamp(TimeUnit::Nanosecond, None),
+                    DataType::Timestamp(TimeUnit::Nanosecond, None),
+                ]),
+                Volatility::Immutable,
+            ),
+        }
+    }
+}
+
+impl ScalarUDFImpl for WindowFunctionUdf {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        "window"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, _: &[DataType]) -> Result<DataType> {
+        Ok(window_arrow_struct())
+    }
+
+    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
+        let columns = args.args;
+        if columns.len() != 2 {
+            return plan_err!(
+                "window function expected 2 arguments, got {}",
+                columns.len()
+            );
+        }
+        if columns[0].data_type() != DataType::Timestamp(TimeUnit::Nanosecond, None) {
+            return plan_err!(
+                "window function expected first argument to be a timestamp, got {:?}",
+                columns[0].data_type()
+            );
+        }
+        if columns[1].data_type() != DataType::Timestamp(TimeUnit::Nanosecond, None) {
+            return plan_err!(
+                "window function expected second argument to be a timestamp, got {:?}",
+                columns[1].data_type()
+            );
+        }
+        let fields = vec![
+            Arc::new(Field::new(
+                "start",
+                DataType::Timestamp(TimeUnit::Nanosecond, None),
+                false,
+            )),
+            Arc::new(Field::new(
+                "end",
+                DataType::Timestamp(TimeUnit::Nanosecond, None),
+                false,
+            )),
+        ]
+        .into();
+
+        match (&columns[0], &columns[1]) {
+            (ColumnarValue::Array(start), ColumnarValue::Array(end)) => {
+                Ok(ColumnarValue::Array(Arc::new(StructArray::new(
+                    fields,
+                    vec![start.clone(), end.clone()],
+                    None,
+                ))))
+            }
+            (ColumnarValue::Array(start), ColumnarValue::Scalar(end)) => {
+                let end = end.to_array_of_size(start.len())?;
+                Ok(ColumnarValue::Array(Arc::new(StructArray::new(
+                    fields,
+                    vec![start.clone(), end],
+                    None,
+                ))))
+            }
+            (ColumnarValue::Scalar(start), ColumnarValue::Array(end)) => {
+                let start = start.to_array_of_size(end.len())?;
+                Ok(ColumnarValue::Array(Arc::new(StructArray::new(
+                    fields,
+                    vec![start, end.clone()],
+                    None,
+                ))))
+            }
+            (ColumnarValue::Scalar(start), ColumnarValue::Scalar(end)) => {
+                Ok(ColumnarValue::Scalar(ScalarValue::Struct(
+                    StructArray::new(fields, vec![start.to_array()?, end.to_array()?], None).into(),
+                )))
+            }
+        }
+    }
+}
+
+make_udf_function!(WindowFunctionUdf, WINDOW_FUNCTION, window);
+
+// ─────────────────── Physical Extension Codec ───────────────────
+
+#[derive(Debug)]
+pub struct FsPhysicalExtensionCodec {
+    pub context: DecodingContext,
+}
+
+impl Default for FsPhysicalExtensionCodec {
+    fn default() -> Self {
+        Self {
+            context: DecodingContext::None,
+        }
+    }
+}
+
+#[derive(Debug)]
+pub enum DecodingContext {
+    None,
+    Planning,
+    SingleLockedBatch(Arc<RwLock<Option<RecordBatch>>>),
+    UnboundedBatchStream(Arc<RwLock<Option<UnboundedReceiver<RecordBatch>>>>),
+    LockedBatchVec(Arc<RwLock<Vec<RecordBatch>>>),
+    LockedJoinPair {
+        left: Arc<RwLock<Option<RecordBatch>>>,
+        right: Arc<RwLock<Option<RecordBatch>>>,
+    },
+    LockedJoinStream {
+        left: Arc<RwLock<Option<UnboundedReceiver<RecordBatch>>>>,
+        right: Arc<RwLock<Option<UnboundedReceiver<RecordBatch>>>>,
+    },
+}
+
+fn make_properties(schema: SchemaRef) -> PlanProperties {
+    PlanProperties::new(
+        EquivalenceProperties::new(schema),
+        Partitioning::UnknownPartitioning(1),
+        EmissionType::Incremental,
+        Boundedness::Unbounded {
+            requires_infinite_memory: false,
+        },
+    )
+}
+
+impl PhysicalExtensionCodec for FsPhysicalExtensionCodec {
+    fn try_decode(
+        &self,
+        buf: &[u8],
+        inputs: &[Arc<dyn ExecutionPlan>],
+        _registry: &dyn datafusion::execution::FunctionRegistry,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        let exec: FsExecNode = Message::decode(buf)
+            .map_err(|err| DataFusionError::Internal(format!("couldn't deserialize: {err}")))?;
+
+        match exec
+            .node
+            .ok_or_else(|| DataFusionError::Internal("exec node is empty".to_string()))?
+        {
+            Node::MemExec(mem_exec) => {
+                let schema: Schema = serde_json::from_str(&mem_exec.schema).map_err(|e| {
+                    DataFusionError::Internal(format!("invalid schema in exec codec: {e:?}"))
+                })?;
+                let schema = Arc::new(schema);
+                match &self.context {
+                    DecodingContext::SingleLockedBatch(single_batch) => Ok(Arc::new(
+                        RwLockRecordBatchReader::new(schema, single_batch.clone()),
+                    )),
+                    DecodingContext::UnboundedBatchStream(unbounded_stream) => Ok(Arc::new(
+                        UnboundedRecordBatchReader::new(schema, unbounded_stream.clone()),
+                    )),
+                    DecodingContext::LockedBatchVec(locked_batches) => Ok(Arc::new(
+                        RecordBatchVecReader::new(schema, locked_batches.clone()),
+                    )),
+                    DecodingContext::Planning => {
+                        Ok(Arc::new(FsMemExec::new(mem_exec.table_name, schema)))
+                    }
+                    DecodingContext::None => Err(DataFusionError::Internal(
+                        "Need an internal context to decode".into(),
+                    )),
+                    DecodingContext::LockedJoinPair { left, right } => {
+                        match mem_exec.table_name.as_str() {
+                            "left" => {
+                                Ok(Arc::new(RwLockRecordBatchReader::new(schema, left.clone())))
+                            }
+                            "right" => Ok(Arc::new(RwLockRecordBatchReader::new(
+                                schema,
+                                right.clone(),
+                            ))),
+                            _ => Err(DataFusionError::Internal(format!(
+                                "unknown table name {}",
+                                mem_exec.table_name
+                            ))),
+                        }
+                    }
+                    DecodingContext::LockedJoinStream { left, right } => {
+                        match mem_exec.table_name.as_str() {
+                            "left" => Ok(Arc::new(UnboundedRecordBatchReader::new(
+                                schema,
+                                left.clone(),
+                            ))),
+                            "right" => Ok(Arc::new(UnboundedRecordBatchReader::new(
+                                schema,
+                                right.clone(),
+                            ))),
+                            _ => Err(DataFusionError::Internal(format!(
+                                "unknown table name {}",
+                                mem_exec.table_name
+                            ))),
+                        }
+                    }
+                }
+            }
+            Node::UnnestExec(unnest) => {
+                let schema: Schema = serde_json::from_str(&unnest.schema).map_err(|e| {
+                    DataFusionError::Internal(format!("invalid schema in exec codec: {e:?}"))
+                })?;
+
+                let column = schema.index_of(UNNESTED_COL).map_err(|_| {
+                    DataFusionError::Internal(format!(
+                        "unnest node schema does not contain {UNNESTED_COL} col"
+                    ))
+                })?;
+
+                Ok(Arc::new(UnnestExec::new(
+                    inputs
+                        .first()
+                        .ok_or_else(|| {
+                            DataFusionError::Internal("no input for unnest node".to_string())
+                        })?
+                        .clone(),
+                    vec![ListUnnest {
+                        index_in_input_schema: column,
+                        depth: 1,
+                    }],
+                    vec![],
+                    Arc::new(schema),
+                    UnnestOptions::default(),
+                )))
+            }
+            Node::DebeziumDecode(debezium) => {
+                let schema = Arc::new(serde_json::from_str::<Schema>(&debezium.schema).map_err(
+                    |e| DataFusionError::Internal(format!("invalid schema in exec codec: {e:?}")),
+                )?);
+                Ok(Arc::new(DebeziumUnrollingExec {
+                    input: inputs
+                        .first()
+                        .ok_or_else(|| {
+                            DataFusionError::Internal("no input for debezium node".to_string())
+                        })?
+                        .clone(),
+                    schema: schema.clone(),
+                    properties: make_properties(schema),
+                    primary_keys: debezium
+                        .primary_keys
+                        .into_iter()
+                        .map(|c| c as usize)
+                        .collect(),
+                }))
+            }
+            Node::DebeziumEncode(debezium) => {
+                let schema = Arc::new(serde_json::from_str::<Schema>(&debezium.schema).map_err(
+                    |e| DataFusionError::Internal(format!("invalid schema in exec codec: {e:?}")),
+                )?);
+                Ok(Arc::new(ToDebeziumExec {
+                    input: inputs
+                        .first()
+                        .ok_or_else(|| {
+                            DataFusionError::Internal("no input for debezium node".to_string())
+                        })?
+                        .clone(),
+                    schema: schema.clone(),
+                    properties: make_properties(schema),
+                }))
+            }
+        }
+    }
+
+    fn try_encode(&self, node: Arc<dyn ExecutionPlan>, buf: &mut Vec<u8>) -> Result<()> {
+        let mut proto = None;
+
+        let mem_table: Option<&FsMemExec> = node.as_any().downcast_ref();
+        if let Some(table) = mem_table {
+            proto = Some(FsExecNode {
+                node: Some(Node::MemExec(MemExecNode {
+                    table_name: table.table_name.clone(),
+                    schema: serde_json::to_string(&table.schema).unwrap(),
+                })),
+            });
+        }
+
+        let unnest: Option<&UnnestExec> = node.as_any().downcast_ref();
+        if let Some(unnest) = unnest {
+            proto = Some(FsExecNode {
+                node: Some(Node::UnnestExec(UnnestExecNode {
+                    schema: serde_json::to_string(&unnest.schema()).unwrap(),
+                })),
+            });
+        }
+
+        let debezium_decode: Option<&DebeziumUnrollingExec> = node.as_any().downcast_ref();
+        if let Some(decode) = debezium_decode {
+            proto = Some(FsExecNode {
+                node: Some(Node::DebeziumDecode(DebeziumDecodeNode {
+                    schema: serde_json::to_string(&decode.schema).unwrap(),
+                    primary_keys: (*decode.primary_keys).iter().map(|c| *c as u64).collect(),
+                })),
+            });
+        }
+
+        let debezium_encode: Option<&ToDebeziumExec> = node.as_any().downcast_ref();
+        if let Some(encode) = debezium_encode {
+            proto = Some(FsExecNode {
+                node: Some(Node::DebeziumEncode(DebeziumEncodeNode {
+                    schema: serde_json::to_string(&encode.schema).unwrap(),
+                })),
+            });
+        }
+
+        if let Some(node) = proto {
+            node.encode(buf).map_err(|err| {
+                DataFusionError::Internal(format!("couldn't serialize exec node {err}"))
+            })?;
+            Ok(())
+        } else {
+            Err(DataFusionError::Internal(format!(
+                "cannot serialize {node:?}"
+            )))
+        }
+    }
+}
+
+// ─────────────────── RwLockRecordBatchReader ───────────────────
+
+#[derive(Debug)]
+struct RwLockRecordBatchReader {
+    schema: SchemaRef,
+    locked_batch: Arc<RwLock<Option<RecordBatch>>>,
+    properties: PlanProperties,
+}
+
+impl RwLockRecordBatchReader {
+    fn new(schema: SchemaRef, locked_batch: Arc<RwLock<Option<RecordBatch>>>) -> Self {
+        Self {
+            schema: schema.clone(),
+            locked_batch,
+            properties: make_properties(schema),
+        }
+    }
+}
+
+impl DisplayAs for RwLockRecordBatchReader {
+    fn fmt_as(
+        &self,
+        _t: datafusion::physical_plan::DisplayFormatType,
+        f: &mut std::fmt::Formatter,
+    ) -> std::fmt::Result {
+        write!(f, "RW Lock RecordBatchReader")
+    }
+}
+
+impl ExecutionPlan for RwLockRecordBatchReader {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn schema(&self) -> SchemaRef {
+        self.schema.clone()
+    }
+
+    fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
+        vec![]
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        _children: Vec<Arc<dyn ExecutionPlan>>,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        Err(DataFusionError::Internal("not supported".into()))
+    }
+
+    fn execute(
+        &self,
+        _partition: usize,
+        _context: Arc<TaskContext>,
+    ) -> Result<SendableRecordBatchStream> {
+        let result = self
+            .locked_batch
+            .write()
+            .unwrap()
+            .take()
+            .expect("should have set a record batch before calling execute()");
+        Ok(Box::pin(MemoryStream::try_new(
+            vec![result],
+            self.schema.clone(),
+            None,
+        )?))
+    }
+
+    fn statistics(&self) -> Result<Statistics> {
+        Ok(Statistics::new_unknown(&self.schema))
+    }
+
+    fn reset(&self) -> Result<()> {
+        Ok(())
+    }
+
+    fn properties(&self) -> &PlanProperties {
+        &self.properties
+    }
+
+    fn name(&self) -> &str {
+        "rw_lock_reader"
+    }
+}
+
+// ─────────────────── UnboundedRecordBatchReader ───────────────────
+
+#[derive(Debug)]
+struct UnboundedRecordBatchReader {
+    schema: SchemaRef,
+    receiver: Arc<RwLock<Option<UnboundedReceiver<RecordBatch>>>>,
+    properties: PlanProperties,
+}
+
+impl UnboundedRecordBatchReader {
+    fn new(
+        schema: SchemaRef,
+        receiver: Arc<RwLock<Option<UnboundedReceiver<RecordBatch>>>>,
+    ) -> Self {
+        Self {
+            schema: schema.clone(),
+            receiver,
+            properties: make_properties(schema),
+        }
+    }
+}
+
+impl DisplayAs for UnboundedRecordBatchReader {
+    fn fmt_as(
+        &self,
+        _t: datafusion::physical_plan::DisplayFormatType,
+        f: &mut std::fmt::Formatter,
+    ) -> std::fmt::Result {
+        write!(f, "unbounded record batch reader")
+    }
+}
+
+impl ExecutionPlan for UnboundedRecordBatchReader {
+    fn name(&self) -> &str {
+        "unbounded_reader"
+    }
+
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn schema(&self) -> SchemaRef {
+        self.schema.clone()
+    }
+
+    fn properties(&self) -> &PlanProperties {
+        &self.properties
+    }
+
+    fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
+        vec![]
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        _children: Vec<Arc<dyn ExecutionPlan>>,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        Err(DataFusionError::Internal("not supported".into()))
+    }
+
+    fn execute(
+        &self,
+        _partition: usize,
+        _context: Arc<TaskContext>,
+    ) -> Result<SendableRecordBatchStream> {
+        Ok(Box::pin(RecordBatchStreamAdapter::new(
+            self.schema.clone(),
+            UnboundedReceiverStream::new(
+                self.receiver
+                    .write()
+                    .unwrap()
+                    .take()
+                    .expect("unbounded receiver should be present before calling exec"),
+            )
+            .map(Ok),
+        )))
+    }
+
+    fn statistics(&self) -> Result<Statistics> {
+        Ok(Statistics::new_unknown(&self.schema))
+    }
+
+    fn reset(&self) -> Result<()> {
+        Ok(())
+    }
+}
+
+// ─────────────────── RecordBatchVecReader ───────────────────
+
+#[derive(Debug)]
+struct RecordBatchVecReader {
+    schema: SchemaRef,
+    receiver: Arc<RwLock<Vec<RecordBatch>>>,
+    properties: PlanProperties,
+}
+
+impl RecordBatchVecReader {
+    fn new(schema: SchemaRef, receiver: Arc<RwLock<Vec<RecordBatch>>>) -> Self {
+        Self {
+            schema: schema.clone(),
+            receiver,
+            properties: make_properties(schema),
+        }
+    }
+}
+
+impl DisplayAs for RecordBatchVecReader {
+    fn fmt_as(
+        &self,
+        _t: datafusion::physical_plan::DisplayFormatType,
+        f: &mut std::fmt::Formatter,
+    ) -> std::fmt::Result {
+        write!(f, "record batch vec reader")
+    }
+}
+
+impl ExecutionPlan for RecordBatchVecReader {
+    fn name(&self) -> &str {
+        "vec_reader"
+    }
+
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn schema(&self) -> SchemaRef {
+        self.schema.clone()
+    }
+
+    fn properties(&self) -> &PlanProperties {
+        &self.properties
+    }
+
+    fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
+        vec![]
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        _children: Vec<Arc<dyn ExecutionPlan>>,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        Err(DataFusionError::Internal("not supported".into()))
+    }
+
+    fn execute(
+        &self,
+        partition: usize,
+        context: Arc<TaskContext>,
+    ) -> Result<SendableRecordBatchStream> {
+        let memory = MemorySourceConfig::try_new(
+            &[mem::take(self.receiver.write().unwrap().as_mut())],
+            self.schema.clone(),
+            None,
+        )?;
+
+        DataSourceExec::new(Arc::new(memory)).execute(partition, context)
+    }
+
+    fn statistics(&self) -> Result<Statistics> {
+        Ok(Statistics::new_unknown(&self.schema))
+    }
+
+    fn reset(&self) -> Result<()> {
+        Ok(())
+    }
+}
+
+// ─────────────────── FsMemExec ───────────────────
+
+#[derive(Debug, Clone)]
+pub struct FsMemExec {
+    pub table_name: String,
+    pub schema: SchemaRef,
+    properties: PlanProperties,
+}
+
+impl DisplayAs for FsMemExec {
+    fn fmt_as(
+        &self,
+        _t: datafusion::physical_plan::DisplayFormatType,
+        f: &mut std::fmt::Formatter,
+    ) -> std::fmt::Result {
+        write!(f, "EmptyPartitionStream: schema={}", self.schema)
+    }
+}
+
+impl FsMemExec {
+    pub fn new(table_name: String, schema: SchemaRef) -> Self {
+        Self {
+            schema: schema.clone(),
+            table_name,
+            properties: make_properties(schema),
+        }
+    }
+}
+
+impl ExecutionPlan for FsMemExec {
+    fn name(&self) -> &str {
+        "mem_exec"
+    }
+
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn schema(&self) -> SchemaRef {
+        self.schema.clone()
+    }
+
+    fn properties(&self) -> &PlanProperties {
+        &self.properties
+    }
+
+    fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
+        vec![]
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        _children: Vec<Arc<dyn ExecutionPlan>>,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        not_impl_err!("with_new_children is not implemented for mem_exec; should not be called")
+    }
+
+    fn execute(
+        &self,
+        _partition: usize,
+        _context: Arc<TaskContext>,
+    ) -> Result<SendableRecordBatchStream> {
+        plan_err!(
+            "EmptyPartitionStream cannot be executed, this is only used for physical planning before serialization"
+        )
+    }
+
+    fn statistics(&self) -> Result<Statistics> {
+        Ok(Statistics::new_unknown(&self.schema))
+    }
+
+    fn reset(&self) -> Result<()> {
+        Ok(())
+    }
+}
+
+// ─────────────────── DebeziumUnrollingExec ───────────────────
+
+#[derive(Debug)]
+pub struct DebeziumUnrollingExec {
+    input: Arc<dyn ExecutionPlan>,
+    schema: SchemaRef,
+    properties: PlanProperties,
+    primary_keys: Vec<usize>,
+}
+
+impl DebeziumUnrollingExec {
+    pub fn try_new(input: Arc<dyn ExecutionPlan>, primary_keys: Vec<usize>) -> Result<Self> {
+        let input_schema = input.schema();
+        let before_index = input_schema.index_of("before")?;
+        let after_index = input_schema.index_of("after")?;
+        let op_index = input_schema.index_of("op")?;
+        let _timestamp_index = input_schema.index_of(TIMESTAMP_FIELD)?;
+        let before_type = input_schema.field(before_index).data_type();
+        let after_type = input_schema.field(after_index).data_type();
+        if before_type != after_type {
+            return Err(DataFusionError::Internal(
+                "before and after columns must have the same type".to_string(),
+            ));
+        }
+        let op_type = input_schema.field(op_index).data_type();
+        if *op_type != DataType::Utf8 {
+            return Err(DataFusionError::Internal(
+                "op column must be a string".to_string(),
+            ));
+        }
+        let DataType::Struct(fields) = before_type else {
+            return Err(DataFusionError::Internal(
+                "before and after columns must be structs".to_string(),
+            ));
+        };
+        let mut fields = fields.to_vec();
+        fields.push(updating_meta_field());
+        fields.push(Arc::new(Field::new(
+            TIMESTAMP_FIELD,
+            DataType::Timestamp(TimeUnit::Nanosecond, None),
+            false,
+        )));
+
+        let schema = Arc::new(Schema::new(fields));
+        Ok(Self {
+            input,
+            schema: schema.clone(),
+            properties: make_properties(schema),
+            primary_keys,
+        })
+    }
+}
+
+impl DisplayAs for DebeziumUnrollingExec {
+    fn fmt_as(
+        &self,
+        _t: datafusion::physical_plan::DisplayFormatType,
+        f: &mut std::fmt::Formatter,
+    ) -> std::fmt::Result {
+        write!(f, "DebeziumUnrollingExec")
+    }
+}
+
+impl ExecutionPlan for DebeziumUnrollingExec {
+    fn name(&self) -> &str {
+        "debezium_unrolling_exec"
+    }
+
+    fn as_any(&self) -> &dyn Any {
+        self as &dyn Any
+    }
+
+    fn schema(&self) -> SchemaRef {
+        self.schema.clone()
+    }
+
+    fn properties(&self) -> &PlanProperties {
+        &self.properties
+    }
+
+    fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
+        vec![&self.input]
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        children: Vec<Arc<dyn ExecutionPlan>>,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        if children.len() != 1 {
+            return Err(DataFusionError::Internal(
+                "DebeziumUnrollingExec wrong number of children".to_string(),
+            ));
+        }
+        Ok(Arc::new(DebeziumUnrollingExec {
+            input: children[0].clone(),
+            schema: self.schema.clone(),
+            properties: self.properties.clone(),
+            primary_keys: self.primary_keys.clone(),
+        }))
+    }
+
+    fn execute(
+        &self,
+        partition: usize,
+        context: Arc<TaskContext>,
+    ) -> Result<SendableRecordBatchStream> {
+        Ok(Box::pin(DebeziumUnrollingStream::try_new(
+            self.input.execute(partition, context)?,
+            self.schema.clone(),
+            self.primary_keys.clone(),
+        )?))
+    }
+
+    fn reset(&self) -> Result<()> {
+        self.input.reset()
+    }
+}
+
+struct DebeziumUnrollingStream {
+    input: SendableRecordBatchStream,
+    schema: SchemaRef,
+    before_index: usize,
+    after_index: usize,
+    op_index: usize,
+    timestamp_index: usize,
+    primary_keys: Vec<usize>,
+}
+
+impl DebeziumUnrollingStream {
+    fn try_new(
+        input: SendableRecordBatchStream,
+        schema: SchemaRef,
+        primary_keys: Vec<usize>,
+    ) -> Result<Self> {
+        if primary_keys.is_empty() {
+            return plan_err!("there must be at least one primary key for a Debezium source");
+        }
+        let input_schema = input.schema();
+        let before_index = input_schema.index_of("before")?;
+        let after_index = input_schema.index_of("after")?;
+        let op_index = input_schema.index_of("op")?;
+        let timestamp_index = input_schema.index_of(TIMESTAMP_FIELD)?;
+
+        Ok(Self {
+            input,
+            schema,
+            before_index,
+            after_index,
+            op_index,
+            timestamp_index,
+            primary_keys,
+        })
+    }
+
+    fn unroll_batch(&self, batch: &RecordBatch) -> Result<RecordBatch> {
+        let before = batch.column(self.before_index).as_ref();
+        let after = batch.column(self.after_index).as_ref();
+        let op = batch
+            .column(self.op_index)
+            .as_any()
+            .downcast_ref::<StringArray>()
+            .ok_or_else(|| DataFusionError::Internal("op column is not a string".to_string()))?;
+
+        let timestamp = batch
+            .column(self.timestamp_index)
+            .as_any()
+            .downcast_ref::<TimestampNanosecondArray>()
+            .ok_or_else(|| {
+                DataFusionError::Internal("timestamp column is not a timestamp".to_string())
+            })?;
+
+        let num_rows = batch.num_rows();
+        let combined_array = concat(&[before, after])?;
+        let mut take_indices = UInt32Builder::with_capacity(num_rows);
+        let mut is_retract_builder = BooleanBuilder::with_capacity(num_rows);
+
+        let mut timestamp_builder = TimestampNanosecondBuilder::with_capacity(2 * num_rows);
+        for i in 0..num_rows {
+            let op = op.value(i);
+            match op {
+                "c" | "r" => {
+                    take_indices.append_value((i + num_rows) as u32);
+                    is_retract_builder.append_value(false);
+                    timestamp_builder.append_value(timestamp.value(i));
+                }
+                "u" => {
+                    take_indices.append_value(i as u32);
+                    is_retract_builder.append_value(true);
+                    timestamp_builder.append_value(timestamp.value(i));
+                    take_indices.append_value((i + num_rows) as u32);
+                    is_retract_builder.append_value(false);
+                    timestamp_builder.append_value(timestamp.value(i));
+                }
+                "d" => {
+                    take_indices.append_value(i as u32);
+                    is_retract_builder.append_value(true);
+                    timestamp_builder.append_value(timestamp.value(i));
+                }
+                _ => {
+                    return Err(DataFusionError::Internal(format!(
+                        "unexpected op value: {op}"
+                    )));
+                }
+            }
+        }
+        let take_indices = take_indices.finish();
+        let unrolled_array = take(&combined_array, &take_indices, None)?;
+
+        let mut columns = unrolled_array.as_struct().columns().to_vec();
+
+        let hash = MultiHashFunction::default().invoke(
+            &self
+                .primary_keys
+                .iter()
+                .map(|i| ColumnarValue::Array(columns[*i].clone()))
+                .collect::<Vec<_>>(),
+        )?;
+
+        let ids = hash.into_array(num_rows)?;
+
+        let meta = StructArray::try_new(
+            updating_meta_fields(),
+            vec![Arc::new(is_retract_builder.finish()), ids],
+            None,
+        )?;
+        columns.push(Arc::new(meta));
+        columns.push(Arc::new(timestamp_builder.finish()));
+        Ok(RecordBatch::try_new(self.schema.clone(), columns)?)
+    }
+}
+
+impl Stream for DebeziumUnrollingStream {
+    type Item = Result<RecordBatch>;
+
+    fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
+        let result =
+            ready!(self.input.poll_next_unpin(cx)).map(|result| self.unroll_batch(&result?));
+        Poll::Ready(result)
+    }
+}
+
+impl RecordBatchStream for DebeziumUnrollingStream {
+    fn schema(&self) -> SchemaRef {
+        self.schema.clone()
+    }
+}
+
+// ─────────────────── ToDebeziumExec ───────────────────
+
+#[derive(Debug)]
+pub struct ToDebeziumExec {
+    input: Arc<dyn ExecutionPlan>,
+    schema: SchemaRef,
+    properties: PlanProperties,
+}
+
+impl ToDebeziumExec {
+    pub fn try_new(input: Arc<dyn ExecutionPlan>) -> Result<Self> {
+        let input_schema = input.schema();
+        let timestamp_index = input_schema.index_of(TIMESTAMP_FIELD)?;
+        let struct_fields: Vec<_> = input_schema
+            .fields()
+            .into_iter()
+            .enumerate()
+            .filter_map(|(index, field)| {
+                if field.name() == UPDATING_META_FIELD || index == timestamp_index {
+                    None
+                } else {
+                    Some(field.clone())
+                }
+            })
+            .collect();
+        let struct_data_type = DataType::Struct(struct_fields.into());
+        let before_field = Arc::new(Field::new("before", struct_data_type.clone(), true));
+        let after_field = Arc::new(Field::new("after", struct_data_type, true));
+        let op_field = Arc::new(Field::new("op", DataType::Utf8, false));
+        let timestamp_field = Arc::new(input_schema.field(timestamp_index).clone());
+
+        let output_schema = Arc::new(Schema::new(vec![
+            before_field,
+            after_field,
+            op_field,
+            timestamp_field,
+        ]));
+
+        Ok(Self {
+            input,
+            schema: output_schema.clone(),
+            properties: make_properties(output_schema),
+        })
+    }
+}
+
+impl DisplayAs for ToDebeziumExec {
+    fn fmt_as(
+        &self,
+        _t: datafusion::physical_plan::DisplayFormatType,
+        f: &mut std::fmt::Formatter,
+    ) -> std::fmt::Result {
+        write!(f, "ToDebeziumExec")
+    }
+}
+
+impl ExecutionPlan for ToDebeziumExec {
+    fn name(&self) -> &str {
+        "to_debezium_exec"
+    }
+
+    fn as_any(&self) -> &dyn Any {
+        self as &dyn Any
+    }
+
+    fn schema(&self) -> SchemaRef {
+        self.schema.clone()
+    }
+
+    fn properties(&self) -> &PlanProperties {
+        &self.properties
+    }
+
+    fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
+        vec![&self.input]
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        children: Vec<Arc<dyn ExecutionPlan>>,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        if children.len() != 1 {
+            return Err(DataFusionError::Internal(
+                "ToDebeziumExec wrong number of children".to_string(),
+            ));
+        }
+        Ok(Arc::new(ToDebeziumExec::try_new(children[0].clone())?))
+    }
+
+    fn execute(
+        &self,
+        partition: usize,
+        context: Arc<TaskContext>,
+    ) -> Result<SendableRecordBatchStream> {
+        let updating_meta_index = self.input.schema().index_of(UPDATING_META_FIELD).ok();
+        let timestamp_index = self.input.schema().index_of(TIMESTAMP_FIELD)?;
+        let struct_projection = (0..self.input.schema().fields().len())
+            .filter(|index| {
+                updating_meta_index
+                    .map(|is_retract_index| *index != is_retract_index)
+                    .unwrap_or(true)
+                    && *index != timestamp_index
+            })
+            .collect();
+
+        Ok(Box::pin(ToDebeziumStream {
+            input: self.input.execute(partition, context)?,
+            schema: self.schema.clone(),
+            updating_meta_index,
+            timestamp_index,
+            struct_projection,
+        }))
+    }
+
+    fn reset(&self) -> Result<()> {
+        self.input.reset()
+    }
+}
+
+struct ToDebeziumStream {
+    input: SendableRecordBatchStream,
+    schema: SchemaRef,
+    updating_meta_index: Option<usize>,
+    timestamp_index: usize,
+    struct_projection: Vec<usize>,
+}
+
+impl ToDebeziumStream {
+    fn as_debezium_batch(&mut self, batch: &RecordBatch) -> Result<RecordBatch> {
+        let value_struct = batch.project(&self.struct_projection)?;
+        let timestamps = batch
+            .column(self.timestamp_index)
+            .as_primitive::<TimestampNanosecondType>();
+
+        let columns: Vec<Arc<dyn Array>> = if let Some(metadata_index) = self.updating_meta_index {
+            let metadata = batch
+                .column(metadata_index)
+                .as_any()
+                .downcast_ref::<StructArray>()
+                .ok_or_else(|| {
+                    DataFusionError::Internal("Invalid type for updating_meta column".to_string())
+                })?;
+
+            let is_retract = metadata.column(0).as_boolean();
+            let id = metadata.column(1).as_fixed_size_binary();
+
+            let mut id_map: HashMap<&[u8], (usize, usize, bool, bool, i64)> = HashMap::new();
+            let mut order = vec![];
+            for i in 0..batch.num_rows() {
+                let row_id = id.value(i);
+                let is_create = !is_retract.value(i);
+                let timestamp = timestamps.value(i);
+
+                id_map
+                    .entry(row_id)
+                    .and_modify(|e| {
+                        e.1 = i;
+                        e.3 = is_create;
+                        e.4 = e.4.max(timestamp);
+                    })
+                    .or_insert_with(|| {
+                        order.push(row_id);
+                        (i, i, is_create, is_create, timestamp)
+                    });
+            }
+
+            let mut before = Vec::with_capacity(id_map.len());
+            let mut after = Vec::with_capacity(id_map.len());
+            let mut op = Vec::with_capacity(id_map.len());
+            let mut ts = TimestampNanosecondBuilder::with_capacity(id_map.len());
+
+            for row_id in order {
+                let (first_idx, last_idx, first_is_create, last_is_create, timestamp) =
+                    id_map.get(row_id).unwrap();
+
+                if *first_is_create && *last_is_create {
+                    before.push(None);
+                    after.push(Some(*last_idx));
+                    op.push("c");
+                } else if !(*first_is_create) && !(*last_is_create) {
+                    before.push(Some(*first_idx));
+                    after.push(None);
+                    op.push("d");
+                } else if !(*first_is_create) && *last_is_create {
+                    before.push(Some(*first_idx));
+                    after.push(Some(*last_idx));
+                    op.push("u");
+                } else {
+                    continue;
+                }
+
+                ts.append_value(*timestamp);
+            }
+
+            let before_array = Self::create_output_array(&value_struct, &before)?;
+            let after_array = Self::create_output_array(&value_struct, &after)?;
+            let op_array = StringArray::from(op);
+
+            vec![
+                Arc::new(before_array),
+                Arc::new(after_array),
+                Arc::new(op_array),
+                Arc::new(ts.finish()),
+            ]
+        } else {
+            let after_array = StructArray::try_new(
+                value_struct.schema().fields().clone(),
+                value_struct.columns().to_vec(),
+                None,
+            )?;
+
+            let before_array = StructArray::new_null(
+                value_struct.schema().fields().clone(),
+                value_struct.num_rows(),
+            );
+
+            vec![
+                Arc::new(before_array),
+                Arc::new(after_array),
+                Arc::new(StringArray::from(vec!["c"; value_struct.num_rows()])),
+                batch.column(self.timestamp_index).clone(),
+            ]
+        };
+
+        Ok(RecordBatch::try_new(self.schema.clone(), columns)?)
+    }
+
+    fn create_output_array(
+        value_struct: &RecordBatch,
+        indices: &[Option<usize>],
+    ) -> Result<StructArray> {
+        let mut arrays: Vec<Arc<dyn Array>> = Vec::with_capacity(value_struct.num_columns());
+        for col in value_struct.columns() {
+            let new_array = take(
+                col.as_ref(),
+                &indices
+                    .iter()
+                    .map(|&idx| idx.map(|i| i as u64))
+                    .collect::<PrimitiveArray<UInt64Type>>(),
+                None,
+            )?;
+            arrays.push(new_array);
+        }
+
+        Ok(StructArray::try_new(
+            value_struct.schema().fields().clone(),
+            arrays,
+            Some(NullBuffer::from(
+                indices.iter().map(|&idx| idx.is_some()).collect::<Vec<_>>(),
+            )),
+        )?)
+    }
+}
+
+impl Stream for ToDebeziumStream {
+    type Item = Result<RecordBatch>;
+
+    fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
+        let result =
+            ready!(self.input.poll_next_unpin(cx)).map(|result| self.as_debezium_batch(&result?));
+        Poll::Ready(result)
+    }
+}
+
+impl RecordBatchStream for ToDebeziumStream {
+    fn schema(&self) -> SchemaRef {
+        self.schema.clone()
+    }
+}
diff --git a/src/sql/planner/extension/aggregate.rs b/src/sql/planner/extension/aggregate.rs
index 911e595f..878d3cc5 100644
--- a/src/sql/planner/extension/aggregate.rs
+++ b/src/sql/planner/extension/aggregate.rs
@@ -11,7 +11,7 @@ use datafusion::logical_expr::{
 
 use crate::multifield_partial_ord;
 use crate::sql::planner::extension::{NamedNode, StreamExtension, TimestampAppendExtension};
-use crate::sql::planner::types::{
+use crate::sql::types::{
     DFField, StreamSchema, TIMESTAMP_FIELD, WindowBehavior, WindowType, fields_with_qualifiers,
     schema_from_df_fields, schema_from_df_fields_with_metadata,
 };
diff --git a/src/sql/planner/extension/debezium.rs b/src/sql/planner/extension/debezium.rs
new file mode 100644
index 00000000..1760533c
--- /dev/null
+++ b/src/sql/planner/extension/debezium.rs
@@ -0,0 +1,250 @@
+use std::sync::Arc;
+
+use datafusion::arrow::datatypes::{DataType, Field, Schema};
+use datafusion::common::{DFSchema, DFSchemaRef, Result, TableReference, plan_err};
+use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore};
+
+use super::{NamedNode, StreamExtension};
+use crate::multifield_partial_ord;
+use crate::sql::types::{StreamSchema, TIMESTAMP_FIELD};
+
+pub(crate) const DEBEZIUM_UNROLLING_EXTENSION_NAME: &str = "DebeziumUnrollingExtension";
+pub(crate) const TO_DEBEZIUM_EXTENSION_NAME: &str = "ToDebeziumExtension";
+
+/// Unrolls a Debezium-formatted (before/after/op) stream into individual rows
+/// with an updating metadata column.
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct DebeziumUnrollingExtension {
+    pub(crate) input: LogicalPlan,
+    pub(crate) schema: DFSchemaRef,
+    pub primary_keys: Vec<usize>,
+    primary_key_names: Arc<Vec<String>>,
+}
+
+multifield_partial_ord!(
+    DebeziumUnrollingExtension,
+    input,
+    primary_keys,
+    primary_key_names
+);
+
+impl DebeziumUnrollingExtension {
+    pub(crate) fn as_debezium_schema(
+        input_schema: &DFSchemaRef,
+        qualifier: Option<TableReference>,
+    ) -> Result<DFSchemaRef> {
+        let timestamp_field = if input_schema.has_column_with_unqualified_name(TIMESTAMP_FIELD) {
+            Some(
+                input_schema
+                    .field_with_unqualified_name(TIMESTAMP_FIELD)?
+                    .clone(),
+            )
+        } else {
+            None
+        };
+        let struct_schema: Vec<_> = input_schema
+            .fields()
+            .iter()
+            .filter(|field| field.name() != TIMESTAMP_FIELD)
+            .cloned()
+            .collect();
+
+        let struct_type = DataType::Struct(struct_schema.into());
+
+        let before = Arc::new(Field::new("before", struct_type.clone(), true));
+        let after = Arc::new(Field::new("after", struct_type, true));
+        let op = Arc::new(Field::new("op", DataType::Utf8, true));
+        let mut fields = vec![before, after, op];
+
+        if let Some(ts) = timestamp_field {
+            fields.push(Arc::new(ts));
+        }
+
+        let schema = match qualifier {
+            Some(q) => DFSchema::try_from_qualified_schema(q, &Schema::new(fields))?,
+            None => DFSchema::try_from(Schema::new(fields))?,
+        };
+        Ok(Arc::new(schema))
+    }
+
+    pub fn try_new(input: LogicalPlan, primary_keys: Arc<Vec<String>>) -> Result<Self> {
+        let input_schema = input.schema();
+
+        let Some(before_index) = input_schema.index_of_column_by_name(None, "before") else {
+            return plan_err!("DebeziumUnrollingExtension requires a before column");
+        };
+        let Some(after_index) = input_schema.index_of_column_by_name(None, "after") else {
+            return plan_err!("DebeziumUnrollingExtension requires an after column");
+        };
+        let Some(op_index) = input_schema.index_of_column_by_name(None, "op") else {
+            return plan_err!("DebeziumUnrollingExtension requires an op column");
+        };
+
+        let before_type = input_schema.field(before_index).data_type();
+        let after_type = input_schema.field(after_index).data_type();
+        if before_type != after_type {
+            return plan_err!(
+                "before and after columns must have the same type, not {} and {}",
+                before_type,
+                after_type
+            );
+        }
+
+        let op_type = input_schema.field(op_index).data_type();
+        if *op_type != DataType::Utf8 {
+            return plan_err!("op column must be a string, not {}", op_type);
+        }
+
+        let DataType::Struct(fields) = before_type else {
+            return plan_err!(
+                "before and after columns must be structs, not {}",
+                before_type
+            );
+        };
+
+        let primary_key_idx = primary_keys
+            .iter()
+            .map(|pk| fields.find(pk).map(|(i, _)| i))
+            .collect::<Option<Vec<_>>>()
+            .ok_or_else(|| {
+                datafusion::error::DataFusionError::Plan(
+                    "primary key field not found in Debezium schema".to_string(),
+                )
+            })?;
+
+        let qualifier = match (
+            input_schema.qualified_field(before_index).0,
+            input_schema.qualified_field(after_index).0,
+        ) {
+            (Some(bq), Some(aq)) => {
+                if bq != aq {
+                    return plan_err!("before and after columns must have the same alias");
+                }
+                Some(bq.clone())
+            }
+            (None, None) => None,
+            _ => return plan_err!("before and after columns must both have an alias or neither"),
+        };
+
+        let mut out_fields = fields.to_vec();
+
+        let Some(input_ts_index) = input_schema.index_of_column_by_name(None, TIMESTAMP_FIELD)
+        else {
+            return plan_err!("DebeziumUnrollingExtension requires a timestamp field");
+        };
+        out_fields.push(Arc::new(input_schema.field(input_ts_index).clone()));
+
+        let arrow_schema = Schema::new(out_fields);
+        let schema = match qualifier {
+            Some(q) => DFSchema::try_from_qualified_schema(q, &arrow_schema)?,
+            None => DFSchema::try_from(arrow_schema)?,
+        };
+
+        Ok(Self {
+            input,
+            schema: Arc::new(schema),
+            primary_keys: primary_key_idx,
+            primary_key_names: primary_keys,
+        })
+    }
+}
+
+impl UserDefinedLogicalNodeCore for DebeziumUnrollingExtension {
+    fn name(&self) -> &str {
+        DEBEZIUM_UNROLLING_EXTENSION_NAME
+    }
+
+    fn inputs(&self) -> Vec<&LogicalPlan> {
+        vec![&self.input]
+    }
+
+    fn schema(&self) -> &DFSchemaRef {
+        &self.schema
+    }
+
+    fn expressions(&self) -> Vec<Expr> {
+        vec![]
+    }
+
+    fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        write!(f, "DebeziumUnrollingExtension")
+    }
+
+    fn with_exprs_and_inputs(&self, _exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> Result<Self> {
+        Self::try_new(inputs[0].clone(), self.primary_key_names.clone())
+    }
+}
+
+impl StreamExtension for DebeziumUnrollingExtension {
+    fn node_name(&self) -> Option<NamedNode> {
+        None
+    }
+
+    fn output_schema(&self) -> StreamSchema {
+        StreamSchema::from_schema_unkeyed(Arc::new(self.schema.as_ref().into())).unwrap()
+    }
+
+    fn transparent(&self) -> bool {
+        true
+    }
+}
+
+/// Wraps an input stream into Debezium format (before/after/op) for updating sinks.
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub(crate) struct ToDebeziumExtension {
+    pub(crate) input: Arc<LogicalPlan>,
+    pub(crate) schema: DFSchemaRef,
+}
+
+multifield_partial_ord!(ToDebeziumExtension, input);
+
+impl ToDebeziumExtension {
+    pub(crate) fn try_new(input: LogicalPlan) -> Result<Self> {
+        let schema = DebeziumUnrollingExtension::as_debezium_schema(input.schema(), None)
+            .expect("should be able to create ToDebeziumExtension");
+        Ok(Self {
+            input: Arc::new(input),
+            schema,
+        })
+    }
+}
+
+impl UserDefinedLogicalNodeCore for ToDebeziumExtension {
+    fn name(&self) -> &str {
+        TO_DEBEZIUM_EXTENSION_NAME
+    }
+
+    fn inputs(&self) -> Vec<&LogicalPlan> {
+        vec![&self.input]
+    }
+
+    fn schema(&self) -> &DFSchemaRef {
+        &self.schema
+    }
+
+    fn expressions(&self) -> Vec<Expr> {
+        vec![]
+    }
+
+    fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        write!(f, "ToDebeziumExtension")
+    }
+
+    fn with_exprs_and_inputs(&self, _exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> Result<Self> {
+        Self::try_new(inputs[0].clone())
+    }
+}
+
+impl StreamExtension for ToDebeziumExtension {
+    fn node_name(&self) -> Option<NamedNode> {
+        None
+    }
+
+    fn output_schema(&self) -> StreamSchema {
+        StreamSchema::from_schema_unkeyed(Arc::new(self.schema.as_ref().into())).unwrap()
+    }
+
+    fn transparent(&self) -> bool {
+        true
+    }
+}
diff --git a/src/sql/planner/extension/join.rs b/src/sql/planner/extension/join.rs
index 0b4fa13a..3857fee7 100644
--- a/src/sql/planner/extension/join.rs
+++ b/src/sql/planner/extension/join.rs
@@ -5,7 +5,7 @@ use datafusion::logical_expr::expr::Expr;
 use datafusion::logical_expr::{LogicalPlan, UserDefinedLogicalNodeCore};
 
 use crate::sql::planner::extension::{NamedNode, StreamExtension};
-use crate::sql::planner::types::StreamSchema;
+use crate::sql::types::StreamSchema;
 
 use std::sync::Arc;
 
diff --git a/src/sql/planner/extension/key_calculation.rs b/src/sql/planner/extension/key_calculation.rs
index f60c4d32..c90b6d1d 100644
--- a/src/sql/planner/extension/key_calculation.rs
+++ b/src/sql/planner/extension/key_calculation.rs
@@ -7,7 +7,7 @@ use datafusion::logical_expr::{Expr, ExprSchemable, LogicalPlan, UserDefinedLogi
 
 use crate::multifield_partial_ord;
 use crate::sql::planner::extension::{NamedNode, StreamExtension};
-use crate::sql::planner::types::{
+use crate::sql::types::{
     StreamSchema, fields_with_qualifiers, schema_from_df_fields_with_metadata,
 };
 
diff --git a/src/sql/planner/extension/lookup.rs b/src/sql/planner/extension/lookup.rs
new file mode 100644
index 00000000..daa4b094
--- /dev/null
+++ b/src/sql/planner/extension/lookup.rs
@@ -0,0 +1,127 @@
+use std::fmt::Formatter;
+use std::sync::Arc;
+
+use datafusion::common::{Column, DFSchemaRef, JoinType, Result, TableReference, internal_err};
+use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore};
+
+use super::{NamedNode, StreamExtension};
+use crate::multifield_partial_ord;
+use crate::sql::catalog::connector_table::ConnectorTable;
+use crate::sql::types::StreamSchema;
+
+pub const SOURCE_EXTENSION_NAME: &str = "LookupSource";
+pub const JOIN_EXTENSION_NAME: &str = "LookupJoin";
+
+/// Represents a lookup table source in the streaming plan.
+/// Lookup sources provide point-query access to external state.
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct LookupSource {
+    pub(crate) table: ConnectorTable,
+    pub(crate) schema: DFSchemaRef,
+}
+
+multifield_partial_ord!(LookupSource, table);
+
+impl UserDefinedLogicalNodeCore for LookupSource {
+    fn name(&self) -> &str {
+        SOURCE_EXTENSION_NAME
+    }
+
+    fn inputs(&self) -> Vec<&LogicalPlan> {
+        vec![]
+    }
+
+    fn schema(&self) -> &DFSchemaRef {
+        &self.schema
+    }
+
+    fn expressions(&self) -> Vec<Expr> {
+        vec![]
+    }
+
+    fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result {
+        write!(f, "LookupSource: {}", self.schema)
+    }
+
+    fn with_exprs_and_inputs(&self, _exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> Result<Self> {
+        if !inputs.is_empty() {
+            return internal_err!("LookupSource cannot have inputs");
+        }
+        Ok(Self {
+            table: self.table.clone(),
+            schema: self.schema.clone(),
+        })
+    }
+}
+
+impl StreamExtension for LookupSource {
+    fn node_name(&self) -> Option<NamedNode> {
+        None
+    }
+
+    fn output_schema(&self) -> StreamSchema {
+        StreamSchema::from_schema_unkeyed(Arc::new(self.schema.as_ref().into())).unwrap()
+    }
+}
+
+/// Represents a lookup join: a streaming input joined against a lookup table.
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct LookupJoin {
+    pub(crate) input: LogicalPlan,
+    pub(crate) schema: DFSchemaRef,
+    pub(crate) connector: ConnectorTable,
+    pub(crate) on: Vec<(Expr, Column)>,
+    pub(crate) filter: Option<Expr>,
+    pub(crate) alias: Option<TableReference>,
+    pub(crate) join_type: JoinType,
+}
+
+multifield_partial_ord!(LookupJoin, input, connector, on, filter, alias);
+
+impl UserDefinedLogicalNodeCore for LookupJoin {
+    fn name(&self) -> &str {
+        JOIN_EXTENSION_NAME
+    }
+
+    fn inputs(&self) -> Vec<&LogicalPlan> {
+        vec![&self.input]
+    }
+
+    fn schema(&self) -> &DFSchemaRef {
+        &self.schema
+    }
+
+    fn expressions(&self) -> Vec<Expr> {
+        let mut e: Vec<_> = self.on.iter().map(|(l, _)| l.clone()).collect();
+        if let Some(filter) = &self.filter {
+            e.push(filter.clone());
+        }
+        e
+    }
+
+    fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result {
+        write!(f, "LookupJoinExtension: {}", self.schema)
+    }
+
+    fn with_exprs_and_inputs(&self, _: Vec<Expr>, inputs: Vec<LogicalPlan>) -> Result<Self> {
+        Ok(Self {
+            input: inputs[0].clone(),
+            schema: self.schema.clone(),
+            connector: self.connector.clone(),
+            on: self.on.clone(),
+            filter: self.filter.clone(),
+            alias: self.alias.clone(),
+            join_type: self.join_type,
+        })
+    }
+}
+
+impl StreamExtension for LookupJoin {
+    fn node_name(&self) -> Option<NamedNode> {
+        None
+    }
+
+    fn output_schema(&self) -> StreamSchema {
+        StreamSchema::from_schema_unkeyed(Arc::new(self.schema.as_ref().into())).unwrap()
+    }
+}
diff --git a/src/sql/planner/extension/mod.rs b/src/sql/planner/extension/mod.rs
index 96ac5f32..4de1892e 100644
--- a/src/sql/planner/extension/mod.rs
+++ b/src/sql/planner/extension/mod.rs
@@ -1,24 +1,53 @@
-use std::fmt::Debug;
+use std::fmt::{Debug, Formatter};
 use std::sync::Arc;
+use std::time::Duration;
 
+use datafusion::arrow::datatypes::{DataType, TimeUnit};
 use datafusion::common::{DFSchemaRef, DataFusionError, Result, TableReference};
 use datafusion::logical_expr::{
     Expr, LogicalPlan, UserDefinedLogicalNode, UserDefinedLogicalNodeCore,
 };
 
+use crate::datastream::logical::{LogicalEdge, LogicalNode};
 use crate::sql::planner::schemas::{add_timestamp_field, has_timestamp_field};
-use crate::sql::planner::types::StreamSchema;
+use crate::sql::types::{
+    DFField, StreamSchema, TIMESTAMP_FIELD, fields_with_qualifiers, schema_from_df_fields,
+};
+use crate::types::FsSchemaRef;
 
 pub(crate) mod aggregate;
+pub(crate) mod debezium;
 pub(crate) mod join;
 pub(crate) mod key_calculation;
+pub(crate) mod lookup;
 pub(crate) mod projection;
 pub(crate) mod remote_table;
+pub(crate) mod sink;
+pub(crate) mod table_source;
+pub(crate) mod updating_aggregate;
 pub(crate) mod watermark_node;
 pub(crate) mod window_fn;
 
+pub(crate) struct NodeWithIncomingEdges {
+    pub node: LogicalNode,
+    pub edges: Vec<LogicalEdge>,
+}
+
 pub(crate) trait StreamExtension: Debug {
     fn node_name(&self) -> Option<NamedNode>;
+
+    fn plan_node(
+        &self,
+        _planner: &super::physical_planner::Planner,
+        _index: usize,
+        _input_schemas: Vec<FsSchemaRef>,
+    ) -> Result<NodeWithIncomingEdges> {
+        Err(DataFusionError::NotImplemented(format!(
+            "plan_node not yet implemented for {:?}",
+            self
+        )))
+    }
+
     fn output_schema(&self) -> StreamSchema;
     fn transparent(&self) -> bool {
         false
@@ -47,20 +76,34 @@ impl<'a> TryFrom<&'a dyn UserDefinedLogicalNode> for &'a dyn StreamExtension {
 
     fn try_from(node: &'a dyn UserDefinedLogicalNode) -> Result<Self, Self::Error> {
         use aggregate::AggregateExtension;
+        use debezium::{DebeziumUnrollingExtension, ToDebeziumExtension};
         use join::JoinExtension;
         use key_calculation::KeyCalculationExtension;
+        use lookup::{LookupJoin, LookupSource};
         use projection::ProjectionExtension;
         use remote_table::RemoteTableExtension;
+        use sink::SinkExtension;
+        use table_source::TableSourceExtension;
+        use updating_aggregate::UpdatingAggregateExtension;
         use watermark_node::WatermarkNode;
         use window_fn::WindowFunctionExtension;
 
-        try_from_t::<WatermarkNode>(node)
+        try_from_t::<TableSourceExtension>(node)
+            .or_else(|_| try_from_t::<WatermarkNode>(node))
+            .or_else(|_| try_from_t::<SinkExtension>(node))
             .or_else(|_| try_from_t::<KeyCalculationExtension>(node))
             .or_else(|_| try_from_t::<AggregateExtension>(node))
             .or_else(|_| try_from_t::<RemoteTableExtension>(node))
             .or_else(|_| try_from_t::<JoinExtension>(node))
             .or_else(|_| try_from_t::<WindowFunctionExtension>(node))
+            .or_else(|_| try_from_t::<AsyncUDFExtension>(node))
+            .or_else(|_| try_from_t::<ToDebeziumExtension>(node))
+            .or_else(|_| try_from_t::<DebeziumUnrollingExtension>(node))
+            .or_else(|_| try_from_t::<UpdatingAggregateExtension>(node))
+            .or_else(|_| try_from_t::<LookupJoin>(node))
+            .or_else(|_| try_from_t::<LookupSource>(node))
             .or_else(|_| try_from_t::<ProjectionExtension>(node))
+            .or_else(|_| try_from_t::<IsRetractExtension>(node))
             .map_err(|_| DataFusionError::Plan(format!("unexpected node: {}", node.name())))
     }
 }
@@ -151,3 +194,163 @@ impl UserDefinedLogicalNodeCore for TimestampAppendExtension {
         Ok(Self::new(inputs[0].clone(), self.qualifier.clone()))
     }
 }
+
+/// Appends an `_updating_meta` and properly qualified `_timestamp` field
+/// to the output schema of an updating aggregate.
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub(crate) struct IsRetractExtension {
+    pub(crate) input: LogicalPlan,
+    pub(crate) schema: DFSchemaRef,
+    pub(crate) timestamp_qualifier: Option<TableReference>,
+}
+
+multifield_partial_ord!(IsRetractExtension, input, timestamp_qualifier);
+
+impl IsRetractExtension {
+    pub(crate) fn new(input: LogicalPlan, timestamp_qualifier: Option<TableReference>) -> Self {
+        let mut output_fields = fields_with_qualifiers(input.schema());
+
+        let timestamp_index = output_fields.len() - 1;
+        output_fields[timestamp_index] = DFField::new(
+            timestamp_qualifier.clone(),
+            TIMESTAMP_FIELD,
+            DataType::Timestamp(TimeUnit::Nanosecond, None),
+            false,
+        );
+        let schema = Arc::new(schema_from_df_fields(&output_fields).unwrap());
+        Self {
+            input,
+            schema,
+            timestamp_qualifier,
+        }
+    }
+}
+
+impl UserDefinedLogicalNodeCore for IsRetractExtension {
+    fn name(&self) -> &str {
+        "IsRetractExtension"
+    }
+
+    fn inputs(&self) -> Vec<&LogicalPlan> {
+        vec![&self.input]
+    }
+
+    fn schema(&self) -> &DFSchemaRef {
+        &self.schema
+    }
+
+    fn expressions(&self) -> Vec<Expr> {
+        vec![]
+    }
+
+    fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        write!(f, "IsRetractExtension")
+    }
+
+    fn with_exprs_and_inputs(&self, _exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> Result<Self> {
+        Ok(Self::new(
+            inputs[0].clone(),
+            self.timestamp_qualifier.clone(),
+        ))
+    }
+}
+
+impl StreamExtension for IsRetractExtension {
+    fn node_name(&self) -> Option<NamedNode> {
+        None
+    }
+
+    fn output_schema(&self) -> StreamSchema {
+        StreamSchema::from_schema_unkeyed(Arc::new(self.schema.as_ref().into())).unwrap()
+    }
+}
+
+pub(crate) const ASYNC_RESULT_FIELD: &str = "__async_result";
+
+/// Extension node for async UDF calls in streaming projections.
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub(crate) struct AsyncUDFExtension {
+    pub(crate) input: Arc<LogicalPlan>,
+    pub(crate) name: String,
+    pub(crate) arg_exprs: Vec<Expr>,
+    pub(crate) final_exprs: Vec<Expr>,
+    pub(crate) ordered: bool,
+    pub(crate) max_concurrency: usize,
+    pub(crate) timeout: Duration,
+    pub(crate) final_schema: DFSchemaRef,
+}
+
+multifield_partial_ord!(
+    AsyncUDFExtension,
+    input,
+    name,
+    arg_exprs,
+    final_exprs,
+    ordered,
+    max_concurrency,
+    timeout
+);
+
+impl UserDefinedLogicalNodeCore for AsyncUDFExtension {
+    fn name(&self) -> &str {
+        "AsyncUDFNode"
+    }
+
+    fn inputs(&self) -> Vec<&LogicalPlan> {
+        vec![&self.input]
+    }
+
+    fn schema(&self) -> &DFSchemaRef {
+        &self.final_schema
+    }
+
+    fn expressions(&self) -> Vec<Expr> {
+        self.arg_exprs
+            .iter()
+            .chain(self.final_exprs.iter())
+            .cloned()
+            .collect()
+    }
+
+    fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result {
+        write!(f, "AsyncUdfExtension<{}>: {}", self.name, self.final_schema)
+    }
+
+    fn with_exprs_and_inputs(&self, exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> Result<Self> {
+        if inputs.len() != 1 {
+            return Err(DataFusionError::Internal("input size inconsistent".into()));
+        }
+        if UserDefinedLogicalNode::expressions(self) != exprs {
+            return Err(DataFusionError::Internal(
+                "Tried to recreate async UDF node with different expressions".into(),
+            ));
+        }
+
+        Ok(Self {
+            input: Arc::new(inputs[0].clone()),
+            name: self.name.clone(),
+            arg_exprs: self.arg_exprs.clone(),
+            final_exprs: self.final_exprs.clone(),
+            ordered: self.ordered,
+            max_concurrency: self.max_concurrency,
+            timeout: self.timeout,
+            final_schema: self.final_schema.clone(),
+        })
+    }
+}
+
+impl StreamExtension for AsyncUDFExtension {
+    fn node_name(&self) -> Option<NamedNode> {
+        None
+    }
+
+    fn output_schema(&self) -> StreamSchema {
+        StreamSchema::from_fields(
+            self.final_schema
+                .fields()
+                .iter()
+                .map(|f| (**f).clone())
+                .collect(),
+        )
+    }
+}
diff --git a/src/sql/planner/extension/projection.rs b/src/sql/planner/extension/projection.rs
index f7ecb6ed..e6dc8ce7 100644
--- a/src/sql/planner/extension/projection.rs
+++ b/src/sql/planner/extension/projection.rs
@@ -6,7 +6,7 @@ use datafusion::logical_expr::{Expr, ExprSchemable, LogicalPlan, UserDefinedLogi
 
 use crate::multifield_partial_ord;
 use crate::sql::planner::extension::{NamedNode, StreamExtension};
-use crate::sql::planner::types::{DFField, StreamSchema, schema_from_df_fields};
+use crate::sql::types::{DFField, StreamSchema, schema_from_df_fields};
 
 pub(crate) const PROJECTION_NAME: &str = "ProjectionExtension";
 
diff --git a/src/sql/planner/extension/remote_table.rs b/src/sql/planner/extension/remote_table.rs
index 4935efd9..2d81cafc 100644
--- a/src/sql/planner/extension/remote_table.rs
+++ b/src/sql/planner/extension/remote_table.rs
@@ -6,7 +6,7 @@ use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore};
 
 use crate::multifield_partial_ord;
 use crate::sql::planner::extension::{NamedNode, StreamExtension};
-use crate::sql::planner::types::StreamSchema;
+use crate::sql::types::StreamSchema;
 
 pub(crate) const REMOTE_TABLE_NAME: &str = "RemoteTableExtension";
 
diff --git a/src/sql/planner/extension/sink.rs b/src/sql/planner/extension/sink.rs
new file mode 100644
index 00000000..7820925f
--- /dev/null
+++ b/src/sql/planner/extension/sink.rs
@@ -0,0 +1,135 @@
+use std::sync::Arc;
+
+use datafusion::common::{DFSchemaRef, Result, TableReference, plan_err};
+use datafusion::logical_expr::{Expr, Extension, LogicalPlan, UserDefinedLogicalNodeCore};
+
+use super::debezium::ToDebeziumExtension;
+use super::remote_table::RemoteTableExtension;
+use super::{NamedNode, StreamExtension};
+use crate::multifield_partial_ord;
+use crate::sql::catalog::table::Table;
+use crate::sql::types::StreamSchema;
+
+pub(crate) const SINK_NODE_NAME: &str = "SinkExtension";
+
+/// Extension node representing a sink (output) in the streaming plan.
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub(crate) struct SinkExtension {
+    pub(crate) name: TableReference,
+    pub(crate) table: Table,
+    pub(crate) schema: DFSchemaRef,
+    pub(crate) inputs: Arc<Vec<LogicalPlan>>,
+}
+
+multifield_partial_ord!(SinkExtension, name, inputs);
+
+impl SinkExtension {
+    pub fn new(
+        name: TableReference,
+        table: Table,
+        mut schema: DFSchemaRef,
+        mut input: Arc<LogicalPlan>,
+    ) -> Result<Self> {
+        match &table {
+            Table::ConnectorTable(connector_table) => {
+                if connector_table.is_updating() {
+                    let to_debezium = ToDebeziumExtension::try_new(input.as_ref().clone())?;
+                    input = Arc::new(LogicalPlan::Extension(Extension {
+                        node: Arc::new(to_debezium),
+                    }));
+                    schema = input.schema().clone();
+                }
+            }
+            Table::LookupTable(..) => return plan_err!("cannot use a lookup table as a sink"),
+            Table::MemoryTable { .. } => return plan_err!("memory tables not supported as sinks"),
+            Table::TableFromQuery { .. } => {}
+            Table::PreviewSink { .. } => {
+                // preview sinks may also need debezium wrapping for updating inputs
+            }
+        }
+
+        Self::add_remote_if_necessary(&schema, &mut input);
+
+        let inputs = Arc::new(vec![(*input).clone()]);
+        Ok(Self {
+            name,
+            table,
+            schema,
+            inputs,
+        })
+    }
+
+    pub fn add_remote_if_necessary(schema: &DFSchemaRef, input: &mut Arc<LogicalPlan>) {
+        if let LogicalPlan::Extension(node) = input.as_ref() {
+            let Ok(ext): Result<&dyn StreamExtension, _> = (&node.node).try_into() else {
+                // not a StreamExtension, wrap it
+                let remote = RemoteTableExtension {
+                    input: input.as_ref().clone(),
+                    name: TableReference::bare("sink projection"),
+                    schema: schema.clone(),
+                    materialize: false,
+                };
+                *input = Arc::new(LogicalPlan::Extension(Extension {
+                    node: Arc::new(remote),
+                }));
+                return;
+            };
+            if !ext.transparent() {
+                return;
+            }
+        }
+        let remote = RemoteTableExtension {
+            input: input.as_ref().clone(),
+            name: TableReference::bare("sink projection"),
+            schema: schema.clone(),
+            materialize: false,
+        };
+        *input = Arc::new(LogicalPlan::Extension(Extension {
+            node: Arc::new(remote),
+        }));
+    }
+}
+
+impl UserDefinedLogicalNodeCore for SinkExtension {
+    fn name(&self) -> &str {
+        SINK_NODE_NAME
+    }
+
+    fn inputs(&self) -> Vec<&LogicalPlan> {
+        self.inputs.iter().collect()
+    }
+
+    fn schema(&self) -> &DFSchemaRef {
+        &self.schema
+    }
+
+    fn expressions(&self) -> Vec<Expr> {
+        vec![]
+    }
+
+    fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        write!(f, "SinkExtension({:?}): {}", self.name, self.schema)
+    }
+
+    fn with_exprs_and_inputs(&self, _exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> Result<Self> {
+        Ok(Self {
+            name: self.name.clone(),
+            table: self.table.clone(),
+            schema: self.schema.clone(),
+            inputs: Arc::new(inputs),
+        })
+    }
+}
+
+impl StreamExtension for SinkExtension {
+    fn node_name(&self) -> Option<NamedNode> {
+        match &self.table {
+            Table::PreviewSink { .. } => None,
+            _ => Some(NamedNode::Sink(self.name.clone())),
+        }
+    }
+
+    fn output_schema(&self) -> StreamSchema {
+        StreamSchema::from_fields(vec![])
+    }
+}
diff --git a/src/sql/planner/extension/table_source.rs b/src/sql/planner/extension/table_source.rs
new file mode 100644
index 00000000..cab3ae3d
--- /dev/null
+++ b/src/sql/planner/extension/table_source.rs
@@ -0,0 +1,94 @@
+use std::sync::Arc;
+
+use datafusion::common::{DFSchemaRef, Result, TableReference};
+use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore};
+
+use super::{NamedNode, StreamExtension};
+use crate::multifield_partial_ord;
+use crate::sql::catalog::connector_table::ConnectorTable;
+use crate::sql::catalog::field_spec::FieldSpec;
+use crate::sql::planner::schemas::add_timestamp_field;
+use crate::sql::types::{StreamSchema, schema_from_df_fields};
+
+pub(crate) const TABLE_SOURCE_NAME: &str = "TableSourceExtension";
+
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub(crate) struct TableSourceExtension {
+    pub(crate) name: TableReference,
+    pub(crate) table: ConnectorTable,
+    pub(crate) schema: DFSchemaRef,
+}
+
+multifield_partial_ord!(TableSourceExtension, name, table);
+
+impl TableSourceExtension {
+    pub fn new(name: TableReference, table: ConnectorTable) -> Self {
+        let physical_fields = table
+            .fields
+            .iter()
+            .filter_map(|field| match field {
+                FieldSpec::Struct(field) | FieldSpec::Metadata { field, .. } => {
+                    Some((Some(name.clone()), Arc::new(field.clone())).into())
+                }
+                FieldSpec::Virtual { .. } => None,
+            })
+            .collect::<Vec<_>>();
+        let base_schema = Arc::new(schema_from_df_fields(&physical_fields).unwrap());
+
+        let schema = if table.is_updating() {
+            super::debezium::DebeziumUnrollingExtension::as_debezium_schema(
+                &base_schema,
+                Some(name.clone()),
+            )
+            .unwrap()
+        } else {
+            base_schema
+        };
+        let schema = add_timestamp_field(schema, Some(name.clone())).unwrap();
+        Self {
+            name,
+            table,
+            schema,
+        }
+    }
+}
+
+impl UserDefinedLogicalNodeCore for TableSourceExtension {
+    fn name(&self) -> &str {
+        TABLE_SOURCE_NAME
+    }
+
+    fn inputs(&self) -> Vec<&LogicalPlan> {
+        vec![]
+    }
+
+    fn schema(&self) -> &DFSchemaRef {
+        &self.schema
+    }
+
+    fn expressions(&self) -> Vec<Expr> {
+        vec![]
+    }
+
+    fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        write!(f, "TableSourceExtension: {}", self.schema)
+    }
+
+    fn with_exprs_and_inputs(&self, _exprs: Vec<Expr>, _inputs: Vec<LogicalPlan>) -> Result<Self> {
+        Ok(Self {
+            name: self.name.clone(),
+            table: self.table.clone(),
+            schema: self.schema.clone(),
+        })
+    }
+}
+
+impl StreamExtension for TableSourceExtension {
+    fn node_name(&self) -> Option<NamedNode> {
+        Some(NamedNode::Source(self.name.clone()))
+    }
+
+    fn output_schema(&self) -> StreamSchema {
+        StreamSchema::from_schema_keys(Arc::new(self.schema.as_ref().into()), vec![]).unwrap()
+    }
+}
diff --git a/src/sql/planner/extension/updating_aggregate.rs b/src/sql/planner/extension/updating_aggregate.rs
new file mode 100644
index 00000000..758edc67
--- /dev/null
+++ b/src/sql/planner/extension/updating_aggregate.rs
@@ -0,0 +1,89 @@
+use std::sync::Arc;
+use std::time::Duration;
+
+use datafusion::common::{DFSchemaRef, Result, TableReference, plan_err};
+use datafusion::logical_expr::{Expr, Extension, LogicalPlan, UserDefinedLogicalNodeCore};
+
+use super::{IsRetractExtension, NamedNode, StreamExtension};
+use crate::sql::types::StreamSchema;
+
+pub(crate) const UPDATING_AGGREGATE_EXTENSION_NAME: &str = "UpdatingAggregateExtension";
+
+/// Extension node for updating (non-windowed) aggregations.
+/// Maintains state with TTL and emits retraction/update pairs.
+#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd)]
+pub(crate) struct UpdatingAggregateExtension {
+    pub(crate) aggregate: LogicalPlan,
+    pub(crate) key_fields: Vec<usize>,
+    pub(crate) final_calculation: LogicalPlan,
+    pub(crate) timestamp_qualifier: Option<TableReference>,
+    pub(crate) ttl: Duration,
+}
+
+impl UpdatingAggregateExtension {
+    pub fn new(
+        aggregate: LogicalPlan,
+        key_fields: Vec<usize>,
+        timestamp_qualifier: Option<TableReference>,
+        ttl: Duration,
+    ) -> Result<Self> {
+        let final_calculation = LogicalPlan::Extension(Extension {
+            node: Arc::new(IsRetractExtension::new(
+                aggregate.clone(),
+                timestamp_qualifier.clone(),
+            )),
+        });
+
+        Ok(Self {
+            aggregate,
+            key_fields,
+            final_calculation,
+            timestamp_qualifier,
+            ttl,
+        })
+    }
+}
+
+impl UserDefinedLogicalNodeCore for UpdatingAggregateExtension {
+    fn name(&self) -> &str {
+        UPDATING_AGGREGATE_EXTENSION_NAME
+    }
+
+    fn inputs(&self) -> Vec<&LogicalPlan> {
+        vec![&self.aggregate]
+    }
+
+    fn schema(&self) -> &DFSchemaRef {
+        self.final_calculation.schema()
+    }
+
+    fn expressions(&self) -> Vec<Expr> {
+        vec![]
+    }
+
+    fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        write!(f, "UpdatingAggregateExtension")
+    }
+
+    fn with_exprs_and_inputs(&self, _exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> Result<Self> {
+        if inputs.len() != 1 {
+            return plan_err!("UpdatingAggregateExtension expects exactly one input");
+        }
+        Self::new(
+            inputs[0].clone(),
+            self.key_fields.clone(),
+            self.timestamp_qualifier.clone(),
+            self.ttl,
+        )
+    }
+}
+
+impl StreamExtension for UpdatingAggregateExtension {
+    fn node_name(&self) -> Option<NamedNode> {
+        None
+    }
+
+    fn output_schema(&self) -> StreamSchema {
+        StreamSchema::from_schema_unkeyed(Arc::new(self.schema().as_ref().into())).unwrap()
+    }
+}
diff --git a/src/sql/planner/extension/watermark_node.rs b/src/sql/planner/extension/watermark_node.rs
index eb776ff2..a06bdb9a 100644
--- a/src/sql/planner/extension/watermark_node.rs
+++ b/src/sql/planner/extension/watermark_node.rs
@@ -8,7 +8,7 @@ use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore};
 use crate::multifield_partial_ord;
 use crate::sql::planner::extension::{NamedNode, StreamExtension};
 use crate::sql::planner::schemas::add_timestamp_field;
-use crate::sql::planner::types::{StreamSchema, TIMESTAMP_FIELD};
+use crate::sql::types::{StreamSchema, TIMESTAMP_FIELD};
 
 pub(crate) const WATERMARK_NODE_NAME: &str = "WatermarkNode";
 
diff --git a/src/sql/planner/extension/window_fn.rs b/src/sql/planner/extension/window_fn.rs
index 6e6e1c36..95832183 100644
--- a/src/sql/planner/extension/window_fn.rs
+++ b/src/sql/planner/extension/window_fn.rs
@@ -4,7 +4,7 @@ use datafusion::common::{DFSchemaRef, Result};
 use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore};
 
 use crate::sql::planner::extension::{NamedNode, StreamExtension};
-use crate::sql::planner::types::StreamSchema;
+use crate::sql::types::StreamSchema;
 
 pub(crate) const WINDOW_FUNCTION_EXTENSION_NAME: &str = "WindowFunctionExtension";
 
diff --git a/src/sql/planner/mod.rs b/src/sql/planner/mod.rs
index ead5e212..d80d3a8d 100644
--- a/src/sql/planner/mod.rs
+++ b/src/sql/planner/mod.rs
@@ -2,354 +2,360 @@
 
 pub(crate) mod extension;
 pub mod parse;
+pub(crate) mod physical_planner;
 pub mod plan;
+pub mod rewrite;
+pub mod schema_provider;
 pub mod schemas;
 pub mod sql_to_plan;
-pub mod types;
+
+pub(crate) mod mod_prelude {
+    pub use super::StreamSchemaProvider;
+}
+
+pub use schema_provider::{LogicalBatchInput, StreamSchemaProvider, StreamTable};
 
 use std::collections::{HashMap, HashSet};
 use std::sync::Arc;
 
-use datafusion::arrow::datatypes::{self as datatypes, DataType, Field, Schema};
+use datafusion::common::tree_node::TreeNode;
 use datafusion::common::{Result, plan_err};
-use datafusion::datasource::DefaultTableSource;
 use datafusion::error::DataFusionError;
-use datafusion::execution::{FunctionRegistry, SessionStateDefaults};
-use datafusion::logical_expr::expr_rewriter::FunctionRewrite;
-use datafusion::logical_expr::planner::ExprPlanner;
-use datafusion::logical_expr::{
-    AggregateUDF, Expr, LogicalPlan, ScalarUDF, TableSource, WindowUDF,
-};
-use datafusion::optimizer::Analyzer;
+use datafusion::execution::SessionStateBuilder;
+use datafusion::logical_expr::{Extension, LogicalPlan, UserDefinedLogicalNodeCore};
+use datafusion::prelude::SessionConfig;
 use datafusion::sql::TableReference;
-use datafusion::sql::planner::ContextProvider;
-use unicase::UniCase;
-
-use crate::sql::planner::schemas::window_arrow_struct;
-use crate::sql::planner::types::{PlaceholderUdf, PlanningOptions};
-
-/// Catalog provider for streaming SQL queries.
-/// Manages tables, UDFs, and configuration for streaming SQL planning.
-#[derive(Clone, Default)]
-pub struct StreamSchemaProvider {
-    pub source_defs: HashMap<String, String>,
-    tables: HashMap<UniCase<String>, StreamTable>,
-    pub functions: HashMap<String, Arc<ScalarUDF>>,
-    pub aggregate_functions: HashMap<String, Arc<AggregateUDF>>,
-    pub window_functions: HashMap<String, Arc<WindowUDF>>,
-    config_options: datafusion::config::ConfigOptions,
-    pub expr_planners: Vec<Arc<dyn ExprPlanner>>,
-    pub planning_options: PlanningOptions,
-    pub analyzer: Analyzer,
-}
+use datafusion::sql::sqlparser::ast::{OneOrManyWithParens, Statement};
+use datafusion::sql::sqlparser::dialect::FunctionStreamDialect;
+use datafusion::sql::sqlparser::parser::Parser;
+use tracing::debug;
+
+use crate::datastream::logical::{LogicalProgram, ProgramConfig};
+use crate::datastream::optimizers::ChainingOptimizer;
+use crate::sql::catalog::insert::Insert;
+use crate::sql::catalog::table::Table as CatalogTable;
+use crate::sql::functions::{is_json_union, serialize_outgoing_json};
+use crate::sql::planner::extension::key_calculation::{KeyCalculationExtension, KeysOrExprs};
+use crate::sql::planner::extension::projection::ProjectionExtension;
+use crate::sql::planner::extension::sink::SinkExtension;
+use crate::sql::planner::extension::{NamedNode, StreamExtension};
+use crate::sql::planner::plan::rewrite_plan;
+use crate::sql::planner::rewrite::{SinkInputRewriter, SourceMetadataVisitor};
+use crate::sql::types::SqlConfig;
+
+// ── Compilation pipeline ──────────────────────────────────────────────
 
-/// Represents a table registered in the streaming SQL context
 #[derive(Clone, Debug)]
-pub enum StreamTable {
-    Source {
-        name: String,
-        schema: Arc<Schema>,
-        event_time_field: Option<String>,
-        watermark_field: Option<String>,
-    },
-    Sink {
-        name: String,
-        schema: Arc<Schema>,
-    },
-    Memory {
-        name: String,
-        logical_plan: Option<LogicalPlan>,
-    },
+pub struct CompiledSql {
+    pub program: LogicalProgram,
+    pub connection_ids: Vec<i64>,
 }
 
-impl StreamTable {
-    pub fn name(&self) -> &str {
-        match self {
-            StreamTable::Source { name, .. } => name,
-            StreamTable::Sink { name, .. } => name,
-            StreamTable::Memory { name, .. } => name,
-        }
-    }
-
-    pub fn get_fields(&self) -> Vec<Arc<Field>> {
-        match self {
-            StreamTable::Source { schema, .. } => schema.fields().to_vec(),
-            StreamTable::Sink { schema, .. } => schema.fields().to_vec(),
-            StreamTable::Memory { .. } => vec![],
-        }
-    }
+pub fn parse_sql_statements(
+    sql: &str,
+) -> std::result::Result<Vec<Statement>, datafusion::sql::sqlparser::parser::ParserError> {
+    Parser::parse_sql(&FunctionStreamDialect {}, sql)
 }
 
-#[derive(Debug)]
-struct LogicalBatchInput {
-    table_name: String,
-    schema: Arc<Schema>,
-}
-
-impl datafusion::datasource::TableProvider for LogicalBatchInput {
-    fn as_any(&self) -> &dyn std::any::Any {
-        self
-    }
-
-    fn schema(&self) -> Arc<Schema> {
-        self.schema.clone()
-    }
-
-    fn table_type(&self) -> datafusion::datasource::TableType {
-        datafusion::datasource::TableType::Base
-    }
-
-    fn scan<'life0, 'life1, 'life2, 'life3, 'async_trait>(
-        &'life0 self,
-        _state: &'life1 dyn datafusion::catalog::Session,
-        _projection: Option<&'life2 Vec<usize>>,
-        _filters: &'life3 [Expr],
-        _limit: Option<usize>,
-    ) -> std::pin::Pin<
-        Box<
-            dyn std::future::Future<
-                    Output = Result<Arc<dyn datafusion::physical_plan::ExecutionPlan>>,
-                > + Send
-                + 'async_trait,
-        >,
-    >
-    where
-        'life0: 'async_trait,
-        'life1: 'async_trait,
-        'life2: 'async_trait,
-        'life3: 'async_trait,
-        Self: 'async_trait,
+fn try_handle_set_variable(
+    statement: &Statement,
+    schema_provider: &mut StreamSchemaProvider,
+) -> Result<bool> {
+    if let Statement::SetVariable {
+        variables, value, ..
+    } = statement
     {
-        unimplemented!("LogicalBatchInput is for planning only")
-    }
-}
-
-fn create_table(table_name: String, schema: Arc<Schema>) -> Arc<dyn TableSource> {
-    let table_provider = LogicalBatchInput { table_name, schema };
-    let wrapped = Arc::new(table_provider);
-    let provider = DefaultTableSource::new(wrapped);
-    Arc::new(provider)
-}
-
-impl StreamSchemaProvider {
-    pub fn new() -> Self {
-        let mut registry = Self {
-            ..Default::default()
+        let OneOrManyWithParens::One(opt) = variables else {
+            return plan_err!("invalid syntax for `SET` call");
         };
 
-        registry
-            .register_udf(PlaceholderUdf::with_return(
-                "hop",
-                vec![
-                    DataType::Interval(datatypes::IntervalUnit::MonthDayNano),
-                    DataType::Interval(datatypes::IntervalUnit::MonthDayNano),
-                ],
-                window_arrow_struct(),
-            ))
-            .unwrap();
-
-        registry
-            .register_udf(PlaceholderUdf::with_return(
-                "tumble",
-                vec![DataType::Interval(datatypes::IntervalUnit::MonthDayNano)],
-                window_arrow_struct(),
-            ))
-            .unwrap();
-
-        registry
-            .register_udf(PlaceholderUdf::with_return(
-                "session",
-                vec![DataType::Interval(datatypes::IntervalUnit::MonthDayNano)],
-                window_arrow_struct(),
-            ))
-            .unwrap();
-
-        registry
-            .register_udf(PlaceholderUdf::with_return(
-                "unnest",
-                vec![DataType::List(Arc::new(Field::new(
-                    "field",
-                    DataType::Utf8,
-                    true,
-                )))],
-                DataType::Utf8,
-            ))
-            .unwrap();
-
-        registry
-            .register_udf(PlaceholderUdf::with_return(
-                "row_time",
-                vec![],
-                DataType::Timestamp(datatypes::TimeUnit::Nanosecond, None),
-            ))
-            .unwrap();
-
-        for p in SessionStateDefaults::default_scalar_functions() {
-            registry.register_udf(p).unwrap();
-        }
-        for p in SessionStateDefaults::default_aggregate_functions() {
-            registry.register_udaf(p).unwrap();
-        }
-        for p in SessionStateDefaults::default_window_functions() {
-            registry.register_udwf(p).unwrap();
-        }
-        for p in SessionStateDefaults::default_expr_planners() {
-            registry.register_expr_planner(p).unwrap();
+        if opt.to_string() != "updating_ttl" {
+            return plan_err!(
+                "invalid option '{}'; supported options are 'updating_ttl'",
+                opt
+            );
         }
 
-        registry
-    }
-
-    pub fn add_source_table(
-        &mut self,
-        name: String,
-        schema: Arc<Schema>,
-        event_time_field: Option<String>,
-        watermark_field: Option<String>,
-    ) {
-        self.tables.insert(
-            UniCase::new(name.clone()),
-            StreamTable::Source {
-                name,
-                schema,
-                event_time_field,
-                watermark_field,
-            },
-        );
-    }
-
-    pub fn add_sink_table(&mut self, name: String, schema: Arc<Schema>) {
-        self.tables.insert(
-            UniCase::new(name.clone()),
-            StreamTable::Sink { name, schema },
-        );
-    }
+        if value.len() != 1 {
+            return plan_err!("invalid `SET updating_ttl` call; expected exactly one expression");
+        }
 
-    fn insert_table(&mut self, table: StreamTable) {
-        self.tables
-            .insert(UniCase::new(table.name().to_string()), table);
-    }
+        let duration = duration_from_sql_expr(&value[0])?;
+        schema_provider.planning_options.ttl = duration;
 
-    pub fn get_table(&self, table_name: impl Into<String>) -> Option<&StreamTable> {
-        self.tables.get(&UniCase::new(table_name.into()))
+        return Ok(true);
     }
 
-    pub fn get_table_mut(&mut self, table_name: impl Into<String>) -> Option<&mut StreamTable> {
-        self.tables.get_mut(&UniCase::new(table_name.into()))
-    }
+    Ok(false)
 }
 
-impl ContextProvider for StreamSchemaProvider {
-    fn get_table_source(&self, name: TableReference) -> Result<Arc<dyn TableSource>> {
-        let table = self
-            .get_table(name.to_string())
-            .ok_or_else(|| DataFusionError::Plan(format!("Table {name} not found")))?;
-
-        let fields = table.get_fields();
-        let schema = Arc::new(Schema::new_with_metadata(
-            fields
-                .iter()
-                .map(|f| f.as_ref().clone())
-                .collect::<Vec<Field>>(),
-            HashMap::new(),
-        ));
-        Ok(create_table(name.to_string(), schema))
-    }
-
-    fn get_function_meta(&self, name: &str) -> Option<Arc<ScalarUDF>> {
-        self.functions.get(name).cloned()
+fn duration_from_sql_expr(
+    expr: &datafusion::sql::sqlparser::ast::Expr,
+) -> Result<std::time::Duration> {
+    use datafusion::sql::sqlparser::ast::Expr as SqlExpr;
+    use datafusion::sql::sqlparser::ast::Value as SqlValue;
+    use datafusion::sql::sqlparser::ast::ValueWithSpan;
+
+    match expr {
+        SqlExpr::Interval(interval) => {
+            let value_str = match interval.value.as_ref() {
+                SqlExpr::Value(ValueWithSpan {
+                    value: SqlValue::SingleQuotedString(s),
+                    ..
+                }) => s.clone(),
+                other => return plan_err!("expected interval string literal, found {other}"),
+            };
+
+            parse_interval_to_duration(&value_str)
+        }
+        SqlExpr::Value(ValueWithSpan {
+            value: SqlValue::SingleQuotedString(s),
+            ..
+        }) => parse_interval_to_duration(s),
+        other => plan_err!("expected an interval expression, found {other}"),
     }
+}
 
-    fn get_aggregate_meta(&self, name: &str) -> Option<Arc<AggregateUDF>> {
-        self.aggregate_functions.get(name).cloned()
+fn parse_interval_to_duration(s: &str) -> Result<std::time::Duration> {
+    let parts: Vec<&str> = s.trim().split_whitespace().collect();
+    if parts.len() != 2 {
+        return plan_err!("invalid interval string '{s}'; expected '<value> <unit>'");
+    }
+    let value: u64 = parts[0]
+        .parse()
+        .map_err(|_| DataFusionError::Plan(format!("invalid interval number: {}", parts[0])))?;
+    match parts[1].to_lowercase().as_str() {
+        "second" | "seconds" | "s" => Ok(std::time::Duration::from_secs(value)),
+        "minute" | "minutes" | "min" => Ok(std::time::Duration::from_secs(value * 60)),
+        "hour" | "hours" | "h" => Ok(std::time::Duration::from_secs(value * 3600)),
+        "day" | "days" | "d" => Ok(std::time::Duration::from_secs(value * 86400)),
+        unit => plan_err!("unsupported interval unit '{unit}'"),
     }
+}
 
-    fn get_variable_type(&self, _variable_names: &[String]) -> Option<DataType> {
-        None
+fn build_sink_inputs(extensions: &[LogicalPlan]) -> HashMap<NamedNode, Vec<LogicalPlan>> {
+    let mut sink_inputs = HashMap::<NamedNode, Vec<LogicalPlan>>::new();
+    for extension in extensions.iter() {
+        if let LogicalPlan::Extension(ext) = extension {
+            if let Some(sink_node) = ext.node.as_any().downcast_ref::<SinkExtension>() {
+                if let Some(named_node) = sink_node.node_name() {
+                    let inputs = sink_node
+                        .inputs()
+                        .into_iter()
+                        .cloned()
+                        .collect::<Vec<LogicalPlan>>();
+                    sink_inputs.entry(named_node).or_default().extend(inputs);
+                }
+            }
+        }
     }
+    sink_inputs
+}
 
-    fn options(&self) -> &datafusion::config::ConfigOptions {
-        &self.config_options
-    }
+fn maybe_add_key_extension_to_sink(plan: LogicalPlan) -> Result<LogicalPlan> {
+    let LogicalPlan::Extension(ref ext) = plan else {
+        return Ok(plan);
+    };
+
+    let Some(sink) = ext.node.as_any().downcast_ref::<SinkExtension>() else {
+        return Ok(plan);
+    };
+
+    let Some(partition_exprs) = sink.table.partition_exprs() else {
+        return Ok(plan);
+    };
+
+    if partition_exprs.is_empty() {
+        return Ok(plan);
+    }
+
+    let inputs = plan
+        .inputs()
+        .into_iter()
+        .map(|input| {
+            Ok(LogicalPlan::Extension(Extension {
+                node: Arc::new(KeyCalculationExtension {
+                    name: Some("key-calc-partition".to_string()),
+                    schema: input.schema().clone(),
+                    input: input.clone(),
+                    keys: KeysOrExprs::Exprs(partition_exprs.clone()),
+                }),
+            }))
+        })
+        .collect::<Result<_>>()?;
+
+    use datafusion::prelude::col;
+    let unkey = LogicalPlan::Extension(Extension {
+        node: Arc::new(
+            ProjectionExtension::new(
+                inputs,
+                Some("unkey".to_string()),
+                sink.schema().iter().map(|(_, f)| col(f.name())).collect(),
+            )
+            .shuffled(),
+        ),
+    });
+
+    let node = sink.with_exprs_and_inputs(vec![], vec![unkey])?;
+    Ok(LogicalPlan::Extension(Extension {
+        node: Arc::new(node),
+    }))
+}
 
-    fn get_window_meta(&self, name: &str) -> Option<Arc<WindowUDF>> {
-        self.window_functions.get(name).cloned()
+fn rewrite_sinks(extensions: Vec<LogicalPlan>) -> Result<Vec<LogicalPlan>> {
+    let mut sink_inputs = build_sink_inputs(&extensions);
+    let mut new_extensions = vec![];
+    for extension in extensions {
+        let mut rewriter = SinkInputRewriter::new(&mut sink_inputs);
+        let result = extension.rewrite(&mut rewriter)?;
+        if !rewriter.was_removed {
+            new_extensions.push(result.data);
+        }
     }
 
-    fn udf_names(&self) -> Vec<String> {
-        self.functions.keys().cloned().collect()
-    }
+    new_extensions
+        .into_iter()
+        .map(maybe_add_key_extension_to_sink)
+        .collect()
+}
 
-    fn udaf_names(&self) -> Vec<String> {
-        self.aggregate_functions.keys().cloned().collect()
-    }
+pub async fn parse_and_get_arrow_program(
+    query: String,
+    mut schema_provider: StreamSchemaProvider,
+    _config: SqlConfig,
+) -> Result<CompiledSql> {
+    let mut config = SessionConfig::new();
+    config
+        .options_mut()
+        .optimizer
+        .enable_round_robin_repartition = false;
+    config.options_mut().optimizer.repartition_aggregations = false;
+    config.options_mut().optimizer.repartition_windows = false;
+    config.options_mut().optimizer.repartition_sorts = false;
+    config.options_mut().optimizer.repartition_joins = false;
+    config.options_mut().execution.target_partitions = 1;
+
+    let session_state = SessionStateBuilder::new()
+        .with_config(config)
+        .with_default_features()
+        .with_physical_optimizer_rules(vec![])
+        .build();
+
+    let mut inserts = vec![];
+    for statement in parse_sql_statements(&query)? {
+        if try_handle_set_variable(&statement, &mut schema_provider)? {
+            continue;
+        }
 
-    fn udwf_names(&self) -> Vec<String> {
-        self.window_functions.keys().cloned().collect()
+        if let Some(table) = CatalogTable::try_from_statement(&statement, &schema_provider)? {
+            schema_provider.insert_catalog_table(table);
+        } else {
+            inserts.push(Insert::try_from_statement(&statement, &schema_provider)?);
+        };
     }
 
-    fn get_expr_planners(&self) -> &[Arc<dyn ExprPlanner>] {
-        &self.expr_planners
+    if inserts.is_empty() {
+        return plan_err!("The provided SQL does not contain a query");
     }
-}
 
-impl FunctionRegistry for StreamSchemaProvider {
-    fn udfs(&self) -> HashSet<String> {
-        self.functions.keys().cloned().collect()
-    }
+    let mut used_connections = HashSet::new();
+    let mut extensions = vec![];
 
-    fn udf(&self, name: &str) -> Result<Arc<ScalarUDF>> {
-        if let Some(f) = self.functions.get(name) {
-            Ok(Arc::clone(f))
-        } else {
-            plan_err!("No UDF with name {name}")
-        }
-    }
+    for insert in inserts {
+        let (plan, sink_name) = match insert {
+            Insert::InsertQuery {
+                sink_name,
+                logical_plan,
+            } => (logical_plan, Some(sink_name)),
+            Insert::Anonymous { logical_plan } => (logical_plan, None),
+        };
 
-    fn udaf(&self, name: &str) -> Result<Arc<AggregateUDF>> {
-        if let Some(f) = self.aggregate_functions.get(name) {
-            Ok(Arc::clone(f))
-        } else {
-            plan_err!("No UDAF with name {name}")
-        }
-    }
+        let mut plan_rewrite = rewrite_plan(plan, &schema_provider)?;
 
-    fn udwf(&self, name: &str) -> Result<Arc<WindowUDF>> {
-        if let Some(f) = self.window_functions.get(name) {
-            Ok(Arc::clone(f))
-        } else {
-            plan_err!("No UDWF with name {name}")
+        if plan_rewrite
+            .schema()
+            .fields()
+            .iter()
+            .any(|f| is_json_union(f.data_type()))
+        {
+            plan_rewrite = serialize_outgoing_json(&schema_provider, Arc::new(plan_rewrite));
         }
-    }
 
-    fn register_function_rewrite(
-        &mut self,
-        rewrite: Arc<dyn FunctionRewrite + Send + Sync>,
-    ) -> Result<()> {
-        self.analyzer.add_function_rewrite(rewrite);
-        Ok(())
+        debug!("Plan = {}", plan_rewrite.display_graphviz());
+
+        let mut metadata = SourceMetadataVisitor::new(&schema_provider);
+        plan_rewrite.visit_with_subqueries(&mut metadata)?;
+        used_connections.extend(metadata.connection_ids.iter());
+
+        let sink = match sink_name {
+            Some(sink_name) => {
+                let table = schema_provider
+                    .get_catalog_table_mut(&sink_name)
+                    .ok_or_else(|| {
+                        DataFusionError::Plan(format!("Connection {sink_name} not found"))
+                    })?;
+                match table {
+                    CatalogTable::ConnectorTable(c) => {
+                        if let Some(id) = c.id {
+                            used_connections.insert(id);
+                        }
+
+                        SinkExtension::new(
+                            TableReference::bare(sink_name),
+                            table.clone(),
+                            plan_rewrite.schema().clone(),
+                            Arc::new(plan_rewrite),
+                        )
+                    }
+                    CatalogTable::MemoryTable { logical_plan, .. } => {
+                        if logical_plan.is_some() {
+                            return plan_err!("Can only insert into a memory table once");
+                        }
+                        logical_plan.replace(plan_rewrite);
+                        continue;
+                    }
+                    CatalogTable::LookupTable(_) => {
+                        plan_err!("lookup (temporary) tables cannot be inserted into")
+                    }
+                    CatalogTable::TableFromQuery { .. } => {
+                        plan_err!(
+                            "shouldn't be inserting more data into a table made with CREATE TABLE AS"
+                        )
+                    }
+                    CatalogTable::PreviewSink { .. } => {
+                        plan_err!("queries shouldn't be able insert into preview sink.")
+                    }
+                }
+            }
+            None => SinkExtension::new(
+                TableReference::parse_str("preview"),
+                CatalogTable::PreviewSink {
+                    logical_plan: plan_rewrite.clone(),
+                },
+                plan_rewrite.schema().clone(),
+                Arc::new(plan_rewrite),
+            ),
+        };
+        extensions.push(LogicalPlan::Extension(Extension {
+            node: Arc::new(sink?),
+        }));
     }
 
-    fn register_udf(&mut self, udf: Arc<ScalarUDF>) -> Result<Option<Arc<ScalarUDF>>> {
-        Ok(self.functions.insert(udf.name().to_string(), udf))
-    }
+    let extensions = rewrite_sinks(extensions)?;
 
-    fn register_udaf(&mut self, udaf: Arc<AggregateUDF>) -> Result<Option<Arc<AggregateUDF>>> {
-        Ok(self
-            .aggregate_functions
-            .insert(udaf.name().to_string(), udaf))
+    let mut plan_to_graph_visitor =
+        physical_planner::PlanToGraphVisitor::new(&schema_provider, &session_state);
+    for extension in extensions {
+        plan_to_graph_visitor.add_plan(extension)?;
     }
+    let graph = plan_to_graph_visitor.into_graph();
 
-    fn register_udwf(&mut self, udwf: Arc<WindowUDF>) -> Result<Option<Arc<WindowUDF>>> {
-        Ok(self.window_functions.insert(udwf.name().to_string(), udwf))
-    }
+    let mut program = LogicalProgram::new(graph, ProgramConfig::default());
 
-    fn register_expr_planner(&mut self, expr_planner: Arc<dyn ExprPlanner>) -> Result<()> {
-        self.expr_planners.push(expr_planner);
-        Ok(())
-    }
+    program.optimize(&ChainingOptimizer {});
 
-    fn expr_planners(&self) -> Vec<Arc<dyn ExprPlanner>> {
-        self.expr_planners.clone()
-    }
+    Ok(CompiledSql {
+        program,
+        connection_ids: used_connections.into_iter().collect(),
+    })
 }
diff --git a/src/sql/planner/physical_planner.rs b/src/sql/planner/physical_planner.rs
new file mode 100644
index 00000000..e7e1cf60
--- /dev/null
+++ b/src/sql/planner/physical_planner.rs
@@ -0,0 +1,396 @@
+use std::collections::HashMap;
+use std::sync::Arc;
+use std::thread;
+use std::time::Duration;
+
+use datafusion::arrow::datatypes::IntervalMonthDayNanoType;
+use datafusion::common::tree_node::{TreeNode, TreeNodeRecursion, TreeNodeVisitor};
+use datafusion::common::{
+    DFSchema, DFSchemaRef, DataFusionError, Result, ScalarValue, Spans, plan_err,
+};
+use datafusion::execution::context::SessionState;
+use datafusion::execution::runtime_env::RuntimeEnvBuilder;
+use datafusion::functions::datetime::date_bin;
+use datafusion::logical_expr::{Expr, Extension, LogicalPlan, UserDefinedLogicalNode};
+use datafusion::physical_expr::PhysicalExpr;
+use datafusion::physical_plan::ExecutionPlan;
+use datafusion::physical_planner::{DefaultPhysicalPlanner, ExtensionPlanner, PhysicalPlanner};
+use datafusion_proto::protobuf::{PhysicalExprNode, PhysicalPlanNode};
+use datafusion_proto::{
+    physical_plan::AsExecutionPlan,
+    protobuf::{AggregateMode, physical_plan_node::PhysicalPlanType},
+};
+use petgraph::graph::{DiGraph, NodeIndex};
+use prost::Message;
+use tokio::runtime::Builder;
+use tokio::sync::oneshot;
+
+use async_trait::async_trait;
+use datafusion_proto::physical_plan::DefaultPhysicalExtensionCodec;
+use datafusion_proto::physical_plan::to_proto::serialize_physical_expr;
+
+use crate::datastream::logical::{LogicalEdge, LogicalGraph, LogicalNode};
+use crate::sql::physical::{
+    DebeziumUnrollingExec, DecodingContext, FsMemExec, FsPhysicalExtensionCodec, ToDebeziumExec,
+};
+use crate::sql::planner::StreamSchemaProvider;
+use crate::sql::planner::extension::debezium::{
+    DEBEZIUM_UNROLLING_EXTENSION_NAME, DebeziumUnrollingExtension, TO_DEBEZIUM_EXTENSION_NAME,
+};
+use crate::sql::planner::extension::key_calculation::KeyCalculationExtension;
+use crate::sql::planner::extension::{NamedNode, NodeWithIncomingEdges, StreamExtension};
+use crate::sql::planner::schemas::add_timestamp_field_arrow;
+use crate::types::{FsSchema, FsSchemaRef};
+
+pub(crate) struct PlanToGraphVisitor<'a> {
+    graph: DiGraph<LogicalNode, LogicalEdge>,
+    output_schemas: HashMap<NodeIndex, FsSchemaRef>,
+    named_nodes: HashMap<NamedNode, NodeIndex>,
+    traversal: Vec<Vec<NodeIndex>>,
+    planner: Planner<'a>,
+}
+
+impl<'a> PlanToGraphVisitor<'a> {
+    pub fn new(schema_provider: &'a StreamSchemaProvider, session_state: &'a SessionState) -> Self {
+        Self {
+            graph: Default::default(),
+            output_schemas: Default::default(),
+            named_nodes: Default::default(),
+            traversal: vec![],
+            planner: Planner::new(schema_provider, session_state),
+        }
+    }
+}
+
+pub(crate) struct Planner<'a> {
+    schema_provider: &'a StreamSchemaProvider,
+    planner: DefaultPhysicalPlanner,
+    session_state: &'a SessionState,
+}
+
+impl<'a> Planner<'a> {
+    pub(crate) fn new(
+        schema_provider: &'a StreamSchemaProvider,
+        session_state: &'a SessionState,
+    ) -> Self {
+        let planner =
+            DefaultPhysicalPlanner::with_extension_planners(vec![Arc::new(FsExtensionPlanner {})]);
+        Self {
+            schema_provider,
+            planner,
+            session_state,
+        }
+    }
+
+    pub(crate) fn sync_plan(&self, plan: &LogicalPlan) -> Result<Arc<dyn ExecutionPlan>> {
+        let fut = self.planner.create_physical_plan(plan, self.session_state);
+        let (tx, mut rx) = oneshot::channel();
+        thread::scope(|s| {
+            let _handle = tokio::runtime::Handle::current();
+            let builder = thread::Builder::new();
+            let builder = if cfg!(debug_assertions) {
+                builder.stack_size(10_000_000)
+            } else {
+                builder
+            };
+            builder
+                .spawn_scoped(s, move || {
+                    let rt = Builder::new_current_thread().enable_all().build().unwrap();
+                    rt.block_on(async {
+                        let plan = fut.await;
+                        tx.send(plan).unwrap();
+                    });
+                })
+                .unwrap();
+        });
+
+        rx.try_recv().unwrap()
+    }
+
+    pub(crate) fn create_physical_expr(
+        &self,
+        expr: &Expr,
+        input_dfschema: &DFSchema,
+    ) -> Result<Arc<dyn PhysicalExpr>> {
+        self.planner
+            .create_physical_expr(expr, input_dfschema, self.session_state)
+    }
+
+    pub(crate) fn serialize_as_physical_expr(
+        &self,
+        expr: &Expr,
+        schema: &DFSchema,
+    ) -> Result<Vec<u8>> {
+        let physical = self.create_physical_expr(expr, schema)?;
+        let proto = serialize_physical_expr(&physical, &DefaultPhysicalExtensionCodec {})?;
+        Ok(proto.encode_to_vec())
+    }
+
+    pub(crate) fn split_physical_plan(
+        &self,
+        key_indices: Vec<usize>,
+        aggregate: &LogicalPlan,
+        add_timestamp_field: bool,
+    ) -> Result<SplitPlanOutput> {
+        let physical_plan = self.sync_plan(aggregate)?;
+        let codec = FsPhysicalExtensionCodec {
+            context: DecodingContext::Planning,
+        };
+        let mut physical_plan_node =
+            PhysicalPlanNode::try_from_physical_plan(physical_plan.clone(), &codec)?;
+        let PhysicalPlanType::Aggregate(mut final_aggregate_proto) = physical_plan_node
+            .physical_plan_type
+            .take()
+            .ok_or_else(|| DataFusionError::Plan("missing physical plan type".to_string()))?
+        else {
+            return plan_err!("unexpected physical plan type");
+        };
+        let AggregateMode::Final = final_aggregate_proto.mode() else {
+            return plan_err!("unexpected physical plan type");
+        };
+
+        let partial_aggregation_plan = *final_aggregate_proto
+            .input
+            .take()
+            .ok_or_else(|| DataFusionError::Plan("missing input".to_string()))?;
+
+        let partial_aggregation_exec_plan = partial_aggregation_plan.try_into_physical_plan(
+            self.schema_provider,
+            &RuntimeEnvBuilder::new().build().unwrap(),
+            &codec,
+        )?;
+
+        let partial_schema = partial_aggregation_exec_plan.schema();
+        let final_input_table_provider = FsMemExec::new("partial".into(), partial_schema.clone());
+
+        final_aggregate_proto.input = Some(Box::new(PhysicalPlanNode::try_from_physical_plan(
+            Arc::new(final_input_table_provider),
+            &codec,
+        )?));
+
+        let finish_plan = PhysicalPlanNode {
+            physical_plan_type: Some(PhysicalPlanType::Aggregate(final_aggregate_proto)),
+        };
+
+        let (partial_schema, timestamp_index) = if add_timestamp_field {
+            (
+                add_timestamp_field_arrow((*partial_schema).clone()),
+                partial_schema.fields().len(),
+            )
+        } else {
+            (partial_schema.clone(), partial_schema.fields().len() - 1)
+        };
+
+        let partial_schema = FsSchema::new_keyed(partial_schema, timestamp_index, key_indices);
+
+        Ok(SplitPlanOutput {
+            partial_aggregation_plan,
+            partial_schema,
+            finish_plan,
+        })
+    }
+
+    pub fn binning_function_proto(
+        &self,
+        width: Duration,
+        input_schema: DFSchemaRef,
+    ) -> Result<PhysicalExprNode> {
+        let date_bin = date_bin().call(vec![
+            Expr::Literal(
+                ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNanoType::make_value(
+                    0,
+                    0,
+                    width.as_nanos() as i64,
+                ))),
+                None,
+            ),
+            Expr::Column(datafusion::common::Column {
+                relation: None,
+                name: "_timestamp".into(),
+                spans: Spans::new(),
+            }),
+        ]);
+
+        let binning_function = self.create_physical_expr(&date_bin, &input_schema)?;
+        serialize_physical_expr(&binning_function, &DefaultPhysicalExtensionCodec {})
+    }
+}
+
+struct FsExtensionPlanner {}
+
+#[async_trait]
+impl ExtensionPlanner for FsExtensionPlanner {
+    async fn plan_extension(
+        &self,
+        _planner: &dyn PhysicalPlanner,
+        node: &dyn UserDefinedLogicalNode,
+        _logical_inputs: &[&LogicalPlan],
+        physical_inputs: &[Arc<dyn ExecutionPlan>],
+        _session_state: &SessionState,
+    ) -> Result<Option<Arc<dyn ExecutionPlan>>> {
+        let schema = node.schema().as_ref().into();
+        if let Ok::<&dyn StreamExtension, _>(stream_extension) = node.try_into() {
+            if stream_extension.transparent() {
+                match node.name() {
+                    DEBEZIUM_UNROLLING_EXTENSION_NAME => {
+                        let node = node
+                            .as_any()
+                            .downcast_ref::<DebeziumUnrollingExtension>()
+                            .unwrap();
+                        let input = physical_inputs[0].clone();
+                        return Ok(Some(Arc::new(DebeziumUnrollingExec::try_new(
+                            input,
+                            node.primary_keys.clone(),
+                        )?)));
+                    }
+                    TO_DEBEZIUM_EXTENSION_NAME => {
+                        let input = physical_inputs[0].clone();
+                        return Ok(Some(Arc::new(ToDebeziumExec::try_new(input)?)));
+                    }
+                    _ => return Ok(None),
+                }
+            }
+        };
+        let name =
+            if let Some(key_extension) = node.as_any().downcast_ref::<KeyCalculationExtension>() {
+                key_extension.name.clone()
+            } else {
+                None
+            };
+        Ok(Some(Arc::new(FsMemExec::new(
+            name.unwrap_or("memory".to_string()),
+            Arc::new(schema),
+        ))))
+    }
+}
+
+impl PlanToGraphVisitor<'_> {
+    fn add_index_to_traversal(&mut self, index: NodeIndex) {
+        if let Some(last) = self.traversal.last_mut() {
+            last.push(index);
+        }
+    }
+
+    pub(crate) fn add_plan(&mut self, plan: LogicalPlan) -> Result<()> {
+        self.traversal.clear();
+        plan.visit(self)?;
+        Ok(())
+    }
+
+    pub fn into_graph(self) -> LogicalGraph {
+        self.graph
+    }
+
+    pub fn build_extension(
+        &mut self,
+        input_nodes: Vec<NodeIndex>,
+        extension: &dyn StreamExtension,
+    ) -> Result<()> {
+        if let Some(node_name) = extension.node_name() {
+            if self.named_nodes.contains_key(&node_name) {
+                return plan_err!(
+                    "extension {:?} has already been planned, shouldn't try again.",
+                    node_name
+                );
+            }
+        }
+
+        let input_schemas = input_nodes
+            .iter()
+            .map(|index| {
+                Ok(self
+                    .output_schemas
+                    .get(index)
+                    .ok_or_else(|| DataFusionError::Plan("missing input node".to_string()))?
+                    .clone())
+            })
+            .collect::<Result<Vec<_>>>()?;
+
+        let NodeWithIncomingEdges { node, edges } = extension
+            .plan_node(&self.planner, self.graph.node_count(), input_schemas)
+            .map_err(|e| e.context(format!("planning operator {extension:?}")))?;
+
+        let node_index = self.graph.add_node(node);
+        self.add_index_to_traversal(node_index);
+
+        for (source, edge) in input_nodes.into_iter().zip(edges.into_iter()) {
+            self.graph.add_edge(source, node_index, edge);
+        }
+
+        self.output_schemas
+            .insert(node_index, extension.output_schema().into());
+
+        if let Some(node_name) = extension.node_name() {
+            self.named_nodes.insert(node_name, node_index);
+        }
+        Ok(())
+    }
+}
+
+impl TreeNodeVisitor<'_> for PlanToGraphVisitor<'_> {
+    type Node = LogicalPlan;
+
+    fn f_down(&mut self, node: &Self::Node) -> Result<TreeNodeRecursion> {
+        let LogicalPlan::Extension(Extension { node }) = node else {
+            return Ok(TreeNodeRecursion::Continue);
+        };
+
+        let stream_extension: &dyn StreamExtension = node
+            .try_into()
+            .map_err(|e: DataFusionError| e.context("converting extension"))?;
+        if stream_extension.transparent() {
+            return Ok(TreeNodeRecursion::Continue);
+        }
+
+        if let Some(name) = stream_extension.node_name() {
+            if let Some(node_index) = self.named_nodes.get(&name) {
+                self.add_index_to_traversal(*node_index);
+                return Ok(TreeNodeRecursion::Jump);
+            }
+        }
+
+        if !node.inputs().is_empty() {
+            self.traversal.push(vec![]);
+        }
+
+        Ok(TreeNodeRecursion::Continue)
+    }
+
+    fn f_up(&mut self, node: &Self::Node) -> Result<TreeNodeRecursion> {
+        let LogicalPlan::Extension(Extension { node }) = node else {
+            return Ok(TreeNodeRecursion::Continue);
+        };
+
+        let stream_extension: &dyn StreamExtension = node
+            .try_into()
+            .map_err(|e: DataFusionError| e.context("planning extension"))?;
+
+        if stream_extension.transparent() {
+            return Ok(TreeNodeRecursion::Continue);
+        }
+
+        if let Some(name) = stream_extension.node_name() {
+            if self.named_nodes.contains_key(&name) {
+                return Ok(TreeNodeRecursion::Continue);
+            }
+        }
+
+        let input_nodes = if !node.inputs().is_empty() {
+            self.traversal.pop().unwrap_or_default()
+        } else {
+            vec![]
+        };
+        let stream_extension: &dyn StreamExtension = node
+            .try_into()
+            .map_err(|e: DataFusionError| e.context("converting extension"))?;
+        self.build_extension(input_nodes, stream_extension)?;
+
+        Ok(TreeNodeRecursion::Continue)
+    }
+}
+
+pub(crate) struct SplitPlanOutput {
+    pub(crate) partial_aggregation_plan: PhysicalPlanNode,
+    pub(crate) partial_schema: FsSchema,
+    pub(crate) finish_plan: PhysicalPlanNode,
+}
diff --git a/src/sql/planner/plan/aggregate.rs b/src/sql/planner/plan/aggregate.rs
index 6ed7499d..aad17edb 100644
--- a/src/sql/planner/plan/aggregate.rs
+++ b/src/sql/planner/plan/aggregate.rs
@@ -12,7 +12,7 @@ use crate::sql::planner::StreamSchemaProvider;
 use crate::sql::planner::extension::aggregate::AggregateExtension;
 use crate::sql::planner::extension::key_calculation::{KeyCalculationExtension, KeysOrExprs};
 use crate::sql::planner::plan::WindowDetectingVisitor;
-use crate::sql::planner::types::{
+use crate::sql::types::{
     DFField, TIMESTAMP_FIELD, WindowBehavior, WindowType, fields_with_qualifiers, find_window,
     schema_from_df_fields_with_metadata,
 };
diff --git a/src/sql/planner/plan/join.rs b/src/sql/planner/plan/join.rs
index f8225905..04a27e9b 100644
--- a/src/sql/planner/plan/join.rs
+++ b/src/sql/planner/plan/join.rs
@@ -16,9 +16,7 @@ use crate::sql::planner::StreamSchemaProvider;
 use crate::sql::planner::extension::join::JoinExtension;
 use crate::sql::planner::extension::key_calculation::KeyCalculationExtension;
 use crate::sql::planner::plan::WindowDetectingVisitor;
-use crate::sql::planner::types::{
-    WindowType, fields_with_qualifiers, schema_from_df_fields_with_metadata,
-};
+use crate::sql::types::{WindowType, fields_with_qualifiers, schema_from_df_fields_with_metadata};
 
 pub(crate) struct JoinRewriter<'a> {
     pub schema_provider: &'a StreamSchemaProvider,
diff --git a/src/sql/planner/plan/mod.rs b/src/sql/planner/plan/mod.rs
index 8d1dd388..d497ca65 100644
--- a/src/sql/planner/plan/mod.rs
+++ b/src/sql/planner/plan/mod.rs
@@ -15,7 +15,7 @@ use crate::sql::planner::extension::aggregate::{AGGREGATE_EXTENSION_NAME, Aggreg
 use crate::sql::planner::extension::join::JOIN_NODE_NAME;
 use crate::sql::planner::extension::remote_table::RemoteTableExtension;
 use crate::sql::planner::schemas::{add_timestamp_field, has_timestamp_field};
-use crate::sql::planner::types::{
+use crate::sql::types::{
     DFField, TIMESTAMP_FIELD, WindowBehavior, WindowType, fields_with_qualifiers, find_window,
 };
 
diff --git a/src/sql/planner/plan/window_fn.rs b/src/sql/planner/plan/window_fn.rs
index 0bd3314f..66f673d1 100644
--- a/src/sql/planner/plan/window_fn.rs
+++ b/src/sql/planner/plan/window_fn.rs
@@ -12,7 +12,7 @@ use tracing::debug;
 use crate::sql::planner::extension::key_calculation::{KeyCalculationExtension, KeysOrExprs};
 use crate::sql::planner::extension::window_fn::WindowFunctionExtension;
 use crate::sql::planner::plan::{WindowDetectingVisitor, extract_column};
-use crate::sql::planner::types::{WindowType, fields_with_qualifiers, schema_from_df_fields};
+use crate::sql::types::{WindowType, fields_with_qualifiers, schema_from_df_fields};
 
 pub(crate) struct WindowFunctionRewriter;
 
diff --git a/src/sql/planner/rewrite/async_udf_rewriter.rs b/src/sql/planner/rewrite/async_udf_rewriter.rs
new file mode 100644
index 00000000..def3c4ef
--- /dev/null
+++ b/src/sql/planner/rewrite/async_udf_rewriter.rs
@@ -0,0 +1,118 @@
+use crate::sql::planner::extension::remote_table::RemoteTableExtension;
+use crate::sql::planner::extension::{ASYNC_RESULT_FIELD, AsyncUDFExtension};
+use crate::sql::planner::mod_prelude::StreamSchemaProvider;
+use datafusion::common::tree_node::{Transformed, TreeNode, TreeNodeRewriter};
+use datafusion::common::{Column, Result as DFResult, TableReference, plan_err};
+use datafusion::logical_expr::expr::ScalarFunction;
+use datafusion::logical_expr::{Expr, Extension, LogicalPlan};
+use std::sync::Arc;
+use std::time::Duration;
+
+type AsyncSplitResult = (String, AsyncOptions, Vec<Expr>);
+
+#[derive(Debug, Clone, Copy)]
+pub struct AsyncOptions {
+    pub ordered: bool,
+    pub max_concurrency: usize,
+    pub timeout: Duration,
+}
+
+pub struct AsyncUdfRewriter<'a> {
+    provider: &'a StreamSchemaProvider,
+}
+
+impl<'a> AsyncUdfRewriter<'a> {
+    pub fn new(provider: &'a StreamSchemaProvider) -> Self {
+        Self { provider }
+    }
+
+    fn split_async(
+        expr: Expr,
+        provider: &StreamSchemaProvider,
+    ) -> DFResult<(Expr, Option<AsyncSplitResult>)> {
+        let mut found: Option<(String, AsyncOptions, Vec<Expr>)> = None;
+        let expr = expr.transform_up(|e| {
+            if let Expr::ScalarFunction(ScalarFunction { func: udf, args }) = &e {
+                if let Some(opts) = provider.get_async_udf_options(udf.name()) {
+                    if found
+                        .replace((udf.name().to_string(), opts, args.clone()))
+                        .is_some()
+                    {
+                        return plan_err!(
+                            "multiple async calls in the same expression, which is not allowed"
+                        );
+                    }
+                    return Ok(Transformed::yes(Expr::Column(Column::new_unqualified(
+                        ASYNC_RESULT_FIELD,
+                    ))));
+                }
+            }
+            Ok(Transformed::no(e))
+        })?;
+
+        Ok((expr.data, found))
+    }
+}
+
+impl TreeNodeRewriter for AsyncUdfRewriter<'_> {
+    type Node = LogicalPlan;
+
+    fn f_up(&mut self, node: Self::Node) -> DFResult<Transformed<Self::Node>> {
+        let LogicalPlan::Projection(mut projection) = node else {
+            for e in node.expressions() {
+                if let (_, Some((udf, _, _))) = Self::split_async(e.clone(), self.provider)? {
+                    return plan_err!(
+                        "async UDFs are only supported in projections, but {udf} was called in another context"
+                    );
+                }
+            }
+            return Ok(Transformed::no(node));
+        };
+
+        let mut args = None;
+        for e in projection.expr.iter_mut() {
+            let (new_e, Some(udf)) = Self::split_async(e.clone(), self.provider)? else {
+                continue;
+            };
+            if let Some((prev, _, _)) = args.replace(udf) {
+                return plan_err!(
+                    "Projection contains multiple async UDFs, which is not supported \
+                    \n(hint: two async UDF calls, {} and {}, appear in the same SELECT statement)",
+                    prev,
+                    args.unwrap().0
+                );
+            }
+            *e = new_e;
+        }
+
+        let Some((name, opts, arg_exprs)) = args else {
+            return Ok(Transformed::no(LogicalPlan::Projection(projection)));
+        };
+
+        let input = if matches!(*projection.input, LogicalPlan::Projection(..)) {
+            Arc::new(LogicalPlan::Extension(Extension {
+                node: Arc::new(RemoteTableExtension {
+                    input: (*projection.input).clone(),
+                    name: TableReference::bare("subquery_projection"),
+                    schema: projection.input.schema().clone(),
+                    materialize: false,
+                }),
+            }))
+        } else {
+            projection.input
+        };
+
+        Ok(Transformed::yes(LogicalPlan::Extension(Extension {
+            node: Arc::new(AsyncUDFExtension {
+                input,
+                name,
+                arg_exprs,
+                final_exprs: projection.expr,
+                ordered: opts.ordered,
+                max_concurrency: opts.max_concurrency,
+                timeout: opts.timeout,
+                final_schema: projection.schema,
+            }),
+        })))
+    }
+}
diff --git a/src/sql/planner/rewrite/mod.rs b/src/sql/planner/rewrite/mod.rs
new file mode 100644
index 00000000..20b2e9bb
--- /dev/null
+++ b/src/sql/planner/rewrite/mod.rs
@@ -0,0 +1,27 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+pub mod async_udf_rewriter;
+pub mod row_time;
+pub mod sink_input_rewriter;
+pub mod source_metadata_visitor;
+pub mod source_rewriter;
+pub mod time_window;
+pub mod unnest_rewriter;
+
+pub use async_udf_rewriter::{AsyncOptions, AsyncUdfRewriter};
+pub use row_time::RowTimeRewriter;
+pub use sink_input_rewriter::SinkInputRewriter;
+pub use source_metadata_visitor::SourceMetadataVisitor;
+pub use source_rewriter::SourceRewriter;
+pub use time_window::{TimeWindowNullCheckRemover, TimeWindowUdfChecker, is_time_window};
+pub use unnest_rewriter::{UNNESTED_COL, UnnestRewriter};
diff --git a/src/sql/planner/rewrite/row_time.rs b/src/sql/planner/rewrite/row_time.rs
new file mode 100644
index 00000000..51309feb
--- /dev/null
+++ b/src/sql/planner/rewrite/row_time.rs
@@ -0,0 +1,39 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use datafusion::common::tree_node::{Transformed, TreeNodeRewriter};
+use datafusion::common::{Column, Result as DFResult};
+use datafusion::logical_expr::Expr;
+
+use crate::sql::types::TIMESTAMP_FIELD;
+
+/// Rewrites `row_time()` scalar function calls to a column reference on `_timestamp`.
+pub struct RowTimeRewriter {}
+
+impl TreeNodeRewriter for RowTimeRewriter {
+    type Node = Expr;
+
+    fn f_down(&mut self, node: Self::Node) -> DFResult<Transformed<Self::Node>> {
+        if let Expr::ScalarFunction(func) = &node
+            && func.name() == "row_time"
+        {
+            let transformed = Expr::Column(Column {
+                relation: None,
+                name: TIMESTAMP_FIELD.to_string(),
+                spans: Default::default(),
+            })
+            .alias("row_time()");
+            return Ok(Transformed::yes(transformed));
+        }
+        Ok(Transformed::no(node))
+    }
+}
diff --git a/src/sql/planner/rewrite/sink_input_rewriter.rs b/src/sql/planner/rewrite/sink_input_rewriter.rs
new file mode 100644
index 00000000..e6b6a0bd
--- /dev/null
+++ b/src/sql/planner/rewrite/sink_input_rewriter.rs
@@ -0,0 +1,46 @@
+use crate::sql::planner::extension::sink::SinkExtension;
+use crate::sql::planner::extension::{NamedNode, StreamExtension};
+use datafusion::common::Result as DFResult;
+use datafusion::common::tree_node::{Transformed, TreeNodeRecursion, TreeNodeRewriter};
+use datafusion::logical_expr::{Extension, LogicalPlan, UserDefinedLogicalNodeCore};
+use std::collections::HashMap;
+use std::sync::Arc;
+
+type SinkInputs = HashMap<NamedNode, Vec<LogicalPlan>>;
+
+/// Merges inputs for sinks with the same name to avoid duplicate sinks in the plan.
+pub struct SinkInputRewriter<'a> {
+    sink_inputs: &'a mut SinkInputs,
+    pub was_removed: bool,
+}
+
+impl<'a> SinkInputRewriter<'a> {
+    pub fn new(sink_inputs: &'a mut SinkInputs) -> Self {
+        Self {
+            sink_inputs,
+            was_removed: false,
+        }
+    }
+}
+
+impl TreeNodeRewriter for SinkInputRewriter<'_> {
+    type Node = LogicalPlan;
+
+    fn f_down(&mut self, node: Self::Node) -> DFResult<Transformed<Self::Node>> {
+        if let LogicalPlan::Extension(extension) = &node {
+            if let Some(sink_node) = extension.node.as_any().downcast_ref::<SinkExtension>() {
+                if let Some(named_node) = sink_node.node_name() {
+                    if let Some(inputs) = self.sink_inputs.remove(&named_node) {
+                        let new_node = LogicalPlan::Extension(Extension {
+                            node: Arc::new(sink_node.with_exprs_and_inputs(vec![], inputs)?),
+                        });
+                        return Ok(Transformed::new(new_node, true, TreeNodeRecursion::Jump));
+                    } else {
+                        self.was_removed = true;
+                    }
+                }
+            }
+        }
+        Ok(Transformed::no(node))
+    }
+}
diff --git a/src/sql/planner/rewrite/source_metadata_visitor.rs b/src/sql/planner/rewrite/source_metadata_visitor.rs
new file mode 100644
index 00000000..168ff712
--- /dev/null
+++ b/src/sql/planner/rewrite/source_metadata_visitor.rs
@@ -0,0 +1,57 @@
+use crate::sql::planner::extension::sink::SinkExtension;
+use crate::sql::planner::extension::table_source::TableSourceExtension;
+use crate::sql::planner::mod_prelude::StreamSchemaProvider;
+use datafusion::common::Result as DFResult;
+use datafusion::common::tree_node::{TreeNodeRecursion, TreeNodeVisitor};
+use datafusion::logical_expr::{Extension, LogicalPlan};
+use std::collections::HashSet;
+
+/// Collects connection IDs from source and sink nodes in the logical plan.
+pub struct SourceMetadataVisitor<'a> {
+    schema_provider: &'a StreamSchemaProvider,
+    pub connection_ids: HashSet<i64>,
+}
+
+impl<'a> SourceMetadataVisitor<'a> {
+    pub fn new(schema_provider: &'a StreamSchemaProvider) -> Self {
+        Self {
+            schema_provider,
+            connection_ids: HashSet::new(),
+        }
+    }
+
+    fn get_connection_id(&self, node: &LogicalPlan) -> Option<i64> {
+        let LogicalPlan::Extension(Extension { node }) = node else {
+            return None;
+        };
+
+        let table_name = match node.name() {
+            "TableSourceExtension" => {
+                let ext = node.as_any().downcast_ref::<TableSourceExtension>()?;
+                ext.name.to_string()
+            }
+            "SinkExtension" => {
+                let ext = node.as_any().downcast_ref::<SinkExtension>()?;
+                ext.name.to_string()
+            }
+            _ => return None,
+        };
+
+        let table = self.schema_provider.get_catalog_table(&table_name)?;
+        match table {
+            crate::sql::catalog::table::Table::ConnectorTable(t) => t.id,
+            _ => None,
+        }
+    }
+}
+
+impl TreeNodeVisitor<'_> for SourceMetadataVisitor<'_> {
+    type Node = LogicalPlan;
+
+    fn f_down(&mut self, node: &Self::Node) -> DFResult<TreeNodeRecursion> {
+        if let Some(id) = self.get_connection_id(node) {
+            self.connection_ids.insert(id);
+        }
+        Ok(TreeNodeRecursion::Continue)
+    }
+}
diff --git a/src/sql/planner/rewrite/source_rewriter.rs b/src/sql/planner/rewrite/source_rewriter.rs
new file mode 100644
index 00000000..209c3288
--- /dev/null
+++ b/src/sql/planner/rewrite/source_rewriter.rs
@@ -0,0 +1,272 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::sync::Arc;
+use std::time::Duration;
+
+use datafusion::common::ScalarValue;
+use datafusion::common::tree_node::{Transformed, TreeNodeRewriter};
+use datafusion::common::{Column, DataFusionError, Result as DFResult, TableReference, plan_err};
+use datafusion::logical_expr::{
+    self, BinaryExpr, Expr, Extension, LogicalPlan, Projection, TableScan,
+};
+
+use crate::sql::catalog::connector_table::ConnectorTable;
+use crate::sql::catalog::field_spec::FieldSpec;
+use crate::sql::catalog::table::Table;
+use crate::sql::catalog::utils::add_timestamp_field;
+use crate::sql::planner::StreamSchemaProvider;
+use crate::sql::planner::extension::remote_table::RemoteTableExtension;
+use crate::sql::planner::extension::watermark_node::WatermarkNode;
+use crate::sql::types::TIMESTAMP_FIELD;
+
+/// Rewrites table scans into proper source nodes with projections and watermarks.
+pub struct SourceRewriter<'a> {
+    pub(crate) schema_provider: &'a StreamSchemaProvider,
+}
+
+impl SourceRewriter<'_> {
+    fn watermark_expression(table: &ConnectorTable) -> DFResult<Expr> {
+        match table.watermark_field.clone() {
+            Some(watermark_field) => table
+                .fields
+                .iter()
+                .find_map(|f| {
+                    if f.field().name() == &watermark_field {
+                        return match f {
+                            FieldSpec::Struct(field) | FieldSpec::Metadata { field, .. } => {
+                                Some(Expr::Column(Column {
+                                    relation: None,
+                                    name: field.name().to_string(),
+                                    spans: Default::default(),
+                                }))
+                            }
+                            FieldSpec::Virtual { expression, .. } => Some(*expression.clone()),
+                        };
+                    }
+                    None
+                })
+                .ok_or_else(|| {
+                    DataFusionError::Plan(format!("Watermark field {watermark_field} not found"))
+                }),
+            None => Ok(Expr::BinaryExpr(BinaryExpr {
+                left: Box::new(Expr::Column(Column {
+                    relation: None,
+                    name: TIMESTAMP_FIELD.to_string(),
+                    spans: Default::default(),
+                })),
+                op: logical_expr::Operator::Minus,
+                right: Box::new(Expr::Literal(
+                    ScalarValue::DurationNanosecond(Some(Duration::from_secs(1).as_nanos() as i64)),
+                    None,
+                )),
+            })),
+        }
+    }
+
+    fn projection_expressions(
+        table: &ConnectorTable,
+        qualifier: &TableReference,
+        projection: &Option<Vec<usize>>,
+    ) -> DFResult<Vec<Expr>> {
+        let mut expressions: Vec<Expr> = table
+            .fields
+            .iter()
+            .map(|field| match field {
+                FieldSpec::Struct(field) | FieldSpec::Metadata { field, .. } => {
+                    Expr::Column(Column {
+                        relation: Some(qualifier.clone()),
+                        name: field.name().to_string(),
+                        spans: Default::default(),
+                    })
+                }
+                FieldSpec::Virtual { field, expression } => expression
+                    .clone()
+                    .alias_qualified(Some(qualifier.clone()), field.name().to_string()),
+            })
+            .collect();
+
+        if let Some(proj) = projection {
+            expressions = proj.iter().map(|i| expressions[*i].clone()).collect();
+        }
+
+        if let Some(event_time_field) = table.event_time_field.clone() {
+            let expr = table
+                .fields
+                .iter()
+                .find_map(|f| {
+                    if f.field().name() == &event_time_field {
+                        return match f {
+                            FieldSpec::Struct(field) | FieldSpec::Metadata { field, .. } => {
+                                Some(Expr::Column(Column {
+                                    relation: Some(qualifier.clone()),
+                                    name: field.name().to_string(),
+                                    spans: Default::default(),
+                                }))
+                            }
+                            FieldSpec::Virtual { expression, .. } => Some(*expression.clone()),
+                        };
+                    }
+                    None
+                })
+                .ok_or_else(|| {
+                    DataFusionError::Plan(format!("Event time field {event_time_field} not found"))
+                })?;
+
+            expressions
+                .push(expr.alias_qualified(Some(qualifier.clone()), TIMESTAMP_FIELD.to_string()));
+        } else {
+            expressions.push(Expr::Column(Column::new(
+                Some(qualifier.clone()),
+                TIMESTAMP_FIELD,
+            )));
+        }
+
+        Ok(expressions)
+    }
+
+    fn projection(&self, table_scan: &TableScan, table: &ConnectorTable) -> DFResult<LogicalPlan> {
+        let qualifier = table_scan.table_name.clone();
+
+        // TODO: replace with TableSourceExtension when available
+        let source_input = LogicalPlan::TableScan(table_scan.clone());
+
+        Ok(LogicalPlan::Projection(Projection::try_new(
+            Self::projection_expressions(table, &qualifier, &table_scan.projection)?,
+            Arc::new(source_input),
+        )?))
+    }
+
+    fn mutate_connector_table(
+        &self,
+        table_scan: &TableScan,
+        table: &ConnectorTable,
+    ) -> DFResult<Transformed<LogicalPlan>> {
+        let input = self.projection(table_scan, table)?;
+
+        let schema = input.schema().clone();
+        let remote = LogicalPlan::Extension(Extension {
+            node: Arc::new(RemoteTableExtension {
+                input,
+                name: table_scan.table_name.to_owned(),
+                schema,
+                materialize: true,
+            }),
+        });
+
+        let watermark_node = WatermarkNode::new(
+            remote,
+            table_scan.table_name.clone(),
+            Self::watermark_expression(table)?,
+        )
+        .map_err(|err| {
+            DataFusionError::Internal(format!("failed to create watermark expression: {err}"))
+        })?;
+
+        Ok(Transformed::yes(LogicalPlan::Extension(Extension {
+            node: Arc::new(watermark_node),
+        })))
+    }
+
+    fn mutate_table_from_query(
+        &self,
+        table_scan: &TableScan,
+        logical_plan: &LogicalPlan,
+    ) -> DFResult<Transformed<LogicalPlan>> {
+        let column_expressions: Vec<_> = if let Some(projection) = &table_scan.projection {
+            logical_plan
+                .schema()
+                .columns()
+                .into_iter()
+                .enumerate()
+                .filter_map(|(i, col)| {
+                    if projection.contains(&i) {
+                        Some(Expr::Column(col))
+                    } else {
+                        None
+                    }
+                })
+                .collect()
+        } else {
+            logical_plan
+                .schema()
+                .columns()
+                .into_iter()
+                .map(Expr::Column)
+                .collect()
+        };
+
+        let target_columns: Vec<_> = table_scan.projected_schema.columns().into_iter().collect();
+
+        let expressions = column_expressions
+            .into_iter()
+            .zip(target_columns)
+            .map(|(expr, col)| expr.alias_qualified(col.relation, col.name))
+            .collect();
+
+        let projection = LogicalPlan::Projection(Projection::try_new_with_schema(
+            expressions,
+            Arc::new(logical_plan.clone()),
+            table_scan.projected_schema.clone(),
+        )?);
+
+        Ok(Transformed::yes(projection))
+    }
+}
+
+impl TreeNodeRewriter for SourceRewriter<'_> {
+    type Node = LogicalPlan;
+
+    fn f_up(&mut self, node: Self::Node) -> DFResult<Transformed<Self::Node>> {
+        let LogicalPlan::TableScan(mut table_scan) = node else {
+            return Ok(Transformed::no(node));
+        };
+
+        let table_name = table_scan.table_name.table();
+        let table = self
+            .schema_provider
+            .get_catalog_table(table_name)
+            .ok_or_else(|| DataFusionError::Plan(format!("Table {table_name} not found")))?;
+
+        match table {
+            Table::ConnectorTable(table) => self.mutate_connector_table(&table_scan, table),
+            Table::LookupTable(_table) => {
+                // TODO: implement LookupSource extension
+                plan_err!("Lookup tables are not yet supported")
+            }
+            Table::MemoryTable {
+                name,
+                fields: _,
+                logical_plan,
+            } => {
+                let Some(logical_plan) = logical_plan else {
+                    return plan_err!(
+                        "Can't query from memory table {} without first inserting into it",
+                        name
+                    );
+                };
+                table_scan.projected_schema = add_timestamp_field(
+                    table_scan.projected_schema.clone(),
+                    Some(table_scan.table_name.clone()),
+                )?;
+                self.mutate_table_from_query(&table_scan, logical_plan)
+            }
+            Table::TableFromQuery {
+                name: _,
+                logical_plan,
+            } => self.mutate_table_from_query(&table_scan, logical_plan),
+            Table::PreviewSink { .. } => Err(DataFusionError::Plan(
+                "can't select from a preview sink".to_string(),
+            )),
+        }
+    }
+}
diff --git a/src/sql/planner/rewrite/time_window.rs b/src/sql/planner/rewrite/time_window.rs
new file mode 100644
index 00000000..104c0cca
--- /dev/null
+++ b/src/sql/planner/rewrite/time_window.rs
@@ -0,0 +1,83 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use datafusion::common::tree_node::{
+    Transformed, TreeNodeRecursion, TreeNodeRewriter, TreeNodeVisitor,
+};
+use datafusion::common::{DataFusionError, Result as DFResult, ScalarValue, plan_err};
+use datafusion::logical_expr::expr::ScalarFunction;
+use datafusion::logical_expr::{Expr, LogicalPlan};
+
+/// Returns the time window function name if the expression is one (tumble/hop/session).
+pub fn is_time_window(expr: &Expr) -> Option<&str> {
+    if let Expr::ScalarFunction(ScalarFunction { func, args: _ }) = expr {
+        match func.name() {
+            "tumble" | "hop" | "session" => return Some(func.name()),
+            _ => {}
+        }
+    }
+    None
+}
+
+struct TimeWindowExprChecker {}
+
+impl TreeNodeVisitor<'_> for TimeWindowExprChecker {
+    type Node = Expr;
+
+    fn f_down(&mut self, node: &Self::Node) -> DFResult<TreeNodeRecursion> {
+        if let Some(w) = is_time_window(node) {
+            return plan_err!(
+                "time window function {} is not allowed in this context. \
+                 Are you missing a GROUP BY clause?",
+                w
+            );
+        }
+        Ok(TreeNodeRecursion::Continue)
+    }
+}
+
+/// Visitor that checks an entire LogicalPlan for misplaced time window UDFs.
+pub struct TimeWindowUdfChecker {}
+
+impl TreeNodeVisitor<'_> for TimeWindowUdfChecker {
+    type Node = LogicalPlan;
+
+    fn f_down(&mut self, node: &Self::Node) -> DFResult<TreeNodeRecursion> {
+        use datafusion::common::tree_node::TreeNode;
+        node.expressions().iter().try_for_each(|expr| {
+            let mut checker = TimeWindowExprChecker {};
+            expr.visit(&mut checker)?;
+            Ok::<(), DataFusionError>(())
+        })?;
+        Ok(TreeNodeRecursion::Continue)
+    }
+}
+
+/// Removes `IS NOT NULL` checks wrapping time window functions,
+/// replacing them with `true` since time windows are never null.
+pub struct TimeWindowNullCheckRemover {}
+
+impl TreeNodeRewriter for TimeWindowNullCheckRemover {
+    type Node = Expr;
+
+    fn f_down(&mut self, node: Self::Node) -> DFResult<Transformed<Self::Node>> {
+        if let Expr::IsNotNull(expr) = &node
+            && is_time_window(expr).is_some()
+        {
+            return Ok(Transformed::yes(Expr::Literal(
+                ScalarValue::Boolean(Some(true)),
+                None,
+            )));
+        }
+        Ok(Transformed::no(node))
+    }
+}
diff --git a/src/sql/planner/rewrite/unnest_rewriter.rs b/src/sql/planner/rewrite/unnest_rewriter.rs
new file mode 100644
index 00000000..2a9eabda
--- /dev/null
+++ b/src/sql/planner/rewrite/unnest_rewriter.rs
@@ -0,0 +1,178 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::sync::Arc;
+
+use datafusion::arrow::datatypes::DataType;
+use datafusion::common::tree_node::{Transformed, TreeNode, TreeNodeRewriter};
+use datafusion::common::{Column, Result as DFResult, plan_err};
+use datafusion::logical_expr::expr::ScalarFunction;
+use datafusion::logical_expr::{ColumnUnnestList, Expr, LogicalPlan, Projection, Unnest};
+
+use crate::sql::types::{DFField, fields_with_qualifiers, schema_from_df_fields};
+
+pub const UNNESTED_COL: &str = "__unnested";
+
+/// Rewrites projections containing `unnest()` calls into proper Unnest logical plans.
+pub struct UnnestRewriter {}
+
+impl UnnestRewriter {
+    fn split_unnest(expr: Expr) -> DFResult<(Expr, Option<Expr>)> {
+        let mut captured: Option<Expr> = None;
+
+        let expr = expr.transform_up(|e| {
+            if let Expr::ScalarFunction(ScalarFunction { func: udf, args }) = &e
+                && udf.name() == "unnest"
+            {
+                match args.len() {
+                    1 => {
+                        if captured.replace(args[0].clone()).is_some() {
+                            return plan_err!(
+                                "Multiple unnests in expression, which is not allowed"
+                            );
+                        }
+                        return Ok(Transformed::yes(Expr::Column(Column::new_unqualified(
+                            UNNESTED_COL,
+                        ))));
+                    }
+                    n => {
+                        panic!("Unnest has wrong number of arguments (expected 1, found {n})");
+                    }
+                }
+            }
+            Ok(Transformed::no(e))
+        })?;
+
+        Ok((expr.data, captured))
+    }
+}
+
+impl TreeNodeRewriter for UnnestRewriter {
+    type Node = LogicalPlan;
+
+    fn f_up(&mut self, node: Self::Node) -> DFResult<Transformed<Self::Node>> {
+        let LogicalPlan::Projection(projection) = &node else {
+            if node.expressions().iter().any(|e| {
+                let e = Self::split_unnest(e.clone());
+                e.is_err() || e.unwrap().1.is_some()
+            }) {
+                return plan_err!("unnest is only supported in SELECT statements");
+            }
+            return Ok(Transformed::no(node));
+        };
+
+        let mut unnest = None;
+        let exprs = projection
+            .expr
+            .clone()
+            .into_iter()
+            .enumerate()
+            .map(|(i, expr)| {
+                let (expr, opt) = Self::split_unnest(expr)?;
+                let is_unnest = if let Some(e) = opt {
+                    if let Some(prev) = unnest.replace((e, i))
+                        && &prev != unnest.as_ref().unwrap()
+                    {
+                        return plan_err!(
+                            "Projection contains multiple unnests, which is not currently supported"
+                        );
+                    }
+                    true
+                } else {
+                    false
+                };
+
+                Ok((expr, is_unnest))
+            })
+            .collect::<DFResult<Vec<_>>>()?;
+
+        if let Some((unnest_inner, unnest_idx)) = unnest {
+            let produce_list = Arc::new(LogicalPlan::Projection(Projection::try_new(
+                exprs
+                    .iter()
+                    .cloned()
+                    .map(|(e, is_unnest)| {
+                        if is_unnest {
+                            unnest_inner.clone().alias(UNNESTED_COL)
+                        } else {
+                            e
+                        }
+                    })
+                    .collect(),
+                projection.input.clone(),
+            )?));
+
+            let unnest_fields = fields_with_qualifiers(produce_list.schema())
+                .iter()
+                .enumerate()
+                .map(|(i, f)| {
+                    if i == unnest_idx {
+                        let DataType::List(inner) = f.data_type() else {
+                            return plan_err!(
+                                "Argument '{}' to unnest is not a List",
+                                f.qualified_name()
+                            );
+                        };
+                        Ok(DFField::new_unqualified(
+                            UNNESTED_COL,
+                            inner.data_type().clone(),
+                            inner.is_nullable(),
+                        ))
+                    } else {
+                        Ok((*f).clone())
+                    }
+                })
+                .collect::<DFResult<Vec<_>>>()?;
+
+            let unnest_node = LogicalPlan::Unnest(Unnest {
+                exec_columns: vec![
+                    DFField::from(produce_list.schema().qualified_field(unnest_idx))
+                        .qualified_column(),
+                ],
+                input: produce_list,
+                list_type_columns: vec![(
+                    unnest_idx,
+                    ColumnUnnestList {
+                        output_column: Column::new_unqualified(UNNESTED_COL),
+                        depth: 1,
+                    },
+                )],
+                struct_type_columns: vec![],
+                dependency_indices: vec![],
+                schema: Arc::new(schema_from_df_fields(&unnest_fields)?),
+                options: Default::default(),
+            });
+
+            let output_node = LogicalPlan::Projection(Projection::try_new(
+                exprs
+                    .iter()
+                    .enumerate()
+                    .map(|(i, (expr, has_unnest))| {
+                        if *has_unnest {
+                            expr.clone()
+                        } else {
+                            Expr::Column(
+                                DFField::from(unnest_node.schema().qualified_field(i))
+                                    .qualified_column(),
+                            )
+                        }
+                    })
+                    .collect(),
+                Arc::new(unnest_node),
+            )?);
+
+            Ok(Transformed::yes(output_node))
+        } else {
+            Ok(Transformed::no(LogicalPlan::Projection(projection.clone())))
+        }
+    }
+}
diff --git a/src/sql/planner/schema_provider.rs b/src/sql/planner/schema_provider.rs
new file mode 100644
index 00000000..d860fd6c
--- /dev/null
+++ b/src/sql/planner/schema_provider.rs
@@ -0,0 +1,360 @@
+use std::collections::{HashMap, HashSet};
+use std::sync::Arc;
+
+use datafusion::arrow::datatypes::{self as datatypes, DataType, Field, Schema};
+use datafusion::common::{Result, plan_err};
+use datafusion::datasource::DefaultTableSource;
+use datafusion::error::DataFusionError;
+use datafusion::execution::{FunctionRegistry, SessionStateDefaults};
+use datafusion::logical_expr::expr_rewriter::FunctionRewrite;
+use datafusion::logical_expr::planner::ExprPlanner;
+use datafusion::logical_expr::{
+    AggregateUDF, Expr, LogicalPlan, ScalarUDF, TableSource, WindowUDF,
+};
+use datafusion::optimizer::Analyzer;
+use datafusion::sql::TableReference;
+use datafusion::sql::planner::ContextProvider;
+use unicase::UniCase;
+
+use crate::sql::catalog::table::Table as CatalogTable;
+use crate::sql::planner::schemas::window_arrow_struct;
+use crate::sql::types::{PlaceholderUdf, PlanningOptions};
+
+#[derive(Clone, Default)]
+pub struct StreamSchemaProvider {
+    pub source_defs: HashMap<String, String>,
+    tables: HashMap<UniCase<String>, StreamTable>,
+    catalog_tables: HashMap<UniCase<String>, CatalogTable>,
+    pub functions: HashMap<String, Arc<ScalarUDF>>,
+    pub aggregate_functions: HashMap<String, Arc<AggregateUDF>>,
+    pub window_functions: HashMap<String, Arc<WindowUDF>>,
+    config_options: datafusion::config::ConfigOptions,
+    pub expr_planners: Vec<Arc<dyn ExprPlanner>>,
+    pub planning_options: PlanningOptions,
+    pub analyzer: Analyzer,
+}
+
+#[derive(Clone, Debug)]
+pub enum StreamTable {
+    Source {
+        name: String,
+        schema: Arc<Schema>,
+        event_time_field: Option<String>,
+        watermark_field: Option<String>,
+    },
+    Sink {
+        name: String,
+        schema: Arc<Schema>,
+    },
+    Memory {
+        name: String,
+        logical_plan: Option<LogicalPlan>,
+    },
+}
+
+impl StreamTable {
+    pub fn name(&self) -> &str {
+        match self {
+            StreamTable::Source { name, .. } => name,
+            StreamTable::Sink { name, .. } => name,
+            StreamTable::Memory { name, .. } => name,
+        }
+    }
+
+    pub fn get_fields(&self) -> Vec<Arc<Field>> {
+        match self {
+            StreamTable::Source { schema, .. } => schema.fields().to_vec(),
+            StreamTable::Sink { schema, .. } => schema.fields().to_vec(),
+            StreamTable::Memory { .. } => vec![],
+        }
+    }
+}
+
+#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
+pub struct LogicalBatchInput {
+    pub table_name: String,
+    pub schema: Arc<Schema>,
+}
+
+#[async_trait::async_trait]
+impl datafusion::datasource::TableProvider for LogicalBatchInput {
+    fn as_any(&self) -> &dyn std::any::Any {
+        self
+    }
+
+    fn schema(&self) -> Arc<Schema> {
+        self.schema.clone()
+    }
+
+    fn table_type(&self) -> datafusion::datasource::TableType {
+        datafusion::datasource::TableType::Temporary
+    }
+
+    async fn scan(
+        &self,
+        _state: &dyn datafusion::catalog::Session,
+        _projection: Option<&Vec<usize>>,
+        _filters: &[Expr],
+        _limit: Option<usize>,
+    ) -> Result<Arc<dyn datafusion::physical_plan::ExecutionPlan>> {
+        Ok(Arc::new(crate::sql::physical::FsMemExec::new(
+            self.table_name.clone(),
+            self.schema.clone(),
+        )))
+    }
+}
+
+fn create_table(table_name: String, schema: Arc<Schema>) -> Arc<dyn TableSource> {
+    let table_provider = LogicalBatchInput { table_name, schema };
+    let wrapped = Arc::new(table_provider);
+    let provider = DefaultTableSource::new(wrapped);
+    Arc::new(provider)
+}
+
+impl StreamSchemaProvider {
+    pub fn new() -> Self {
+        let mut registry = Self {
+            ..Default::default()
+        };
+
+        registry
+            .register_udf(PlaceholderUdf::with_return(
+                "hop",
+                vec![
+                    DataType::Interval(datatypes::IntervalUnit::MonthDayNano),
+                    DataType::Interval(datatypes::IntervalUnit::MonthDayNano),
+                ],
+                window_arrow_struct(),
+            ))
+            .unwrap();
+
+        registry
+            .register_udf(PlaceholderUdf::with_return(
+                "tumble",
+                vec![DataType::Interval(datatypes::IntervalUnit::MonthDayNano)],
+                window_arrow_struct(),
+            ))
+            .unwrap();
+
+        registry
+            .register_udf(PlaceholderUdf::with_return(
+                "session",
+                vec![DataType::Interval(datatypes::IntervalUnit::MonthDayNano)],
+                window_arrow_struct(),
+            ))
+            .unwrap();
+
+        registry
+            .register_udf(PlaceholderUdf::with_return(
+                "unnest",
+                vec![DataType::List(Arc::new(Field::new(
+                    "field",
+                    DataType::Utf8,
+                    true,
+                )))],
+                DataType::Utf8,
+            ))
+            .unwrap();
+
+        registry
+            .register_udf(PlaceholderUdf::with_return(
+                "row_time",
+                vec![],
+                DataType::Timestamp(datatypes::TimeUnit::Nanosecond, None),
+            ))
+            .unwrap();
+
+        for p in SessionStateDefaults::default_scalar_functions() {
+            registry.register_udf(p).unwrap();
+        }
+        for p in SessionStateDefaults::default_aggregate_functions() {
+            registry.register_udaf(p).unwrap();
+        }
+        for p in SessionStateDefaults::default_window_functions() {
+            registry.register_udwf(p).unwrap();
+        }
+        for p in SessionStateDefaults::default_expr_planners() {
+            registry.register_expr_planner(p).unwrap();
+        }
+
+        registry
+    }
+
+    pub fn add_source_table(
+        &mut self,
+        name: String,
+        schema: Arc<Schema>,
+        event_time_field: Option<String>,
+        watermark_field: Option<String>,
+    ) {
+        self.tables.insert(
+            UniCase::new(name.clone()),
+            StreamTable::Source {
+                name,
+                schema,
+                event_time_field,
+                watermark_field,
+            },
+        );
+    }
+
+    pub fn add_sink_table(&mut self, name: String, schema: Arc<Schema>) {
+        self.tables.insert(
+            UniCase::new(name.clone()),
+            StreamTable::Sink { name, schema },
+        );
+    }
+
+    pub fn insert_table(&mut self, table: StreamTable) {
+        self.tables
+            .insert(UniCase::new(table.name().to_string()), table);
+    }
+
+    pub fn get_table(&self, table_name: impl Into<String>) -> Option<&StreamTable> {
+        self.tables.get(&UniCase::new(table_name.into()))
+    }
+
+    pub fn get_table_mut(&mut self, table_name: impl Into<String>) -> Option<&mut StreamTable> {
+        self.tables.get_mut(&UniCase::new(table_name.into()))
+    }
+
+    pub fn insert_catalog_table(&mut self, table: CatalogTable) {
+        self.catalog_tables
+            .insert(UniCase::new(table.name().to_string()), table);
+    }
+
+    pub fn get_catalog_table(&self, table_name: impl Into<String>) -> Option<&CatalogTable> {
+        self.catalog_tables.get(&UniCase::new(table_name.into()))
+    }
+
+    pub fn get_catalog_table_mut(
+        &mut self,
+        table_name: impl Into<String>,
+    ) -> Option<&mut CatalogTable> {
+        self.catalog_tables
+            .get_mut(&UniCase::new(table_name.into()))
+    }
+
+    pub fn get_async_udf_options(
+        &self,
+        _name: &str,
+    ) -> Option<crate::sql::planner::rewrite::AsyncOptions> {
+        // TODO: implement async UDF lookup
+        None
+    }
+}
+
+impl ContextProvider for StreamSchemaProvider {
+    fn get_table_source(&self, name: TableReference) -> Result<Arc<dyn TableSource>> {
+        let table = self
+            .get_table(name.to_string())
+            .ok_or_else(|| DataFusionError::Plan(format!("Table {name} not found")))?;
+
+        let fields = table.get_fields();
+        let schema = Arc::new(Schema::new_with_metadata(
+            fields
+                .iter()
+                .map(|f| f.as_ref().clone())
+                .collect::<Vec<Field>>(),
+            HashMap::new(),
+        ));
+        Ok(create_table(name.to_string(), schema))
+    }
+
+    fn get_function_meta(&self, name: &str) -> Option<Arc<ScalarUDF>> {
+        self.functions.get(name).cloned()
+    }
+
+    fn get_aggregate_meta(&self, name: &str) -> Option<Arc<AggregateUDF>> {
+        self.aggregate_functions.get(name).cloned()
+    }
+
+    fn get_variable_type(&self, _variable_names: &[String]) -> Option<DataType> {
+        None
+    }
+
+    fn options(&self) -> &datafusion::config::ConfigOptions {
+        &self.config_options
+    }
+
+    fn get_window_meta(&self, name: &str) -> Option<Arc<WindowUDF>> {
+        self.window_functions.get(name).cloned()
+    }
+
+    fn udf_names(&self) -> Vec<String> {
+        self.functions.keys().cloned().collect()
+    }
+
+    fn udaf_names(&self) -> Vec<String> {
+        self.aggregate_functions.keys().cloned().collect()
+    }
+
+    fn udwf_names(&self) -> Vec<String> {
+        self.window_functions.keys().cloned().collect()
+    }
+
+    fn get_expr_planners(&self) -> &[Arc<dyn ExprPlanner>] {
+        &self.expr_planners
+    }
+}
+
+impl FunctionRegistry for StreamSchemaProvider {
+    fn udfs(&self) -> HashSet<String> {
+        self.functions.keys().cloned().collect()
+    }
+
+    fn udf(&self, name: &str) -> Result<Arc<ScalarUDF>> {
+        if let Some(f) = self.functions.get(name) {
+            Ok(Arc::clone(f))
+        } else {
+            plan_err!("No UDF with name {name}")
+        }
+    }
+
+    fn udaf(&self, name: &str) -> Result<Arc<AggregateUDF>> {
+        if let Some(f) = self.aggregate_functions.get(name) {
+            Ok(Arc::clone(f))
+        } else {
+            plan_err!("No UDAF with name {name}")
+        }
+    }
+
+    fn udwf(&self, name: &str) -> Result<Arc<WindowUDF>> {
+        if let Some(f) = self.window_functions.get(name) {
+            Ok(Arc::clone(f))
+        } else {
+            plan_err!("No UDWF with name {name}")
+        }
+    }
+
+    fn register_function_rewrite(
+        &mut self,
+        rewrite: Arc<dyn FunctionRewrite + Send + Sync>,
+    ) -> Result<()> {
+        self.analyzer.add_function_rewrite(rewrite);
+        Ok(())
+    }
+
+    fn register_udf(&mut self, udf: Arc<ScalarUDF>) -> Result<Option<Arc<ScalarUDF>>> {
+        Ok(self.functions.insert(udf.name().to_string(), udf))
+    }
+
+    fn register_udaf(&mut self, udaf: Arc<AggregateUDF>) -> Result<Option<Arc<AggregateUDF>>> {
+        Ok(self
+            .aggregate_functions
+            .insert(udaf.name().to_string(), udaf))
+    }
+
+    fn register_udwf(&mut self, udwf: Arc<WindowUDF>) -> Result<Option<Arc<WindowUDF>>> {
+        Ok(self.window_functions.insert(udwf.name().to_string(), udwf))
+    }
+
+    fn register_expr_planner(&mut self, expr_planner: Arc<dyn ExprPlanner>) -> Result<()> {
+        self.expr_planners.push(expr_planner);
+        Ok(())
+    }
+
+    fn expr_planners(&self) -> Vec<Arc<dyn ExprPlanner>> {
+        self.expr_planners.clone()
+    }
+}
diff --git a/src/sql/planner/schemas.rs b/src/sql/planner/schemas.rs
index 0440cc85..f903db83 100644
--- a/src/sql/planner/schemas.rs
+++ b/src/sql/planner/schemas.rs
@@ -1,59 +1,5 @@
-use crate::sql::planner::types::{DFField, TIMESTAMP_FIELD};
-use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef, TimeUnit};
-use datafusion::common::{DFSchema, DFSchemaRef, Result as DFResult, TableReference};
-use std::{collections::HashMap, sync::Arc};
-
-pub fn window_arrow_struct() -> DataType {
-    DataType::Struct(
-        vec![
-            Arc::new(Field::new(
-                "start",
-                DataType::Timestamp(TimeUnit::Nanosecond, None),
-                false,
-            )),
-            Arc::new(Field::new(
-                "end",
-                DataType::Timestamp(TimeUnit::Nanosecond, None),
-                false,
-            )),
-        ]
-        .into(),
-    )
-}
-
-pub(crate) fn add_timestamp_field(
-    schema: DFSchemaRef,
-    qualifier: Option<TableReference>,
-) -> DFResult<DFSchemaRef> {
-    if has_timestamp_field(&schema) {
-        return Ok(schema);
-    }
-
-    let timestamp_field = DFField::new(
-        qualifier,
-        TIMESTAMP_FIELD,
-        DataType::Timestamp(TimeUnit::Nanosecond, None),
-        false,
-    );
-    Ok(Arc::new(schema.join(&DFSchema::new_with_metadata(
-        vec![timestamp_field.into()],
-        HashMap::new(),
-    )?)?))
-}
-
-pub(crate) fn has_timestamp_field(schema: &DFSchemaRef) -> bool {
-    schema
-        .fields()
-        .iter()
-        .any(|field| field.name() == TIMESTAMP_FIELD)
-}
-
-pub fn add_timestamp_field_arrow(schema: Schema) -> SchemaRef {
-    let mut fields = schema.fields().to_vec();
-    fields.push(Arc::new(Field::new(
-        TIMESTAMP_FIELD,
-        DataType::Timestamp(TimeUnit::Nanosecond, None),
-        false,
-    )));
-    Arc::new(Schema::new(fields))
-}
+// Re-export schema utilities from catalog::utils.
+// Kept for backward compatibility with existing planner imports.
+pub use crate::sql::catalog::utils::{
+    add_timestamp_field, add_timestamp_field_arrow, has_timestamp_field, window_arrow_struct,
+};
diff --git a/src/sql/planner/types.rs b/src/sql/planner/types.rs
deleted file mode 100644
index 2330c0de..00000000
--- a/src/sql/planner/types.rs
+++ /dev/null
@@ -1,513 +0,0 @@
-use std::collections::HashMap;
-use std::fmt::{Debug, Formatter};
-use std::sync::Arc;
-use std::time::Duration;
-
-use datafusion::arrow::datatypes::{
-    DECIMAL_DEFAULT_SCALE, DECIMAL128_MAX_PRECISION, DataType, Field, FieldRef, IntervalUnit,
-    Schema, SchemaRef, TimeUnit,
-};
-use datafusion::common::{Column, DFSchema, Result, TableReference, plan_datafusion_err, plan_err};
-use datafusion::logical_expr::{
-    ColumnarValue, Expr, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Signature, Volatility,
-};
-use std::any::Any;
-
-pub const TIMESTAMP_FIELD: &str = "_timestamp";
-
-#[derive(Clone, Debug, Eq, PartialEq)]
-pub enum ProcessingMode {
-    Append,
-    Update,
-}
-
-#[derive(Clone, Debug, PartialEq, Eq, Hash)]
-pub enum WindowType {
-    Tumbling { width: Duration },
-    Sliding { width: Duration, slide: Duration },
-    Session { gap: Duration },
-    Instant,
-}
-
-#[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub(crate) enum WindowBehavior {
-    FromOperator {
-        window: WindowType,
-        window_field: DFField,
-        window_index: usize,
-        is_nested: bool,
-    },
-    InData,
-}
-
-#[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub struct DFField {
-    qualifier: Option<TableReference>,
-    field: FieldRef,
-}
-
-impl From<(Option<TableReference>, FieldRef)> for DFField {
-    fn from(value: (Option<TableReference>, FieldRef)) -> Self {
-        Self {
-            qualifier: value.0,
-            field: value.1,
-        }
-    }
-}
-
-impl From<(Option<&TableReference>, &Field)> for DFField {
-    fn from(value: (Option<&TableReference>, &Field)) -> Self {
-        Self {
-            qualifier: value.0.cloned(),
-            field: Arc::new(value.1.clone()),
-        }
-    }
-}
-
-impl From<DFField> for (Option<TableReference>, FieldRef) {
-    fn from(value: DFField) -> Self {
-        (value.qualifier, value.field)
-    }
-}
-
-impl DFField {
-    pub fn new(
-        qualifier: Option<TableReference>,
-        name: impl Into<String>,
-        data_type: DataType,
-        nullable: bool,
-    ) -> Self {
-        Self {
-            qualifier,
-            field: Arc::new(Field::new(name, data_type, nullable)),
-        }
-    }
-
-    pub fn new_unqualified(name: &str, data_type: DataType, nullable: bool) -> Self {
-        DFField {
-            qualifier: None,
-            field: Arc::new(Field::new(name, data_type, nullable)),
-        }
-    }
-
-    pub fn name(&self) -> &String {
-        self.field.name()
-    }
-
-    pub fn data_type(&self) -> &DataType {
-        self.field.data_type()
-    }
-
-    pub fn is_nullable(&self) -> bool {
-        self.field.is_nullable()
-    }
-
-    pub fn metadata(&self) -> &HashMap<String, String> {
-        self.field.metadata()
-    }
-
-    pub fn qualified_name(&self) -> String {
-        if let Some(qualifier) = &self.qualifier {
-            format!("{}.{}", qualifier, self.field.name())
-        } else {
-            self.field.name().to_owned()
-        }
-    }
-
-    pub fn qualified_column(&self) -> Column {
-        Column {
-            relation: self.qualifier.clone(),
-            name: self.field.name().to_string(),
-            spans: Default::default(),
-        }
-    }
-
-    pub fn unqualified_column(&self) -> Column {
-        Column {
-            relation: None,
-            name: self.field.name().to_string(),
-            spans: Default::default(),
-        }
-    }
-
-    pub fn qualifier(&self) -> Option<&TableReference> {
-        self.qualifier.as_ref()
-    }
-
-    pub fn field(&self) -> &FieldRef {
-        &self.field
-    }
-
-    pub fn strip_qualifier(mut self) -> Self {
-        self.qualifier = None;
-        self
-    }
-
-    pub fn with_nullable(mut self, nullable: bool) -> Self {
-        let f = self.field().as_ref().clone().with_nullable(nullable);
-        self.field = f.into();
-        self
-    }
-
-    pub fn with_metadata(mut self, metadata: HashMap<String, String>) -> Self {
-        let f = self.field().as_ref().clone().with_metadata(metadata);
-        self.field = f.into();
-        self
-    }
-}
-
-#[derive(Debug, Clone, PartialEq, Eq)]
-pub struct StreamSchema {
-    pub schema: SchemaRef,
-    pub timestamp_index: usize,
-    pub key_indices: Option<Vec<usize>>,
-}
-
-impl StreamSchema {
-    pub fn new(schema: SchemaRef, timestamp_index: usize, key_indices: Option<Vec<usize>>) -> Self {
-        Self {
-            schema,
-            timestamp_index,
-            key_indices,
-        }
-    }
-
-    pub fn new_unkeyed(schema: SchemaRef, timestamp_index: usize) -> Self {
-        Self {
-            schema,
-            timestamp_index,
-            key_indices: None,
-        }
-    }
-
-    pub fn from_fields(fields: Vec<Field>) -> Self {
-        let schema = Arc::new(Schema::new(fields));
-        let timestamp_index = schema
-            .column_with_name(TIMESTAMP_FIELD)
-            .map(|(i, _)| i)
-            .unwrap_or(0);
-        Self {
-            schema,
-            timestamp_index,
-            key_indices: None,
-        }
-    }
-
-    pub fn from_schema_keys(schema: SchemaRef, key_indices: Vec<usize>) -> Result<Self> {
-        let timestamp_index = schema
-            .column_with_name(TIMESTAMP_FIELD)
-            .ok_or_else(|| {
-                datafusion::error::DataFusionError::Plan(format!(
-                    "no {TIMESTAMP_FIELD} field in schema, schema is {schema:?}"
-                ))
-            })?
-            .0;
-        Ok(Self {
-            schema,
-            timestamp_index,
-            key_indices: Some(key_indices),
-        })
-    }
-
-    pub fn from_schema_unkeyed(schema: SchemaRef) -> Result<Self> {
-        let timestamp_index = schema
-            .column_with_name(TIMESTAMP_FIELD)
-            .ok_or_else(|| {
-                datafusion::error::DataFusionError::Plan(format!(
-                    "no {TIMESTAMP_FIELD} field in schema"
-                ))
-            })?
-            .0;
-        Ok(Self {
-            schema,
-            timestamp_index,
-            key_indices: None,
-        })
-    }
-}
-
-#[allow(clippy::type_complexity)]
-pub(crate) struct PlaceholderUdf {
-    name: String,
-    signature: Signature,
-    return_type: Arc<dyn Fn(&[DataType]) -> Result<DataType> + Send + Sync + 'static>,
-}
-
-impl Debug for PlaceholderUdf {
-    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        write!(f, "PlaceholderUDF<{}>", self.name)
-    }
-}
-
-impl ScalarUDFImpl for PlaceholderUdf {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn name(&self) -> &str {
-        &self.name
-    }
-
-    fn signature(&self) -> &Signature {
-        &self.signature
-    }
-
-    fn return_type(&self, args: &[DataType]) -> Result<DataType> {
-        (self.return_type)(args)
-    }
-
-    fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> Result<ColumnarValue> {
-        unimplemented!("PlaceholderUdf should never be called at execution time");
-    }
-}
-
-impl PlaceholderUdf {
-    pub fn with_return(
-        name: impl Into<String>,
-        args: Vec<DataType>,
-        ret: DataType,
-    ) -> Arc<ScalarUDF> {
-        Arc::new(ScalarUDF::new_from_impl(PlaceholderUdf {
-            name: name.into(),
-            signature: Signature::exact(args, Volatility::Volatile),
-            return_type: Arc::new(move |_| Ok(ret.clone())),
-        }))
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct SqlConfig {
-    pub default_parallelism: usize,
-}
-
-impl Default for SqlConfig {
-    fn default() -> Self {
-        Self {
-            default_parallelism: 4,
-        }
-    }
-}
-
-#[derive(Clone)]
-pub struct PlanningOptions {
-    pub ttl: Duration,
-}
-
-impl Default for PlanningOptions {
-    fn default() -> Self {
-        Self {
-            ttl: Duration::from_secs(24 * 60 * 60),
-        }
-    }
-}
-
-pub fn convert_data_type(sql_type: &datafusion::sql::sqlparser::ast::DataType) -> Result<DataType> {
-    use datafusion::sql::sqlparser::ast::ArrayElemTypeDef;
-    use datafusion::sql::sqlparser::ast::DataType as SQLDataType;
-
-    match sql_type {
-        SQLDataType::Array(ArrayElemTypeDef::AngleBracket(inner_sql_type))
-        | SQLDataType::Array(ArrayElemTypeDef::SquareBracket(inner_sql_type, _)) => {
-            let data_type = convert_data_type(inner_sql_type)?;
-            Ok(DataType::List(Arc::new(Field::new(
-                "field", data_type, true,
-            ))))
-        }
-        SQLDataType::Array(ArrayElemTypeDef::None) => {
-            plan_err!("Arrays with unspecified type is not supported")
-        }
-        other => convert_simple_data_type(other),
-    }
-}
-
-fn convert_simple_data_type(
-    sql_type: &datafusion::sql::sqlparser::ast::DataType,
-) -> Result<DataType> {
-    use datafusion::sql::sqlparser::ast::DataType as SQLDataType;
-    use datafusion::sql::sqlparser::ast::{ExactNumberInfo, TimezoneInfo};
-
-    match sql_type {
-        SQLDataType::Boolean | SQLDataType::Bool => Ok(DataType::Boolean),
-        SQLDataType::TinyInt(_) => Ok(DataType::Int8),
-        SQLDataType::SmallInt(_) | SQLDataType::Int2(_) => Ok(DataType::Int16),
-        SQLDataType::Int(_) | SQLDataType::Integer(_) | SQLDataType::Int4(_) => Ok(DataType::Int32),
-        SQLDataType::BigInt(_) | SQLDataType::Int8(_) => Ok(DataType::Int64),
-        SQLDataType::TinyIntUnsigned(_) => Ok(DataType::UInt8),
-        SQLDataType::SmallIntUnsigned(_) | SQLDataType::Int2Unsigned(_) => Ok(DataType::UInt16),
-        SQLDataType::IntUnsigned(_)
-        | SQLDataType::UnsignedInteger
-        | SQLDataType::Int4Unsigned(_) => Ok(DataType::UInt32),
-        SQLDataType::BigIntUnsigned(_) | SQLDataType::Int8Unsigned(_) => Ok(DataType::UInt64),
-        SQLDataType::Float(_) | SQLDataType::Real | SQLDataType::Float4 => Ok(DataType::Float32),
-        SQLDataType::Double(_) | SQLDataType::DoublePrecision | SQLDataType::Float8 => {
-            Ok(DataType::Float64)
-        }
-        SQLDataType::Char(_)
-        | SQLDataType::Varchar(_)
-        | SQLDataType::Text
-        | SQLDataType::String(_) => Ok(DataType::Utf8),
-        SQLDataType::Timestamp(None, TimezoneInfo::None) | SQLDataType::Datetime(_) => {
-            Ok(DataType::Timestamp(TimeUnit::Nanosecond, None))
-        }
-        SQLDataType::Timestamp(Some(precision), TimezoneInfo::None) => match *precision {
-            0 => Ok(DataType::Timestamp(TimeUnit::Second, None)),
-            3 => Ok(DataType::Timestamp(TimeUnit::Millisecond, None)),
-            6 => Ok(DataType::Timestamp(TimeUnit::Microsecond, None)),
-            9 => Ok(DataType::Timestamp(TimeUnit::Nanosecond, None)),
-            _ => {
-                plan_err!(
-                    "unsupported precision {} -- supported: 0 (seconds), 3 (ms), 6 (us), 9 (ns)",
-                    precision
-                )
-            }
-        },
-        SQLDataType::Date => Ok(DataType::Date32),
-        SQLDataType::Time(None, tz_info) => {
-            if matches!(tz_info, TimezoneInfo::None)
-                || matches!(tz_info, TimezoneInfo::WithoutTimeZone)
-            {
-                Ok(DataType::Time64(TimeUnit::Nanosecond))
-            } else {
-                plan_err!("Unsupported SQL type {sql_type:?}")
-            }
-        }
-        SQLDataType::Numeric(exact_number_info) | SQLDataType::Decimal(exact_number_info) => {
-            let (precision, scale) = match *exact_number_info {
-                ExactNumberInfo::None => (None, None),
-                ExactNumberInfo::Precision(precision) => (Some(precision), None),
-                ExactNumberInfo::PrecisionAndScale(precision, scale) => {
-                    (Some(precision), Some(scale))
-                }
-            };
-            make_decimal_type(precision, scale)
-        }
-        SQLDataType::Bytea => Ok(DataType::Binary),
-        SQLDataType::Interval => Ok(DataType::Interval(IntervalUnit::MonthDayNano)),
-        SQLDataType::Struct(fields, _) => {
-            let fields: Vec<_> = fields
-                .iter()
-                .map(|f| {
-                    Ok::<_, datafusion::error::DataFusionError>(Arc::new(Field::new(
-                        f.field_name
-                            .as_ref()
-                            .ok_or_else(|| {
-                                plan_datafusion_err!("anonymous struct fields are not allowed")
-                            })?
-                            .to_string(),
-                        convert_data_type(&f.field_type)?,
-                        true,
-                    )))
-                })
-                .collect::<Result<_>>()?;
-            Ok(DataType::Struct(fields.into()))
-        }
-        _ => plan_err!("Unsupported SQL type {sql_type:?}"),
-    }
-}
-
-fn make_decimal_type(precision: Option<u64>, scale: Option<u64>) -> Result<DataType> {
-    let (precision, scale) = match (precision, scale) {
-        (Some(p), Some(s)) => (p as u8, s as i8),
-        (Some(p), None) => (p as u8, 0),
-        (None, Some(_)) => return plan_err!("Cannot specify only scale for decimal data type"),
-        (None, None) => (DECIMAL128_MAX_PRECISION, DECIMAL_DEFAULT_SCALE),
-    };
-
-    if precision == 0 || precision > DECIMAL128_MAX_PRECISION || scale.unsigned_abs() > precision {
-        plan_err!(
-            "Decimal(precision = {precision}, scale = {scale}) should satisfy `0 < precision <= 38`, and `scale <= precision`."
-        )
-    } else {
-        Ok(DataType::Decimal128(precision, scale))
-    }
-}
-
-pub fn fields_with_qualifiers(schema: &DFSchema) -> Vec<DFField> {
-    schema
-        .fields()
-        .iter()
-        .enumerate()
-        .map(|(i, f)| (schema.qualified_field(i).0.cloned(), f.clone()).into())
-        .collect()
-}
-
-pub fn schema_from_df_fields(fields: &[DFField]) -> Result<DFSchema> {
-    schema_from_df_fields_with_metadata(fields, HashMap::new())
-}
-
-pub fn schema_from_df_fields_with_metadata(
-    fields: &[DFField],
-    metadata: HashMap<String, String>,
-) -> Result<DFSchema> {
-    DFSchema::new_with_metadata(fields.iter().map(|t| t.clone().into()).collect(), metadata)
-}
-
-pub fn get_duration(expression: &Expr) -> Result<Duration> {
-    use datafusion::common::ScalarValue;
-
-    match expression {
-        Expr::Literal(ScalarValue::IntervalDayTime(Some(val)), _) => {
-            Ok(Duration::from_secs((val.days as u64) * 24 * 60 * 60)
-                + Duration::from_millis(val.milliseconds as u64))
-        }
-        Expr::Literal(ScalarValue::IntervalMonthDayNano(Some(val)), _) => {
-            if val.months != 0 {
-                return datafusion::common::not_impl_err!(
-                    "Windows do not support durations specified as months"
-                );
-            }
-            Ok(Duration::from_secs((val.days as u64) * 24 * 60 * 60)
-                + Duration::from_nanos(val.nanoseconds as u64))
-        }
-        _ => plan_err!(
-            "unsupported Duration expression, expect duration literal, not {}",
-            expression
-        ),
-    }
-}
-
-pub fn find_window(expression: &Expr) -> Result<Option<WindowType>> {
-    use datafusion::logical_expr::expr::Alias;
-    use datafusion::logical_expr::expr::ScalarFunction;
-
-    match expression {
-        Expr::ScalarFunction(ScalarFunction { func: fun, args }) => match fun.name() {
-            "hop" => {
-                if args.len() != 2 {
-                    unreachable!();
-                }
-                let slide = get_duration(&args[0])?;
-                let width = get_duration(&args[1])?;
-                if width.as_nanos() % slide.as_nanos() != 0 {
-                    return plan_err!(
-                        "hop() width {:?} must be a multiple of slide {:?}",
-                        width,
-                        slide
-                    );
-                }
-                if slide == width {
-                    Ok(Some(WindowType::Tumbling { width }))
-                } else {
-                    Ok(Some(WindowType::Sliding { width, slide }))
-                }
-            }
-            "tumble" => {
-                if args.len() != 1 {
-                    unreachable!("wrong number of arguments for tumble(), expect one");
-                }
-                let width = get_duration(&args[0])?;
-                Ok(Some(WindowType::Tumbling { width }))
-            }
-            "session" => {
-                if args.len() != 1 {
-                    unreachable!("wrong number of arguments for session(), expected one");
-                }
-                let gap = get_duration(&args[0])?;
-                Ok(Some(WindowType::Session { gap }))
-            }
-            _ => Ok(None),
-        },
-        Expr::Alias(Alias { expr, .. }) => find_window(expr),
-        _ => Ok(None),
-    }
-}
diff --git a/src/sql/planner/udafs.rs b/src/sql/planner/udafs.rs
new file mode 100644
index 00000000..9685c2d4
--- /dev/null
+++ b/src/sql/planner/udafs.rs
@@ -0,0 +1,31 @@
+use datafusion::arrow::array::ArrayRef;
+use datafusion::error::Result;
+use datafusion::physical_plan::Accumulator;
+use datafusion::scalar::ScalarValue;
+use std::fmt::Debug;
+
+/// Fake UDAF used just for plan-time placeholder.
+#[derive(Debug)]
+pub struct EmptyUdaf {}
+
+impl Accumulator for EmptyUdaf {
+    fn update_batch(&mut self, _: &[ArrayRef]) -> Result<()> {
+        unreachable!()
+    }
+
+    fn evaluate(&self) -> Result<ScalarValue> {
+        unreachable!()
+    }
+
+    fn size(&self) -> usize {
+        unreachable!()
+    }
+
+    fn state(&self) -> Result<Vec<ScalarValue>> {
+        unreachable!()
+    }
+
+    fn merge_batch(&mut self, _: &[ArrayRef]) -> Result<()> {
+        unreachable!()
+    }
+}
diff --git a/src/sql/types/data_type.rs b/src/sql/types/data_type.rs
new file mode 100644
index 00000000..57edc3c9
--- /dev/null
+++ b/src/sql/types/data_type.rs
@@ -0,0 +1,144 @@
+use std::sync::Arc;
+
+use datafusion::arrow::datatypes::{
+    DECIMAL_DEFAULT_SCALE, DECIMAL128_MAX_PRECISION, DataType, Field, IntervalUnit, TimeUnit,
+};
+use datafusion::common::{Result, plan_datafusion_err, plan_err};
+
+use crate::types::FsExtensionType;
+
+pub fn convert_data_type(
+    sql_type: &datafusion::sql::sqlparser::ast::DataType,
+) -> Result<(DataType, Option<FsExtensionType>)> {
+    use datafusion::sql::sqlparser::ast::ArrayElemTypeDef;
+    use datafusion::sql::sqlparser::ast::DataType as SQLDataType;
+
+    match sql_type {
+        SQLDataType::Array(ArrayElemTypeDef::AngleBracket(inner_sql_type))
+        | SQLDataType::Array(ArrayElemTypeDef::SquareBracket(inner_sql_type, _)) => {
+            let (data_type, extension) = convert_simple_data_type(inner_sql_type)?;
+
+            Ok((
+                DataType::List(Arc::new(FsExtensionType::add_metadata(
+                    extension,
+                    Field::new("field", data_type, true),
+                ))),
+                None,
+            ))
+        }
+        SQLDataType::Array(ArrayElemTypeDef::None) => {
+            plan_err!("Arrays with unspecified type is not supported")
+        }
+        other => convert_simple_data_type(other),
+    }
+}
+
+fn convert_simple_data_type(
+    sql_type: &datafusion::sql::sqlparser::ast::DataType,
+) -> Result<(DataType, Option<FsExtensionType>)> {
+    use datafusion::sql::sqlparser::ast::DataType as SQLDataType;
+    use datafusion::sql::sqlparser::ast::{ExactNumberInfo, TimezoneInfo};
+
+    if matches!(sql_type, SQLDataType::JSON) {
+        return Ok((DataType::Utf8, Some(FsExtensionType::JSON)));
+    }
+
+    let dt = match sql_type {
+        SQLDataType::Boolean | SQLDataType::Bool => Ok(DataType::Boolean),
+        SQLDataType::TinyInt(_) => Ok(DataType::Int8),
+        SQLDataType::SmallInt(_) | SQLDataType::Int2(_) => Ok(DataType::Int16),
+        SQLDataType::Int(_) | SQLDataType::Integer(_) | SQLDataType::Int4(_) => Ok(DataType::Int32),
+        SQLDataType::BigInt(_) | SQLDataType::Int8(_) => Ok(DataType::Int64),
+        SQLDataType::TinyIntUnsigned(_) => Ok(DataType::UInt8),
+        SQLDataType::SmallIntUnsigned(_) | SQLDataType::Int2Unsigned(_) => Ok(DataType::UInt16),
+        SQLDataType::IntUnsigned(_)
+        | SQLDataType::UnsignedInteger
+        | SQLDataType::Int4Unsigned(_) => Ok(DataType::UInt32),
+        SQLDataType::BigIntUnsigned(_) | SQLDataType::Int8Unsigned(_) => Ok(DataType::UInt64),
+        SQLDataType::Float(_) => Ok(DataType::Float32),
+        SQLDataType::Real | SQLDataType::Float4 => Ok(DataType::Float32),
+        SQLDataType::Double(_) | SQLDataType::DoublePrecision | SQLDataType::Float8 => {
+            Ok(DataType::Float64)
+        }
+        SQLDataType::Char(_)
+        | SQLDataType::Varchar(_)
+        | SQLDataType::Text
+        | SQLDataType::String(_) => Ok(DataType::Utf8),
+        SQLDataType::Timestamp(None, TimezoneInfo::None) | SQLDataType::Datetime(_) => {
+            Ok(DataType::Timestamp(TimeUnit::Nanosecond, None))
+        }
+        SQLDataType::Timestamp(Some(precision), TimezoneInfo::None) => match *precision {
+            0 => Ok(DataType::Timestamp(TimeUnit::Second, None)),
+            3 => Ok(DataType::Timestamp(TimeUnit::Millisecond, None)),
+            6 => Ok(DataType::Timestamp(TimeUnit::Microsecond, None)),
+            9 => Ok(DataType::Timestamp(TimeUnit::Nanosecond, None)),
+            _ => {
+                return plan_err!(
+                    "unsupported precision {} -- supported precisions are 0 (seconds), \
+            3 (milliseconds), 6 (microseconds), and 9 (nanoseconds)",
+                    precision
+                );
+            }
+        },
+        SQLDataType::Date => Ok(DataType::Date32),
+        SQLDataType::Time(None, tz_info) => {
+            if matches!(tz_info, TimezoneInfo::None)
+                || matches!(tz_info, TimezoneInfo::WithoutTimeZone)
+            {
+                Ok(DataType::Time64(TimeUnit::Nanosecond))
+            } else {
+                return plan_err!("Unsupported SQL type {sql_type:?}");
+            }
+        }
+        SQLDataType::Numeric(exact_number_info) | SQLDataType::Decimal(exact_number_info) => {
+            let (precision, scale) = match *exact_number_info {
+                ExactNumberInfo::None => (None, None),
+                ExactNumberInfo::Precision(precision) => (Some(precision), None),
+                ExactNumberInfo::PrecisionAndScale(precision, scale) => {
+                    (Some(precision), Some(scale))
+                }
+            };
+            make_decimal_type(precision, scale)
+        }
+        SQLDataType::Bytea => Ok(DataType::Binary),
+        SQLDataType::Interval => Ok(DataType::Interval(IntervalUnit::MonthDayNano)),
+        SQLDataType::Struct(fields, _) => {
+            let fields: Vec<_> = fields
+                .iter()
+                .map(|f| {
+                    Ok::<_, datafusion::error::DataFusionError>(Arc::new(Field::new(
+                        f.field_name
+                            .as_ref()
+                            .ok_or_else(|| {
+                                plan_datafusion_err!("anonymous struct fields are not allowed")
+                            })?
+                            .to_string(),
+                        convert_data_type(&f.field_type)?.0,
+                        true,
+                    )))
+                })
+                .collect::<Result<_>>()?;
+            Ok(DataType::Struct(fields.into()))
+        }
+        _ => return plan_err!("Unsupported SQL type {sql_type:?}"),
+    };
+
+    Ok((dt?, None))
+}
+
+fn make_decimal_type(precision: Option<u64>, scale: Option<u64>) -> Result<DataType> {
+    let (precision, scale) = match (precision, scale) {
+        (Some(p), Some(s)) => (p as u8, s as i8),
+        (Some(p), None) => (p as u8, 0),
+        (None, Some(_)) => return plan_err!("Cannot specify only scale for decimal data type"),
+        (None, None) => (DECIMAL128_MAX_PRECISION, DECIMAL_DEFAULT_SCALE),
+    };
+
+    if precision == 0 || precision > DECIMAL128_MAX_PRECISION || scale.unsigned_abs() > precision {
+        plan_err!(
+            "Decimal(precision = {precision}, scale = {scale}) should satisfy `0 < precision <= 38`, and `scale <= precision`."
+        )
+    } else {
+        Ok(DataType::Decimal128(precision, scale))
+    }
+}
diff --git a/src/sql/types/df_field.rs b/src/sql/types/df_field.rs
new file mode 100644
index 00000000..3797adb2
--- /dev/null
+++ b/src/sql/types/df_field.rs
@@ -0,0 +1,141 @@
+use std::collections::HashMap;
+use std::sync::Arc;
+
+use datafusion::arrow::datatypes::{DataType, Field, FieldRef};
+use datafusion::common::{Column, DFSchema, Result, TableReference};
+
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct DFField {
+    qualifier: Option<TableReference>,
+    field: FieldRef,
+}
+
+impl From<(Option<TableReference>, FieldRef)> for DFField {
+    fn from(value: (Option<TableReference>, FieldRef)) -> Self {
+        Self {
+            qualifier: value.0,
+            field: value.1,
+        }
+    }
+}
+
+impl From<(Option<&TableReference>, &Field)> for DFField {
+    fn from(value: (Option<&TableReference>, &Field)) -> Self {
+        Self {
+            qualifier: value.0.cloned(),
+            field: Arc::new(value.1.clone()),
+        }
+    }
+}
+
+impl From<DFField> for (Option<TableReference>, FieldRef) {
+    fn from(value: DFField) -> Self {
+        (value.qualifier, value.field)
+    }
+}
+
+impl DFField {
+    pub fn new(
+        qualifier: Option<TableReference>,
+        name: impl Into<String>,
+        data_type: DataType,
+        nullable: bool,
+    ) -> Self {
+        Self {
+            qualifier,
+            field: Arc::new(Field::new(name, data_type, nullable)),
+        }
+    }
+
+    pub fn new_unqualified(name: &str, data_type: DataType, nullable: bool) -> Self {
+        DFField {
+            qualifier: None,
+            field: Arc::new(Field::new(name, data_type, nullable)),
+        }
+    }
+
+    pub fn name(&self) -> &String {
+        self.field.name()
+    }
+
+    pub fn data_type(&self) -> &DataType {
+        self.field.data_type()
+    }
+
+    pub fn is_nullable(&self) -> bool {
+        self.field.is_nullable()
+    }
+
+    pub fn metadata(&self) -> &HashMap<String, String> {
+        self.field.metadata()
+    }
+
+    pub fn qualified_name(&self) -> String {
+        if let Some(qualifier) = &self.qualifier {
+            format!("{}.{}", qualifier, self.field.name())
+        } else {
+            self.field.name().to_owned()
+        }
+    }
+
+    pub fn qualified_column(&self) -> Column {
+        Column {
+            relation: self.qualifier.clone(),
+            name: self.field.name().to_string(),
+            spans: Default::default(),
+        }
+    }
+
+    pub fn unqualified_column(&self) -> Column {
+        Column {
+            relation: None,
+            name: self.field.name().to_string(),
+            spans: Default::default(),
+        }
+    }
+
+    pub fn qualifier(&self) -> Option<&TableReference> {
+        self.qualifier.as_ref()
+    }
+
+    pub fn field(&self) -> &FieldRef {
+        &self.field
+    }
+
+    pub fn strip_qualifier(mut self) -> Self {
+        self.qualifier = None;
+        self
+    }
+
+    pub fn with_nullable(mut self, nullable: bool) -> Self {
+        let f = self.field().as_ref().clone().with_nullable(nullable);
+        self.field = f.into();
+        self
+    }
+
+    pub fn with_metadata(mut self, metadata: HashMap<String, String>) -> Self {
+        let f = self.field().as_ref().clone().with_metadata(metadata);
+        self.field = f.into();
+        self
+    }
+}
+
+pub fn fields_with_qualifiers(schema: &DFSchema) -> Vec<DFField> {
+    schema
+        .fields()
+        .iter()
+        .enumerate()
+        .map(|(i, f)| (schema.qualified_field(i).0.cloned(), f.clone()).into())
+        .collect()
+}
+
+pub fn schema_from_df_fields(fields: &[DFField]) -> Result<DFSchema> {
+    schema_from_df_fields_with_metadata(fields, HashMap::new())
+}
+
+pub fn schema_from_df_fields_with_metadata(
+    fields: &[DFField],
+    metadata: HashMap<String, String>,
+) -> Result<DFSchema> {
+    DFSchema::new_with_metadata(fields.iter().map(|t| t.clone().into()).collect(), metadata)
+}
diff --git a/src/sql/types/mod.rs b/src/sql/types/mod.rs
new file mode 100644
index 00000000..25c67574
--- /dev/null
+++ b/src/sql/types/mod.rs
@@ -0,0 +1,50 @@
+mod data_type;
+mod df_field;
+pub(crate) mod placeholder_udf;
+mod stream_schema;
+mod window;
+
+use std::time::Duration;
+
+pub use data_type::convert_data_type;
+pub use df_field::{
+    DFField, fields_with_qualifiers, schema_from_df_fields, schema_from_df_fields_with_metadata,
+};
+pub(crate) use placeholder_udf::PlaceholderUdf;
+pub use stream_schema::StreamSchema;
+pub(crate) use window::WindowBehavior;
+pub use window::{WindowType, find_window, get_duration};
+
+pub const TIMESTAMP_FIELD: &str = "_timestamp";
+
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum ProcessingMode {
+    Append,
+    Update,
+}
+
+#[derive(Clone, Debug)]
+pub struct SqlConfig {
+    pub default_parallelism: usize,
+}
+
+impl Default for SqlConfig {
+    fn default() -> Self {
+        Self {
+            default_parallelism: 4,
+        }
+    }
+}
+
+#[derive(Clone)]
+pub struct PlanningOptions {
+    pub ttl: Duration,
+}
+
+impl Default for PlanningOptions {
+    fn default() -> Self {
+        Self {
+            ttl: Duration::from_secs(24 * 60 * 60),
+        }
+    }
+}
diff --git a/src/sql/types/placeholder_udf.rs b/src/sql/types/placeholder_udf.rs
new file mode 100644
index 00000000..5cf96d28
--- /dev/null
+++ b/src/sql/types/placeholder_udf.rs
@@ -0,0 +1,58 @@
+use std::any::Any;
+use std::fmt::{Debug, Formatter};
+use std::sync::Arc;
+
+use datafusion::arrow::datatypes::DataType;
+use datafusion::common::Result;
+use datafusion::logical_expr::{
+    ColumnarValue, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Signature, Volatility,
+};
+
+#[allow(clippy::type_complexity)]
+pub(crate) struct PlaceholderUdf {
+    name: String,
+    signature: Signature,
+    return_type: Arc<dyn Fn(&[DataType]) -> Result<DataType> + Send + Sync + 'static>,
+}
+
+impl Debug for PlaceholderUdf {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        write!(f, "PlaceholderUDF<{}>", self.name)
+    }
+}
+
+impl ScalarUDFImpl for PlaceholderUdf {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        &self.name
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, args: &[DataType]) -> Result<DataType> {
+        (self.return_type)(args)
+    }
+
+    fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> Result<ColumnarValue> {
+        unimplemented!("PlaceholderUdf should never be called at execution time");
+    }
+}
+
+impl PlaceholderUdf {
+    pub fn with_return(
+        name: impl Into<String>,
+        args: Vec<DataType>,
+        ret: DataType,
+    ) -> Arc<ScalarUDF> {
+        Arc::new(ScalarUDF::new_from_impl(PlaceholderUdf {
+            name: name.into(),
+            signature: Signature::exact(args, Volatility::Volatile),
+            return_type: Arc::new(move |_| Ok(ret.clone())),
+        }))
+    }
+}
diff --git a/src/sql/types/stream_schema.rs b/src/sql/types/stream_schema.rs
new file mode 100644
index 00000000..e981111b
--- /dev/null
+++ b/src/sql/types/stream_schema.rs
@@ -0,0 +1,76 @@
+use std::sync::Arc;
+
+use datafusion::arrow::datatypes::{Field, Schema, SchemaRef};
+use datafusion::common::Result;
+
+use super::TIMESTAMP_FIELD;
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct StreamSchema {
+    pub schema: SchemaRef,
+    pub timestamp_index: usize,
+    pub key_indices: Option<Vec<usize>>,
+}
+
+impl StreamSchema {
+    pub fn new(schema: SchemaRef, timestamp_index: usize, key_indices: Option<Vec<usize>>) -> Self {
+        Self {
+            schema,
+            timestamp_index,
+            key_indices,
+        }
+    }
+
+    pub fn new_unkeyed(schema: SchemaRef, timestamp_index: usize) -> Self {
+        Self {
+            schema,
+            timestamp_index,
+            key_indices: None,
+        }
+    }
+
+    pub fn from_fields(fields: Vec<Field>) -> Self {
+        let schema = Arc::new(Schema::new(fields));
+        let timestamp_index = schema
+            .column_with_name(TIMESTAMP_FIELD)
+            .map(|(i, _)| i)
+            .unwrap_or(0);
+        Self {
+            schema,
+            timestamp_index,
+            key_indices: None,
+        }
+    }
+
+    pub fn from_schema_keys(schema: SchemaRef, key_indices: Vec<usize>) -> Result<Self> {
+        let timestamp_index = schema
+            .column_with_name(TIMESTAMP_FIELD)
+            .ok_or_else(|| {
+                datafusion::error::DataFusionError::Plan(format!(
+                    "no {TIMESTAMP_FIELD} field in schema, schema is {schema:?}"
+                ))
+            })?
+            .0;
+        Ok(Self {
+            schema,
+            timestamp_index,
+            key_indices: Some(key_indices),
+        })
+    }
+
+    pub fn from_schema_unkeyed(schema: SchemaRef) -> Result<Self> {
+        let timestamp_index = schema
+            .column_with_name(TIMESTAMP_FIELD)
+            .ok_or_else(|| {
+                datafusion::error::DataFusionError::Plan(format!(
+                    "no {TIMESTAMP_FIELD} field in schema"
+                ))
+            })?
+            .0;
+        Ok(Self {
+            schema,
+            timestamp_index,
+            key_indices: None,
+        })
+    }
+}
diff --git a/src/sql/types/window.rs b/src/sql/types/window.rs
new file mode 100644
index 00000000..9687974a
--- /dev/null
+++ b/src/sql/types/window.rs
@@ -0,0 +1,95 @@
+use std::time::Duration;
+
+use datafusion::common::{Result, plan_err};
+use datafusion::logical_expr::Expr;
+
+use super::DFField;
+
+#[derive(Clone, Debug, PartialEq, Eq, Hash)]
+pub enum WindowType {
+    Tumbling { width: Duration },
+    Sliding { width: Duration, slide: Duration },
+    Session { gap: Duration },
+    Instant,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub(crate) enum WindowBehavior {
+    FromOperator {
+        window: WindowType,
+        window_field: DFField,
+        window_index: usize,
+        is_nested: bool,
+    },
+    InData,
+}
+
+pub fn get_duration(expression: &Expr) -> Result<Duration> {
+    use datafusion::common::ScalarValue;
+
+    match expression {
+        Expr::Literal(ScalarValue::IntervalDayTime(Some(val)), _) => {
+            Ok(Duration::from_secs((val.days as u64) * 24 * 60 * 60)
+                + Duration::from_millis(val.milliseconds as u64))
+        }
+        Expr::Literal(ScalarValue::IntervalMonthDayNano(Some(val)), _) => {
+            if val.months != 0 {
+                return datafusion::common::not_impl_err!(
+                    "Windows do not support durations specified as months"
+                );
+            }
+            Ok(Duration::from_secs((val.days as u64) * 24 * 60 * 60)
+                + Duration::from_nanos(val.nanoseconds as u64))
+        }
+        _ => plan_err!(
+            "unsupported Duration expression, expect duration literal, not {}",
+            expression
+        ),
+    }
+}
+
+pub fn find_window(expression: &Expr) -> Result<Option<WindowType>> {
+    use datafusion::logical_expr::expr::Alias;
+    use datafusion::logical_expr::expr::ScalarFunction;
+
+    match expression {
+        Expr::ScalarFunction(ScalarFunction { func: fun, args }) => match fun.name() {
+            "hop" => {
+                if args.len() != 2 {
+                    unreachable!();
+                }
+                let slide = get_duration(&args[0])?;
+                let width = get_duration(&args[1])?;
+                if width.as_nanos() % slide.as_nanos() != 0 {
+                    return plan_err!(
+                        "hop() width {:?} must be a multiple of slide {:?}",
+                        width,
+                        slide
+                    );
+                }
+                if slide == width {
+                    Ok(Some(WindowType::Tumbling { width }))
+                } else {
+                    Ok(Some(WindowType::Sliding { width, slide }))
+                }
+            }
+            "tumble" => {
+                if args.len() != 1 {
+                    unreachable!("wrong number of arguments for tumble(), expect one");
+                }
+                let width = get_duration(&args[0])?;
+                Ok(Some(WindowType::Tumbling { width }))
+            }
+            "session" => {
+                if args.len() != 1 {
+                    unreachable!("wrong number of arguments for session(), expected one");
+                }
+                let gap = get_duration(&args[0])?;
+                Ok(Some(WindowType::Session { gap }))
+            }
+            _ => Ok(None),
+        },
+        Expr::Alias(Alias { expr, .. }) => find_window(expr),
+        _ => Ok(None),
+    }
+}
diff --git a/src/storage/task/rocksdb_storage.rs b/src/storage/task/rocksdb_storage.rs
index 31709a51..714a9143 100644
--- a/src/storage/task/rocksdb_storage.rs
+++ b/src/storage/task/rocksdb_storage.rs
@@ -103,11 +103,19 @@ impl TaskStorage for RocksDBTaskStorage {
         };
 
         let mut batch = WriteBatch::default();
-        batch.put_cf(&cf_meta, key, bincode::serialize(&meta)?);
+        batch.put_cf(
+            &cf_meta,
+            key,
+            bincode::serde::encode_to_vec(&meta, bincode::config::standard())?,
+        );
         batch.put_cf(&cf_conf, key, &task_info.config_bytes);
 
         if let Some(ref module) = task_info.module_bytes {
-            batch.put_cf(&cf_payl, key, bincode::serialize(module)?);
+            batch.put_cf(
+                &cf_payl,
+                key,
+                bincode::serde::encode_to_vec(module, bincode::config::standard())?,
+            );
         }
 
         self.db
@@ -124,10 +132,15 @@ impl TaskStorage for RocksDBTaskStorage {
             .get_cf(&cf, key)?
             .ok_or_else(|| anyhow!("Task {} not found", task_name))?;
 
-        let mut meta: TaskMetadata = bincode::deserialize(&raw)?;
+        let (mut meta, _): (TaskMetadata, _) =
+            bincode::serde::decode_from_slice(&raw, bincode::config::standard())?;
         meta.state = new_state;
 
-        self.db.put_cf(&cf, key, bincode::serialize(&meta)?)?;
+        self.db.put_cf(
+            &cf,
+            key,
+            bincode::serde::encode_to_vec(&meta, bincode::config::standard())?,
+        )?;
         Ok(())
     }
 
@@ -140,10 +153,15 @@ impl TaskStorage for RocksDBTaskStorage {
             .get_cf(&cf, key)?
             .ok_or_else(|| anyhow!("Task {} not found", task_name))?;
 
-        let mut meta: TaskMetadata = bincode::deserialize(&raw)?;
+        let (mut meta, _): (TaskMetadata, _) =
+            bincode::serde::decode_from_slice(&raw, bincode::config::standard())?;
         meta.checkpoint_id = checkpoint_id;
 
-        self.db.put_cf(&cf, key, bincode::serialize(&meta)?)?;
+        self.db.put_cf(
+            &cf,
+            key,
+            bincode::serde::encode_to_vec(&meta, bincode::config::standard())?,
+        )?;
         Ok(())
     }
 
@@ -174,9 +192,17 @@ impl TaskStorage for RocksDBTaskStorage {
         let module_bytes = self
             .db
             .get_cf(&self.get_cf(CF_PAYLOAD)?, key)?
-            .and_then(|b| bincode::deserialize::<TaskModuleBytes>(&b).ok());
-
-        let meta: TaskMetadata = bincode::deserialize(&meta_raw)?;
+            .and_then(|b| {
+                bincode::serde::decode_from_slice::<TaskModuleBytes, _>(
+                    &b,
+                    bincode::config::standard(),
+                )
+                .ok()
+                .map(|(v, _)| v)
+            });
+
+        let (meta, _): (TaskMetadata, _) =
+            bincode::serde::decode_from_slice(&meta_raw, bincode::config::standard())?;
 
         Ok(StoredTaskInfo {
             name: task_name.to_string(),
diff --git a/src/types/arrow_ext.rs b/src/types/arrow_ext.rs
new file mode 100644
index 00000000..701bf8e4
--- /dev/null
+++ b/src/types/arrow_ext.rs
@@ -0,0 +1,169 @@
+use std::collections::HashMap;
+use std::fmt::{Display, Formatter};
+use std::time::SystemTime;
+
+use datafusion::arrow::datatypes::{DataType, Field, TimeUnit};
+
+pub struct DisplayAsSql<'a>(pub &'a DataType);
+
+impl Display for DisplayAsSql<'_> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        match self.0 {
+            DataType::Boolean => write!(f, "BOOLEAN"),
+            DataType::Int8 | DataType::Int16 | DataType::Int32 => write!(f, "INT"),
+            DataType::Int64 => write!(f, "BIGINT"),
+            DataType::UInt8 | DataType::UInt16 | DataType::UInt32 => write!(f, "INT UNSIGNED"),
+            DataType::UInt64 => write!(f, "BIGINT UNSIGNED"),
+            DataType::Float16 | DataType::Float32 => write!(f, "FLOAT"),
+            DataType::Float64 => write!(f, "DOUBLE"),
+            DataType::Timestamp(_, _) => write!(f, "TIMESTAMP"),
+            DataType::Date32 => write!(f, "DATE"),
+            DataType::Date64 => write!(f, "DATETIME"),
+            DataType::Time32(_) => write!(f, "TIME"),
+            DataType::Time64(_) => write!(f, "TIME"),
+            DataType::Duration(_) => write!(f, "INTERVAL"),
+            DataType::Interval(_) => write!(f, "INTERVAL"),
+            DataType::Binary | DataType::FixedSizeBinary(_) | DataType::LargeBinary => {
+                write!(f, "BYTEA")
+            }
+            DataType::Utf8 | DataType::LargeUtf8 => write!(f, "TEXT"),
+            DataType::List(inner) => {
+                write!(f, "{}[]", DisplayAsSql(inner.data_type()))
+            }
+            dt => write!(f, "{dt}"),
+        }
+    }
+}
+
+/// Arrow extension type markers for FunctionStream-specific semantics.
+#[derive(Copy, Clone, Eq, PartialEq, Debug)]
+pub enum FsExtensionType {
+    JSON,
+}
+
+impl FsExtensionType {
+    pub fn from_map(map: &HashMap<String, String>) -> Option<Self> {
+        match map.get("ARROW:extension:name")?.as_str() {
+            "functionstream.json" => Some(Self::JSON),
+            _ => None,
+        }
+    }
+
+    pub fn add_metadata(v: Option<Self>, field: Field) -> Field {
+        if let Some(v) = v {
+            let mut m = HashMap::new();
+            match v {
+                FsExtensionType::JSON => {
+                    m.insert(
+                        "ARROW:extension:name".to_string(),
+                        "functionstream.json".to_string(),
+                    );
+                }
+            }
+            field.with_metadata(m)
+        } else {
+            field
+        }
+    }
+}
+
+pub trait GetArrowType {
+    fn arrow_type() -> DataType;
+}
+
+pub trait GetArrowSchema {
+    fn arrow_schema() -> datafusion::arrow::datatypes::Schema;
+}
+
+impl<T> GetArrowType for T
+where
+    T: GetArrowSchema,
+{
+    fn arrow_type() -> DataType {
+        DataType::Struct(Self::arrow_schema().fields.clone())
+    }
+}
+
+impl GetArrowType for bool {
+    fn arrow_type() -> DataType {
+        DataType::Boolean
+    }
+}
+
+impl GetArrowType for i8 {
+    fn arrow_type() -> DataType {
+        DataType::Int8
+    }
+}
+
+impl GetArrowType for i16 {
+    fn arrow_type() -> DataType {
+        DataType::Int16
+    }
+}
+
+impl GetArrowType for i32 {
+    fn arrow_type() -> DataType {
+        DataType::Int32
+    }
+}
+
+impl GetArrowType for i64 {
+    fn arrow_type() -> DataType {
+        DataType::Int64
+    }
+}
+
+impl GetArrowType for u8 {
+    fn arrow_type() -> DataType {
+        DataType::UInt8
+    }
+}
+
+impl GetArrowType for u16 {
+    fn arrow_type() -> DataType {
+        DataType::UInt16
+    }
+}
+
+impl GetArrowType for u32 {
+    fn arrow_type() -> DataType {
+        DataType::UInt32
+    }
+}
+
+impl GetArrowType for u64 {
+    fn arrow_type() -> DataType {
+        DataType::UInt64
+    }
+}
+
+impl GetArrowType for f32 {
+    fn arrow_type() -> DataType {
+        DataType::Float32
+    }
+}
+
+impl GetArrowType for f64 {
+    fn arrow_type() -> DataType {
+        DataType::Float64
+    }
+}
+
+impl GetArrowType for String {
+    fn arrow_type() -> DataType {
+        DataType::Utf8
+    }
+}
+
+impl GetArrowType for Vec<u8> {
+    fn arrow_type() -> DataType {
+        DataType::Binary
+    }
+}
+
+impl GetArrowType for SystemTime {
+    fn arrow_type() -> DataType {
+        DataType::Timestamp(TimeUnit::Nanosecond, None)
+    }
+}
diff --git a/src/types/control.rs b/src/types/control.rs
new file mode 100644
index 00000000..efdc754e
--- /dev/null
+++ b/src/types/control.rs
@@ -0,0 +1,152 @@
+use std::collections::HashMap;
+use std::time::SystemTime;
+
+use super::message::CheckpointBarrier;
+
+/// Control messages sent from the controller to worker tasks.
+#[derive(Debug, Clone)]
+pub enum ControlMessage {
+    Checkpoint(CheckpointBarrier),
+    Stop {
+        mode: StopMode,
+    },
+    Commit {
+        epoch: u32,
+        commit_data: HashMap<String, HashMap<u32, Vec<u8>>>,
+    },
+    LoadCompacted {
+        compacted: CompactionResult,
+    },
+    NoOp,
+}
+
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+pub enum StopMode {
+    Graceful,
+    Immediate,
+}
+
+#[derive(Debug, Clone)]
+pub struct CompactionResult {
+    pub operator_id: String,
+    pub compacted_tables: HashMap<String, TableCheckpointMetadata>,
+}
+
+#[derive(Debug, Clone)]
+pub struct TableCheckpointMetadata {
+    pub table_type: TableType,
+    pub data: Vec<u8>,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum TableType {
+    GlobalKeyValue,
+    ExpiringKeyedTimeTable,
+}
+
+/// Responses sent from worker tasks back to the controller.
+#[derive(Debug, Clone)]
+pub enum ControlResp {
+    CheckpointEvent(CheckpointEvent),
+    CheckpointCompleted(CheckpointCompleted),
+    TaskStarted {
+        node_id: u32,
+        task_index: usize,
+        start_time: SystemTime,
+    },
+    TaskFinished {
+        node_id: u32,
+        task_index: usize,
+    },
+    TaskFailed {
+        node_id: u32,
+        task_index: usize,
+        error: TaskError,
+    },
+    Error {
+        node_id: u32,
+        operator_id: String,
+        task_index: usize,
+        message: String,
+        details: String,
+    },
+}
+
+#[derive(Debug, Clone)]
+pub struct CheckpointCompleted {
+    pub checkpoint_epoch: u32,
+    pub node_id: u32,
+    pub operator_id: String,
+    pub subtask_metadata: SubtaskCheckpointMetadata,
+}
+
+#[derive(Debug, Clone)]
+pub struct SubtaskCheckpointMetadata {
+    pub subtask_index: u32,
+    pub start_time: u64,
+    pub finish_time: u64,
+    pub watermark: Option<u64>,
+    pub bytes: u64,
+    pub table_metadata: HashMap<String, TableSubtaskCheckpointMetadata>,
+    pub table_configs: HashMap<String, TableConfig>,
+}
+
+#[derive(Debug, Clone)]
+pub struct TableSubtaskCheckpointMetadata {
+    pub subtask_index: u32,
+    pub table_type: TableType,
+    pub data: Vec<u8>,
+}
+
+#[derive(Debug, Clone)]
+pub struct TableConfig {
+    pub table_type: TableType,
+    pub config: Vec<u8>,
+    pub state_version: u32,
+}
+
+#[derive(Debug, Clone)]
+pub struct CheckpointEvent {
+    pub checkpoint_epoch: u32,
+    pub node_id: u32,
+    pub operator_id: String,
+    pub subtask_index: u32,
+    pub time: SystemTime,
+    pub event_type: TaskCheckpointEventType,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum TaskCheckpointEventType {
+    StartedAlignment,
+    StartedCheckpointing,
+    FinishedOperatorSetup,
+    FinishedSync,
+    FinishedCommit,
+}
+
+#[derive(Debug, Clone)]
+pub struct TaskError {
+    pub job_id: String,
+    pub node_id: u32,
+    pub operator_id: String,
+    pub operator_subtask: u64,
+    pub error: String,
+    pub error_domain: ErrorDomain,
+    pub retry_hint: RetryHint,
+    pub details: String,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum ErrorDomain {
+    User,
+    Internal,
+    External,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum RetryHint {
+    NoRetry,
+    WithBackoff,
+}
diff --git a/src/types/date.rs b/src/types/date.rs
new file mode 100644
index 00000000..c18e31a7
--- /dev/null
+++ b/src/types/date.rs
@@ -0,0 +1,70 @@
+use serde::Serialize;
+use std::convert::TryFrom;
+
+#[derive(Debug, Clone, Copy, Eq, PartialEq, PartialOrd, Hash, Serialize)]
+pub enum DatePart {
+    Year,
+    Month,
+    Week,
+    Day,
+    Hour,
+    Minute,
+    Second,
+    Millisecond,
+    Microsecond,
+    Nanosecond,
+    DayOfWeek,
+    DayOfYear,
+}
+
+impl TryFrom<&str> for DatePart {
+    type Error = String;
+
+    fn try_from(value: &str) -> Result<Self, Self::Error> {
+        match value.to_lowercase().as_str() {
+            "year" => Ok(DatePart::Year),
+            "month" => Ok(DatePart::Month),
+            "week" => Ok(DatePart::Week),
+            "day" => Ok(DatePart::Day),
+            "hour" => Ok(DatePart::Hour),
+            "minute" => Ok(DatePart::Minute),
+            "second" => Ok(DatePart::Second),
+            "millisecond" => Ok(DatePart::Millisecond),
+            "microsecond" => Ok(DatePart::Microsecond),
+            "nanosecond" => Ok(DatePart::Nanosecond),
+            "dow" => Ok(DatePart::DayOfWeek),
+            "doy" => Ok(DatePart::DayOfYear),
+            _ => Err(format!("'{value}' is not a valid DatePart")),
+        }
+    }
+}
+
+#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash, PartialOrd, Serialize)]
+pub enum DateTruncPrecision {
+    Year,
+    Quarter,
+    Month,
+    Week,
+    Day,
+    Hour,
+    Minute,
+    Second,
+}
+
+impl TryFrom<&str> for DateTruncPrecision {
+    type Error = String;
+
+    fn try_from(value: &str) -> Result<Self, Self::Error> {
+        match value.to_lowercase().as_str() {
+            "year" => Ok(DateTruncPrecision::Year),
+            "quarter" => Ok(DateTruncPrecision::Quarter),
+            "month" => Ok(DateTruncPrecision::Month),
+            "week" => Ok(DateTruncPrecision::Week),
+            "day" => Ok(DateTruncPrecision::Day),
+            "hour" => Ok(DateTruncPrecision::Hour),
+            "minute" => Ok(DateTruncPrecision::Minute),
+            "second" => Ok(DateTruncPrecision::Second),
+            _ => Err(format!("'{value}' is not a valid DateTruncPrecision")),
+        }
+    }
+}
diff --git a/src/types/debezium.rs b/src/types/debezium.rs
new file mode 100644
index 00000000..3c9f4747
--- /dev/null
+++ b/src/types/debezium.rs
@@ -0,0 +1,136 @@
+use bincode::{Decode, Encode};
+use serde::{Deserialize, Serialize};
+use std::convert::TryFrom;
+use std::fmt::Debug;
+
+pub trait Key:
+    Debug + Clone + Encode + Decode<()> + std::hash::Hash + PartialEq + Eq + Send + 'static
+{
+}
+impl<T: Debug + Clone + Encode + Decode<()> + std::hash::Hash + PartialEq + Eq + Send + 'static> Key
+    for T
+{
+}
+
+pub trait Data: Debug + Clone + Encode + Decode<()> + Send + 'static {}
+impl<T: Debug + Clone + Encode + Decode<()> + Send + 'static> Data for T {}
+
+#[derive(Debug, Clone, PartialEq, Encode, Decode, Serialize, Deserialize)]
+pub enum UpdatingData<T: Data> {
+    Retract(T),
+    Update { old: T, new: T },
+    Append(T),
+}
+
+impl<T: Data> UpdatingData<T> {
+    pub fn lower(&self) -> T {
+        match self {
+            UpdatingData::Retract(_) => panic!("cannot lower retractions"),
+            UpdatingData::Update { new, .. } => new.clone(),
+            UpdatingData::Append(t) => t.clone(),
+        }
+    }
+
+    pub fn unwrap_append(&self) -> &T {
+        match self {
+            UpdatingData::Append(t) => t,
+            _ => panic!("UpdatingData is not an append"),
+        }
+    }
+}
+
+#[derive(Clone, Encode, Decode, Debug, Serialize, Deserialize, PartialEq)]
+#[serde(try_from = "DebeziumShadow<T>")]
+pub struct Debezium<T: Data> {
+    pub before: Option<T>,
+    pub after: Option<T>,
+    pub op: DebeziumOp,
+}
+
+#[derive(Clone, Encode, Decode, Debug, Serialize, Deserialize, PartialEq)]
+struct DebeziumShadow<T: Data> {
+    before: Option<T>,
+    after: Option<T>,
+    op: DebeziumOp,
+}
+
+impl<T: Data> TryFrom<DebeziumShadow<T>> for Debezium<T> {
+    type Error = &'static str;
+
+    fn try_from(value: DebeziumShadow<T>) -> Result<Self, Self::Error> {
+        match (value.op, &value.before, &value.after) {
+            (DebeziumOp::Create, _, None) => {
+                Err("`after` must be set for Debezium create messages")
+            }
+            (DebeziumOp::Update, None, _) => {
+                Err("`before` must be set for Debezium update messages")
+            }
+            (DebeziumOp::Update, _, None) => {
+                Err("`after` must be set for Debezium update messages")
+            }
+            (DebeziumOp::Delete, None, _) => {
+                Err("`before` must be set for Debezium delete messages")
+            }
+            _ => Ok(Debezium {
+                before: value.before,
+                after: value.after,
+                op: value.op,
+            }),
+        }
+    }
+}
+
+#[derive(Copy, Clone, Encode, Decode, Debug, PartialEq)]
+pub enum DebeziumOp {
+    Create,
+    Update,
+    Delete,
+}
+
+#[allow(clippy::to_string_trait_impl)]
+impl ToString for DebeziumOp {
+    fn to_string(&self) -> String {
+        match self {
+            DebeziumOp::Create => "c",
+            DebeziumOp::Update => "u",
+            DebeziumOp::Delete => "d",
+        }
+        .to_string()
+    }
+}
+
+impl<'de> Deserialize<'de> for DebeziumOp {
+    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+    where
+        D: serde::Deserializer<'de>,
+    {
+        let s = String::deserialize(deserializer)?;
+        match s.as_str() {
+            "c" | "r" => Ok(DebeziumOp::Create),
+            "u" => Ok(DebeziumOp::Update),
+            "d" => Ok(DebeziumOp::Delete),
+            _ => Err(serde::de::Error::custom(format!("Invalid DebeziumOp {s}"))),
+        }
+    }
+}
+
+impl Serialize for DebeziumOp {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        match self {
+            DebeziumOp::Create => serializer.serialize_str("c"),
+            DebeziumOp::Update => serializer.serialize_str("u"),
+            DebeziumOp::Delete => serializer.serialize_str("d"),
+        }
+    }
+}
+
+#[derive(Copy, Clone, Encode, Decode, Debug, PartialEq, Serialize, Deserialize)]
+pub enum JoinType {
+    Inner,
+    Left,
+    Right,
+    Full,
+}
diff --git a/src/types/df.rs b/src/types/df.rs
new file mode 100644
index 00000000..30b4eb9c
--- /dev/null
+++ b/src/types/df.rs
@@ -0,0 +1,394 @@
+use datafusion::arrow::array::builder::{ArrayBuilder, make_builder};
+use datafusion::arrow::array::{RecordBatch, TimestampNanosecondArray};
+use datafusion::arrow::datatypes::{DataType, Field, FieldRef, Schema, SchemaBuilder, TimeUnit};
+use datafusion::arrow::error::ArrowError;
+use datafusion::common::{DataFusionError, Result as DFResult};
+use std::sync::Arc;
+
+use super::TIMESTAMP_FIELD;
+use crate::sql::types::StreamSchema;
+
+pub type FsSchemaRef = Arc<FsSchema>;
+
+/// Core streaming schema with timestamp and key tracking.
+/// Analogous to Arroyo's `ArroyoSchema`.
+#[derive(Debug, Clone, Eq, PartialEq, Hash)]
+pub struct FsSchema {
+    pub schema: Arc<Schema>,
+    pub timestamp_index: usize,
+    key_indices: Option<Vec<usize>>,
+    routing_key_indices: Option<Vec<usize>>,
+}
+
+impl FsSchema {
+    pub fn new(
+        schema: Arc<Schema>,
+        timestamp_index: usize,
+        key_indices: Option<Vec<usize>>,
+        routing_key_indices: Option<Vec<usize>>,
+    ) -> Self {
+        Self {
+            schema,
+            timestamp_index,
+            key_indices,
+            routing_key_indices,
+        }
+    }
+
+    pub fn new_unkeyed(schema: Arc<Schema>, timestamp_index: usize) -> Self {
+        Self {
+            schema,
+            timestamp_index,
+            key_indices: None,
+            routing_key_indices: None,
+        }
+    }
+
+    pub fn new_keyed(schema: Arc<Schema>, timestamp_index: usize, key_indices: Vec<usize>) -> Self {
+        Self {
+            schema,
+            timestamp_index,
+            key_indices: Some(key_indices),
+            routing_key_indices: None,
+        }
+    }
+
+    pub fn from_fields(mut fields: Vec<Field>) -> Self {
+        if !fields.iter().any(|f| f.name() == TIMESTAMP_FIELD) {
+            fields.push(Field::new(
+                TIMESTAMP_FIELD,
+                DataType::Timestamp(TimeUnit::Nanosecond, None),
+                false,
+            ));
+        }
+
+        Self::from_schema_keys(Arc::new(Schema::new(fields)), vec![]).unwrap()
+    }
+
+    pub fn from_schema_unkeyed(schema: Arc<Schema>) -> DFResult<Self> {
+        let timestamp_index = schema
+            .column_with_name(TIMESTAMP_FIELD)
+            .ok_or_else(|| {
+                DataFusionError::Plan(format!(
+                    "no {TIMESTAMP_FIELD} field in schema, schema is {schema:?}"
+                ))
+            })?
+            .0;
+
+        Ok(Self {
+            schema,
+            timestamp_index,
+            key_indices: None,
+            routing_key_indices: None,
+        })
+    }
+
+    pub fn from_schema_keys(schema: Arc<Schema>, key_indices: Vec<usize>) -> DFResult<Self> {
+        let timestamp_index = schema
+            .column_with_name(TIMESTAMP_FIELD)
+            .ok_or_else(|| {
+                DataFusionError::Plan(format!(
+                    "no {TIMESTAMP_FIELD} field in schema, schema is {schema:?}"
+                ))
+            })?
+            .0;
+
+        Ok(Self {
+            schema,
+            timestamp_index,
+            key_indices: Some(key_indices),
+            routing_key_indices: None,
+        })
+    }
+
+    pub fn schema_without_timestamp(&self) -> Schema {
+        let mut builder = SchemaBuilder::from(self.schema.fields());
+        builder.remove(self.timestamp_index);
+        builder.finish()
+    }
+
+    pub fn remove_timestamp_column(&self, batch: &mut RecordBatch) {
+        batch.remove_column(self.timestamp_index);
+    }
+
+    pub fn builders(&self) -> Vec<Box<dyn ArrayBuilder>> {
+        self.schema
+            .fields
+            .iter()
+            .map(|f| make_builder(f.data_type(), 8))
+            .collect()
+    }
+
+    pub fn timestamp_column<'a>(&self, batch: &'a RecordBatch) -> &'a TimestampNanosecondArray {
+        batch
+            .column(self.timestamp_index)
+            .as_any()
+            .downcast_ref::<TimestampNanosecondArray>()
+            .unwrap()
+    }
+
+    pub fn has_routing_keys(&self) -> bool {
+        self.routing_keys().map(|k| !k.is_empty()).unwrap_or(false)
+    }
+
+    pub fn routing_keys(&self) -> Option<&Vec<usize>> {
+        self.routing_key_indices
+            .as_ref()
+            .or(self.key_indices.as_ref())
+    }
+
+    pub fn storage_keys(&self) -> Option<&Vec<usize>> {
+        self.key_indices.as_ref()
+    }
+
+    pub fn sort_field_indices(&self, with_timestamp: bool) -> Vec<usize> {
+        let mut indices = vec![];
+        if let Some(keys) = &self.key_indices {
+            indices.extend(keys.iter().copied());
+        }
+        if with_timestamp {
+            indices.push(self.timestamp_index);
+        }
+        indices
+    }
+
+    pub fn value_indices(&self, with_timestamp: bool) -> Vec<usize> {
+        let field_count = self.schema.fields().len();
+        match &self.key_indices {
+            None => {
+                let mut indices: Vec<usize> = (0..field_count).collect();
+                if !with_timestamp {
+                    indices.remove(self.timestamp_index);
+                }
+                indices
+            }
+            Some(keys) => (0..field_count)
+                .filter(|index| {
+                    !keys.contains(index) && (with_timestamp || *index != self.timestamp_index)
+                })
+                .collect(),
+        }
+    }
+
+    pub fn unkeyed_batch(&self, batch: &RecordBatch) -> Result<RecordBatch, ArrowError> {
+        if self.key_indices.is_none() {
+            return Ok(batch.clone());
+        }
+        let columns: Vec<_> = (0..batch.num_columns())
+            .filter(|index| !self.key_indices.as_ref().unwrap().contains(index))
+            .collect();
+        batch.project(&columns)
+    }
+
+    pub fn schema_without_keys(&self) -> Result<Self, ArrowError> {
+        if self.key_indices.is_none() {
+            return Ok(self.clone());
+        }
+        let key_indices = self.key_indices.as_ref().unwrap();
+        let unkeyed_schema = Schema::new(
+            self.schema
+                .fields()
+                .iter()
+                .enumerate()
+                .filter(|(index, _)| !key_indices.contains(index))
+                .map(|(_, field)| field.as_ref().clone())
+                .collect::<Vec<_>>(),
+        );
+        let timestamp_index = unkeyed_schema.index_of(TIMESTAMP_FIELD)?;
+        Ok(Self {
+            schema: Arc::new(unkeyed_schema),
+            timestamp_index,
+            key_indices: None,
+            routing_key_indices: None,
+        })
+    }
+
+    pub fn with_fields(&self, fields: Vec<FieldRef>) -> Result<Self, ArrowError> {
+        let schema = Arc::new(Schema::new_with_metadata(
+            fields,
+            self.schema.metadata.clone(),
+        ));
+
+        let timestamp_index = schema.index_of(TIMESTAMP_FIELD)?;
+        let max_index = *[&self.key_indices, &self.routing_key_indices]
+            .iter()
+            .map(|indices| indices.as_ref().and_then(|k| k.iter().max()))
+            .max()
+            .flatten()
+            .unwrap_or(&0);
+
+        if schema.fields.len() - 1 < max_index {
+            return Err(ArrowError::InvalidArgumentError(format!(
+                "expected at least {} fields, but were only {}",
+                max_index + 1,
+                schema.fields.len()
+            )));
+        }
+
+        Ok(Self {
+            schema,
+            timestamp_index,
+            key_indices: self.key_indices.clone(),
+            routing_key_indices: self.routing_key_indices.clone(),
+        })
+    }
+
+    pub fn with_additional_fields(
+        &self,
+        new_fields: impl Iterator<Item = Field>,
+    ) -> Result<Self, ArrowError> {
+        let mut fields = self.schema.fields.to_vec();
+        fields.extend(new_fields.map(Arc::new));
+        self.with_fields(fields)
+    }
+}
+
+/// Proto serialization: convert between FsSchema and the proto `FsSchema` message.
+///
+/// Schema is encoded as JSON using Arrow's `SchemaRef` JSON representation.
+/// This approach avoids depending on serde for `arrow_schema::Schema` directly.
+impl FsSchema {
+    pub fn to_proto(&self) -> protocol::grpc::api::FsSchema {
+        let arrow_schema = schema_to_json_string(&self.schema);
+        let timestamp_index = self.timestamp_index as u32;
+
+        let has_keys = self.key_indices.is_some();
+        let key_indices = self
+            .key_indices
+            .as_ref()
+            .map(|ks| ks.iter().map(|i| *i as u32).collect())
+            .unwrap_or_default();
+
+        let has_routing_keys = self.routing_key_indices.is_some();
+        let routing_key_indices = self
+            .routing_key_indices
+            .as_ref()
+            .map(|ks| ks.iter().map(|i| *i as u32).collect())
+            .unwrap_or_default();
+
+        protocol::grpc::api::FsSchema {
+            arrow_schema,
+            timestamp_index,
+            key_indices,
+            has_keys,
+            routing_key_indices,
+            has_routing_keys,
+        }
+    }
+
+    pub fn from_proto(proto: protocol::grpc::api::FsSchema) -> Result<Self, DataFusionError> {
+        let schema = schema_from_json_string(&proto.arrow_schema)?;
+        let timestamp_index = proto.timestamp_index as usize;
+
+        let key_indices = proto
+            .has_keys
+            .then(|| proto.key_indices.into_iter().map(|i| i as usize).collect());
+
+        let routing_key_indices = proto.has_routing_keys.then(|| {
+            proto
+                .routing_key_indices
+                .into_iter()
+                .map(|i| i as usize)
+                .collect()
+        });
+
+        Ok(Self {
+            schema: Arc::new(schema),
+            timestamp_index,
+            key_indices,
+            routing_key_indices,
+        })
+    }
+}
+
+fn schema_to_json_string(schema: &Schema) -> String {
+    let json_fields: Vec<serde_json::Value> = schema
+        .fields()
+        .iter()
+        .map(|f| {
+            serde_json::json!({
+                "name": f.name(),
+                "data_type": format!("{:?}", f.data_type()),
+                "nullable": f.is_nullable(),
+            })
+        })
+        .collect();
+    serde_json::to_string(&json_fields).unwrap()
+}
+
+fn schema_from_json_string(s: &str) -> Result<Schema, DataFusionError> {
+    let json_fields: Vec<serde_json::Value> = serde_json::from_str(s)
+        .map_err(|e| DataFusionError::Plan(format!("Invalid schema JSON: {e}")))?;
+
+    let fields: Vec<Field> = json_fields
+        .into_iter()
+        .map(|v| {
+            let name = v["name"]
+                .as_str()
+                .ok_or_else(|| DataFusionError::Plan("missing field name".into()))?
+                .to_string();
+            let nullable = v["nullable"].as_bool().unwrap_or(true);
+            let dt_str = v["data_type"]
+                .as_str()
+                .ok_or_else(|| DataFusionError::Plan("missing data_type".into()))?;
+            let data_type = parse_debug_data_type(dt_str)?;
+            Ok(Field::new(name, data_type, nullable))
+        })
+        .collect::<Result<_, DataFusionError>>()?;
+
+    Ok(Schema::new(fields))
+}
+
+fn parse_debug_data_type(s: &str) -> Result<DataType, DataFusionError> {
+    match s {
+        "Boolean" => Ok(DataType::Boolean),
+        "Int8" => Ok(DataType::Int8),
+        "Int16" => Ok(DataType::Int16),
+        "Int32" => Ok(DataType::Int32),
+        "Int64" => Ok(DataType::Int64),
+        "UInt8" => Ok(DataType::UInt8),
+        "UInt16" => Ok(DataType::UInt16),
+        "UInt32" => Ok(DataType::UInt32),
+        "UInt64" => Ok(DataType::UInt64),
+        "Float16" => Ok(DataType::Float16),
+        "Float32" => Ok(DataType::Float32),
+        "Float64" => Ok(DataType::Float64),
+        "Utf8" => Ok(DataType::Utf8),
+        "LargeUtf8" => Ok(DataType::LargeUtf8),
+        "Binary" => Ok(DataType::Binary),
+        "LargeBinary" => Ok(DataType::LargeBinary),
+        "Date32" => Ok(DataType::Date32),
+        "Date64" => Ok(DataType::Date64),
+        "Null" => Ok(DataType::Null),
+        s if s.starts_with("Timestamp(Nanosecond") => {
+            Ok(DataType::Timestamp(TimeUnit::Nanosecond, None))
+        }
+        s if s.starts_with("Timestamp(Microsecond") => {
+            Ok(DataType::Timestamp(TimeUnit::Microsecond, None))
+        }
+        s if s.starts_with("Timestamp(Millisecond") => {
+            Ok(DataType::Timestamp(TimeUnit::Millisecond, None))
+        }
+        s if s.starts_with("Timestamp(Second") => Ok(DataType::Timestamp(TimeUnit::Second, None)),
+        _ => Err(DataFusionError::Plan(format!(
+            "Unsupported data type in schema JSON: {s}"
+        ))),
+    }
+}
+
+impl From<StreamSchema> for FsSchema {
+    fn from(s: StreamSchema) -> Self {
+        FsSchema {
+            schema: s.schema,
+            timestamp_index: s.timestamp_index,
+            key_indices: s.key_indices,
+            routing_key_indices: None,
+        }
+    }
+}
+
+impl From<StreamSchema> for Arc<FsSchema> {
+    fn from(s: StreamSchema) -> Self {
+        Arc::new(FsSchema::from(s))
+    }
+}
diff --git a/src/types/errors.rs b/src/types/errors.rs
new file mode 100644
index 00000000..2c425c93
--- /dev/null
+++ b/src/types/errors.rs
@@ -0,0 +1,67 @@
+use std::fmt;
+
+/// Unified error type for streaming dataflow operations.
+#[derive(Debug)]
+pub enum DataflowError {
+    Arrow(arrow_schema::ArrowError),
+    DataFusion(datafusion::error::DataFusionError),
+    Operator(String),
+    State(String),
+    Connector(String),
+    Internal(String),
+}
+
+impl fmt::Display for DataflowError {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            DataflowError::Arrow(e) => write!(f, "Arrow error: {e}"),
+            DataflowError::DataFusion(e) => write!(f, "DataFusion error: {e}"),
+            DataflowError::Operator(msg) => write!(f, "Operator error: {msg}"),
+            DataflowError::State(msg) => write!(f, "State error: {msg}"),
+            DataflowError::Connector(msg) => write!(f, "Connector error: {msg}"),
+            DataflowError::Internal(msg) => write!(f, "Internal error: {msg}"),
+        }
+    }
+}
+
+impl std::error::Error for DataflowError {}
+
+impl From<arrow_schema::ArrowError> for DataflowError {
+    fn from(e: arrow_schema::ArrowError) -> Self {
+        DataflowError::Arrow(e)
+    }
+}
+
+impl From<datafusion::error::DataFusionError> for DataflowError {
+    fn from(e: datafusion::error::DataFusionError) -> Self {
+        DataflowError::DataFusion(e)
+    }
+}
+
+/// Macro for creating connector errors.
+#[macro_export]
+macro_rules! connector_err {
+    ($($arg:tt)*) => {
+        $crate::types::errors::DataflowError::Connector(format!($($arg)*))
+    };
+}
+
+/// State-related errors.
+#[derive(Debug)]
+pub enum StateError {
+    KeyNotFound(String),
+    SerializationError(String),
+    BackendError(String),
+}
+
+impl fmt::Display for StateError {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            StateError::KeyNotFound(key) => write!(f, "Key not found: {key}"),
+            StateError::SerializationError(msg) => write!(f, "Serialization error: {msg}"),
+            StateError::BackendError(msg) => write!(f, "State backend error: {msg}"),
+        }
+    }
+}
+
+impl std::error::Error for StateError {}
diff --git a/src/types/formats.rs b/src/types/formats.rs
new file mode 100644
index 00000000..25d09a74
--- /dev/null
+++ b/src/types/formats.rs
@@ -0,0 +1,234 @@
+use serde::{Deserialize, Serialize};
+use std::fmt::{Display, Formatter};
+use std::str::FromStr;
+
+#[derive(Copy, Clone, Debug, Serialize, Deserialize, PartialEq, Eq, Default, Hash, PartialOrd)]
+#[serde(rename_all = "snake_case")]
+pub enum TimestampFormat {
+    #[default]
+    #[serde(rename = "rfc3339")]
+    RFC3339,
+    UnixMillis,
+}
+
+impl TryFrom<&str> for TimestampFormat {
+    type Error = ();
+
+    fn try_from(value: &str) -> Result<Self, Self::Error> {
+        match value {
+            "RFC3339" | "rfc3339" => Ok(TimestampFormat::RFC3339),
+            "UnixMillis" | "unix_millis" => Ok(TimestampFormat::UnixMillis),
+            _ => Err(()),
+        }
+    }
+}
+
+#[derive(Copy, Clone, Debug, Serialize, Deserialize, PartialEq, Eq, Default, Hash, PartialOrd)]
+#[serde(rename_all = "snake_case")]
+pub enum DecimalEncoding {
+    #[default]
+    Number,
+    String,
+    Bytes,
+}
+
+impl TryFrom<&str> for DecimalEncoding {
+    type Error = ();
+
+    fn try_from(s: &str) -> Result<Self, Self::Error> {
+        match s {
+            "number" => Ok(Self::Number),
+            "string" => Ok(Self::String),
+            "bytes" => Ok(Self::Bytes),
+            _ => Err(()),
+        }
+    }
+}
+
+#[derive(Serialize, Deserialize, Default, Copy, Clone, Debug, PartialEq, Eq, Hash, PartialOrd)]
+#[serde(rename_all = "snake_case")]
+pub enum JsonCompression {
+    #[default]
+    Uncompressed,
+    Gzip,
+}
+
+impl FromStr for JsonCompression {
+    type Err = String;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        match s {
+            "uncompressed" => Ok(JsonCompression::Uncompressed),
+            "gzip" => Ok(JsonCompression::Gzip),
+            _ => Err(format!("invalid json compression '{s}'")),
+        }
+    }
+}
+
+#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq, Default, Hash, PartialOrd)]
+#[serde(rename_all = "snake_case")]
+pub struct JsonFormat {
+    #[serde(default)]
+    pub confluent_schema_registry: bool,
+    #[serde(default, alias = "confluent_schema_version")]
+    pub schema_id: Option<u32>,
+    #[serde(default)]
+    pub include_schema: bool,
+    #[serde(default)]
+    pub debezium: bool,
+    #[serde(default)]
+    pub unstructured: bool,
+    #[serde(default)]
+    pub timestamp_format: TimestampFormat,
+    #[serde(default)]
+    pub decimal_encoding: DecimalEncoding,
+    #[serde(default)]
+    pub compression: JsonCompression,
+}
+
+#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq, Hash, PartialOrd)]
+#[serde(rename_all = "snake_case")]
+pub struct RawStringFormat {}
+
+#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq, Hash, PartialOrd)]
+#[serde(rename_all = "snake_case")]
+pub struct RawBytesFormat {}
+
+#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq, Hash, PartialOrd)]
+#[serde(rename_all = "snake_case")]
+pub struct AvroFormat {
+    #[serde(default)]
+    pub confluent_schema_registry: bool,
+    #[serde(default)]
+    pub raw_datums: bool,
+    #[serde(default)]
+    pub into_unstructured_json: bool,
+    #[serde(default)]
+    pub schema_id: Option<u32>,
+}
+
+impl AvroFormat {
+    pub fn new(
+        confluent_schema_registry: bool,
+        raw_datums: bool,
+        into_unstructured_json: bool,
+    ) -> Self {
+        Self {
+            confluent_schema_registry,
+            raw_datums,
+            into_unstructured_json,
+            schema_id: None,
+        }
+    }
+}
+
+#[derive(Serialize, Deserialize, Copy, Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Default)]
+#[serde(rename_all = "snake_case")]
+pub enum ParquetCompression {
+    Uncompressed,
+    Snappy,
+    Gzip,
+    #[default]
+    Zstd,
+    Lz4,
+    Lz4Raw,
+}
+
+impl FromStr for ParquetCompression {
+    type Err = String;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        match s {
+            "uncompressed" => Ok(ParquetCompression::Uncompressed),
+            "snappy" => Ok(ParquetCompression::Snappy),
+            "gzip" => Ok(ParquetCompression::Gzip),
+            "zstd" => Ok(ParquetCompression::Zstd),
+            "lz4" => Ok(ParquetCompression::Lz4),
+            "lz4_raw" => Ok(ParquetCompression::Lz4Raw),
+            _ => Err(format!("invalid parquet compression '{s}'")),
+        }
+    }
+}
+
+#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq, Hash, PartialOrd, Default)]
+#[serde(rename_all = "snake_case")]
+pub struct ParquetFormat {
+    #[serde(default)]
+    pub compression: ParquetCompression,
+    #[serde(default)]
+    pub row_group_bytes: Option<u64>,
+}
+
+#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq, Hash, PartialOrd)]
+#[serde(rename_all = "snake_case")]
+pub struct ProtobufFormat {
+    #[serde(default)]
+    pub into_unstructured_json: bool,
+    #[serde(default)]
+    pub message_name: Option<String>,
+    #[serde(default)]
+    pub compiled_schema: Option<Vec<u8>>,
+    #[serde(default)]
+    pub confluent_schema_registry: bool,
+    #[serde(default)]
+    pub length_delimited: bool,
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq, Hash, PartialOrd)]
+#[serde(rename_all = "snake_case", tag = "type")]
+pub enum Format {
+    Json(JsonFormat),
+    Avro(AvroFormat),
+    Protobuf(ProtobufFormat),
+    Parquet(ParquetFormat),
+    RawString(RawStringFormat),
+    RawBytes(RawBytesFormat),
+}
+
+impl Display for Format {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        f.write_str(self.name())
+    }
+}
+
+impl Format {
+    pub fn name(&self) -> &'static str {
+        match self {
+            Format::Json(_) => "json",
+            Format::Avro(_) => "avro",
+            Format::Protobuf(_) => "protobuf",
+            Format::Parquet(_) => "parquet",
+            Format::RawString(_) => "raw_string",
+            Format::RawBytes(_) => "raw_bytes",
+        }
+    }
+
+    pub fn is_updating(&self) -> bool {
+        matches!(self, Format::Json(JsonFormat { debezium: true, .. }))
+    }
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq, Hash, PartialOrd)]
+#[serde(rename_all = "snake_case", tag = "behavior")]
+pub enum BadData {
+    Fail {},
+    Drop {},
+}
+
+impl Default for BadData {
+    fn default() -> Self {
+        BadData::Fail {}
+    }
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq, Hash, PartialOrd)]
+#[serde(rename_all = "snake_case", tag = "method")]
+pub enum Framing {
+    Newline(NewlineDelimitedFraming),
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq, Hash, PartialOrd)]
+#[serde(rename_all = "snake_case")]
+pub struct NewlineDelimitedFraming {
+    pub max_line_length: Option<u64>,
+}
diff --git a/src/types/hash.rs b/src/types/hash.rs
new file mode 100644
index 00000000..8f47a8fa
--- /dev/null
+++ b/src/types/hash.rs
@@ -0,0 +1,88 @@
+use std::ops::RangeInclusive;
+
+/// Randomly generated seeds for consistent hashing. Changing these breaks existing state.
+pub const HASH_SEEDS: [u64; 4] = [
+    5093852630788334730,
+    1843948808084437226,
+    8049205638242432149,
+    17942305062735447798,
+];
+
+/// Returns the server index (0-based) responsible for the given hash value
+/// when distributing across `n` servers.
+pub fn server_for_hash(x: u64, n: usize) -> usize {
+    if n == 1 {
+        0
+    } else {
+        let range_size = (u64::MAX / (n as u64)) + 1;
+        (x / range_size) as usize
+    }
+}
+
+/// Returns the key range assigned to server `i` out of `n` total servers.
+pub fn range_for_server(i: usize, n: usize) -> RangeInclusive<u64> {
+    if n == 1 {
+        return 0..=u64::MAX;
+    }
+    let range_size = (u64::MAX / (n as u64)) + 1;
+    let start = range_size * (i as u64);
+    let end = if i + 1 == n {
+        u64::MAX
+    } else {
+        start + range_size - 1
+    };
+    start..=end
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_range_for_server() {
+        let n = 6;
+
+        for i in 0..(n - 1) {
+            let range1 = range_for_server(i, n);
+            let range2 = range_for_server(i + 1, n);
+
+            assert_eq!(*range1.end() + 1, *range2.start(), "Ranges not adjacent");
+            assert_eq!(
+                i,
+                server_for_hash(*range1.start(), n),
+                "start not assigned to range"
+            );
+            assert_eq!(
+                i,
+                server_for_hash(*range1.end(), n),
+                "end not assigned to range"
+            );
+        }
+
+        let last_range = range_for_server(n - 1, n);
+        assert_eq!(
+            *last_range.end(),
+            u64::MAX,
+            "Last range does not contain u64::MAX"
+        );
+        assert_eq!(
+            n - 1,
+            server_for_hash(u64::MAX, n),
+            "u64::MAX not in last range"
+        );
+    }
+
+    #[test]
+    fn test_server_for_hash() {
+        let n = 2;
+        let x = u64::MAX;
+
+        let server_index = server_for_hash(x, n);
+        let server_range = range_for_server(server_index, n);
+
+        assert!(
+            server_range.contains(&x),
+            "u64::MAX is not in the correct range"
+        );
+    }
+}
diff --git a/src/types/message.rs b/src/types/message.rs
new file mode 100644
index 00000000..29b7f3a5
--- /dev/null
+++ b/src/types/message.rs
@@ -0,0 +1,42 @@
+use bincode::{Decode, Encode};
+use datafusion::arrow::array::RecordBatch;
+use serde::{Deserialize, Serialize};
+use std::time::SystemTime;
+
+#[derive(Debug, Copy, Clone, PartialEq, Eq, Encode, Decode, Serialize, Deserialize)]
+pub enum Watermark {
+    EventTime(SystemTime),
+    Idle,
+}
+
+#[derive(Debug, Clone, PartialEq)]
+pub enum ArrowMessage {
+    Data(RecordBatch),
+    Signal(SignalMessage),
+}
+
+impl ArrowMessage {
+    pub fn is_end(&self) -> bool {
+        matches!(
+            self,
+            ArrowMessage::Signal(SignalMessage::Stop)
+                | ArrowMessage::Signal(SignalMessage::EndOfData)
+        )
+    }
+}
+
+#[derive(Debug, Clone, PartialEq, Encode, Decode)]
+pub enum SignalMessage {
+    Barrier(CheckpointBarrier),
+    Watermark(Watermark),
+    Stop,
+    EndOfData,
+}
+
+#[derive(Debug, Copy, Clone, PartialEq, Eq, Encode, Decode, Serialize, Deserialize)]
+pub struct CheckpointBarrier {
+    pub epoch: u32,
+    pub min_epoch: u32,
+    pub timestamp: SystemTime,
+    pub then_stop: bool,
+}
diff --git a/src/types/mod.rs b/src/types/mod.rs
new file mode 100644
index 00000000..ddf7baca
--- /dev/null
+++ b/src/types/mod.rs
@@ -0,0 +1,71 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Core types shared across the FunctionStream system.
+//!
+//! This module provides fundamental types used by the runtime, SQL planner,
+//! coordinator, and other subsystems — analogous to `arroyo-types` + `arroyo-rpc` in Arroyo.
+
+pub mod arrow_ext;
+pub mod control;
+pub mod date;
+pub mod debezium;
+pub mod df;
+pub mod errors;
+pub mod formats;
+pub mod hash;
+pub mod message;
+pub mod operator_config;
+pub mod task_info;
+pub mod time_utils;
+pub mod worker;
+
+// ── Re-exports from existing modules ──
+pub use arrow_ext::{DisplayAsSql, FsExtensionType, GetArrowSchema, GetArrowType};
+pub use date::{DatePart, DateTruncPrecision};
+pub use debezium::{Debezium, DebeziumOp, UpdatingData};
+pub use hash::{HASH_SEEDS, range_for_server, server_for_hash};
+pub use message::{ArrowMessage, CheckpointBarrier, SignalMessage, Watermark};
+pub use task_info::{ChainInfo, TaskInfo};
+pub use time_utils::{from_micros, from_millis, from_nanos, to_micros, to_millis, to_nanos};
+pub use worker::{MachineId, WorkerId};
+
+// ── Re-exports from new modules ──
+pub use control::{
+    CheckpointCompleted, CheckpointEvent, CompactionResult, ControlMessage, ControlResp,
+    ErrorDomain, RetryHint, StopMode, TaskCheckpointEventType, TaskError,
+};
+pub use df::{FsSchema, FsSchemaRef};
+pub use errors::DataflowError;
+pub use formats::{BadData, Format, Framing, JsonFormat};
+pub use operator_config::{MetadataField, OperatorConfig, RateLimit};
+
+// ── Well-known column names ──
+pub const TIMESTAMP_FIELD: &str = "_timestamp";
+pub const UPDATING_META_FIELD: &str = "_updating_meta";
+
+// ── Environment variables ──
+pub const JOB_ID_ENV: &str = "JOB_ID";
+pub const RUN_ID_ENV: &str = "RUN_ID";
+
+// ── Metric names ──
+pub const MESSAGES_RECV: &str = "fs_worker_messages_recv";
+pub const MESSAGES_SENT: &str = "fs_worker_messages_sent";
+pub const BYTES_RECV: &str = "fs_worker_bytes_recv";
+pub const BYTES_SENT: &str = "fs_worker_bytes_sent";
+pub const BATCHES_RECV: &str = "fs_worker_batches_recv";
+pub const BATCHES_SENT: &str = "fs_worker_batches_sent";
+pub const TX_QUEUE_SIZE: &str = "fs_worker_tx_queue_size";
+pub const TX_QUEUE_REM: &str = "fs_worker_tx_queue_rem";
+pub const DESERIALIZATION_ERRORS: &str = "fs_worker_deserialization_errors";
+
+pub const LOOKUP_KEY_INDEX_FIELD: &str = "__lookup_key_index";
diff --git a/src/types/operator_config.rs b/src/types/operator_config.rs
new file mode 100644
index 00000000..744dbd85
--- /dev/null
+++ b/src/types/operator_config.rs
@@ -0,0 +1,30 @@
+use serde::{Deserialize, Serialize};
+use serde_json::Value;
+
+use super::formats::{BadData, Format, Framing};
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct RateLimit {
+    pub messages_per_second: u32,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct MetadataField {
+    pub field_name: String,
+    pub key: String,
+    /// JSON-encoded Arrow DataType string, e.g. `"Utf8"`, `"Int64"`.
+    #[serde(default)]
+    pub data_type: Option<String>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct OperatorConfig {
+    pub connection: Value,
+    pub table: Value,
+    pub format: Option<Format>,
+    pub bad_data: Option<BadData>,
+    pub framing: Option<Framing>,
+    pub rate_limit: Option<RateLimit>,
+    #[serde(default)]
+    pub metadata_fields: Vec<MetadataField>,
+}
diff --git a/src/types/task_info.rs b/src/types/task_info.rs
new file mode 100644
index 00000000..5a31511b
--- /dev/null
+++ b/src/types/task_info.rs
@@ -0,0 +1,80 @@
+use bincode::{Decode, Encode};
+use serde::{Deserialize, Serialize};
+use std::collections::HashMap;
+use std::fmt::{Display, Formatter};
+use std::ops::RangeInclusive;
+
+#[derive(Eq, PartialEq, Hash, Debug, Clone, Encode, Decode, Serialize, Deserialize)]
+pub struct TaskInfo {
+    pub job_id: String,
+    pub node_id: u32,
+    pub operator_name: String,
+    pub operator_id: String,
+    pub task_index: u32,
+    pub parallelism: u32,
+    pub key_range: RangeInclusive<u64>,
+}
+
+impl Display for TaskInfo {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        write!(
+            f,
+            "Task_{}-{}/{}",
+            self.operator_id, self.task_index, self.parallelism
+        )
+    }
+}
+
+impl TaskInfo {
+    pub fn for_test(job_id: &str, operator_id: &str) -> Self {
+        Self {
+            job_id: job_id.to_string(),
+            node_id: 1,
+            operator_name: "op".to_string(),
+            operator_id: operator_id.to_string(),
+            task_index: 0,
+            parallelism: 1,
+            key_range: 0..=u64::MAX,
+        }
+    }
+}
+
+pub fn get_test_task_info() -> TaskInfo {
+    TaskInfo {
+        job_id: "instance-1".to_string(),
+        node_id: 1,
+        operator_name: "test-operator".to_string(),
+        operator_id: "test-operator-1".to_string(),
+        task_index: 0,
+        parallelism: 1,
+        key_range: 0..=u64::MAX,
+    }
+}
+
+#[derive(Eq, PartialEq, Hash, Debug, Clone, Encode, Decode, Serialize, Deserialize)]
+pub struct ChainInfo {
+    pub job_id: String,
+    pub node_id: u32,
+    pub description: String,
+    pub task_index: u32,
+}
+
+impl Display for ChainInfo {
+    fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
+        write!(
+            f,
+            "TaskChain{}-{} ({})",
+            self.node_id, self.task_index, self.description
+        )
+    }
+}
+
+impl ChainInfo {
+    pub fn metric_label_map(&self) -> HashMap<String, String> {
+        let mut labels = HashMap::new();
+        labels.insert("node_id".to_string(), self.node_id.to_string());
+        labels.insert("subtask_idx".to_string(), self.task_index.to_string());
+        labels.insert("node_description".to_string(), self.description.to_string());
+        labels
+    }
+}
diff --git a/src/types/time_utils.rs b/src/types/time_utils.rs
new file mode 100644
index 00000000..2ee5a126
--- /dev/null
+++ b/src/types/time_utils.rs
@@ -0,0 +1,62 @@
+use std::collections::HashMap;
+use std::hash::Hash;
+use std::time::{Duration, SystemTime, UNIX_EPOCH};
+
+pub fn to_millis(time: SystemTime) -> u64 {
+    time.duration_since(UNIX_EPOCH).unwrap().as_millis() as u64
+}
+
+pub fn to_micros(time: SystemTime) -> u64 {
+    time.duration_since(UNIX_EPOCH).unwrap().as_micros() as u64
+}
+
+pub fn from_millis(ts: u64) -> SystemTime {
+    UNIX_EPOCH + Duration::from_millis(ts)
+}
+
+pub fn from_micros(ts: u64) -> SystemTime {
+    UNIX_EPOCH + Duration::from_micros(ts)
+}
+
+pub fn to_nanos(time: SystemTime) -> u128 {
+    time.duration_since(UNIX_EPOCH).unwrap().as_nanos()
+}
+
+pub fn from_nanos(ts: u128) -> SystemTime {
+    UNIX_EPOCH
+        + Duration::from_secs((ts / 1_000_000_000) as u64)
+        + Duration::from_nanos((ts % 1_000_000_000) as u64)
+}
+
+pub fn print_time(time: SystemTime) -> String {
+    chrono::DateTime::<chrono::Utc>::from(time)
+        .format("%Y-%m-%d %H:%M:%S%.3f")
+        .to_string()
+}
+
+/// Returns the number of days since the UNIX epoch (for Avro serialization).
+pub fn days_since_epoch(time: SystemTime) -> i32 {
+    time.duration_since(UNIX_EPOCH)
+        .unwrap()
+        .as_secs()
+        .div_euclid(86400) as i32
+}
+
+pub fn single_item_hash_map<I: Into<K>, K: Hash + Eq, V>(key: I, value: V) -> HashMap<K, V> {
+    let mut map = HashMap::new();
+    map.insert(key.into(), value);
+    map
+}
+
+pub fn string_to_map(s: &str, pair_delimiter: char) -> Option<HashMap<String, String>> {
+    if s.trim().is_empty() {
+        return Some(HashMap::new());
+    }
+
+    s.split(',')
+        .map(|s| {
+            let mut kv = s.trim().split(pair_delimiter);
+            Some((kv.next()?.trim().to_string(), kv.next()?.trim().to_string()))
+        })
+        .collect()
+}
diff --git a/src/types/worker.rs b/src/types/worker.rs
new file mode 100644
index 00000000..c12163ba
--- /dev/null
+++ b/src/types/worker.rs
@@ -0,0 +1,14 @@
+use std::fmt::{Display, Formatter};
+use std::sync::Arc;
+
+#[derive(Debug, Hash, Eq, PartialEq, Copy, Clone)]
+pub struct WorkerId(pub u64);
+
+#[derive(Debug, Hash, Eq, PartialEq, Clone)]
+pub struct MachineId(pub Arc<String>);
+
+impl Display for MachineId {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{}", self.0)
+    }
+}

From a42f5a305cf8326bc731d6c8924f61997bd132ee Mon Sep 17 00:00:00 2001
From: luoluoyuyu <zhenyu@apache.org>
Date: Mon, 16 Mar 2026 23:19:50 +0800
Subject: [PATCH 03/44] update

---
 src/coordinator/analyze/analyzer.rs           | 21 ++++++++-
 src/coordinator/execution/executor.rs         | 32 +++++++++++--
 src/coordinator/mod.rs                        |  4 +-
 src/coordinator/plan/create_table_plan.rs     | 32 +++++++++++++
 src/coordinator/plan/insert_statement_plan.rs | 32 +++++++++++++
 src/coordinator/plan/logical_plan_visitor.rs  | 46 +++++++++++++++++--
 src/coordinator/plan/mod.rs                   |  4 ++
 src/coordinator/plan/visitor.rs               | 16 ++++++-
 src/coordinator/statement/create_table.rs     | 40 ++++++++++++++++
 src/coordinator/statement/insert_statement.rs | 41 +++++++++++++++++
 src/coordinator/statement/mod.rs              |  4 ++
 src/coordinator/statement/visitor.rs          | 16 ++++++-
 src/sql/planner/parse.rs                      | 16 +++++--
 13 files changed, 284 insertions(+), 20 deletions(-)
 create mode 100644 src/coordinator/plan/create_table_plan.rs
 create mode 100644 src/coordinator/plan/insert_statement_plan.rs
 create mode 100644 src/coordinator/statement/create_table.rs
 create mode 100644 src/coordinator/statement/insert_statement.rs

diff --git a/src/coordinator/analyze/analyzer.rs b/src/coordinator/analyze/analyzer.rs
index 58056b67..297d0de2 100644
--- a/src/coordinator/analyze/analyzer.rs
+++ b/src/coordinator/analyze/analyzer.rs
@@ -13,8 +13,9 @@
 use super::Analysis;
 use crate::coordinator::execution_context::ExecutionContext;
 use crate::coordinator::statement::{
-    CreateFunction, CreatePythonFunction, DropFunction, ShowFunctions, StartFunction, Statement,
-    StatementVisitor, StatementVisitorContext, StatementVisitorResult, StopFunction, StreamingSql,
+    CreateFunction, CreatePythonFunction, CreateTable, DropFunction, InsertStatement,
+    ShowFunctions, StartFunction, Statement, StatementVisitor, StatementVisitorContext,
+    StatementVisitorResult, StopFunction, StreamingSql,
 };
 use std::fmt;
 
@@ -116,6 +117,22 @@ impl StatementVisitor for Analyzer<'_> {
         StatementVisitorResult::Analyze(Box::new(stmt.clone()))
     }
 
+    fn visit_create_table(
+        &self,
+        stmt: &CreateTable,
+        _context: &StatementVisitorContext,
+    ) -> StatementVisitorResult {
+        StatementVisitorResult::Analyze(Box::new(CreateTable::new(stmt.statement.clone())))
+    }
+
+    fn visit_insert_statement(
+        &self,
+        stmt: &InsertStatement,
+        _context: &StatementVisitorContext,
+    ) -> StatementVisitorResult {
+        StatementVisitorResult::Analyze(Box::new(InsertStatement::new(stmt.statement.clone())))
+    }
+
     fn visit_streaming_sql(
         &self,
         stmt: &StreamingSql,
diff --git a/src/coordinator/execution/executor.rs b/src/coordinator/execution/executor.rs
index 5d96bf45..dbc76923 100644
--- a/src/coordinator/execution/executor.rs
+++ b/src/coordinator/execution/executor.rs
@@ -12,9 +12,9 @@
 
 use crate::coordinator::dataset::{ExecuteResult, ShowFunctionsResult, empty_record_batch};
 use crate::coordinator::plan::{
-    CreateFunctionPlan, CreatePythonFunctionPlan, DropFunctionPlan, PlanNode, PlanVisitor,
-    PlanVisitorContext, PlanVisitorResult, ShowFunctionsPlan, StartFunctionPlan, StopFunctionPlan,
-    StreamingSqlPlan,
+    CreateFunctionPlan, CreatePythonFunctionPlan, CreateTablePlan, DropFunctionPlan,
+    InsertStatementPlan, PlanNode, PlanVisitor, PlanVisitorContext, PlanVisitorResult,
+    ShowFunctionsPlan, StartFunctionPlan, StopFunctionPlan, StreamingSqlPlan,
 };
 use crate::coordinator::statement::{ConfigSource, FunctionSource};
 use crate::runtime::taskexecutor::TaskManager;
@@ -202,6 +202,32 @@ impl PlanVisitor for Executor {
         PlanVisitorResult::Execute(result)
     }
 
+    fn visit_create_table_plan(
+        &self,
+        plan: &CreateTablePlan,
+        _context: &PlanVisitorContext,
+    ) -> PlanVisitorResult {
+        // TODO: register table in catalog and execute DDL
+        let result = Err(ExecuteError::Internal(format!(
+            "CREATE TABLE execution not yet implemented. LogicalPlan:\n{}",
+            plan.logical_plan.display_indent()
+        )));
+        PlanVisitorResult::Execute(result)
+    }
+
+    fn visit_insert_statement_plan(
+        &self,
+        plan: &InsertStatementPlan,
+        _context: &PlanVisitorContext,
+    ) -> PlanVisitorResult {
+        // TODO: start streaming pipeline for INSERT / anonymous query
+        let result = Err(ExecuteError::Internal(format!(
+            "INSERT statement execution not yet implemented. LogicalPlan:\n{}",
+            plan.logical_plan.display_indent()
+        )));
+        PlanVisitorResult::Execute(result)
+    }
+
     fn visit_streaming_sql_plan(
         &self,
         plan: &StreamingSqlPlan,
diff --git a/src/coordinator/mod.rs b/src/coordinator/mod.rs
index 26627a8b..51b93ca0 100644
--- a/src/coordinator/mod.rs
+++ b/src/coordinator/mod.rs
@@ -22,6 +22,6 @@ mod statement;
 pub use coordinator::Coordinator;
 pub use dataset::{DataSet, ShowFunctionsResult};
 pub use statement::{
-    CreateFunction, CreatePythonFunction, DropFunction, PythonModule, ShowFunctions, StartFunction,
-    Statement, StopFunction, StreamingSql,
+    CreateFunction, CreatePythonFunction, CreateTable, DropFunction, InsertStatement, PythonModule,
+    ShowFunctions, StartFunction, Statement, StopFunction, StreamingSql,
 };
diff --git a/src/coordinator/plan/create_table_plan.rs b/src/coordinator/plan/create_table_plan.rs
new file mode 100644
index 00000000..450c8813
--- /dev/null
+++ b/src/coordinator/plan/create_table_plan.rs
@@ -0,0 +1,32 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use datafusion::logical_expr::LogicalPlan;
+
+use super::{PlanNode, PlanVisitor, PlanVisitorContext, PlanVisitorResult};
+
+#[derive(Debug)]
+pub struct CreateTablePlan {
+    pub logical_plan: LogicalPlan,
+}
+
+impl CreateTablePlan {
+    pub fn new(logical_plan: LogicalPlan) -> Self {
+        Self { logical_plan }
+    }
+}
+
+impl PlanNode for CreateTablePlan {
+    fn accept(&self, visitor: &dyn PlanVisitor, context: &PlanVisitorContext) -> PlanVisitorResult {
+        visitor.visit_create_table_plan(self, context)
+    }
+}
diff --git a/src/coordinator/plan/insert_statement_plan.rs b/src/coordinator/plan/insert_statement_plan.rs
new file mode 100644
index 00000000..e96a2772
--- /dev/null
+++ b/src/coordinator/plan/insert_statement_plan.rs
@@ -0,0 +1,32 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use datafusion::logical_expr::LogicalPlan;
+
+use super::{PlanNode, PlanVisitor, PlanVisitorContext, PlanVisitorResult};
+
+#[derive(Debug)]
+pub struct InsertStatementPlan {
+    pub logical_plan: LogicalPlan,
+}
+
+impl InsertStatementPlan {
+    pub fn new(logical_plan: LogicalPlan) -> Self {
+        Self { logical_plan }
+    }
+}
+
+impl PlanNode for InsertStatementPlan {
+    fn accept(&self, visitor: &dyn PlanVisitor, context: &PlanVisitorContext) -> PlanVisitorResult {
+        visitor.visit_insert_statement_plan(self, context)
+    }
+}
diff --git a/src/coordinator/plan/logical_plan_visitor.rs b/src/coordinator/plan/logical_plan_visitor.rs
index 3462d033..fde7f35a 100644
--- a/src/coordinator/plan/logical_plan_visitor.rs
+++ b/src/coordinator/plan/logical_plan_visitor.rs
@@ -14,12 +14,14 @@ use tracing::debug;
 
 use crate::coordinator::analyze::analysis::Analysis;
 use crate::coordinator::plan::{
-    CreateFunctionPlan, CreatePythonFunctionPlan, DropFunctionPlan, PlanNode, ShowFunctionsPlan,
-    StartFunctionPlan, StopFunctionPlan, StreamingSqlPlan,
+    CreateFunctionPlan, CreatePythonFunctionPlan, CreateTablePlan, DropFunctionPlan,
+    InsertStatementPlan, PlanNode, ShowFunctionsPlan, StartFunctionPlan, StopFunctionPlan,
+    StreamingSqlPlan,
 };
 use crate::coordinator::statement::{
-    CreateFunction, CreatePythonFunction, DropFunction, ShowFunctions, StartFunction,
-    StatementVisitor, StatementVisitorContext, StatementVisitorResult, StopFunction, StreamingSql,
+    CreateFunction, CreatePythonFunction, CreateTable, DropFunction, InsertStatement,
+    ShowFunctions, StartFunction, StatementVisitor, StatementVisitorContext,
+    StatementVisitorResult, StopFunction, StreamingSql,
 };
 use crate::sql::planner::StreamSchemaProvider;
 
@@ -110,6 +112,42 @@ impl StatementVisitor for LogicalPlanVisitor {
         )))
     }
 
+    fn visit_create_table(
+        &self,
+        stmt: &CreateTable,
+        _context: &StatementVisitorContext,
+    ) -> StatementVisitorResult {
+        let sql_to_rel = datafusion::sql::planner::SqlToRel::new(&self.schema_provider);
+
+        match sql_to_rel.sql_statement_to_plan(stmt.statement.clone()) {
+            Ok(plan) => {
+                debug!("Create table plan:\n{}", plan.display_graphviz());
+                StatementVisitorResult::Plan(Box::new(CreateTablePlan::new(plan)))
+            }
+            Err(e) => {
+                panic!("Failed to convert CREATE TABLE to logical plan: {e}");
+            }
+        }
+    }
+
+    fn visit_insert_statement(
+        &self,
+        stmt: &InsertStatement,
+        _context: &StatementVisitorContext,
+    ) -> StatementVisitorResult {
+        let sql_to_rel = datafusion::sql::planner::SqlToRel::new(&self.schema_provider);
+
+        match sql_to_rel.sql_statement_to_plan(stmt.statement.clone()) {
+            Ok(plan) => {
+                debug!("Insert statement plan:\n{}", plan.display_graphviz());
+                StatementVisitorResult::Plan(Box::new(InsertStatementPlan::new(plan)))
+            }
+            Err(e) => {
+                panic!("Failed to convert INSERT statement to logical plan: {e}");
+            }
+        }
+    }
+
     fn visit_streaming_sql(
         &self,
         stmt: &StreamingSql,
diff --git a/src/coordinator/plan/mod.rs b/src/coordinator/plan/mod.rs
index 744410e1..3d36ec16 100644
--- a/src/coordinator/plan/mod.rs
+++ b/src/coordinator/plan/mod.rs
@@ -12,7 +12,9 @@
 
 mod create_function_plan;
 mod create_python_function_plan;
+mod create_table_plan;
 mod drop_function_plan;
+mod insert_statement_plan;
 mod logical_plan_visitor;
 mod optimizer;
 mod show_functions_plan;
@@ -23,7 +25,9 @@ mod visitor;
 
 pub use create_function_plan::CreateFunctionPlan;
 pub use create_python_function_plan::CreatePythonFunctionPlan;
+pub use create_table_plan::CreateTablePlan;
 pub use drop_function_plan::DropFunctionPlan;
+pub use insert_statement_plan::InsertStatementPlan;
 pub use logical_plan_visitor::LogicalPlanVisitor;
 pub use optimizer::LogicalPlanner;
 pub use show_functions_plan::ShowFunctionsPlan;
diff --git a/src/coordinator/plan/visitor.rs b/src/coordinator/plan/visitor.rs
index e8bd0ffc..e3911a8b 100644
--- a/src/coordinator/plan/visitor.rs
+++ b/src/coordinator/plan/visitor.rs
@@ -11,8 +11,8 @@
 // limitations under the License.
 
 use super::{
-    CreateFunctionPlan, CreatePythonFunctionPlan, DropFunctionPlan, ShowFunctionsPlan,
-    StartFunctionPlan, StopFunctionPlan, StreamingSqlPlan,
+    CreateFunctionPlan, CreatePythonFunctionPlan, CreateTablePlan, DropFunctionPlan,
+    InsertStatementPlan, ShowFunctionsPlan, StartFunctionPlan, StopFunctionPlan, StreamingSqlPlan,
 };
 
 /// Context passed to PlanVisitor methods
@@ -85,6 +85,18 @@ pub trait PlanVisitor {
         context: &PlanVisitorContext,
     ) -> PlanVisitorResult;
 
+    fn visit_create_table_plan(
+        &self,
+        plan: &CreateTablePlan,
+        context: &PlanVisitorContext,
+    ) -> PlanVisitorResult;
+
+    fn visit_insert_statement_plan(
+        &self,
+        plan: &InsertStatementPlan,
+        context: &PlanVisitorContext,
+    ) -> PlanVisitorResult;
+
     fn visit_streaming_sql_plan(
         &self,
         plan: &StreamingSqlPlan,
diff --git a/src/coordinator/statement/create_table.rs b/src/coordinator/statement/create_table.rs
new file mode 100644
index 00000000..8aa16bf0
--- /dev/null
+++ b/src/coordinator/statement/create_table.rs
@@ -0,0 +1,40 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use datafusion::sql::sqlparser::ast::Statement as DFStatement;
+
+use super::{Statement, StatementVisitor, StatementVisitorContext, StatementVisitorResult};
+
+/// Represents a CREATE TABLE or CREATE VIEW statement.
+///
+/// This wraps the raw SQL AST node so the coordinator pipeline can
+/// distinguish table/view creation from other streaming SQL operations.
+#[derive(Debug)]
+pub struct CreateTable {
+    pub statement: DFStatement,
+}
+
+impl CreateTable {
+    pub fn new(statement: DFStatement) -> Self {
+        Self { statement }
+    }
+}
+
+impl Statement for CreateTable {
+    fn accept(
+        &self,
+        visitor: &dyn StatementVisitor,
+        context: &StatementVisitorContext,
+    ) -> StatementVisitorResult {
+        visitor.visit_create_table(self, context)
+    }
+}
diff --git a/src/coordinator/statement/insert_statement.rs b/src/coordinator/statement/insert_statement.rs
new file mode 100644
index 00000000..45785251
--- /dev/null
+++ b/src/coordinator/statement/insert_statement.rs
@@ -0,0 +1,41 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use datafusion::sql::sqlparser::ast::Statement as DFStatement;
+
+use super::{Statement, StatementVisitor, StatementVisitorContext, StatementVisitorResult};
+
+/// Represents an INSERT INTO or standalone SELECT/query statement.
+///
+/// In the streaming SQL context, both INSERT INTO (writing to a sink)
+/// and standalone SELECT (anonymous computation) are treated as
+/// data-producing operations that feed into the streaming pipeline.
+#[derive(Debug)]
+pub struct InsertStatement {
+    pub statement: DFStatement,
+}
+
+impl InsertStatement {
+    pub fn new(statement: DFStatement) -> Self {
+        Self { statement }
+    }
+}
+
+impl Statement for InsertStatement {
+    fn accept(
+        &self,
+        visitor: &dyn StatementVisitor,
+        context: &StatementVisitorContext,
+    ) -> StatementVisitorResult {
+        visitor.visit_insert_statement(self, context)
+    }
+}
diff --git a/src/coordinator/statement/mod.rs b/src/coordinator/statement/mod.rs
index a115af91..7628b94b 100644
--- a/src/coordinator/statement/mod.rs
+++ b/src/coordinator/statement/mod.rs
@@ -12,7 +12,9 @@
 
 mod create_function;
 mod create_python_function;
+mod create_table;
 mod drop_function;
+mod insert_statement;
 mod show_functions;
 mod start_function;
 mod stop_function;
@@ -21,7 +23,9 @@ mod visitor;
 
 pub use create_function::{ConfigSource, CreateFunction, FunctionSource};
 pub use create_python_function::{CreatePythonFunction, PythonModule};
+pub use create_table::CreateTable;
 pub use drop_function::DropFunction;
+pub use insert_statement::InsertStatement;
 pub use show_functions::ShowFunctions;
 pub use start_function::StartFunction;
 pub use stop_function::StopFunction;
diff --git a/src/coordinator/statement/visitor.rs b/src/coordinator/statement/visitor.rs
index c9a63831..8de6ffe2 100644
--- a/src/coordinator/statement/visitor.rs
+++ b/src/coordinator/statement/visitor.rs
@@ -11,8 +11,8 @@
 // limitations under the License.
 
 use super::{
-    CreateFunction, CreatePythonFunction, DropFunction, ShowFunctions, StartFunction, StopFunction,
-    StreamingSql,
+    CreateFunction, CreatePythonFunction, CreateTable, DropFunction, InsertStatement,
+    ShowFunctions, StartFunction, StopFunction, StreamingSql,
 };
 use crate::coordinator::plan::PlanNode;
 use crate::coordinator::statement::Statement;
@@ -89,6 +89,18 @@ pub trait StatementVisitor {
         context: &StatementVisitorContext,
     ) -> StatementVisitorResult;
 
+    fn visit_create_table(
+        &self,
+        stmt: &CreateTable,
+        context: &StatementVisitorContext,
+    ) -> StatementVisitorResult;
+
+    fn visit_insert_statement(
+        &self,
+        stmt: &InsertStatement,
+        context: &StatementVisitorContext,
+    ) -> StatementVisitorResult;
+
     fn visit_streaming_sql(
         &self,
         stmt: &StreamingSql,
diff --git a/src/sql/planner/parse.rs b/src/sql/planner/parse.rs
index dfaec9a6..4bd8f30e 100644
--- a/src/sql/planner/parse.rs
+++ b/src/sql/planner/parse.rs
@@ -19,8 +19,8 @@ use datafusion::sql::sqlparser::dialect::FunctionStreamDialect;
 use datafusion::sql::sqlparser::parser::Parser;
 
 use crate::coordinator::{
-    CreateFunction, DropFunction, ShowFunctions, StartFunction, Statement as CoordinatorStatement,
-    StopFunction, StreamingSql,
+    CreateFunction, CreateTable, DropFunction, InsertStatement, ShowFunctions, StartFunction,
+    Statement as CoordinatorStatement, StopFunction, StreamingSql,
 };
 
 /// Stage 1: String → Box<dyn Statement>
@@ -48,9 +48,11 @@ pub fn parse_sql(query: &str) -> Result<Box<dyn CoordinatorStatement>> {
 
 /// Classify a parsed DataFusion Statement into the coordinator's Statement type.
 ///
-/// FunctionStream DDL (CREATE/DROP/START/STOP FUNCTION, SHOW FUNCTIONS)
-/// is converted to concrete coordinator types; everything else is wrapped
-/// in StreamingSql.
+/// Statement classification mirrors the analysis flow from `parse_and_get_arrow_program`:
+///   - FunctionStream DDL → concrete coordinator types (CreateFunction, DropFunction, etc.)
+///   - CREATE TABLE / CREATE VIEW → CreateTable (catalog registration)
+///   - INSERT INTO / standalone SELECT → InsertStatement (streaming pipeline)
+///   - Everything else → StreamingSql (catch-all)
 fn classify_statement(stmt: DFStatement) -> Result<Box<dyn CoordinatorStatement>> {
     match stmt {
         DFStatement::CreateFunctionWith { options } => {
@@ -69,6 +71,10 @@ fn classify_statement(stmt: DFStatement) -> Result<Box<dyn CoordinatorStatement>
             Ok(Box::new(DropFunction::new(name)))
         }
         DFStatement::ShowFunctions { .. } => Ok(Box::new(ShowFunctions::new())),
+        s @ DFStatement::CreateTable(_) | s @ DFStatement::CreateView { .. } => {
+            Ok(Box::new(CreateTable::new(s)))
+        }
+        s @ DFStatement::Insert(_) => Ok(Box::new(InsertStatement::new(s))),
         other => Ok(Box::new(StreamingSql::new(other))),
     }
 }

From 29b19d9ddad473cdc5f62a2dafd80bf1b788f5ea Mon Sep 17 00:00:00 2001
From: luoluoyuyu <zhenyu@apache.org>
Date: Tue, 17 Mar 2026 20:27:48 +0800
Subject: [PATCH 04/44] update

---
 src/coordinator/analyze/analyzer.rs           |  11 +-
 src/coordinator/execution/executor.rs         |  20 +--
 src/coordinator/mod.rs                        |   2 +-
 src/coordinator/plan/insert_statement_plan.rs |  14 +-
 src/coordinator/plan/logical_plan_visitor.rs  | 169 ++++++++++++++----
 src/coordinator/plan/mod.rs                   |   2 -
 src/coordinator/plan/streaming_sql_plan.rs    |  32 ----
 src/coordinator/plan/visitor.rs               |   8 +-
 src/coordinator/statement/mod.rs              |   2 -
 src/coordinator/statement/streaming_sql.rs    |  39 ----
 src/coordinator/statement/visitor.rs          |   8 +-
 src/server/handler.rs                         |  21 ++-
 src/sql/planner/mod.rs                        |   2 +-
 src/sql/planner/parse.rs                      | 111 +++++++-----
 14 files changed, 239 insertions(+), 202 deletions(-)
 delete mode 100644 src/coordinator/plan/streaming_sql_plan.rs
 delete mode 100644 src/coordinator/statement/streaming_sql.rs

diff --git a/src/coordinator/analyze/analyzer.rs b/src/coordinator/analyze/analyzer.rs
index 297d0de2..cd469f55 100644
--- a/src/coordinator/analyze/analyzer.rs
+++ b/src/coordinator/analyze/analyzer.rs
@@ -15,7 +15,7 @@ use crate::coordinator::execution_context::ExecutionContext;
 use crate::coordinator::statement::{
     CreateFunction, CreatePythonFunction, CreateTable, DropFunction, InsertStatement,
     ShowFunctions, StartFunction, Statement, StatementVisitor, StatementVisitorContext,
-    StatementVisitorResult, StopFunction, StreamingSql,
+    StatementVisitorResult, StopFunction,
 };
 use std::fmt;
 
@@ -132,13 +132,4 @@ impl StatementVisitor for Analyzer<'_> {
     ) -> StatementVisitorResult {
         StatementVisitorResult::Analyze(Box::new(InsertStatement::new(stmt.statement.clone())))
     }
-
-    fn visit_streaming_sql(
-        &self,
-        stmt: &StreamingSql,
-        _context: &StatementVisitorContext,
-    ) -> StatementVisitorResult {
-        // TODO: add semantic analysis for streaming SQL (schema validation, etc.)
-        StatementVisitorResult::Analyze(Box::new(StreamingSql::new(stmt.statement.clone())))
-    }
 }
diff --git a/src/coordinator/execution/executor.rs b/src/coordinator/execution/executor.rs
index dbc76923..1a8e042a 100644
--- a/src/coordinator/execution/executor.rs
+++ b/src/coordinator/execution/executor.rs
@@ -14,7 +14,7 @@ use crate::coordinator::dataset::{ExecuteResult, ShowFunctionsResult, empty_reco
 use crate::coordinator::plan::{
     CreateFunctionPlan, CreatePythonFunctionPlan, CreateTablePlan, DropFunctionPlan,
     InsertStatementPlan, PlanNode, PlanVisitor, PlanVisitorContext, PlanVisitorResult,
-    ShowFunctionsPlan, StartFunctionPlan, StopFunctionPlan, StreamingSqlPlan,
+    ShowFunctionsPlan, StartFunctionPlan, StopFunctionPlan,
 };
 use crate::coordinator::statement::{ConfigSource, FunctionSource};
 use crate::runtime::taskexecutor::TaskManager;
@@ -222,21 +222,9 @@ impl PlanVisitor for Executor {
     ) -> PlanVisitorResult {
         // TODO: start streaming pipeline for INSERT / anonymous query
         let result = Err(ExecuteError::Internal(format!(
-            "INSERT statement execution not yet implemented. LogicalPlan:\n{}",
-            plan.logical_plan.display_indent()
-        )));
-        PlanVisitorResult::Execute(result)
-    }
-
-    fn visit_streaming_sql_plan(
-        &self,
-        plan: &StreamingSqlPlan,
-        _context: &PlanVisitorContext,
-    ) -> PlanVisitorResult {
-        // TODO: apply rewrite_plan for streaming transformations, then execute
-        let result = Err(ExecuteError::Internal(format!(
-            "Streaming SQL execution not yet implemented. LogicalPlan:\n{}",
-            plan.logical_plan.display_indent()
+            "INSERT statement execution not yet implemented. Program graph has {} node(s), {} connection(s)",
+            plan.program.graph.node_count(),
+            plan.connection_ids.len(),
         )));
         PlanVisitorResult::Execute(result)
     }
diff --git a/src/coordinator/mod.rs b/src/coordinator/mod.rs
index 51b93ca0..500e8164 100644
--- a/src/coordinator/mod.rs
+++ b/src/coordinator/mod.rs
@@ -23,5 +23,5 @@ pub use coordinator::Coordinator;
 pub use dataset::{DataSet, ShowFunctionsResult};
 pub use statement::{
     CreateFunction, CreatePythonFunction, CreateTable, DropFunction, InsertStatement, PythonModule,
-    ShowFunctions, StartFunction, Statement, StopFunction, StreamingSql,
+    ShowFunctions, StartFunction, Statement, StopFunction,
 };
diff --git a/src/coordinator/plan/insert_statement_plan.rs b/src/coordinator/plan/insert_statement_plan.rs
index e96a2772..9c7e4b76 100644
--- a/src/coordinator/plan/insert_statement_plan.rs
+++ b/src/coordinator/plan/insert_statement_plan.rs
@@ -10,18 +10,24 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-use datafusion::logical_expr::LogicalPlan;
+use std::collections::HashSet;
+
+use crate::datastream::logical::LogicalProgram;
 
 use super::{PlanNode, PlanVisitor, PlanVisitorContext, PlanVisitorResult};
 
 #[derive(Debug)]
 pub struct InsertStatementPlan {
-    pub logical_plan: LogicalPlan,
+    pub program: LogicalProgram,
+    pub connection_ids: HashSet<i64>,
 }
 
 impl InsertStatementPlan {
-    pub fn new(logical_plan: LogicalPlan) -> Self {
-        Self { logical_plan }
+    pub fn new(program: LogicalProgram, connection_ids: HashSet<i64>) -> Self {
+        Self {
+            program,
+            connection_ids,
+        }
     }
 }
 
diff --git a/src/coordinator/plan/logical_plan_visitor.rs b/src/coordinator/plan/logical_plan_visitor.rs
index fde7f35a..818d830f 100644
--- a/src/coordinator/plan/logical_plan_visitor.rs
+++ b/src/coordinator/plan/logical_plan_visitor.rs
@@ -10,20 +10,38 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+use std::collections::HashSet;
+use std::sync::Arc;
+
+use datafusion::common::tree_node::TreeNode;
+use datafusion::common::{Result, plan_err};
+use datafusion::error::DataFusionError;
+use datafusion::execution::SessionStateBuilder;
+use datafusion::logical_expr::{Extension, LogicalPlan, UserDefinedLogicalNodeCore};
+use datafusion::prelude::SessionConfig;
+use datafusion::sql::TableReference;
 use tracing::debug;
 
 use crate::coordinator::analyze::analysis::Analysis;
 use crate::coordinator::plan::{
     CreateFunctionPlan, CreatePythonFunctionPlan, CreateTablePlan, DropFunctionPlan,
     InsertStatementPlan, PlanNode, ShowFunctionsPlan, StartFunctionPlan, StopFunctionPlan,
-    StreamingSqlPlan,
 };
 use crate::coordinator::statement::{
     CreateFunction, CreatePythonFunction, CreateTable, DropFunction, InsertStatement,
     ShowFunctions, StartFunction, StatementVisitor, StatementVisitorContext,
-    StatementVisitorResult, StopFunction, StreamingSql,
+    StatementVisitorResult, StopFunction,
 };
+use crate::datastream::logical::{LogicalProgram, ProgramConfig};
+use crate::datastream::optimizers::ChainingOptimizer;
+use crate::sql::catalog::insert::Insert;
+use crate::sql::catalog::table::Table as CatalogTable;
+use crate::sql::functions::{is_json_union, serialize_outgoing_json};
 use crate::sql::planner::StreamSchemaProvider;
+use crate::sql::planner::extension::sink::SinkExtension;
+use crate::sql::planner::plan::rewrite_plan;
+use crate::sql::planner::rewrite::SourceMetadataVisitor;
+use crate::sql::planner::{physical_planner, rewrite_sinks};
 
 pub struct LogicalPlanVisitor {
     schema_provider: StreamSchemaProvider,
@@ -45,6 +63,122 @@ impl LogicalPlanVisitor {
             _ => panic!("LogicalPlanVisitor should return Plan"),
         }
     }
+
+    fn build_insert_plan(&self, stmt: &InsertStatement) -> Result<Box<dyn PlanNode>> {
+        let insert = Insert::try_from_statement(&stmt.statement, &self.schema_provider)?;
+
+        let (plan, sink_name) = match insert {
+            Insert::InsertQuery {
+                sink_name,
+                logical_plan,
+            } => (logical_plan, Some(sink_name)),
+            Insert::Anonymous { logical_plan } => (logical_plan, None),
+        };
+
+        let mut plan_rewrite = rewrite_plan(plan, &self.schema_provider)?;
+
+        if plan_rewrite
+            .schema()
+            .fields()
+            .iter()
+            .any(|f| is_json_union(f.data_type()))
+        {
+            plan_rewrite = serialize_outgoing_json(&self.schema_provider, Arc::new(plan_rewrite));
+        }
+
+        debug!("Plan = {}", plan_rewrite.display_graphviz());
+
+        let mut used_connections = HashSet::new();
+        let mut metadata = SourceMetadataVisitor::new(&self.schema_provider);
+        plan_rewrite.visit_with_subqueries(&mut metadata)?;
+        used_connections.extend(metadata.connection_ids.iter());
+
+        let sink = match sink_name {
+            Some(sink_name) => {
+                let table = self
+                    .schema_provider
+                    .get_catalog_table(&sink_name)
+                    .ok_or_else(|| {
+                        DataFusionError::Plan(format!("Connection {sink_name} not found"))
+                    })?;
+                match &table {
+                    CatalogTable::ConnectorTable(c) => {
+                        if let Some(id) = c.id {
+                            used_connections.insert(id);
+                        }
+                        SinkExtension::new(
+                            TableReference::bare(sink_name),
+                            table.clone(),
+                            plan_rewrite.schema().clone(),
+                            Arc::new(plan_rewrite),
+                        )
+                    }
+                    CatalogTable::MemoryTable { .. } => {
+                        return plan_err!(
+                            "INSERT into memory tables is not supported in single-statement mode"
+                        );
+                    }
+                    CatalogTable::LookupTable(_) => {
+                        plan_err!("lookup (temporary) tables cannot be inserted into")
+                    }
+                    CatalogTable::TableFromQuery { .. } => {
+                        plan_err!(
+                            "shouldn't be inserting more data into a table made with CREATE TABLE AS"
+                        )
+                    }
+                    CatalogTable::PreviewSink { .. } => {
+                        plan_err!("queries shouldn't be able insert into preview sink.")
+                    }
+                }
+            }
+            None => SinkExtension::new(
+                TableReference::parse_str("preview"),
+                CatalogTable::PreviewSink {
+                    logical_plan: plan_rewrite.clone(),
+                },
+                plan_rewrite.schema().clone(),
+                Arc::new(plan_rewrite),
+            ),
+        };
+
+        let extension = LogicalPlan::Extension(Extension {
+            node: Arc::new(sink?),
+        });
+
+        let extensions = rewrite_sinks(vec![extension])?;
+
+        let mut config = SessionConfig::new();
+        config
+            .options_mut()
+            .optimizer
+            .enable_round_robin_repartition = false;
+        config.options_mut().optimizer.repartition_aggregations = false;
+        config.options_mut().optimizer.repartition_windows = false;
+        config.options_mut().optimizer.repartition_sorts = false;
+        config.options_mut().optimizer.repartition_joins = false;
+        config.options_mut().execution.target_partitions = 1;
+
+        let session_state = SessionStateBuilder::new()
+            .with_config(config)
+            .with_default_features()
+            .with_physical_optimizer_rules(vec![])
+            .build();
+
+        let mut plan_to_graph_visitor =
+            physical_planner::PlanToGraphVisitor::new(&self.schema_provider, &session_state);
+        for ext in extensions {
+            plan_to_graph_visitor.add_plan(ext)?;
+        }
+        let graph = plan_to_graph_visitor.into_graph();
+
+        let mut program = LogicalProgram::new(graph, ProgramConfig::default());
+        program.optimize(&ChainingOptimizer {});
+
+        Ok(Box::new(InsertStatementPlan::new(
+            program,
+            used_connections,
+        )))
+    }
 }
 
 impl StatementVisitor for LogicalPlanVisitor {
@@ -135,34 +269,9 @@ impl StatementVisitor for LogicalPlanVisitor {
         stmt: &InsertStatement,
         _context: &StatementVisitorContext,
     ) -> StatementVisitorResult {
-        let sql_to_rel = datafusion::sql::planner::SqlToRel::new(&self.schema_provider);
-
-        match sql_to_rel.sql_statement_to_plan(stmt.statement.clone()) {
-            Ok(plan) => {
-                debug!("Insert statement plan:\n{}", plan.display_graphviz());
-                StatementVisitorResult::Plan(Box::new(InsertStatementPlan::new(plan)))
-            }
-            Err(e) => {
-                panic!("Failed to convert INSERT statement to logical plan: {e}");
-            }
-        }
-    }
-
-    fn visit_streaming_sql(
-        &self,
-        stmt: &StreamingSql,
-        _context: &StatementVisitorContext,
-    ) -> StatementVisitorResult {
-        let sql_to_rel = datafusion::sql::planner::SqlToRel::new(&self.schema_provider);
-
-        match sql_to_rel.sql_statement_to_plan(stmt.statement.clone()) {
-            Ok(plan) => {
-                debug!("Logical plan:\n{}", plan.display_graphviz());
-                StatementVisitorResult::Plan(Box::new(StreamingSqlPlan::new(plan)))
-            }
-            Err(e) => {
-                panic!("Failed to convert SQL statement to logical plan: {e}");
-            }
+        match self.build_insert_plan(stmt) {
+            Ok(plan) => StatementVisitorResult::Plan(plan),
+            Err(e) => panic!("Failed to build INSERT plan: {e}"),
         }
     }
 }
diff --git a/src/coordinator/plan/mod.rs b/src/coordinator/plan/mod.rs
index 3d36ec16..1dbd75f6 100644
--- a/src/coordinator/plan/mod.rs
+++ b/src/coordinator/plan/mod.rs
@@ -20,7 +20,6 @@ mod optimizer;
 mod show_functions_plan;
 mod start_function_plan;
 mod stop_function_plan;
-mod streaming_sql_plan;
 mod visitor;
 
 pub use create_function_plan::CreateFunctionPlan;
@@ -33,7 +32,6 @@ pub use optimizer::LogicalPlanner;
 pub use show_functions_plan::ShowFunctionsPlan;
 pub use start_function_plan::StartFunctionPlan;
 pub use stop_function_plan::StopFunctionPlan;
-pub use streaming_sql_plan::StreamingSqlPlan;
 pub use visitor::{PlanVisitor, PlanVisitorContext, PlanVisitorResult};
 
 use std::fmt;
diff --git a/src/coordinator/plan/streaming_sql_plan.rs b/src/coordinator/plan/streaming_sql_plan.rs
deleted file mode 100644
index 607420a8..00000000
--- a/src/coordinator/plan/streaming_sql_plan.rs
+++ /dev/null
@@ -1,32 +0,0 @@
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use datafusion::logical_expr::LogicalPlan;
-
-use super::{PlanNode, PlanVisitor, PlanVisitorContext, PlanVisitorResult};
-
-#[derive(Debug)]
-pub struct StreamingSqlPlan {
-    pub logical_plan: LogicalPlan,
-}
-
-impl StreamingSqlPlan {
-    pub fn new(logical_plan: LogicalPlan) -> Self {
-        Self { logical_plan }
-    }
-}
-
-impl PlanNode for StreamingSqlPlan {
-    fn accept(&self, visitor: &dyn PlanVisitor, context: &PlanVisitorContext) -> PlanVisitorResult {
-        visitor.visit_streaming_sql_plan(self, context)
-    }
-}
diff --git a/src/coordinator/plan/visitor.rs b/src/coordinator/plan/visitor.rs
index e3911a8b..3964d645 100644
--- a/src/coordinator/plan/visitor.rs
+++ b/src/coordinator/plan/visitor.rs
@@ -12,7 +12,7 @@
 
 use super::{
     CreateFunctionPlan, CreatePythonFunctionPlan, CreateTablePlan, DropFunctionPlan,
-    InsertStatementPlan, ShowFunctionsPlan, StartFunctionPlan, StopFunctionPlan, StreamingSqlPlan,
+    InsertStatementPlan, ShowFunctionsPlan, StartFunctionPlan, StopFunctionPlan,
 };
 
 /// Context passed to PlanVisitor methods
@@ -96,10 +96,4 @@ pub trait PlanVisitor {
         plan: &InsertStatementPlan,
         context: &PlanVisitorContext,
     ) -> PlanVisitorResult;
-
-    fn visit_streaming_sql_plan(
-        &self,
-        plan: &StreamingSqlPlan,
-        context: &PlanVisitorContext,
-    ) -> PlanVisitorResult;
 }
diff --git a/src/coordinator/statement/mod.rs b/src/coordinator/statement/mod.rs
index 7628b94b..627ebbaa 100644
--- a/src/coordinator/statement/mod.rs
+++ b/src/coordinator/statement/mod.rs
@@ -18,7 +18,6 @@ mod insert_statement;
 mod show_functions;
 mod start_function;
 mod stop_function;
-mod streaming_sql;
 mod visitor;
 
 pub use create_function::{ConfigSource, CreateFunction, FunctionSource};
@@ -29,7 +28,6 @@ pub use insert_statement::InsertStatement;
 pub use show_functions::ShowFunctions;
 pub use start_function::StartFunction;
 pub use stop_function::StopFunction;
-pub use streaming_sql::StreamingSql;
 pub use visitor::{StatementVisitor, StatementVisitorContext, StatementVisitorResult};
 
 use std::fmt;
diff --git a/src/coordinator/statement/streaming_sql.rs b/src/coordinator/statement/streaming_sql.rs
deleted file mode 100644
index 1aa49205..00000000
--- a/src/coordinator/statement/streaming_sql.rs
+++ /dev/null
@@ -1,39 +0,0 @@
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use datafusion::sql::sqlparser::ast::Statement as DFStatement;
-
-use super::{Statement, StatementVisitor, StatementVisitorContext, StatementVisitorResult};
-
-/// Wraps a DataFusion SQL statement (SELECT, INSERT, CREATE TABLE, etc.)
-/// so it can flow through the same Statement → StatementVisitor pipeline
-/// as FunctionStream DDL commands.
-#[derive(Debug)]
-pub struct StreamingSql {
-    pub statement: DFStatement,
-}
-
-impl StreamingSql {
-    pub fn new(statement: DFStatement) -> Self {
-        Self { statement }
-    }
-}
-
-impl Statement for StreamingSql {
-    fn accept(
-        &self,
-        visitor: &dyn StatementVisitor,
-        context: &StatementVisitorContext,
-    ) -> StatementVisitorResult {
-        visitor.visit_streaming_sql(self, context)
-    }
-}
diff --git a/src/coordinator/statement/visitor.rs b/src/coordinator/statement/visitor.rs
index 8de6ffe2..27e5a512 100644
--- a/src/coordinator/statement/visitor.rs
+++ b/src/coordinator/statement/visitor.rs
@@ -12,7 +12,7 @@
 
 use super::{
     CreateFunction, CreatePythonFunction, CreateTable, DropFunction, InsertStatement,
-    ShowFunctions, StartFunction, StopFunction, StreamingSql,
+    ShowFunctions, StartFunction, StopFunction,
 };
 use crate::coordinator::plan::PlanNode;
 use crate::coordinator::statement::Statement;
@@ -100,10 +100,4 @@ pub trait StatementVisitor {
         stmt: &InsertStatement,
         context: &StatementVisitorContext,
     ) -> StatementVisitorResult;
-
-    fn visit_streaming_sql(
-        &self,
-        stmt: &StreamingSql,
-        context: &StatementVisitorContext,
-    ) -> StatementVisitorResult;
 }
diff --git a/src/server/handler.rs b/src/server/handler.rs
index 45b0cd07..bf9350e6 100644
--- a/src/server/handler.rs
+++ b/src/server/handler.rs
@@ -70,10 +70,14 @@ impl FunctionStreamService for FunctionStreamServiceImpl {
         let req = request.into_inner();
 
         let parse_start = Instant::now();
-        let parsed = match parse_sql(&req.sql) {
-            Ok(parsed) => {
-                log::debug!("SQL parsed in {}ms", parse_start.elapsed().as_millis());
-                parsed
+        let statements = match parse_sql(&req.sql) {
+            Ok(stmts) => {
+                log::debug!(
+                    "SQL parsed {} statement(s) in {}ms",
+                    stmts.len(),
+                    parse_start.elapsed().as_millis()
+                );
+                stmts
             }
             Err(e) => {
                 return Ok(TonicResponse::new(Self::build_response(
@@ -85,7 +89,14 @@ impl FunctionStreamService for FunctionStreamServiceImpl {
         };
 
         let exec_start = Instant::now();
-        let result = self.coordinator.execute(parsed.as_ref());
+        let mut last_result = self.coordinator.execute(statements[0].as_ref());
+        for stmt in &statements[1..] {
+            if !last_result.success {
+                break;
+            }
+            last_result = self.coordinator.execute(stmt.as_ref());
+        }
+        let result = last_result;
         log::debug!(
             "Coordinator execution finished in {}ms",
             exec_start.elapsed().as_millis()
diff --git a/src/sql/planner/mod.rs b/src/sql/planner/mod.rs
index d80d3a8d..6bf8d357 100644
--- a/src/sql/planner/mod.rs
+++ b/src/sql/planner/mod.rs
@@ -202,7 +202,7 @@ fn maybe_add_key_extension_to_sink(plan: LogicalPlan) -> Result<LogicalPlan> {
     }))
 }
 
-fn rewrite_sinks(extensions: Vec<LogicalPlan>) -> Result<Vec<LogicalPlan>> {
+pub fn rewrite_sinks(extensions: Vec<LogicalPlan>) -> Result<Vec<LogicalPlan>> {
     let mut sink_inputs = build_sink_inputs(&extensions);
     let mut new_extensions = vec![];
     for extension in extensions {
diff --git a/src/sql/planner/parse.rs b/src/sql/planner/parse.rs
index 4bd8f30e..a3af2e89 100644
--- a/src/sql/planner/parse.rs
+++ b/src/sql/planner/parse.rs
@@ -20,30 +20,29 @@ use datafusion::sql::sqlparser::parser::Parser;
 
 use crate::coordinator::{
     CreateFunction, CreateTable, DropFunction, InsertStatement, ShowFunctions, StartFunction,
-    Statement as CoordinatorStatement, StopFunction, StreamingSql,
+    Statement as CoordinatorStatement, StopFunction,
 };
 
-/// Stage 1: String → Box<dyn Statement>
+/// Stage 1: String → Vec<Box<dyn Statement>>
 ///
 /// Parses SQL using FunctionStreamDialect (from sqlparser-rs), then classifies
-/// the result into either a FunctionStream DDL statement or a StreamingSql,
-/// both unified under the coordinator's Statement trait.
-pub fn parse_sql(query: &str) -> Result<Box<dyn CoordinatorStatement>> {
+/// each statement into a concrete coordinator Statement type.
+/// A single SQL input may contain multiple statements (separated by `;`).
+pub fn parse_sql(query: &str) -> Result<Vec<Box<dyn CoordinatorStatement>>> {
     let trimmed = query.trim();
     if trimmed.is_empty() {
         return plan_err!("Query is empty");
     }
 
     let dialect = FunctionStreamDialect {};
-    let mut statements = Parser::parse_sql(&dialect, trimmed)
+    let statements = Parser::parse_sql(&dialect, trimmed)
         .map_err(|e| DataFusionError::Plan(format!("SQL parse error: {e}")))?;
 
     if statements.is_empty() {
         return plan_err!("No SQL statements found");
     }
 
-    let stmt = statements.remove(0);
-    classify_statement(stmt)
+    statements.into_iter().map(classify_statement).collect()
 }
 
 /// Classify a parsed DataFusion Statement into the coordinator's Statement type.
@@ -51,8 +50,8 @@ pub fn parse_sql(query: &str) -> Result<Box<dyn CoordinatorStatement>> {
 /// Statement classification mirrors the analysis flow from `parse_and_get_arrow_program`:
 ///   - FunctionStream DDL → concrete coordinator types (CreateFunction, DropFunction, etc.)
 ///   - CREATE TABLE / CREATE VIEW → CreateTable (catalog registration)
-///   - INSERT INTO / standalone SELECT → InsertStatement (streaming pipeline)
-///   - Everything else → StreamingSql (catch-all)
+///   - INSERT INTO → InsertStatement (streaming pipeline)
+///   - Everything else → error (unsupported)
 fn classify_statement(stmt: DFStatement) -> Result<Box<dyn CoordinatorStatement>> {
     match stmt {
         DFStatement::CreateFunctionWith { options } => {
@@ -75,7 +74,7 @@ fn classify_statement(stmt: DFStatement) -> Result<Box<dyn CoordinatorStatement>
             Ok(Box::new(CreateTable::new(s)))
         }
         s @ DFStatement::Insert(_) => Ok(Box::new(InsertStatement::new(s))),
-        other => Ok(Box::new(StreamingSql::new(other))),
+        other => plan_err!("Unsupported SQL statement: {other}"),
     }
 }
 
@@ -97,76 +96,90 @@ fn sql_options_to_map(options: &[SqlOption]) -> HashMap<String, String> {
 mod tests {
     use super::*;
 
-    fn is_streaming_sql(stmt: &dyn CoordinatorStatement) -> bool {
-        let debug = format!("{:?}", stmt);
-        debug.starts_with("StreamingSql")
+    fn first_stmt(sql: &str) -> Box<dyn CoordinatorStatement> {
+        let mut stmts = parse_sql(sql).unwrap();
+        assert!(!stmts.is_empty());
+        stmts.remove(0)
     }
 
-    fn is_ddl(stmt: &dyn CoordinatorStatement) -> bool {
-        !is_streaming_sql(stmt)
+    fn is_type(stmt: &dyn CoordinatorStatement, prefix: &str) -> bool {
+        format!("{:?}", stmt).starts_with(prefix)
     }
 
     #[test]
     fn test_parse_create_function() {
         let sql =
             "CREATE FUNCTION WITH ('function_path'='./test.wasm', 'config_path'='./config.yml')";
-        let stmt = parse_sql(sql).unwrap();
-        assert!(is_ddl(stmt.as_ref()));
+        let stmt = first_stmt(sql);
+        assert!(is_type(stmt.as_ref(), "CreateFunction"));
     }
 
     #[test]
     fn test_parse_create_function_minimal() {
         let sql = "CREATE FUNCTION WITH ('function_path'='./processor.wasm')";
-        let stmt = parse_sql(sql).unwrap();
-        assert!(is_ddl(stmt.as_ref()));
+        let stmt = first_stmt(sql);
+        assert!(is_type(stmt.as_ref(), "CreateFunction"));
     }
 
     #[test]
     fn test_parse_drop_function() {
-        let sql = "DROP FUNCTION my_task";
-        let stmt = parse_sql(sql).unwrap();
-        assert!(is_ddl(stmt.as_ref()));
+        let stmt = first_stmt("DROP FUNCTION my_task");
+        assert!(is_type(stmt.as_ref(), "DropFunction"));
     }
 
     #[test]
     fn test_parse_start_function() {
-        let sql = "START FUNCTION my_task";
-        let stmt = parse_sql(sql).unwrap();
-        assert!(is_ddl(stmt.as_ref()));
+        let stmt = first_stmt("START FUNCTION my_task");
+        assert!(is_type(stmt.as_ref(), "StartFunction"));
     }
 
     #[test]
     fn test_parse_stop_function() {
-        let sql = "STOP FUNCTION my_task";
-        let stmt = parse_sql(sql).unwrap();
-        assert!(is_ddl(stmt.as_ref()));
+        let stmt = first_stmt("STOP FUNCTION my_task");
+        assert!(is_type(stmt.as_ref(), "StopFunction"));
     }
 
     #[test]
     fn test_parse_show_functions() {
-        let sql = "SHOW FUNCTIONS";
-        let stmt = parse_sql(sql).unwrap();
-        assert!(is_ddl(stmt.as_ref()));
+        let stmt = first_stmt("SHOW FUNCTIONS");
+        assert!(is_type(stmt.as_ref(), "ShowFunctions"));
     }
 
     #[test]
-    fn test_parse_case_insensitive() {
-        let sql1 = "create function with ('function_path'='./test.wasm')";
-        assert!(is_ddl(parse_sql(sql1).unwrap().as_ref()));
+    fn test_parse_create_table() {
+        let stmt = first_stmt("CREATE TABLE foo (id INT, name VARCHAR)");
+        assert!(is_type(stmt.as_ref(), "CreateTable"));
+    }
 
-        let sql2 = "show functions";
-        assert!(is_ddl(parse_sql(sql2).unwrap().as_ref()));
+    #[test]
+    fn test_parse_insert_statement() {
+        let stmt = first_stmt("INSERT INTO sink SELECT * FROM source");
+        assert!(is_type(stmt.as_ref(), "InsertStatement"));
+    }
 
-        let sql3 = "start function my_task";
-        assert!(is_ddl(parse_sql(sql3).unwrap().as_ref()));
+    #[test]
+    fn test_parse_case_insensitive() {
+        assert!(is_type(
+            first_stmt("create function with ('function_path'='./test.wasm')").as_ref(),
+            "CreateFunction"
+        ));
+        assert!(is_type(
+            first_stmt("show functions").as_ref(),
+            "ShowFunctions"
+        ));
+        assert!(is_type(
+            first_stmt("start function my_task").as_ref(),
+            "StartFunction"
+        ));
     }
 
     #[test]
-    fn test_parse_streaming_sql() {
-        let sql =
-            "SELECT count(*), tumble(interval '1 minute') as window FROM events GROUP BY window";
-        let stmt = parse_sql(sql).unwrap();
-        assert!(is_streaming_sql(stmt.as_ref()));
+    fn test_parse_multiple_statements() {
+        let sql = "CREATE TABLE t1 (id INT); INSERT INTO sink SELECT * FROM t1";
+        let stmts = parse_sql(sql).unwrap();
+        assert_eq!(stmts.len(), 2);
+        assert!(is_type(stmts[0].as_ref(), "CreateTable"));
+        assert!(is_type(stmts[1].as_ref(), "InsertStatement"));
     }
 
     #[test]
@@ -175,6 +188,12 @@ mod tests {
         assert!(parse_sql("  ").is_err());
     }
 
+    #[test]
+    fn test_parse_unsupported_statement() {
+        let result = parse_sql("SELECT 1");
+        assert!(result.is_err());
+    }
+
     #[test]
     fn test_parse_with_extra_properties() {
         let sql = r#"CREATE FUNCTION WITH (
@@ -183,7 +202,7 @@ mod tests {
             'parallelism'='4',
             'memory-limit'='256mb'
         )"#;
-        let stmt = parse_sql(sql).unwrap();
-        assert!(is_ddl(stmt.as_ref()));
+        let stmt = first_stmt(sql);
+        assert!(is_type(stmt.as_ref(), "CreateFunction"));
     }
 }

From 67b65a9de49193759169a69377dc7142d3a95eb0 Mon Sep 17 00:00:00 2001
From: luoluoyuyu <zhenyu@apache.org>
Date: Wed, 18 Mar 2026 22:41:13 +0800
Subject: [PATCH 05/44] update

---
 Cargo.lock                                    |   4 +-
 src/coordinator/analyze/analyzer.rs           |  14 +-
 src/coordinator/execution/executor.rs         |  40 +-
 src/coordinator/mod.rs                        |   5 +-
 src/coordinator/plan/logical_plan_visitor.rs  | 266 ++++++-----
 ...statement_plan.rs => lookup_table_plan.rs} |  23 +-
 src/coordinator/plan/mod.rs                   |   8 +-
 .../plan/streaming_table_connector_plan.rs    |  27 ++
 src/coordinator/plan/streaming_table_plan.rs  |  30 ++
 src/coordinator/plan/visitor.rs               |  19 +-
 src/coordinator/statement/mod.rs              |   4 +-
 ...insert_statement.rs => streaming_table.rs} |  12 +-
 src/coordinator/statement/visitor.rs          |   8 +-
 src/coordinator/tool/connector_options.rs     | 360 +++++++++++++++
 src/coordinator/tool/mod.rs                   |   3 +
 src/sql/catalog/table.rs                      |  59 +--
 src/sql/planner/extension/sink.rs             |   9 +-
 src/sql/planner/mod.rs                        |  25 +-
 src/sql/planner/parse.rs                      |  21 +-
 src/sql/planner/plan/mod.rs                   | 429 +-----------------
 src/sql/planner/plan/stream_rewriter.rs       | 148 ++++++
 .../planner/plan/window_detecting_visitor.rs  | 215 +++++++++
 src/sql/planner/rewrite/source_rewriter.rs    |  23 +-
 23 files changed, 1024 insertions(+), 728 deletions(-)
 rename src/coordinator/plan/{insert_statement_plan.rs => lookup_table_plan.rs} (61%)
 create mode 100644 src/coordinator/plan/streaming_table_connector_plan.rs
 create mode 100644 src/coordinator/plan/streaming_table_plan.rs
 rename src/coordinator/statement/{insert_statement.rs => streaming_table.rs} (76%)
 create mode 100644 src/coordinator/tool/connector_options.rs
 create mode 100644 src/coordinator/tool/mod.rs
 create mode 100644 src/sql/planner/plan/stream_rewriter.rs
 create mode 100644 src/sql/planner/plan/window_detecting_visitor.rs

diff --git a/Cargo.lock b/Cargo.lock
index cb19233d..f39d5d3e 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -4964,7 +4964,7 @@ dependencies = [
 [[package]]
 name = "sqlparser"
 version = "0.55.0"
-source = "git+https://github.com/FunctionStream/sqlparser-rs?branch=0.6.0%2Ffunction-sql-parser#7e7cfb6145a426a26a7db12ae5874fed8b9c6b95"
+source = "git+https://github.com/FunctionStream/sqlparser-rs?branch=0.6.0%2Ffunction-sql-parser#9783cf9e3e6b61c763f78bcdd460e85edec22250"
 dependencies = [
  "log",
  "recursive",
@@ -4974,7 +4974,7 @@ dependencies = [
 [[package]]
 name = "sqlparser_derive"
 version = "0.3.0"
-source = "git+https://github.com/FunctionStream/sqlparser-rs?branch=0.6.0%2Ffunction-sql-parser#7e7cfb6145a426a26a7db12ae5874fed8b9c6b95"
+source = "git+https://github.com/FunctionStream/sqlparser-rs?branch=0.6.0%2Ffunction-sql-parser#9783cf9e3e6b61c763f78bcdd460e85edec22250"
 dependencies = [
  "proc-macro2",
  "quote",
diff --git a/src/coordinator/analyze/analyzer.rs b/src/coordinator/analyze/analyzer.rs
index cd469f55..c351f3ae 100644
--- a/src/coordinator/analyze/analyzer.rs
+++ b/src/coordinator/analyze/analyzer.rs
@@ -13,9 +13,9 @@
 use super::Analysis;
 use crate::coordinator::execution_context::ExecutionContext;
 use crate::coordinator::statement::{
-    CreateFunction, CreatePythonFunction, CreateTable, DropFunction, InsertStatement,
-    ShowFunctions, StartFunction, Statement, StatementVisitor, StatementVisitorContext,
-    StatementVisitorResult, StopFunction,
+    CreateFunction, CreatePythonFunction, CreateTable, DropFunction, ShowFunctions, StartFunction,
+    Statement, StatementVisitor, StatementVisitorContext, StatementVisitorResult, StopFunction,
+    StreamingTableStatement,
 };
 use std::fmt;
 
@@ -125,11 +125,13 @@ impl StatementVisitor for Analyzer<'_> {
         StatementVisitorResult::Analyze(Box::new(CreateTable::new(stmt.statement.clone())))
     }
 
-    fn visit_insert_statement(
+    fn visit_streaming_table_statement(
         &self,
-        stmt: &InsertStatement,
+        stmt: &StreamingTableStatement,
         _context: &StatementVisitorContext,
     ) -> StatementVisitorResult {
-        StatementVisitorResult::Analyze(Box::new(InsertStatement::new(stmt.statement.clone())))
+        StatementVisitorResult::Analyze(Box::new(StreamingTableStatement::new(
+            stmt.statement.clone(),
+        )))
     }
 }
diff --git a/src/coordinator/execution/executor.rs b/src/coordinator/execution/executor.rs
index 1a8e042a..056f0236 100644
--- a/src/coordinator/execution/executor.rs
+++ b/src/coordinator/execution/executor.rs
@@ -13,8 +13,9 @@
 use crate::coordinator::dataset::{ExecuteResult, ShowFunctionsResult, empty_record_batch};
 use crate::coordinator::plan::{
     CreateFunctionPlan, CreatePythonFunctionPlan, CreateTablePlan, DropFunctionPlan,
-    InsertStatementPlan, PlanNode, PlanVisitor, PlanVisitorContext, PlanVisitorResult,
-    ShowFunctionsPlan, StartFunctionPlan, StopFunctionPlan,
+    LookupTablePlan, PlanNode, PlanVisitor, PlanVisitorContext, PlanVisitorResult,
+    ShowFunctionsPlan, StartFunctionPlan, StopFunctionPlan, StreamingTable,
+    StreamingTableConnectorPlan,
 };
 use crate::coordinator::statement::{ConfigSource, FunctionSource};
 use crate::runtime::taskexecutor::TaskManager;
@@ -215,17 +216,36 @@ impl PlanVisitor for Executor {
         PlanVisitorResult::Execute(result)
     }
 
-    fn visit_insert_statement_plan(
+    fn visit_streaming_table(
         &self,
-        plan: &InsertStatementPlan,
+        _plan: &StreamingTable,
         _context: &PlanVisitorContext,
     ) -> PlanVisitorResult {
-        // TODO: start streaming pipeline for INSERT / anonymous query
-        let result = Err(ExecuteError::Internal(format!(
-            "INSERT statement execution not yet implemented. Program graph has {} node(s), {} connection(s)",
-            plan.program.graph.node_count(),
-            plan.connection_ids.len(),
-        )));
+        let result = Err(ExecuteError::Internal(
+            "StreamingTable execution not yet implemented".to_string(),
+        ));
+        PlanVisitorResult::Execute(result)
+    }
+
+    fn visit_lookup_table(
+        &self,
+        _plan: &LookupTablePlan,
+        _context: &PlanVisitorContext,
+    ) -> PlanVisitorResult {
+        let result = Err(ExecuteError::Internal(
+            "LookupTable execution not yet implemented".to_string(),
+        ));
+        PlanVisitorResult::Execute(result)
+    }
+
+    fn visit_streaming_connector_table(
+        &self,
+        _plan: &StreamingTableConnectorPlan,
+        _context: &PlanVisitorContext,
+    ) -> PlanVisitorResult {
+        let result = Err(ExecuteError::Internal(
+            "StreamingTableConnector execution not yet implemented".to_string(),
+        ));
         PlanVisitorResult::Execute(result)
     }
 }
diff --git a/src/coordinator/mod.rs b/src/coordinator/mod.rs
index 500e8164..7791e8a8 100644
--- a/src/coordinator/mod.rs
+++ b/src/coordinator/mod.rs
@@ -18,10 +18,11 @@ mod execution;
 mod execution_context;
 mod plan;
 mod statement;
+mod tool;
 
 pub use coordinator::Coordinator;
 pub use dataset::{DataSet, ShowFunctionsResult};
 pub use statement::{
-    CreateFunction, CreatePythonFunction, CreateTable, DropFunction, InsertStatement, PythonModule,
-    ShowFunctions, StartFunction, Statement, StopFunction,
+    CreateFunction, CreatePythonFunction, CreateTable, DropFunction, PythonModule, ShowFunctions,
+    StartFunction, Statement, StopFunction, StreamingTableStatement,
 };
diff --git a/src/coordinator/plan/logical_plan_visitor.rs b/src/coordinator/plan/logical_plan_visitor.rs
index 818d830f..fb8c8c82 100644
--- a/src/coordinator/plan/logical_plan_visitor.rs
+++ b/src/coordinator/plan/logical_plan_visitor.rs
@@ -10,38 +10,53 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-use std::collections::HashSet;
 use std::sync::Arc;
 
-use datafusion::common::tree_node::TreeNode;
-use datafusion::common::{Result, plan_err};
-use datafusion::error::DataFusionError;
-use datafusion::execution::SessionStateBuilder;
-use datafusion::logical_expr::{Extension, LogicalPlan, UserDefinedLogicalNodeCore};
-use datafusion::prelude::SessionConfig;
-use datafusion::sql::TableReference;
+use datafusion::common::{Result, plan_datafusion_err, plan_err};
+use datafusion::sql::sqlparser::ast::{SqlOption, Statement as DFStatement};
+use datafusion_common::TableReference;
+use datafusion_expr::{Expr, Extension, LogicalPlan, col};
+use sqlparser::ast::Statement;
 use tracing::debug;
 
 use crate::coordinator::analyze::analysis::Analysis;
 use crate::coordinator::plan::{
-    CreateFunctionPlan, CreatePythonFunctionPlan, CreateTablePlan, DropFunctionPlan,
-    InsertStatementPlan, PlanNode, ShowFunctionsPlan, StartFunctionPlan, StopFunctionPlan,
+    CreateFunctionPlan, CreatePythonFunctionPlan, CreateTablePlan, DropFunctionPlan, PlanNode,
+    ShowFunctionsPlan, StartFunctionPlan, StopFunctionPlan, StreamingTable,
 };
 use crate::coordinator::statement::{
-    CreateFunction, CreatePythonFunction, CreateTable, DropFunction, InsertStatement,
-    ShowFunctions, StartFunction, StatementVisitor, StatementVisitorContext,
-    StatementVisitorResult, StopFunction,
+    CreateFunction, CreatePythonFunction, CreateTable, DropFunction, ShowFunctions, StartFunction,
+    StatementVisitor, StatementVisitorContext, StatementVisitorResult, StopFunction,
+    StreamingTableStatement,
 };
-use crate::datastream::logical::{LogicalProgram, ProgramConfig};
-use crate::datastream::optimizers::ChainingOptimizer;
-use crate::sql::catalog::insert::Insert;
-use crate::sql::catalog::table::Table as CatalogTable;
+use crate::coordinator::tool::ConnectorOptions;
+use crate::sql::catalog::Table;
+use crate::sql::catalog::connector::ConnectionType;
+use crate::sql::catalog::connector_table::ConnectorTable;
+use crate::sql::catalog::field_spec::FieldSpec;
+use crate::sql::catalog::optimizer::produce_optimized_plan;
 use crate::sql::functions::{is_json_union, serialize_outgoing_json};
-use crate::sql::planner::StreamSchemaProvider;
 use crate::sql::planner::extension::sink::SinkExtension;
-use crate::sql::planner::plan::rewrite_plan;
-use crate::sql::planner::rewrite::SourceMetadataVisitor;
-use crate::sql::planner::{physical_planner, rewrite_sinks};
+use crate::sql::planner::{StreamSchemaProvider, maybe_add_key_extension_to_sink};
+use crate::sql::rewrite_plan;
+
+const CONNECTOR: &str = "connector";
+const PARTITION_BY: &str = "partition_by";
+const IDLE_MICROS: &str = "idle_time";
+
+/// 将 WITH 选项列表转为 key-value map，便于读取 connector 等配置。
+fn with_options_to_map(options: &[SqlOption]) -> std::collections::HashMap<String, String> {
+    options
+        .iter()
+        .filter_map(|opt| match opt {
+            SqlOption::KeyValue { key, value } => Some((
+                key.value.clone(),
+                value.to_string().trim_matches('\'').to_string(),
+            )),
+            _ => None,
+        })
+        .collect()
+}
 
 pub struct LogicalPlanVisitor {
     schema_provider: StreamSchemaProvider,
@@ -63,121 +78,98 @@ impl LogicalPlanVisitor {
             _ => panic!("LogicalPlanVisitor should return Plan"),
         }
     }
-
-    fn build_insert_plan(&self, stmt: &InsertStatement) -> Result<Box<dyn PlanNode>> {
-        let insert = Insert::try_from_statement(&stmt.statement, &self.schema_provider)?;
-
-        let (plan, sink_name) = match insert {
-            Insert::InsertQuery {
-                sink_name,
-                logical_plan,
-            } => (logical_plan, Some(sink_name)),
-            Insert::Anonymous { logical_plan } => (logical_plan, None),
-        };
-
-        let mut plan_rewrite = rewrite_plan(plan, &self.schema_provider)?;
-
-        if plan_rewrite
-            .schema()
-            .fields()
-            .iter()
-            .any(|f| is_json_union(f.data_type()))
-        {
-            plan_rewrite = serialize_outgoing_json(&self.schema_provider, Arc::new(plan_rewrite));
-        }
-
-        debug!("Plan = {}", plan_rewrite.display_graphviz());
-
-        let mut used_connections = HashSet::new();
-        let mut metadata = SourceMetadataVisitor::new(&self.schema_provider);
-        plan_rewrite.visit_with_subqueries(&mut metadata)?;
-        used_connections.extend(metadata.connection_ids.iter());
-
-        let sink = match sink_name {
-            Some(sink_name) => {
-                let table = self
-                    .schema_provider
-                    .get_catalog_table(&sink_name)
-                    .ok_or_else(|| {
-                        DataFusionError::Plan(format!("Connection {sink_name} not found"))
-                    })?;
-                match &table {
-                    CatalogTable::ConnectorTable(c) => {
-                        if let Some(id) = c.id {
-                            used_connections.insert(id);
-                        }
-                        SinkExtension::new(
-                            TableReference::bare(sink_name),
-                            table.clone(),
-                            plan_rewrite.schema().clone(),
-                            Arc::new(plan_rewrite),
-                        )
-                    }
-                    CatalogTable::MemoryTable { .. } => {
-                        return plan_err!(
-                            "INSERT into memory tables is not supported in single-statement mode"
-                        );
-                    }
-                    CatalogTable::LookupTable(_) => {
-                        plan_err!("lookup (temporary) tables cannot be inserted into")
-                    }
-                    CatalogTable::TableFromQuery { .. } => {
-                        plan_err!(
-                            "shouldn't be inserting more data into a table made with CREATE TABLE AS"
-                        )
-                    }
-                    CatalogTable::PreviewSink { .. } => {
-                        plan_err!("queries shouldn't be able insert into preview sink.")
-                    }
+    fn build_create_streaming_table_plan(
+        &self,
+        stmt: &StreamingTableStatement,
+    ) -> Result<Box<dyn PlanNode>> {
+        let statement = &stmt.statement;
+        match statement {
+            DFStatement::CreateStreamingTable {
+                name,
+                with_options,
+                comment,
+                query,
+            } => {
+                let name_str = name.to_string();
+
+                let mut connector_opts = ConnectorOptions::new(with_options, &None)?;
+                let connector_type = connector_opts.pull_opt_str(CONNECTOR)?.ok_or_else(|| {
+                    plan_datafusion_err!(
+                        "Streaming Table '{}' must specify '{}' option",
+                        name_str,
+                        CONNECTOR
+                    )
+                })?;
+
+                let synthetic_statement = Statement::Query(query.clone());
+                let base_plan =
+                    produce_optimized_plan(&synthetic_statement, &self.schema_provider)?;
+
+                let mut plan_rewrite = rewrite_plan(base_plan, &self.schema_provider)?;
+
+                if plan_rewrite
+                    .schema()
+                    .fields()
+                    .iter()
+                    .any(|f| is_json_union(f.data_type()))
+                {
+                    plan_rewrite =
+                        serialize_outgoing_json(&self.schema_provider, Arc::new(plan_rewrite));
                 }
-            }
-            None => SinkExtension::new(
-                TableReference::parse_str("preview"),
-                CatalogTable::PreviewSink {
-                    logical_plan: plan_rewrite.clone(),
-                },
-                plan_rewrite.schema().clone(),
-                Arc::new(plan_rewrite),
-            ),
-        };
-
-        let extension = LogicalPlan::Extension(Extension {
-            node: Arc::new(sink?),
-        });
-
-        let extensions = rewrite_sinks(vec![extension])?;
-
-        let mut config = SessionConfig::new();
-        config
-            .options_mut()
-            .optimizer
-            .enable_round_robin_repartition = false;
-        config.options_mut().optimizer.repartition_aggregations = false;
-        config.options_mut().optimizer.repartition_windows = false;
-        config.options_mut().optimizer.repartition_sorts = false;
-        config.options_mut().optimizer.repartition_joins = false;
-        config.options_mut().execution.target_partitions = 1;
-
-        let session_state = SessionStateBuilder::new()
-            .with_config(config)
-            .with_default_features()
-            .with_physical_optimizer_rules(vec![])
-            .build();
 
-        let mut plan_to_graph_visitor =
-            physical_planner::PlanToGraphVisitor::new(&self.schema_provider, &session_state);
-        for ext in extensions {
-            plan_to_graph_visitor.add_plan(ext)?;
+                let fields: Vec<FieldSpec> = plan_rewrite
+                    .schema()
+                    .fields()
+                    .iter()
+                    .map(|f| FieldSpec::Struct((**f).clone()))
+                    .collect();
+
+                let partition_exprs =
+                    if let Some(partition_cols) = connector_opts.pull_opt_str(PARTITION_BY)? {
+                        let cols: Vec<Expr> =
+                            partition_cols.split(',').map(|c| col(c.trim())).collect();
+                        Some(cols)
+                    } else {
+                        None
+                    };
+
+                let connector_table = ConnectorTable {
+                    id: None,
+                    connector: connector_type,
+                    name: name_str.clone(),
+                    connection_type: ConnectionType::Sink,
+                    fields,
+                    config: "".to_string(),
+                    description: comment.clone().unwrap_or_default(),
+                    event_time_field: None,
+                    watermark_field: None,
+                    idle_time: connector_opts.pull_opt_duration(IDLE_MICROS)?,
+                    primary_keys: Arc::new(vec![]),
+                    inferred_fields: None,
+                    partition_exprs: Arc::new(partition_exprs),
+                };
+
+                let sink_extension = SinkExtension::new(
+                    TableReference::bare(name_str.clone()),
+                    Table::ConnectorTable(connector_table.clone()),
+                    plan_rewrite.schema().clone(),
+                    Arc::new(plan_rewrite),
+                )?;
+
+                let final_plan =
+                    maybe_add_key_extension_to_sink(LogicalPlan::Extension(Extension {
+                        node: Arc::new(sink_extension),
+                    }))?;
+
+                Ok(Box::new(StreamingTable {
+                    name: name_str,
+                    comment: comment.clone(),
+                    connector_table,
+                    logical_plan: final_plan,
+                }))
+            }
+            _ => plan_err!("Only CREATE STREAMING TABLE supported"),
         }
-        let graph = plan_to_graph_visitor.into_graph();
-
-        let mut program = LogicalProgram::new(graph, ProgramConfig::default());
-        program.optimize(&ChainingOptimizer {});
-
-        Ok(Box::new(InsertStatementPlan::new(
-            program,
-            used_connections,
-        )))
     }
 }
 
@@ -264,14 +256,14 @@ impl StatementVisitor for LogicalPlanVisitor {
         }
     }
 
-    fn visit_insert_statement(
+    fn visit_streaming_table_statement(
         &self,
-        stmt: &InsertStatement,
+        stmt: &StreamingTableStatement,
         _context: &StatementVisitorContext,
     ) -> StatementVisitorResult {
-        match self.build_insert_plan(stmt) {
+        match self.build_create_streaming_table_plan(stmt) {
             Ok(plan) => StatementVisitorResult::Plan(plan),
-            Err(e) => panic!("Failed to build INSERT plan: {e}"),
+            Err(e) => panic!("Failed to build CreateStreamingTable plan: {e}"),
         }
     }
 }
diff --git a/src/coordinator/plan/insert_statement_plan.rs b/src/coordinator/plan/lookup_table_plan.rs
similarity index 61%
rename from src/coordinator/plan/insert_statement_plan.rs
rename to src/coordinator/plan/lookup_table_plan.rs
index 9c7e4b76..889f57e1 100644
--- a/src/coordinator/plan/insert_statement_plan.rs
+++ b/src/coordinator/plan/lookup_table_plan.rs
@@ -10,29 +10,18 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-use std::collections::HashSet;
-
-use crate::datastream::logical::LogicalProgram;
+use crate::sql::catalog::connector_table::ConnectorTable;
 
 use super::{PlanNode, PlanVisitor, PlanVisitorContext, PlanVisitorResult};
 
+/// Plan node that exposes a lookup table config as a logical plan input.
 #[derive(Debug)]
-pub struct InsertStatementPlan {
-    pub program: LogicalProgram,
-    pub connection_ids: HashSet<i64>,
-}
-
-impl InsertStatementPlan {
-    pub fn new(program: LogicalProgram, connection_ids: HashSet<i64>) -> Self {
-        Self {
-            program,
-            connection_ids,
-        }
-    }
+pub struct LookupTablePlan {
+    pub table: ConnectorTable,
 }
 
-impl PlanNode for InsertStatementPlan {
+impl PlanNode for LookupTablePlan {
     fn accept(&self, visitor: &dyn PlanVisitor, context: &PlanVisitorContext) -> PlanVisitorResult {
-        visitor.visit_insert_statement_plan(self, context)
+        visitor.visit_lookup_table(self, context)
     }
 }
diff --git a/src/coordinator/plan/mod.rs b/src/coordinator/plan/mod.rs
index 1dbd75f6..d68320d8 100644
--- a/src/coordinator/plan/mod.rs
+++ b/src/coordinator/plan/mod.rs
@@ -14,24 +14,28 @@ mod create_function_plan;
 mod create_python_function_plan;
 mod create_table_plan;
 mod drop_function_plan;
-mod insert_statement_plan;
 mod logical_plan_visitor;
+mod lookup_table_plan;
 mod optimizer;
 mod show_functions_plan;
 mod start_function_plan;
 mod stop_function_plan;
+mod streaming_table_connector_plan;
+mod streaming_table_plan;
 mod visitor;
 
 pub use create_function_plan::CreateFunctionPlan;
 pub use create_python_function_plan::CreatePythonFunctionPlan;
 pub use create_table_plan::CreateTablePlan;
 pub use drop_function_plan::DropFunctionPlan;
-pub use insert_statement_plan::InsertStatementPlan;
 pub use logical_plan_visitor::LogicalPlanVisitor;
+pub use lookup_table_plan::LookupTablePlan;
 pub use optimizer::LogicalPlanner;
 pub use show_functions_plan::ShowFunctionsPlan;
 pub use start_function_plan::StartFunctionPlan;
 pub use stop_function_plan::StopFunctionPlan;
+pub use streaming_table_connector_plan::StreamingTableConnectorPlan;
+pub use streaming_table_plan::StreamingTable;
 pub use visitor::{PlanVisitor, PlanVisitorContext, PlanVisitorResult};
 
 use std::fmt;
diff --git a/src/coordinator/plan/streaming_table_connector_plan.rs b/src/coordinator/plan/streaming_table_connector_plan.rs
new file mode 100644
index 00000000..be1cda31
--- /dev/null
+++ b/src/coordinator/plan/streaming_table_connector_plan.rs
@@ -0,0 +1,27 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use crate::sql::catalog::connector_table::ConnectorTable;
+
+use super::{PlanNode, PlanVisitor, PlanVisitorContext, PlanVisitorResult};
+
+/// Plan node that exposes a connector table config as a logical plan input.
+#[derive(Debug)]
+pub struct StreamingTableConnectorPlan {
+    pub table: ConnectorTable,
+}
+
+impl PlanNode for StreamingTableConnectorPlan {
+    fn accept(&self, visitor: &dyn PlanVisitor, context: &PlanVisitorContext) -> PlanVisitorResult {
+        visitor.visit_streaming_connector_table(self, context)
+    }
+}
diff --git a/src/coordinator/plan/streaming_table_plan.rs b/src/coordinator/plan/streaming_table_plan.rs
new file mode 100644
index 00000000..577e6494
--- /dev/null
+++ b/src/coordinator/plan/streaming_table_plan.rs
@@ -0,0 +1,30 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use super::{PlanNode, PlanVisitor, PlanVisitorContext, PlanVisitorResult};
+use crate::sql::catalog::connector_table::ConnectorTable;
+use datafusion::logical_expr::LogicalPlan;
+
+/// Plan node representing a fully resolved streaming table (DDL).
+#[derive(Debug)]
+pub struct StreamingTable {
+    pub name: String,
+    pub comment: Option<String>,
+    pub connector_table: ConnectorTable,
+    pub logical_plan: LogicalPlan,
+}
+
+impl PlanNode for StreamingTable {
+    fn accept(&self, visitor: &dyn PlanVisitor, context: &PlanVisitorContext) -> PlanVisitorResult {
+        visitor.visit_streaming_table(self, context)
+    }
+}
diff --git a/src/coordinator/plan/visitor.rs b/src/coordinator/plan/visitor.rs
index 3964d645..fc764b2b 100644
--- a/src/coordinator/plan/visitor.rs
+++ b/src/coordinator/plan/visitor.rs
@@ -12,7 +12,8 @@
 
 use super::{
     CreateFunctionPlan, CreatePythonFunctionPlan, CreateTablePlan, DropFunctionPlan,
-    InsertStatementPlan, ShowFunctionsPlan, StartFunctionPlan, StopFunctionPlan,
+    LookupTablePlan, ShowFunctionsPlan, StartFunctionPlan, StopFunctionPlan, StreamingTable,
+    StreamingTableConnectorPlan,
 };
 
 /// Context passed to PlanVisitor methods
@@ -91,9 +92,21 @@ pub trait PlanVisitor {
         context: &PlanVisitorContext,
     ) -> PlanVisitorResult;
 
-    fn visit_insert_statement_plan(
+    fn visit_streaming_table(
         &self,
-        plan: &InsertStatementPlan,
+        plan: &StreamingTable,
+        context: &PlanVisitorContext,
+    ) -> PlanVisitorResult;
+
+    fn visit_lookup_table(
+        &self,
+        plan: &LookupTablePlan,
+        context: &PlanVisitorContext,
+    ) -> PlanVisitorResult;
+
+    fn visit_streaming_connector_table(
+        &self,
+        plan: &StreamingTableConnectorPlan,
         context: &PlanVisitorContext,
     ) -> PlanVisitorResult;
 }
diff --git a/src/coordinator/statement/mod.rs b/src/coordinator/statement/mod.rs
index 627ebbaa..15880284 100644
--- a/src/coordinator/statement/mod.rs
+++ b/src/coordinator/statement/mod.rs
@@ -14,20 +14,20 @@ mod create_function;
 mod create_python_function;
 mod create_table;
 mod drop_function;
-mod insert_statement;
 mod show_functions;
 mod start_function;
 mod stop_function;
+mod streaming_table;
 mod visitor;
 
 pub use create_function::{ConfigSource, CreateFunction, FunctionSource};
 pub use create_python_function::{CreatePythonFunction, PythonModule};
 pub use create_table::CreateTable;
 pub use drop_function::DropFunction;
-pub use insert_statement::InsertStatement;
 pub use show_functions::ShowFunctions;
 pub use start_function::StartFunction;
 pub use stop_function::StopFunction;
+pub use streaming_table::StreamingTableStatement;
 pub use visitor::{StatementVisitor, StatementVisitorContext, StatementVisitorResult};
 
 use std::fmt;
diff --git a/src/coordinator/statement/insert_statement.rs b/src/coordinator/statement/streaming_table.rs
similarity index 76%
rename from src/coordinator/statement/insert_statement.rs
rename to src/coordinator/statement/streaming_table.rs
index 45785251..48fd25e9 100644
--- a/src/coordinator/statement/insert_statement.rs
+++ b/src/coordinator/statement/streaming_table.rs
@@ -14,28 +14,28 @@ use datafusion::sql::sqlparser::ast::Statement as DFStatement;
 
 use super::{Statement, StatementVisitor, StatementVisitorContext, StatementVisitorResult};
 
-/// Represents an INSERT INTO or standalone SELECT/query statement.
+/// Represents an INSERT INTO or standalone SELECT/query that creates a streaming table/pipeline.
 ///
 /// In the streaming SQL context, both INSERT INTO (writing to a sink)
 /// and standalone SELECT (anonymous computation) are treated as
-/// data-producing operations that feed into the streaming pipeline.
+/// data-producing operations that create/feed into the streaming pipeline.
 #[derive(Debug)]
-pub struct InsertStatement {
+pub struct StreamingTableStatement {
     pub statement: DFStatement,
 }
 
-impl InsertStatement {
+impl StreamingTableStatement {
     pub fn new(statement: DFStatement) -> Self {
         Self { statement }
     }
 }
 
-impl Statement for InsertStatement {
+impl Statement for StreamingTableStatement {
     fn accept(
         &self,
         visitor: &dyn StatementVisitor,
         context: &StatementVisitorContext,
     ) -> StatementVisitorResult {
-        visitor.visit_insert_statement(self, context)
+        visitor.visit_streaming_table_statement(self, context)
     }
 }
diff --git a/src/coordinator/statement/visitor.rs b/src/coordinator/statement/visitor.rs
index 27e5a512..1867b603 100644
--- a/src/coordinator/statement/visitor.rs
+++ b/src/coordinator/statement/visitor.rs
@@ -11,8 +11,8 @@
 // limitations under the License.
 
 use super::{
-    CreateFunction, CreatePythonFunction, CreateTable, DropFunction, InsertStatement,
-    ShowFunctions, StartFunction, StopFunction,
+    CreateFunction, CreatePythonFunction, CreateTable, DropFunction, ShowFunctions, StartFunction,
+    StopFunction, StreamingTableStatement,
 };
 use crate::coordinator::plan::PlanNode;
 use crate::coordinator::statement::Statement;
@@ -95,9 +95,9 @@ pub trait StatementVisitor {
         context: &StatementVisitorContext,
     ) -> StatementVisitorResult;
 
-    fn visit_insert_statement(
+    fn visit_streaming_table_statement(
         &self,
-        stmt: &InsertStatement,
+        stmt: &StreamingTableStatement,
         context: &StatementVisitorContext,
     ) -> StatementVisitorResult;
 }
diff --git a/src/coordinator/tool/connector_options.rs b/src/coordinator/tool/connector_options.rs
new file mode 100644
index 00000000..de39872f
--- /dev/null
+++ b/src/coordinator/tool/connector_options.rs
@@ -0,0 +1,360 @@
+use std::collections::HashMap;
+use std::num::{NonZero, NonZeroU64};
+use std::str::FromStr;
+use std::time::Duration;
+
+use datafusion::common::{Result as DFResult, plan_datafusion_err};
+use datafusion::error::DataFusionError;
+use datafusion::sql::sqlparser::ast::{Expr, Ident, SqlOption, Value as SqlValue, ValueWithSpan};
+use tracing::warn;
+
+pub trait FromOpts: Sized {
+    fn from_opts(opts: &mut ConnectorOptions) -> DFResult<Self>;
+}
+
+pub struct ConnectorOptions {
+    options: HashMap<String, Expr>,
+    partitions: Vec<Expr>,
+}
+
+impl ConnectorOptions {
+    pub fn new(sql_opts: &[SqlOption], partition_by: &Option<Vec<Expr>>) -> DFResult<Self> {
+        let mut options = HashMap::new();
+
+        for option in sql_opts {
+            let SqlOption::KeyValue { key, value } = option else {
+                return Err(plan_datafusion_err!(
+                    "invalid with option: '{}'; expected an `=` delimited key-value pair",
+                    option
+                ));
+            };
+
+            options.insert(key.value.clone(), value.clone());
+        }
+
+        Ok(Self {
+            options,
+            partitions: partition_by.clone().unwrap_or_default(),
+        })
+    }
+
+    pub fn partitions(&self) -> &[Expr] {
+        &self.partitions
+    }
+
+    pub fn pull_struct<T: FromOpts>(&mut self) -> DFResult<T> {
+        T::from_opts(self)
+    }
+
+    pub fn pull_opt_str(&mut self, name: &str) -> DFResult<Option<String>> {
+        match self.options.remove(name) {
+            Some(Expr::Value(ValueWithSpan {
+                value: SqlValue::SingleQuotedString(s),
+                span: _,
+            })) => Ok(Some(s)),
+            Some(e) => Err(plan_datafusion_err!(
+                "expected with option '{}' to be a single-quoted string, but it was `{:?}`",
+                name,
+                e
+            )),
+            None => Ok(None),
+        }
+    }
+
+    pub fn pull_str(&mut self, name: &str) -> DFResult<String> {
+        self.pull_opt_str(name)?
+            .ok_or_else(|| plan_datafusion_err!("required option '{}' not set", name))
+    }
+
+    pub fn pull_opt_bool(&mut self, name: &str) -> DFResult<Option<bool>> {
+        match self.options.remove(name) {
+            Some(Expr::Value(ValueWithSpan {
+                value: SqlValue::Boolean(b),
+                span: _,
+            })) => Ok(Some(b)),
+            Some(Expr::Value(ValueWithSpan {
+                value: SqlValue::SingleQuotedString(s),
+                span: _,
+            })) => match s.as_str() {
+                "true" | "yes" => Ok(Some(true)),
+                "false" | "no" => Ok(Some(false)),
+                _ => Err(plan_datafusion_err!(
+                    "expected with option '{}' to be a boolean, but it was `'{}'`",
+                    name,
+                    s
+                )),
+            },
+            Some(e) => Err(plan_datafusion_err!(
+                "expected with option '{}' to be a boolean, but it was `{:?}`",
+                name,
+                e
+            )),
+            None => Ok(None),
+        }
+    }
+
+    pub fn pull_opt_u64(&mut self, name: &str) -> DFResult<Option<u64>> {
+        match self.options.remove(name) {
+            Some(Expr::Value(ValueWithSpan {
+                value: SqlValue::Number(s, _),
+                span: _,
+            }))
+            | Some(Expr::Value(ValueWithSpan {
+                value: SqlValue::SingleQuotedString(s),
+                span: _,
+            })) => s.parse::<u64>().map(Some).map_err(|_| {
+                plan_datafusion_err!(
+                    "expected with option '{}' to be an unsigned integer, but it was `{}`",
+                    name,
+                    s
+                )
+            }),
+            Some(e) => Err(plan_datafusion_err!(
+                "expected with option '{}' to be an unsigned integer, but it was `{:?}`",
+                name,
+                e
+            )),
+            None => Ok(None),
+        }
+    }
+
+    pub fn pull_opt_nonzero_u64(&mut self, name: &str) -> DFResult<Option<NonZero<u64>>> {
+        match self.pull_opt_u64(name)? {
+            Some(0) => Err(plan_datafusion_err!(
+                "expected with option '{name}' to be greater than 0, but it was 0"
+            )),
+            Some(i) => Ok(Some(NonZeroU64::new(i).unwrap())),
+            None => Ok(None),
+        }
+    }
+
+    pub fn pull_opt_data_size_bytes(&mut self, name: &str) -> DFResult<Option<u64>> {
+        self.pull_opt_str(name)?
+            .map(|s| {
+                s.parse::<u64>().map_err(|_| {
+                    plan_datafusion_err!(
+                        "expected with option '{}' to be a size in bytes (unsigned integer), but it was `{}`",
+                        name,
+                        s
+                    )
+                })
+            })
+            .transpose()
+    }
+
+    pub fn pull_opt_i64(&mut self, name: &str) -> DFResult<Option<i64>> {
+        match self.options.remove(name) {
+            Some(Expr::Value(ValueWithSpan {
+                value: SqlValue::Number(s, _),
+                span: _,
+            }))
+            | Some(Expr::Value(ValueWithSpan {
+                value: SqlValue::SingleQuotedString(s),
+                span: _,
+            })) => s.parse::<i64>().map(Some).map_err(|_| {
+                plan_datafusion_err!(
+                    "expected with option '{}' to be an integer, but it was `{}`",
+                    name,
+                    s
+                )
+            }),
+            Some(e) => Err(plan_datafusion_err!(
+                "expected with option '{}' to be an integer, but it was `{:?}`",
+                name,
+                e
+            )),
+            None => Ok(None),
+        }
+    }
+
+    pub fn pull_i64(&mut self, name: &str) -> DFResult<i64> {
+        self.pull_opt_i64(name)?
+            .ok_or_else(|| plan_datafusion_err!("required option '{}' not set", name))
+    }
+
+    pub fn pull_u64(&mut self, name: &str) -> DFResult<u64> {
+        self.pull_opt_u64(name)?
+            .ok_or_else(|| plan_datafusion_err!("required option '{}' not set", name))
+    }
+
+    pub fn pull_opt_f64(&mut self, name: &str) -> DFResult<Option<f64>> {
+        match self.options.remove(name) {
+            Some(Expr::Value(ValueWithSpan {
+                value: SqlValue::Number(s, _),
+                span: _,
+            }))
+            | Some(Expr::Value(ValueWithSpan {
+                value: SqlValue::SingleQuotedString(s),
+                span: _,
+            })) => s.parse::<f64>().map(Some).map_err(|_| {
+                plan_datafusion_err!(
+                    "expected with option '{}' to be a double, but it was `{}`",
+                    name,
+                    s
+                )
+            }),
+            Some(e) => Err(plan_datafusion_err!(
+                "expected with option '{}' to be a double, but it was `{:?}`",
+                name,
+                e
+            )),
+            None => Ok(None),
+        }
+    }
+
+    pub fn pull_f64(&mut self, name: &str) -> DFResult<f64> {
+        self.pull_opt_f64(name)?
+            .ok_or_else(|| plan_datafusion_err!("required option '{}' not set", name))
+    }
+
+    pub fn pull_bool(&mut self, name: &str) -> DFResult<bool> {
+        self.pull_opt_bool(name)?
+            .ok_or_else(|| plan_datafusion_err!("required option '{}' not set", name))
+    }
+
+    pub fn pull_opt_duration(&mut self, name: &str) -> DFResult<Option<Duration>> {
+        match self.options.remove(name) {
+            Some(e) => Ok(Some(duration_from_sql_expr(&e).map_err(|e| {
+                plan_datafusion_err!("in with clause '{name}': {}", e)
+            })?)),
+            None => Ok(None),
+        }
+    }
+
+    pub fn pull_opt_field(&mut self, name: &str) -> DFResult<Option<String>> {
+        match self.options.remove(name) {
+            Some(Expr::Value(ValueWithSpan {
+                value: SqlValue::SingleQuotedString(s),
+                span: _,
+            })) => {
+                warn!(
+                    "Referred to a field in `{name}` with a string—this is deprecated and will be unsupported after Arroyo 0.14"
+                );
+                Ok(Some(s))
+            }
+            Some(Expr::Identifier(Ident { value, .. })) => Ok(Some(value)),
+            Some(e) => Err(plan_datafusion_err!(
+                "expected with option '{}' to be a field, but it was `{:?}`",
+                name,
+                e
+            )),
+            None => Ok(None),
+        }
+    }
+
+    pub fn pull_opt_array(&mut self, name: &str) -> Option<Vec<Expr>> {
+        Some(match self.options.remove(name)? {
+            Expr::Value(ValueWithSpan {
+                value: SqlValue::SingleQuotedString(s),
+                span,
+            }) => s
+                .split(',')
+                .map(|p| {
+                    Expr::Value(ValueWithSpan {
+                        value: SqlValue::SingleQuotedString(p.to_string()),
+                        span: span.clone(),
+                    })
+                })
+                .collect(),
+            Expr::Array(a) => a.elem,
+            e => vec![e],
+        })
+    }
+
+    pub fn pull_opt_parsed<T: FromStr>(&mut self, name: &str) -> DFResult<Option<T>> {
+        Ok(match self.pull_opt_str(name)? {
+            Some(s) => Some(
+                s.parse()
+                    .map_err(|_| plan_datafusion_err!("invalid value '{s}' for {name}"))?,
+            ),
+            None => None,
+        })
+    }
+
+    pub fn keys(&self) -> impl Iterator<Item = &String> {
+        self.options.keys()
+    }
+
+    pub fn keys_with_prefix<'a, 'b>(
+        &'a self,
+        prefix: &'b str,
+    ) -> impl Iterator<Item = &'a String> + 'b
+    where
+        'a: 'b,
+    {
+        self.options.keys().filter(move |k| k.starts_with(prefix))
+    }
+
+    pub fn insert_str(
+        &mut self,
+        name: impl Into<String>,
+        value: impl Into<String>,
+    ) -> DFResult<Option<String>> {
+        let name = name.into();
+        let value = value.into();
+        let existing = self.pull_opt_str(&name)?;
+        self.options.insert(
+            name,
+            Expr::Value(SqlValue::SingleQuotedString(value).with_empty_span()),
+        );
+        Ok(existing)
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.options.is_empty()
+    }
+
+    pub fn contains_key(&self, key: &str) -> bool {
+        self.options.contains_key(key)
+    }
+}
+
+fn duration_from_sql_expr(expr: &Expr) -> Result<Duration, DataFusionError> {
+    match expr {
+        Expr::Interval(interval) => {
+            let s = match interval.value.as_ref() {
+                Expr::Value(ValueWithSpan {
+                    value: SqlValue::SingleQuotedString(s),
+                    ..
+                }) => s.clone(),
+                other => {
+                    return Err(DataFusionError::Plan(format!(
+                        "expected interval string literal, found {other}"
+                    )));
+                }
+            };
+            parse_interval_to_duration(&s)
+        }
+        Expr::Value(ValueWithSpan {
+            value: SqlValue::SingleQuotedString(s),
+            ..
+        }) => parse_interval_to_duration(s),
+        other => Err(DataFusionError::Plan(format!(
+            "expected an interval expression, found {other}"
+        ))),
+    }
+}
+
+fn parse_interval_to_duration(s: &str) -> Result<Duration, DataFusionError> {
+    let parts: Vec<&str> = s.trim().split_whitespace().collect();
+    if parts.len() != 2 {
+        return Err(DataFusionError::Plan(format!(
+            "invalid interval string '{s}'; expected '<value> <unit>'"
+        )));
+    }
+    let value: u64 = parts[0]
+        .parse()
+        .map_err(|_| DataFusionError::Plan(format!("invalid interval number: {}", parts[0])))?;
+    let duration = match parts[1].to_lowercase().as_str() {
+        "second" | "seconds" | "s" => Duration::from_secs(value),
+        "minute" | "minutes" | "min" => Duration::from_secs(value * 60),
+        "hour" | "hours" | "h" => Duration::from_secs(value * 3600),
+        "day" | "days" | "d" => Duration::from_secs(value * 86400),
+        unit => {
+            return Err(DataFusionError::Plan(format!(
+                "unsupported interval unit '{unit}'"
+            )));
+        }
+    };
+    Ok(duration)
+}
diff --git a/src/coordinator/tool/mod.rs b/src/coordinator/tool/mod.rs
new file mode 100644
index 00000000..95d6a7ed
--- /dev/null
+++ b/src/coordinator/tool/mod.rs
@@ -0,0 +1,3 @@
+mod connector_options;
+
+pub use connector_options::{ConnectorOptions, FromOpts};
diff --git a/src/sql/catalog/table.rs b/src/sql/catalog/table.rs
index b1d60028..a997680b 100644
--- a/src/sql/catalog/table.rs
+++ b/src/sql/catalog/table.rs
@@ -32,19 +32,11 @@ pub enum Table {
     LookupTable(ConnectorTable),
     /// A source/sink table backed by an external connector.
     ConnectorTable(ConnectorTable),
-    /// An in-memory table with an optional logical plan (for views).
-    MemoryTable {
-        name: String,
-        fields: Vec<FieldRef>,
-        logical_plan: Option<LogicalPlan>,
-    },
     /// A table defined by a query (CREATE VIEW / CREATE TABLE AS SELECT).
     TableFromQuery {
         name: String,
         logical_plan: LogicalPlan,
     },
-    /// A preview sink for debugging/inspection.
-    PreviewSink { logical_plan: LogicalPlan },
 }
 
 impl Table {
@@ -56,44 +48,10 @@ impl Table {
         use datafusion::logical_expr::{CreateMemoryTable, CreateView, DdlStatement};
         use datafusion::sql::sqlparser::ast::CreateTable;
 
-        if let Statement::CreateTable(CreateTable {
-            name,
-            columns,
-            query: None,
-            ..
-        }) = statement
-        {
-            let name = name.to_string();
-
-            if columns.is_empty() {
-                return plan_err!("CREATE TABLE requires at least one column");
-            }
-
-            let fields: Vec<FieldRef> = columns
-                .iter()
-                .map(|col| {
-                    let data_type = crate::sql::types::convert_data_type(&col.data_type)
-                        .map(|(dt, _)| dt)
-                        .unwrap_or(datafusion::arrow::datatypes::DataType::Utf8);
-                    let nullable = !col.options.iter().any(|opt| {
-                        matches!(
-                            opt.option,
-                            datafusion::sql::sqlparser::ast::ColumnOption::NotNull
-                        )
-                    });
-                    Arc::new(datafusion::arrow::datatypes::Field::new(
-                        col.name.value.clone(),
-                        data_type,
-                        nullable,
-                    ))
-                })
-                .collect();
-
-            return Ok(Some(Table::MemoryTable {
-                name,
-                fields,
-                logical_plan: None,
-            }));
+        if let Statement::CreateTable(CreateTable { query: None, .. }) = statement {
+            return plan_err!(
+                "CREATE TABLE without AS SELECT is not supported; use CREATE TABLE ... AS SELECT or a connector table"
+            );
         }
 
         match produce_optimized_plan(statement, schema_provider) {
@@ -124,15 +82,13 @@ impl Table {
 
     pub fn name(&self) -> &str {
         match self {
-            Table::MemoryTable { name, .. } | Table::TableFromQuery { name, .. } => name.as_str(),
+            Table::TableFromQuery { name, .. } => name.as_str(),
             Table::ConnectorTable(c) | Table::LookupTable(c) => c.name.as_str(),
-            Table::PreviewSink { .. } => "preview",
         }
     }
 
     pub fn get_fields(&self) -> Vec<FieldRef> {
         match self {
-            Table::MemoryTable { fields, .. } => fields.clone(),
             Table::ConnectorTable(ConnectorTable {
                 fields,
                 inferred_fields,
@@ -151,9 +107,6 @@ impl Table {
             Table::TableFromQuery { logical_plan, .. } => {
                 logical_plan.schema().fields().iter().cloned().collect()
             }
-            Table::PreviewSink { logical_plan } => {
-                logical_plan.schema().fields().iter().cloned().collect()
-            }
         }
     }
 
@@ -187,9 +140,7 @@ impl Table {
     pub fn connector_op(&self) -> Result<super::connector::ConnectorOp> {
         match self {
             Table::ConnectorTable(c) | Table::LookupTable(c) => Ok(c.connector_op()),
-            Table::MemoryTable { .. } => plan_err!("can't write to a memory table"),
             Table::TableFromQuery { .. } => plan_err!("can't write to a query-defined table"),
-            Table::PreviewSink { .. } => Ok(super::connector::ConnectorOp::new("preview", "")),
         }
     }
 
diff --git a/src/sql/planner/extension/sink.rs b/src/sql/planner/extension/sink.rs
index 7820925f..e73a8383 100644
--- a/src/sql/planner/extension/sink.rs
+++ b/src/sql/planner/extension/sink.rs
@@ -41,11 +41,7 @@ impl SinkExtension {
                 }
             }
             Table::LookupTable(..) => return plan_err!("cannot use a lookup table as a sink"),
-            Table::MemoryTable { .. } => return plan_err!("memory tables not supported as sinks"),
             Table::TableFromQuery { .. } => {}
-            Table::PreviewSink { .. } => {
-                // preview sinks may also need debezium wrapping for updating inputs
-            }
         }
 
         Self::add_remote_if_necessary(&schema, &mut input);
@@ -123,10 +119,7 @@ impl UserDefinedLogicalNodeCore for SinkExtension {
 
 impl StreamExtension for SinkExtension {
     fn node_name(&self) -> Option<NamedNode> {
-        match &self.table {
-            Table::PreviewSink { .. } => None,
-            _ => Some(NamedNode::Sink(self.name.clone())),
-        }
+        Some(NamedNode::Sink(self.name.clone()))
     }
 
     fn output_schema(&self) -> StreamSchema {
diff --git a/src/sql/planner/mod.rs b/src/sql/planner/mod.rs
index 6bf8d357..c85c0fb2 100644
--- a/src/sql/planner/mod.rs
+++ b/src/sql/planner/mod.rs
@@ -152,7 +152,7 @@ fn build_sink_inputs(extensions: &[LogicalPlan]) -> HashMap<NamedNode, Vec<Logic
     sink_inputs
 }
 
-fn maybe_add_key_extension_to_sink(plan: LogicalPlan) -> Result<LogicalPlan> {
+pub(crate) fn maybe_add_key_extension_to_sink(plan: LogicalPlan) -> Result<LogicalPlan> {
     let LogicalPlan::Extension(ref ext) = plan else {
         return Ok(plan);
     };
@@ -307,13 +307,6 @@ pub async fn parse_and_get_arrow_program(
                             Arc::new(plan_rewrite),
                         )
                     }
-                    CatalogTable::MemoryTable { logical_plan, .. } => {
-                        if logical_plan.is_some() {
-                            return plan_err!("Can only insert into a memory table once");
-                        }
-                        logical_plan.replace(plan_rewrite);
-                        continue;
-                    }
                     CatalogTable::LookupTable(_) => {
                         plan_err!("lookup (temporary) tables cannot be inserted into")
                     }
@@ -322,19 +315,13 @@ pub async fn parse_and_get_arrow_program(
                             "shouldn't be inserting more data into a table made with CREATE TABLE AS"
                         )
                     }
-                    CatalogTable::PreviewSink { .. } => {
-                        plan_err!("queries shouldn't be able insert into preview sink.")
-                    }
                 }
             }
-            None => SinkExtension::new(
-                TableReference::parse_str("preview"),
-                CatalogTable::PreviewSink {
-                    logical_plan: plan_rewrite.clone(),
-                },
-                plan_rewrite.schema().clone(),
-                Arc::new(plan_rewrite),
-            ),
+            None => {
+                return plan_err!(
+                    "Anonymous query is not supported; use INSERT INTO <sink> SELECT ..."
+                );
+            }
         };
         extensions.push(LogicalPlan::Extension(Extension {
             node: Arc::new(sink?),
diff --git a/src/sql/planner/parse.rs b/src/sql/planner/parse.rs
index a3af2e89..bdb4d481 100644
--- a/src/sql/planner/parse.rs
+++ b/src/sql/planner/parse.rs
@@ -19,8 +19,8 @@ use datafusion::sql::sqlparser::dialect::FunctionStreamDialect;
 use datafusion::sql::sqlparser::parser::Parser;
 
 use crate::coordinator::{
-    CreateFunction, CreateTable, DropFunction, InsertStatement, ShowFunctions, StartFunction,
-    Statement as CoordinatorStatement, StopFunction,
+    CreateFunction, CreateTable, DropFunction, ShowFunctions, StartFunction,
+    Statement as CoordinatorStatement, StopFunction, StreamingTableStatement,
 };
 
 /// Stage 1: String → Vec<Box<dyn Statement>>
@@ -45,13 +45,6 @@ pub fn parse_sql(query: &str) -> Result<Vec<Box<dyn CoordinatorStatement>>> {
     statements.into_iter().map(classify_statement).collect()
 }
 
-/// Classify a parsed DataFusion Statement into the coordinator's Statement type.
-///
-/// Statement classification mirrors the analysis flow from `parse_and_get_arrow_program`:
-///   - FunctionStream DDL → concrete coordinator types (CreateFunction, DropFunction, etc.)
-///   - CREATE TABLE / CREATE VIEW → CreateTable (catalog registration)
-///   - INSERT INTO → InsertStatement (streaming pipeline)
-///   - Everything else → error (unsupported)
 fn classify_statement(stmt: DFStatement) -> Result<Box<dyn CoordinatorStatement>> {
     match stmt {
         DFStatement::CreateFunctionWith { options } => {
@@ -70,10 +63,10 @@ fn classify_statement(stmt: DFStatement) -> Result<Box<dyn CoordinatorStatement>
             Ok(Box::new(DropFunction::new(name)))
         }
         DFStatement::ShowFunctions { .. } => Ok(Box::new(ShowFunctions::new())),
-        s @ DFStatement::CreateTable(_) | s @ DFStatement::CreateView { .. } => {
-            Ok(Box::new(CreateTable::new(s)))
+        s @ DFStatement::CreateTable(_) => Ok(Box::new(CreateTable::new(s))),
+        s @ DFStatement::CreateStreamingTable { .. } => {
+            Ok(Box::new(StreamingTableStatement::new(s)))
         }
-        s @ DFStatement::Insert(_) => Ok(Box::new(InsertStatement::new(s))),
         other => plan_err!("Unsupported SQL statement: {other}"),
     }
 }
@@ -154,7 +147,7 @@ mod tests {
     #[test]
     fn test_parse_insert_statement() {
         let stmt = first_stmt("INSERT INTO sink SELECT * FROM source");
-        assert!(is_type(stmt.as_ref(), "InsertStatement"));
+        assert!(is_type(stmt.as_ref(), "CreateStreamingTableStatement"));
     }
 
     #[test]
@@ -179,7 +172,7 @@ mod tests {
         let stmts = parse_sql(sql).unwrap();
         assert_eq!(stmts.len(), 2);
         assert!(is_type(stmts[0].as_ref(), "CreateTable"));
-        assert!(is_type(stmts[1].as_ref(), "InsertStatement"));
+        assert!(is_type(stmts[1].as_ref(), "CreateStreamingTableStatement"));
     }
 
     #[test]
diff --git a/src/sql/planner/plan/mod.rs b/src/sql/planner/plan/mod.rs
index d497ca65..83891731 100644
--- a/src/sql/planner/plan/mod.rs
+++ b/src/sql/planner/plan/mod.rs
@@ -1,39 +1,24 @@
-use std::collections::HashSet;
-use std::sync::Arc;
+use datafusion::common::Result;
+use datafusion::common::tree_node::TreeNode;
+use datafusion::logical_expr::LogicalPlan;
 
-use datafusion::common::tree_node::{Transformed, TreeNodeRecursion};
-use datafusion::common::{
-    Column, DataFusionError, Result, Spans, TableReference, plan_err,
-    tree_node::{TreeNode, TreeNodeRewriter, TreeNodeVisitor},
-};
-use datafusion::logical_expr::{
-    Aggregate, Expr, Extension, Filter, LogicalPlan, SubqueryAlias, expr::Alias,
-};
-
-use crate::sql::planner::extension::StreamExtension;
-use crate::sql::planner::extension::aggregate::{AGGREGATE_EXTENSION_NAME, AggregateExtension};
-use crate::sql::planner::extension::join::JOIN_NODE_NAME;
-use crate::sql::planner::extension::remote_table::RemoteTableExtension;
-use crate::sql::planner::schemas::{add_timestamp_field, has_timestamp_field};
-use crate::sql::types::{
-    DFField, TIMESTAMP_FIELD, WindowBehavior, WindowType, fields_with_qualifiers, find_window,
-};
+use crate::sql::planner::StreamSchemaProvider;
+use crate::sql::planner::rewrite::TimeWindowUdfChecker;
 
 use self::aggregate::AggregateRewriter;
 use self::join::JoinRewriter;
+use self::stream_rewriter::StreamRewriter;
+use self::window_detecting_visitor::{WindowDetectingVisitor, extract_column};
 use self::window_fn::WindowFunctionRewriter;
 
 pub(crate) mod aggregate;
 pub(crate) mod join;
+pub(crate) mod stream_rewriter;
+pub(crate) mod window_detecting_visitor;
 pub(crate) mod window_fn;
 
-use super::StreamSchemaProvider;
 use tracing::debug;
 
-/// Stage 3: LogicalPlan → Streaming LogicalPlan
-///
-/// Rewrites a standard DataFusion logical plan into one that supports
-/// streaming semantics (timestamps, windows, watermarks).
 pub fn rewrite_plan(
     plan: LogicalPlan,
     schema_provider: &StreamSchemaProvider,
@@ -51,399 +36,3 @@ pub fn rewrite_plan(
 
     Ok(rewritten_plan.data)
 }
-
-/// Visitor that detects window types in a logical plan
-#[derive(Debug, Default)]
-pub(crate) struct WindowDetectingVisitor {
-    pub(crate) window: Option<WindowType>,
-    pub(crate) fields: HashSet<DFField>,
-}
-
-impl WindowDetectingVisitor {
-    pub(crate) fn get_window(logical_plan: &LogicalPlan) -> Result<Option<WindowType>> {
-        let mut visitor = WindowDetectingVisitor {
-            window: None,
-            fields: HashSet::new(),
-        };
-        logical_plan.visit_with_subqueries(&mut visitor)?;
-        Ok(visitor.window.take())
-    }
-}
-
-fn extract_column(expr: &Expr) -> Option<&Column> {
-    match expr {
-        Expr::Column(column) => Some(column),
-        Expr::Alias(Alias { expr, .. }) => extract_column(expr),
-        _ => None,
-    }
-}
-
-impl TreeNodeVisitor<'_> for WindowDetectingVisitor {
-    type Node = LogicalPlan;
-
-    fn f_down(&mut self, node: &Self::Node) -> Result<TreeNodeRecursion> {
-        let LogicalPlan::Extension(Extension { node }) = node else {
-            return Ok(TreeNodeRecursion::Continue);
-        };
-
-        if node.name() == JOIN_NODE_NAME {
-            let input_windows: HashSet<_> = node
-                .inputs()
-                .iter()
-                .map(|input| Self::get_window(input))
-                .collect::<Result<HashSet<_>>>()?;
-            if input_windows.len() > 1 {
-                return Err(DataFusionError::Plan(
-                    "can't handle mixed windowing between left and right".to_string(),
-                ));
-            }
-            self.window = input_windows
-                .into_iter()
-                .next()
-                .expect("join has at least one input");
-            return Ok(TreeNodeRecursion::Jump);
-        }
-        Ok(TreeNodeRecursion::Continue)
-    }
-
-    fn f_up(&mut self, node: &Self::Node) -> Result<TreeNodeRecursion> {
-        match node {
-            LogicalPlan::Projection(projection) => {
-                let window_expressions = projection
-                    .expr
-                    .iter()
-                    .enumerate()
-                    .filter_map(|(index, expr)| {
-                        if let Some(column) = extract_column(expr) {
-                            let input_field = projection
-                                .input
-                                .schema()
-                                .field_with_name(column.relation.as_ref(), &column.name);
-                            let input_field = match input_field {
-                                Ok(field) => field,
-                                Err(err) => return Some(Err(err)),
-                            };
-                            if self.fields.contains(
-                                &(column.relation.clone(), Arc::new(input_field.clone())).into(),
-                            ) {
-                                return self.window.clone().map(|window| Ok((index, window)));
-                            }
-                        }
-                        find_window(expr)
-                            .map(|option| option.map(|inner| (index, inner)))
-                            .transpose()
-                    })
-                    .collect::<Result<Vec<_>>>()?;
-                self.fields.clear();
-                for (index, window) in window_expressions {
-                    if let Some(existing_window) = &self.window {
-                        if *existing_window != window {
-                            return plan_err!(
-                                "can't window by both {:?} and {:?}",
-                                existing_window,
-                                window
-                            );
-                        }
-                        self.fields
-                            .insert(projection.schema.qualified_field(index).into());
-                    } else {
-                        return plan_err!(
-                            "can't call a windowing function without grouping by it in an aggregate"
-                        );
-                    }
-                }
-            }
-            LogicalPlan::SubqueryAlias(subquery_alias) => {
-                self.fields = self
-                    .fields
-                    .drain()
-                    .map(|field| {
-                        Ok(subquery_alias
-                            .schema
-                            .qualified_field(
-                                subquery_alias
-                                    .input
-                                    .schema()
-                                    .index_of_column(&field.qualified_column())?,
-                            )
-                            .into())
-                    })
-                    .collect::<Result<HashSet<_>>>()?;
-            }
-            LogicalPlan::Aggregate(Aggregate {
-                input,
-                group_expr,
-                aggr_expr: _,
-                schema,
-                ..
-            }) => {
-                let window_expressions = group_expr
-                    .iter()
-                    .enumerate()
-                    .filter_map(|(index, expr)| {
-                        if let Some(column) = extract_column(expr) {
-                            let input_field = input
-                                .schema()
-                                .field_with_name(column.relation.as_ref(), &column.name);
-                            let input_field = match input_field {
-                                Ok(field) => field,
-                                Err(err) => return Some(Err(err)),
-                            };
-                            if self
-                                .fields
-                                .contains(&(column.relation.as_ref(), input_field).into())
-                            {
-                                return self.window.clone().map(|window| Ok((index, window)));
-                            }
-                        }
-                        find_window(expr)
-                            .map(|option| option.map(|inner| (index, inner)))
-                            .transpose()
-                    })
-                    .collect::<Result<Vec<_>>>()?;
-                self.fields.clear();
-                for (index, window) in window_expressions {
-                    if let Some(existing_window) = &self.window {
-                        if *existing_window != window {
-                            return Err(DataFusionError::Plan(
-                                "window expressions do not match".to_string(),
-                            ));
-                        }
-                    } else {
-                        self.window = Some(window);
-                    }
-                    self.fields.insert(schema.qualified_field(index).into());
-                }
-            }
-            LogicalPlan::Extension(Extension { node }) => {
-                if node.name() == AGGREGATE_EXTENSION_NAME {
-                    let aggregate_extension = node
-                        .as_any()
-                        .downcast_ref::<AggregateExtension>()
-                        .expect("should be aggregate extension");
-
-                    match &aggregate_extension.window_behavior {
-                        WindowBehavior::FromOperator {
-                            window,
-                            window_field,
-                            window_index: _,
-                            is_nested,
-                        } => {
-                            if self.window.is_some() && !*is_nested {
-                                return Err(DataFusionError::Plan(
-                                    "aggregate node should not be recalculating window, as input is windowed.".to_string(),
-                                ));
-                            }
-                            self.window = Some(window.clone());
-                            self.fields.insert(window_field.clone());
-                        }
-                        WindowBehavior::InData => {
-                            let input_fields = self.fields.clone();
-                            self.fields.clear();
-                            for field in fields_with_qualifiers(node.schema()) {
-                                if input_fields.contains(&field) {
-                                    self.fields.insert(field);
-                                }
-                            }
-                            if self.fields.is_empty() {
-                                return Err(DataFusionError::Plan(
-                                    "must have window in aggregate. Make sure you are calling one of the windowing functions (hop, tumble, session) or using the window field of the input".to_string(),
-                                ));
-                            }
-                        }
-                    }
-                }
-            }
-            _ => {}
-        }
-        Ok(TreeNodeRecursion::Continue)
-    }
-}
-
-/// Main rewriter for streaming SQL plans.
-/// Rewrites standard logical plans into streaming-aware plans with
-/// timestamp propagation, window detection, and streaming operator insertion.
-pub struct StreamRewriter<'a> {
-    pub(crate) schema_provider: &'a StreamSchemaProvider,
-}
-
-impl TreeNodeRewriter for StreamRewriter<'_> {
-    type Node = LogicalPlan;
-
-    fn f_up(&mut self, mut node: Self::Node) -> Result<Transformed<Self::Node>> {
-        match node {
-            LogicalPlan::Projection(ref mut projection) => {
-                if !has_timestamp_field(&projection.schema) {
-                    let timestamp_field: DFField = projection
-                        .input
-                        .schema()
-                        .qualified_field_with_unqualified_name(TIMESTAMP_FIELD)
-                        .map_err(|_| {
-                            DataFusionError::Plan(format!(
-                                "No timestamp field found in projection input ({})",
-                                projection.input.display()
-                            ))
-                        })?
-                        .into();
-                    projection.schema = add_timestamp_field(
-                        projection.schema.clone(),
-                        timestamp_field.qualifier().cloned(),
-                    )
-                    .expect("in projection");
-                    projection.expr.push(Expr::Column(Column {
-                        relation: timestamp_field.qualifier().cloned(),
-                        name: TIMESTAMP_FIELD.to_string(),
-                        spans: Spans::default(),
-                    }));
-                }
-
-                // Rewrite row_time() calls to _timestamp column references
-                let rewritten = projection
-                    .expr
-                    .iter()
-                    .map(|expr| expr.clone().rewrite(&mut RowTimeRewriter {}))
-                    .collect::<Result<Vec<_>>>()?;
-                if rewritten.iter().any(|r| r.transformed) {
-                    projection.expr = rewritten.into_iter().map(|r| r.data).collect();
-                }
-                return Ok(Transformed::yes(node));
-            }
-            LogicalPlan::Aggregate(aggregate) => {
-                return AggregateRewriter {
-                    schema_provider: self.schema_provider,
-                }
-                .f_up(LogicalPlan::Aggregate(aggregate));
-            }
-            LogicalPlan::Join(join) => {
-                return JoinRewriter {
-                    schema_provider: self.schema_provider,
-                }
-                .f_up(LogicalPlan::Join(join));
-            }
-            LogicalPlan::Filter(f) => {
-                let expr = f
-                    .predicate
-                    .clone()
-                    .rewrite(&mut TimeWindowNullCheckRemover {})?;
-                return Ok(if expr.transformed {
-                    Transformed::yes(LogicalPlan::Filter(Filter::try_new(expr.data, f.input)?))
-                } else {
-                    Transformed::no(LogicalPlan::Filter(f))
-                });
-            }
-            LogicalPlan::Window(_) => {
-                return WindowFunctionRewriter {}.f_up(node);
-            }
-            LogicalPlan::Sort(_) => {
-                return plan_err!(
-                    "ORDER BY is not currently supported in streaming SQL ({})",
-                    node.display()
-                );
-            }
-            LogicalPlan::Repartition(_) => {
-                return plan_err!(
-                    "Repartitions are not currently supported ({})",
-                    node.display()
-                );
-            }
-            LogicalPlan::Union(mut union) => {
-                union.schema = union.inputs[0].schema().clone();
-                for input in union.inputs.iter_mut() {
-                    if let LogicalPlan::Extension(Extension { node }) = input.as_ref() {
-                        let stream_extension: &dyn StreamExtension = node.try_into().unwrap();
-                        if !stream_extension.transparent() {
-                            continue;
-                        }
-                    }
-                    let remote_table_extension = Arc::new(RemoteTableExtension {
-                        input: input.as_ref().clone(),
-                        name: TableReference::bare("union_input"),
-                        schema: union.schema.clone(),
-                        materialize: false,
-                    });
-                    *input = Arc::new(LogicalPlan::Extension(Extension {
-                        node: remote_table_extension,
-                    }));
-                }
-                return Ok(Transformed::yes(LogicalPlan::Union(union)));
-            }
-            LogicalPlan::SubqueryAlias(sa) => {
-                return Ok(Transformed::yes(LogicalPlan::SubqueryAlias(
-                    SubqueryAlias::try_new(sa.input, sa.alias)?,
-                )));
-            }
-            LogicalPlan::Limit(_) => {
-                return plan_err!(
-                    "LIMIT is not currently supported in streaming SQL ({})",
-                    node.display()
-                );
-            }
-            LogicalPlan::Explain(_) => {
-                return plan_err!("EXPLAIN is not supported ({})", node.display());
-            }
-            LogicalPlan::Analyze(_) => {
-                return plan_err!("ANALYZE is not supported ({})", node.display());
-            }
-            _ => {}
-        }
-        Ok(Transformed::no(node))
-    }
-}
-
-/// Rewrites row_time() function calls to _timestamp column references
-struct RowTimeRewriter;
-
-impl TreeNodeRewriter for RowTimeRewriter {
-    type Node = Expr;
-
-    fn f_up(&mut self, node: Self::Node) -> Result<Transformed<Self::Node>> {
-        if let Expr::ScalarFunction(ref func) = node {
-            if func.func.name() == "row_time" {
-                return Ok(Transformed::yes(Expr::Column(Column::new_unqualified(
-                    TIMESTAMP_FIELD.to_string(),
-                ))));
-            }
-        }
-        Ok(Transformed::no(node))
-    }
-}
-
-/// Removes IS NOT NULL checks on window expressions that get pushed down incorrectly
-pub(crate) struct TimeWindowNullCheckRemover;
-
-impl TreeNodeRewriter for TimeWindowNullCheckRemover {
-    type Node = Expr;
-
-    fn f_up(&mut self, node: Self::Node) -> Result<Transformed<Self::Node>> {
-        if let Expr::IsNotNull(ref inner) = node {
-            if find_window(inner)?.is_some() {
-                return Ok(Transformed::yes(Expr::Literal(
-                    datafusion::common::ScalarValue::Boolean(Some(true)),
-                    None,
-                )));
-            }
-        }
-        Ok(Transformed::no(node))
-    }
-}
-
-/// Checks that window UDFs (tumble/hop/session) are not used outside aggregates
-pub(crate) struct TimeWindowUdfChecker;
-
-impl TreeNodeVisitor<'_> for TimeWindowUdfChecker {
-    type Node = LogicalPlan;
-
-    fn f_up(&mut self, node: &Self::Node) -> Result<TreeNodeRecursion> {
-        if let LogicalPlan::Projection(projection) = node {
-            for expr in &projection.expr {
-                if let Some(window) = find_window(expr)? {
-                    return plan_err!(
-                        "Window function {:?} can only be used as a GROUP BY expression in an aggregate",
-                        window
-                    );
-                }
-            }
-        }
-        Ok(TreeNodeRecursion::Continue)
-    }
-}
diff --git a/src/sql/planner/plan/stream_rewriter.rs b/src/sql/planner/plan/stream_rewriter.rs
new file mode 100644
index 00000000..53549af4
--- /dev/null
+++ b/src/sql/planner/plan/stream_rewriter.rs
@@ -0,0 +1,148 @@
+use std::sync::Arc;
+
+use crate::sql::planner::extension::StreamExtension;
+use crate::sql::planner::extension::remote_table::RemoteTableExtension;
+use crate::sql::planner::plan::{
+    aggregate::AggregateRewriter, join::JoinRewriter, window_fn::WindowFunctionRewriter,
+};
+use crate::sql::planner::rewrite::{RowTimeRewriter, TimeWindowNullCheckRemover};
+use crate::sql::planner::schemas::{add_timestamp_field, has_timestamp_field};
+use crate::sql::types::{DFField, TIMESTAMP_FIELD};
+use datafusion::common::tree_node::{Transformed, TreeNodeRewriter};
+use datafusion::common::{Column, DataFusionError, Result, Spans, TableReference, plan_err};
+use datafusion::logical_expr::{Expr, Extension, Filter, LogicalPlan, SubqueryAlias};
+use datafusion_common::tree_node::TreeNode;
+
+use super::StreamSchemaProvider;
+
+pub struct StreamRewriter<'a> {
+    pub(crate) schema_provider: &'a StreamSchemaProvider,
+}
+
+impl<'a> StreamRewriter<'a> {
+    pub fn new(schema_provider: &'a StreamSchemaProvider) -> Self {
+        Self { schema_provider }
+    }
+}
+
+impl TreeNodeRewriter for StreamRewriter<'_> {
+    type Node = LogicalPlan;
+
+    fn f_up(&mut self, mut node: Self::Node) -> Result<Transformed<Self::Node>> {
+        match node {
+            LogicalPlan::Projection(ref mut projection) => {
+                if !has_timestamp_field(&projection.schema) {
+                    let timestamp_field: DFField = projection
+                        .input
+                        .schema()
+                        .qualified_field_with_unqualified_name(TIMESTAMP_FIELD)
+                        .map_err(|_| {
+                            DataFusionError::Plan(format!(
+                                "No timestamp field found in projection input ({})",
+                                projection.input.display()
+                            ))
+                        })?
+                        .into();
+                    projection.schema = add_timestamp_field(
+                        projection.schema.clone(),
+                        timestamp_field.qualifier().cloned(),
+                    )
+                    .expect("in projection");
+                    projection.expr.push(Expr::Column(Column {
+                        relation: timestamp_field.qualifier().cloned(),
+                        name: TIMESTAMP_FIELD.to_string(),
+                        spans: Spans::default(),
+                    }));
+                }
+
+                let rewritten = projection
+                    .expr
+                    .iter()
+                    .map(|expr| expr.clone().rewrite(&mut RowTimeRewriter {}))
+                    .collect::<Result<Vec<_>>>()?;
+                if rewritten.iter().any(|r| r.transformed) {
+                    projection.expr = rewritten.into_iter().map(|r| r.data).collect();
+                }
+                return Ok(Transformed::yes(node));
+            }
+            LogicalPlan::Aggregate(aggregate) => {
+                return AggregateRewriter {
+                    schema_provider: self.schema_provider,
+                }
+                .f_up(LogicalPlan::Aggregate(aggregate));
+            }
+            LogicalPlan::Join(join) => {
+                return JoinRewriter {
+                    schema_provider: self.schema_provider,
+                }
+                .f_up(LogicalPlan::Join(join));
+            }
+            LogicalPlan::Filter(f) => {
+                let expr = f
+                    .predicate
+                    .clone()
+                    .rewrite(&mut TimeWindowNullCheckRemover {})?;
+                return Ok(if expr.transformed {
+                    Transformed::yes(LogicalPlan::Filter(Filter::try_new(expr.data, f.input)?))
+                } else {
+                    Transformed::no(LogicalPlan::Filter(f))
+                });
+            }
+            LogicalPlan::Window(_) => {
+                return WindowFunctionRewriter {}.f_up(node);
+            }
+            LogicalPlan::Sort(_) => {
+                return plan_err!(
+                    "ORDER BY is not currently supported in streaming SQL ({})",
+                    node.display()
+                );
+            }
+            LogicalPlan::Repartition(_) => {
+                return plan_err!(
+                    "Repartitions are not currently supported ({})",
+                    node.display()
+                );
+            }
+            LogicalPlan::Union(mut union) => {
+                union.schema = union.inputs[0].schema().clone();
+                for input in union.inputs.iter_mut() {
+                    if let LogicalPlan::Extension(Extension { node }) = input.as_ref() {
+                        let stream_extension: &dyn StreamExtension = node.try_into().unwrap();
+                        if !stream_extension.transparent() {
+                            continue;
+                        }
+                    }
+                    let remote_table_extension = Arc::new(RemoteTableExtension {
+                        input: input.as_ref().clone(),
+                        name: TableReference::bare("union_input"),
+                        schema: union.schema.clone(),
+                        materialize: false,
+                    });
+                    *input = Arc::new(LogicalPlan::Extension(Extension {
+                        node: remote_table_extension,
+                    }));
+                }
+                return Ok(Transformed::yes(LogicalPlan::Union(union)));
+            }
+            LogicalPlan::SubqueryAlias(sa) => {
+                return Ok(Transformed::yes(LogicalPlan::SubqueryAlias(
+                    SubqueryAlias::try_new(sa.input, sa.alias)?,
+                )));
+            }
+            LogicalPlan::Limit(_) => {
+                return plan_err!(
+                    "LIMIT is not currently supported in streaming SQL ({})",
+                    node.display()
+                );
+            }
+            LogicalPlan::Explain(_) => {
+                return plan_err!("EXPLAIN is not supported ({})", node.display());
+            }
+            LogicalPlan::Analyze(_) => {
+                return plan_err!("ANALYZE is not supported ({})", node.display());
+            }
+            _ => {}
+        }
+        Ok(Transformed::no(node))
+    }
+}
diff --git a/src/sql/planner/plan/window_detecting_visitor.rs b/src/sql/planner/plan/window_detecting_visitor.rs
new file mode 100644
index 00000000..0a0a0323
--- /dev/null
+++ b/src/sql/planner/plan/window_detecting_visitor.rs
@@ -0,0 +1,215 @@
+use std::collections::HashSet;
+use std::sync::Arc;
+
+use datafusion::common::{
+    Column, DataFusionError, Result,
+    tree_node::{TreeNode, TreeNodeRecursion, TreeNodeVisitor},
+};
+use datafusion::logical_expr::{Aggregate, Expr, Extension, LogicalPlan, expr::Alias};
+
+use crate::sql::planner::extension::aggregate::{AGGREGATE_EXTENSION_NAME, AggregateExtension};
+use crate::sql::planner::extension::join::JOIN_NODE_NAME;
+use crate::sql::types::{DFField, WindowBehavior, WindowType, fields_with_qualifiers, find_window};
+
+#[derive(Debug, Default)]
+pub(crate) struct WindowDetectingVisitor {
+    pub(crate) window: Option<WindowType>,
+    pub(crate) fields: HashSet<DFField>,
+}
+
+impl WindowDetectingVisitor {
+    pub(crate) fn get_window(logical_plan: &LogicalPlan) -> Result<Option<WindowType>> {
+        let mut visitor = WindowDetectingVisitor {
+            window: None,
+            fields: HashSet::new(),
+        };
+        logical_plan.visit_with_subqueries(&mut visitor)?;
+        Ok(visitor.window.take())
+    }
+}
+
+pub(crate) fn extract_column(expr: &Expr) -> Option<&Column> {
+    match expr {
+        Expr::Column(column) => Some(column),
+        Expr::Alias(Alias { expr, .. }) => extract_column(expr),
+        _ => None,
+    }
+}
+
+impl TreeNodeVisitor<'_> for WindowDetectingVisitor {
+    type Node = LogicalPlan;
+
+    fn f_down(&mut self, node: &Self::Node) -> Result<TreeNodeRecursion> {
+        let LogicalPlan::Extension(Extension { node }) = node else {
+            return Ok(TreeNodeRecursion::Continue);
+        };
+
+        if node.name() == JOIN_NODE_NAME {
+            let input_windows: HashSet<_> = node
+                .inputs()
+                .iter()
+                .map(|input| Self::get_window(input))
+                .collect::<Result<HashSet<_>>>()?;
+            if input_windows.len() > 1 {
+                return Err(DataFusionError::Plan(
+                    "can't handle mixed windowing between left and right".to_string(),
+                ));
+            }
+            self.window = input_windows
+                .into_iter()
+                .next()
+                .expect("join has at least one input");
+            return Ok(TreeNodeRecursion::Jump);
+        }
+        Ok(TreeNodeRecursion::Continue)
+    }
+
+    fn f_up(&mut self, node: &Self::Node) -> Result<TreeNodeRecursion> {
+        match node {
+            LogicalPlan::Projection(projection) => {
+                let window_expressions = projection
+                    .expr
+                    .iter()
+                    .enumerate()
+                    .filter_map(|(index, expr)| {
+                        if let Some(column) = extract_column(expr) {
+                            let input_field = projection
+                                .input
+                                .schema()
+                                .field_with_name(column.relation.as_ref(), &column.name);
+                            let input_field = match input_field {
+                                Ok(field) => field,
+                                Err(err) => return Some(Err(err)),
+                            };
+                            if self.fields.contains(
+                                &(column.relation.clone(), Arc::new(input_field.clone())).into(),
+                            ) {
+                                return self.window.clone().map(|window| Ok((index, window)));
+                            }
+                        }
+                        find_window(expr)
+                            .map(|option| option.map(|inner| (index, inner)))
+                            .transpose()
+                    })
+                    .collect::<Result<Vec<_>>>()?;
+                self.fields.clear();
+                for (index, window) in window_expressions {
+                    if let Some(existing_window) = &self.window {
+                        if *existing_window != window {
+                            return Err(DataFusionError::Plan(
+                                "window expressions do not match".to_string(),
+                            ));
+                        }
+                    } else {
+                        self.window = Some(window);
+                    }
+                    self.fields
+                        .insert(projection.schema.qualified_field(index).into());
+                }
+            }
+            LogicalPlan::SubqueryAlias(subquery_alias) => {
+                self.fields = self
+                    .fields
+                    .drain()
+                    .map(|field| {
+                        Ok(subquery_alias
+                            .schema
+                            .qualified_field(
+                                subquery_alias
+                                    .input
+                                    .schema()
+                                    .index_of_column(&field.qualified_column())?,
+                            )
+                            .into())
+                    })
+                    .collect::<Result<HashSet<_>>>()?;
+            }
+            LogicalPlan::Aggregate(Aggregate {
+                input,
+                group_expr,
+                aggr_expr: _,
+                schema,
+                ..
+            }) => {
+                let window_expressions = group_expr
+                    .iter()
+                    .enumerate()
+                    .filter_map(|(index, expr)| {
+                        if let Some(column) = extract_column(expr) {
+                            let input_field = input
+                                .schema()
+                                .field_with_name(column.relation.as_ref(), &column.name);
+                            let input_field = match input_field {
+                                Ok(field) => field,
+                                Err(err) => return Some(Err(err)),
+                            };
+                            if self
+                                .fields
+                                .contains(&(column.relation.as_ref(), input_field).into())
+                            {
+                                return self.window.clone().map(|window| Ok((index, window)));
+                            }
+                        }
+                        find_window(expr)
+                            .map(|option| option.map(|inner| (index, inner)))
+                            .transpose()
+                    })
+                    .collect::<Result<Vec<_>>>()?;
+                self.fields.clear();
+                for (index, window) in window_expressions {
+                    if let Some(existing_window) = &self.window {
+                        if *existing_window != window {
+                            return Err(DataFusionError::Plan(
+                                "window expressions do not match".to_string(),
+                            ));
+                        }
+                    } else {
+                        self.window = Some(window);
+                    }
+                    self.fields.insert(schema.qualified_field(index).into());
+                }
+            }
+            LogicalPlan::Extension(Extension { node }) => {
+                if node.name() == AGGREGATE_EXTENSION_NAME {
+                    let aggregate_extension = node
+                        .as_any()
+                        .downcast_ref::<AggregateExtension>()
+                        .expect("should be aggregate extension");
+
+                    match &aggregate_extension.window_behavior {
+                        WindowBehavior::FromOperator {
+                            window,
+                            window_field,
+                            window_index: _,
+                            is_nested,
+                        } => {
+                            if self.window.is_some() && !*is_nested {
+                                return Err(DataFusionError::Plan(
+                                    "aggregate node should not be recalculating window, as input is windowed.".to_string(),
+                                ));
+                            }
+                            self.window = Some(window.clone());
+                            self.fields.insert(window_field.clone());
+                        }
+                        WindowBehavior::InData => {
+                            let input_fields = self.fields.clone();
+                            self.fields.clear();
+                            for field in fields_with_qualifiers(node.schema()) {
+                                if input_fields.contains(&field) {
+                                    self.fields.insert(field);
+                                }
+                            }
+                            if self.fields.is_empty() {
+                                return Err(DataFusionError::Plan(
+                                    "must have window in aggregate. Make sure you are calling one of the windowing functions (hop, tumble, session) or using the window field of the input".to_string(),
+                                ));
+                            }
+                        }
+                    }
+                }
+            }
+            _ => {}
+        }
+        Ok(TreeNodeRecursion::Continue)
+    }
+}
diff --git a/src/sql/planner/rewrite/source_rewriter.rs b/src/sql/planner/rewrite/source_rewriter.rs
index 209c3288..27281b41 100644
--- a/src/sql/planner/rewrite/source_rewriter.rs
+++ b/src/sql/planner/rewrite/source_rewriter.rs
@@ -23,7 +23,6 @@ use datafusion::logical_expr::{
 use crate::sql::catalog::connector_table::ConnectorTable;
 use crate::sql::catalog::field_spec::FieldSpec;
 use crate::sql::catalog::table::Table;
-use crate::sql::catalog::utils::add_timestamp_field;
 use crate::sql::planner::StreamSchemaProvider;
 use crate::sql::planner::extension::remote_table::RemoteTableExtension;
 use crate::sql::planner::extension::watermark_node::WatermarkNode;
@@ -227,7 +226,7 @@ impl TreeNodeRewriter for SourceRewriter<'_> {
     type Node = LogicalPlan;
 
     fn f_up(&mut self, node: Self::Node) -> DFResult<Transformed<Self::Node>> {
-        let LogicalPlan::TableScan(mut table_scan) = node else {
+        let LogicalPlan::TableScan(table_scan) = node else {
             return Ok(Transformed::no(node));
         };
 
@@ -243,30 +242,10 @@ impl TreeNodeRewriter for SourceRewriter<'_> {
                 // TODO: implement LookupSource extension
                 plan_err!("Lookup tables are not yet supported")
             }
-            Table::MemoryTable {
-                name,
-                fields: _,
-                logical_plan,
-            } => {
-                let Some(logical_plan) = logical_plan else {
-                    return plan_err!(
-                        "Can't query from memory table {} without first inserting into it",
-                        name
-                    );
-                };
-                table_scan.projected_schema = add_timestamp_field(
-                    table_scan.projected_schema.clone(),
-                    Some(table_scan.table_name.clone()),
-                )?;
-                self.mutate_table_from_query(&table_scan, logical_plan)
-            }
             Table::TableFromQuery {
                 name: _,
                 logical_plan,
             } => self.mutate_table_from_query(&table_scan, logical_plan),
-            Table::PreviewSink { .. } => Err(DataFusionError::Plan(
-                "can't select from a preview sink".to_string(),
-            )),
         }
     }
 }

From 1821c0f50cd3686e88e41fec6b8d0d295cd8e5c4 Mon Sep 17 00:00:00 2001
From: luoluoyuyu <zhenyu@apache.org>
Date: Thu, 19 Mar 2026 00:33:57 +0800
Subject: [PATCH 06/44] update

---
 src/coordinator/execution/executor.rs         |   4 +
 src/coordinator/plan/logical_plan_visitor.rs  | 199 ++++++-----
 src/sql/planner/plan/aggregate.rs             | 275 ----------------
 src/sql/planner/plan/aggregate_rewriter.rs    | 262 +++++++++++++++
 src/sql/planner/plan/join.rs                  | 240 --------------
 src/sql/planner/plan/join_rewriter.rs         | 224 +++++++++++++
 src/sql/planner/plan/mod.rs                   |  68 ++--
 src/sql/planner/plan/row_time_rewriter.rs     |  36 ++
 src/sql/planner/plan/stream_rewriter.rs       | 311 +++++++++++-------
 .../planner/plan/streaming_window_analzer.rs  | 203 ++++++++++++
 .../planner/plan/window_detecting_visitor.rs  | 215 ------------
 src/sql/planner/plan/window_fn.rs             | 178 ----------
 .../planner/plan/window_function_rewriter.rs  | 191 +++++++++++
 src/sql/planner/rewrite/mod.rs                |   2 -
 src/sql/planner/rewrite/row_time.rs           |  39 ---
 15 files changed, 1264 insertions(+), 1183 deletions(-)
 delete mode 100644 src/sql/planner/plan/aggregate.rs
 create mode 100644 src/sql/planner/plan/aggregate_rewriter.rs
 delete mode 100644 src/sql/planner/plan/join.rs
 create mode 100644 src/sql/planner/plan/join_rewriter.rs
 create mode 100644 src/sql/planner/plan/row_time_rewriter.rs
 create mode 100644 src/sql/planner/plan/streaming_window_analzer.rs
 delete mode 100644 src/sql/planner/plan/window_detecting_visitor.rs
 delete mode 100644 src/sql/planner/plan/window_fn.rs
 create mode 100644 src/sql/planner/plan/window_function_rewriter.rs
 delete mode 100644 src/sql/planner/rewrite/row_time.rs

diff --git a/src/coordinator/execution/executor.rs b/src/coordinator/execution/executor.rs
index 056f0236..2dfb6326 100644
--- a/src/coordinator/execution/executor.rs
+++ b/src/coordinator/execution/executor.rs
@@ -22,6 +22,10 @@ use crate::runtime::taskexecutor::TaskManager;
 use std::sync::Arc;
 use thiserror::Error;
 use tracing::{debug, info};
+use crate::datastream::logical::{LogicalProgram, ProgramConfig};
+use crate::datastream::optimizers::ChainingOptimizer;
+use crate::sql::CompiledSql;
+use crate::sql::planner::{physical_planner, rewrite_sinks};
 
 #[derive(Error, Debug)]
 pub enum ExecuteError {
diff --git a/src/coordinator/plan/logical_plan_visitor.rs b/src/coordinator/plan/logical_plan_visitor.rs
index fb8c8c82..dfcf2e10 100644
--- a/src/coordinator/plan/logical_plan_visitor.rs
+++ b/src/coordinator/plan/logical_plan_visitor.rs
@@ -37,7 +37,7 @@ use crate::sql::catalog::field_spec::FieldSpec;
 use crate::sql::catalog::optimizer::produce_optimized_plan;
 use crate::sql::functions::{is_json_union, serialize_outgoing_json};
 use crate::sql::planner::extension::sink::SinkExtension;
-use crate::sql::planner::{StreamSchemaProvider, maybe_add_key_extension_to_sink};
+use crate::sql::planner::{StreamSchemaProvider, maybe_add_key_extension_to_sink, rewrite_sinks};
 use crate::sql::rewrite_plan;
 
 const CONNECTOR: &str = "connector";
@@ -78,98 +78,121 @@ impl LogicalPlanVisitor {
             _ => panic!("LogicalPlanVisitor should return Plan"),
         }
     }
+    /// Builds the logical plan for 'CREATE STREAMING TABLE'.
+    /// This orchestrates the transformation from a SQL Query to a stateful Sink.
     fn build_create_streaming_table_plan(
         &self,
         stmt: &StreamingTableStatement,
     ) -> Result<Box<dyn PlanNode>> {
-        let statement = &stmt.statement;
-        match statement {
-            DFStatement::CreateStreamingTable {
-                name,
-                with_options,
-                comment,
-                query,
-            } => {
-                let name_str = name.to_string();
-
-                let mut connector_opts = ConnectorOptions::new(with_options, &None)?;
-                let connector_type = connector_opts.pull_opt_str(CONNECTOR)?.ok_or_else(|| {
-                    plan_datafusion_err!(
-                        "Streaming Table '{}' must specify '{}' option",
-                        name_str,
-                        CONNECTOR
-                    )
-                })?;
-
-                let synthetic_statement = Statement::Query(query.clone());
-                let base_plan =
-                    produce_optimized_plan(&synthetic_statement, &self.schema_provider)?;
-
-                let mut plan_rewrite = rewrite_plan(base_plan, &self.schema_provider)?;
-
-                if plan_rewrite
-                    .schema()
-                    .fields()
-                    .iter()
-                    .any(|f| is_json_union(f.data_type()))
-                {
-                    plan_rewrite =
-                        serialize_outgoing_json(&self.schema_provider, Arc::new(plan_rewrite));
-                }
-
-                let fields: Vec<FieldSpec> = plan_rewrite
-                    .schema()
-                    .fields()
-                    .iter()
-                    .map(|f| FieldSpec::Struct((**f).clone()))
-                    .collect();
-
-                let partition_exprs =
-                    if let Some(partition_cols) = connector_opts.pull_opt_str(PARTITION_BY)? {
-                        let cols: Vec<Expr> =
-                            partition_cols.split(',').map(|c| col(c.trim())).collect();
-                        Some(cols)
-                    } else {
-                        None
-                    };
-
-                let connector_table = ConnectorTable {
-                    id: None,
-                    connector: connector_type,
-                    name: name_str.clone(),
-                    connection_type: ConnectionType::Sink,
-                    fields,
-                    config: "".to_string(),
-                    description: comment.clone().unwrap_or_default(),
-                    event_time_field: None,
-                    watermark_field: None,
-                    idle_time: connector_opts.pull_opt_duration(IDLE_MICROS)?,
-                    primary_keys: Arc::new(vec![]),
-                    inferred_fields: None,
-                    partition_exprs: Arc::new(partition_exprs),
-                };
-
-                let sink_extension = SinkExtension::new(
-                    TableReference::bare(name_str.clone()),
-                    Table::ConnectorTable(connector_table.clone()),
-                    plan_rewrite.schema().clone(),
-                    Arc::new(plan_rewrite),
-                )?;
-
-                let final_plan =
-                    maybe_add_key_extension_to_sink(LogicalPlan::Extension(Extension {
-                        node: Arc::new(sink_extension),
-                    }))?;
-
-                Ok(Box::new(StreamingTable {
-                    name: name_str,
-                    comment: comment.clone(),
-                    connector_table,
-                    logical_plan: final_plan,
-                }))
-            }
-            _ => plan_err!("Only CREATE STREAMING TABLE supported"),
+        let DFStatement::CreateStreamingTable {
+            name,
+            with_options,
+            comment,
+            query,
+        } = &stmt.statement
+        else {
+            return plan_err!("Only CREATE STREAMING TABLE is supported in this context");
+        };
+
+        let table_name = name.to_string();
+        debug!("Compiling Streaming Table Sink for: {}", table_name);
+
+        // 1. Connector Options Extraction
+        // Extract 'connector' (Kafka, Postgres, etc.) and other physical properties.
+        let mut opts = ConnectorOptions::new(with_options, &None)?;
+        let connector = opts.pull_opt_str(CONNECTOR)?.ok_or_else(|| {
+            plan_datafusion_err!(
+                "Streaming Table '{}' must specify the '{}' option",
+                table_name,
+                CONNECTOR
+            )
+        })?;
+
+        // 2. Query Optimization & Streaming Rewrite
+        // Convert the standard SQL query into a streaming-aware logical plan.
+        let base_plan =
+            produce_optimized_plan(&Statement::Query(query.clone()), &self.schema_provider)?;
+        let mut plan = rewrite_plan(base_plan, &self.schema_provider)?;
+
+        // 3. Outgoing Data Serialization
+        // If the query produces internal types (like JSON Union), inject a serialization layer.
+        if plan
+            .schema()
+            .fields()
+            .iter()
+            .any(|f| is_json_union(f.data_type()))
+        {
+            plan = serialize_outgoing_json(&self.schema_provider, Arc::new(plan));
         }
+
+        // 4. Sink Metadata & Partitioning Logic
+        // Determine how data should be partitioned before hitting the external system.
+        let partition_exprs = self.resolve_partition_expressions(&mut opts)?;
+
+        // Map DataFusion fields to Arroyo FieldSpecs for the connector.
+        let fields: Vec<FieldSpec> = plan
+            .schema()
+            .fields()
+            .iter()
+            .map(|f| FieldSpec::Struct((**f).clone()))
+            .collect();
+
+        // 5. Connector Table Construction
+        // This object acts as the 'Identity Card' for the Sink in the physical cluster.
+        let connector_table = ConnectorTable {
+            id: None,
+            connector,
+            name: table_name.clone(),
+            connection_type: ConnectionType::Sink,
+            fields,
+            config: "".to_string(), // Filled by the coordinator later
+            description: comment.clone().unwrap_or_default(),
+            event_time_field: None,
+            watermark_field: None,
+            idle_time: opts.pull_opt_duration(IDLE_MICROS)?,
+            primary_keys: Arc::new(vec![]), // PKs are inferred or explicitly set here
+            inferred_fields: None,
+            partition_exprs: Arc::new(partition_exprs),
+        };
+
+        // 6. Sink Extension & Final Rewrites
+        // Wrap the plan in a SinkExtension and ensure Key/Partition alignment.
+        let sink_extension = SinkExtension::new(
+            TableReference::bare(table_name.clone()),
+            Table::ConnectorTable(connector_table.clone()),
+            plan.schema().clone(),
+            Arc::new(plan),
+        )?;
+
+        // Ensure the data distribution matches the Sink's requirements (e.g., Shuffle by Partition Key)
+        let plan_with_keys = maybe_add_key_extension_to_sink(LogicalPlan::Extension(Extension {
+            node: Arc::new(sink_extension),
+        }))?;
+
+        // Global pass to wire inputs and handle shared sub-plans
+        let final_extensions = rewrite_sinks(vec![plan_with_keys])?;
+        let final_plan = final_extensions.into_iter().next().unwrap();
+
+        Ok(Box::new(StreamingTable {
+            name: table_name,
+            comment: comment.clone(),
+            connector_table,
+            logical_plan: final_plan,
+        }))
+    }
+
+    fn resolve_partition_expressions(
+        &self,
+        opts: &mut ConnectorOptions,
+    ) -> Result<Option<Vec<Expr>>> {
+        opts.pull_opt_str(PARTITION_BY)?
+            .map(|cols| {
+                cols.split(',')
+                    .map(|c| col(c.trim()))
+                    .collect::<Vec<Expr>>()
+            })
+            .map(Ok)
+            .transpose()
     }
 }
 
diff --git a/src/sql/planner/plan/aggregate.rs b/src/sql/planner/plan/aggregate.rs
deleted file mode 100644
index aad17edb..00000000
--- a/src/sql/planner/plan/aggregate.rs
+++ /dev/null
@@ -1,275 +0,0 @@
-use std::sync::Arc;
-
-use datafusion::common::tree_node::{Transformed, TreeNodeRewriter};
-use datafusion::common::{DFSchema, Result, not_impl_err, plan_err};
-use datafusion::functions_aggregate::expr_fn::max;
-use datafusion::logical_expr;
-use datafusion::logical_expr::{Aggregate, Expr, Extension, LogicalPlan};
-use datafusion::prelude::col;
-use tracing::debug;
-
-use crate::sql::planner::StreamSchemaProvider;
-use crate::sql::planner::extension::aggregate::AggregateExtension;
-use crate::sql::planner::extension::key_calculation::{KeyCalculationExtension, KeysOrExprs};
-use crate::sql::planner::plan::WindowDetectingVisitor;
-use crate::sql::types::{
-    DFField, TIMESTAMP_FIELD, WindowBehavior, WindowType, fields_with_qualifiers, find_window,
-    schema_from_df_fields_with_metadata,
-};
-
-pub(crate) struct AggregateRewriter<'a> {
-    pub schema_provider: &'a StreamSchemaProvider,
-}
-
-impl AggregateRewriter<'_> {
-    /// Rewrite a non-windowed aggregate into an updating aggregate with key calculation
-    pub fn rewrite_non_windowed_aggregate(
-        input: Arc<LogicalPlan>,
-        mut key_fields: Vec<DFField>,
-        group_expr: Vec<Expr>,
-        mut aggr_expr: Vec<Expr>,
-        schema: Arc<DFSchema>,
-        _schema_provider: &StreamSchemaProvider,
-    ) -> Result<Transformed<LogicalPlan>> {
-        let key_count = key_fields.len();
-        key_fields.extend(fields_with_qualifiers(input.schema()));
-
-        let key_schema = Arc::new(schema_from_df_fields_with_metadata(
-            &key_fields,
-            schema.metadata().clone(),
-        )?);
-
-        let mut key_projection_expressions: Vec<_> = group_expr
-            .iter()
-            .zip(key_fields.iter())
-            .map(|(expr, f)| expr.clone().alias(f.name().to_string()))
-            .collect();
-
-        key_projection_expressions.extend(
-            fields_with_qualifiers(input.schema())
-                .iter()
-                .map(|field| Expr::Column(field.qualified_column())),
-        );
-
-        let key_projection =
-            LogicalPlan::Projection(logical_expr::Projection::try_new_with_schema(
-                key_projection_expressions,
-                input.clone(),
-                key_schema,
-            )?);
-
-        let key_plan = LogicalPlan::Extension(Extension {
-            node: Arc::new(KeyCalculationExtension::new(
-                key_projection,
-                KeysOrExprs::Keys((0..key_count).collect()),
-            )),
-        });
-
-        let Ok(timestamp_field) = key_plan
-            .schema()
-            .qualified_field_with_unqualified_name(TIMESTAMP_FIELD)
-        else {
-            return plan_err!("no timestamp field found in schema");
-        };
-
-        let timestamp_field: DFField = timestamp_field.into();
-        let column = timestamp_field.qualified_column();
-        aggr_expr.push(max(col(column.clone())).alias(TIMESTAMP_FIELD));
-
-        let mut output_schema_fields = fields_with_qualifiers(&schema);
-        output_schema_fields.push(timestamp_field.clone());
-        let output_schema = Arc::new(schema_from_df_fields_with_metadata(
-            &output_schema_fields,
-            schema.metadata().clone(),
-        )?);
-
-        let aggregate = Aggregate::try_new_with_schema(
-            Arc::new(key_plan),
-            group_expr,
-            aggr_expr,
-            output_schema,
-        )?;
-
-        debug!(
-            "non-windowed aggregate field names: {:?}",
-            aggregate
-                .schema
-                .fields()
-                .iter()
-                .map(|f| f.name())
-                .collect::<Vec<_>>()
-        );
-
-        let final_plan = LogicalPlan::Aggregate(aggregate);
-        Ok(Transformed::yes(final_plan))
-    }
-}
-
-impl TreeNodeRewriter for AggregateRewriter<'_> {
-    type Node = LogicalPlan;
-
-    fn f_up(&mut self, node: Self::Node) -> Result<Transformed<Self::Node>> {
-        let LogicalPlan::Aggregate(Aggregate {
-            input,
-            mut group_expr,
-            aggr_expr,
-            schema,
-            ..
-        }) = node
-        else {
-            return Ok(Transformed::no(node));
-        };
-
-        let mut window_group_expr: Vec<_> = group_expr
-            .iter()
-            .enumerate()
-            .filter_map(|(i, expr)| {
-                find_window(expr)
-                    .map(|option| option.map(|inner| (i, inner)))
-                    .transpose()
-            })
-            .collect::<Result<Vec<_>>>()?;
-
-        if window_group_expr.len() > 1 {
-            return not_impl_err!(
-                "do not support {} window expressions in group by",
-                window_group_expr.len()
-            );
-        }
-
-        let mut key_fields: Vec<DFField> = fields_with_qualifiers(&schema)
-            .iter()
-            .take(group_expr.len())
-            .map(|field| {
-                DFField::new(
-                    field.qualifier().cloned(),
-                    format!("_key_{}", field.name()),
-                    field.data_type().clone(),
-                    field.is_nullable(),
-                )
-            })
-            .collect();
-
-        let mut window_detecting_visitor = WindowDetectingVisitor::default();
-        input.visit_with_subqueries(&mut window_detecting_visitor)?;
-
-        let window = window_detecting_visitor.window;
-        let window_behavior = match (window.is_some(), !window_group_expr.is_empty()) {
-            (true, true) => {
-                let input_window = window.unwrap();
-                let (window_index, group_by_window_type) = window_group_expr.pop().unwrap();
-                if group_by_window_type != input_window {
-                    return Err(datafusion::error::DataFusionError::NotImplemented(
-                        "window in group by does not match input window".to_string(),
-                    ));
-                }
-                let matching_field = window_detecting_visitor.fields.iter().next();
-                match matching_field {
-                    Some(field) => {
-                        group_expr[window_index] = Expr::Column(field.qualified_column());
-                        WindowBehavior::InData
-                    }
-                    None => {
-                        if matches!(input_window, WindowType::Session { .. }) {
-                            return plan_err!("can't reinvoke session window in nested aggregates");
-                        }
-                        group_expr.remove(window_index);
-                        key_fields.remove(window_index);
-                        let window_field = schema.qualified_field(window_index).into();
-                        WindowBehavior::FromOperator {
-                            window: input_window,
-                            window_field,
-                            window_index,
-                            is_nested: true,
-                        }
-                    }
-                }
-            }
-            (true, false) => WindowBehavior::InData,
-            (false, true) => {
-                let (window_index, window_type) = window_group_expr.pop().unwrap();
-                group_expr.remove(window_index);
-                key_fields.remove(window_index);
-                let window_field = schema.qualified_field(window_index).into();
-                WindowBehavior::FromOperator {
-                    window: window_type,
-                    window_field,
-                    window_index,
-                    is_nested: false,
-                }
-            }
-            (false, false) => {
-                return Self::rewrite_non_windowed_aggregate(
-                    input,
-                    key_fields,
-                    group_expr,
-                    aggr_expr,
-                    schema,
-                    self.schema_provider,
-                );
-            }
-        };
-
-        let key_count = key_fields.len();
-        key_fields.extend(fields_with_qualifiers(input.schema()));
-
-        let key_schema = Arc::new(schema_from_df_fields_with_metadata(
-            &key_fields,
-            schema.metadata().clone(),
-        )?);
-
-        let mut key_projection_expressions: Vec<_> = group_expr
-            .iter()
-            .zip(key_fields.iter())
-            .map(|(expr, f)| expr.clone().alias(f.name().to_string()))
-            .collect();
-
-        key_projection_expressions.extend(
-            fields_with_qualifiers(input.schema())
-                .iter()
-                .map(|field| Expr::Column(field.qualified_column())),
-        );
-
-        let key_projection =
-            LogicalPlan::Projection(logical_expr::Projection::try_new_with_schema(
-                key_projection_expressions,
-                input.clone(),
-                key_schema,
-            )?);
-
-        let key_plan = LogicalPlan::Extension(Extension {
-            node: Arc::new(KeyCalculationExtension::new(
-                key_projection,
-                KeysOrExprs::Keys((0..key_count).collect()),
-            )),
-        });
-
-        let mut aggregate_schema_fields = fields_with_qualifiers(&schema);
-        if let WindowBehavior::FromOperator { window_index, .. } = &window_behavior {
-            aggregate_schema_fields.remove(*window_index);
-        }
-        let internal_schema = Arc::new(schema_from_df_fields_with_metadata(
-            &aggregate_schema_fields,
-            schema.metadata().clone(),
-        )?);
-
-        let rewritten_aggregate = Aggregate::try_new_with_schema(
-            Arc::new(key_plan),
-            group_expr,
-            aggr_expr,
-            internal_schema,
-        )?;
-
-        let aggregate_extension = AggregateExtension::new(
-            window_behavior,
-            LogicalPlan::Aggregate(rewritten_aggregate),
-            (0..key_count).collect(),
-        );
-        let final_plan = LogicalPlan::Extension(Extension {
-            node: Arc::new(aggregate_extension),
-        });
-
-        WindowDetectingVisitor::get_window(&final_plan)?;
-        Ok(Transformed::yes(final_plan))
-    }
-}
diff --git a/src/sql/planner/plan/aggregate_rewriter.rs b/src/sql/planner/plan/aggregate_rewriter.rs
new file mode 100644
index 00000000..802fa180
--- /dev/null
+++ b/src/sql/planner/plan/aggregate_rewriter.rs
@@ -0,0 +1,262 @@
+use datafusion::common::tree_node::{Transformed, TreeNodeRewriter};
+use datafusion::common::{DFSchema, DataFusionError, Result, not_impl_err, plan_err};
+use datafusion::functions_aggregate::expr_fn::max;
+use datafusion::logical_expr::{self, Aggregate, Expr, Extension, LogicalPlan, Projection};
+use datafusion::prelude::col;
+use std::sync::Arc;
+
+use crate::sql::planner::StreamSchemaProvider;
+use crate::sql::planner::extension::aggregate::AggregateExtension;
+use crate::sql::planner::extension::key_calculation::{KeyCalculationExtension, KeysOrExprs};
+use crate::sql::planner::plan::streaming_window_analzer::StreamingWindowAnalzer;
+use crate::sql::types::{
+    DFField, TIMESTAMP_FIELD, WindowBehavior, WindowType, fields_with_qualifiers, find_window,
+    schema_from_df_fields_with_metadata,
+};
+
+/// AggregateRewriter transforms batch DataFusion aggregates into streaming stateful operators.
+/// It handles windowing (Tumble/Hop/Session), watermarks, and continuous updating aggregates.
+pub(crate) struct AggregateRewriter<'a> {
+    pub schema_provider: &'a StreamSchemaProvider,
+}
+
+impl TreeNodeRewriter for AggregateRewriter<'_> {
+    type Node = LogicalPlan;
+
+    fn f_up(&mut self, node: Self::Node) -> Result<Transformed<Self::Node>> {
+        let LogicalPlan::Aggregate(mut agg) = node else {
+            return Ok(Transformed::no(node));
+        };
+
+        // 1. Identify windowing functions (e.g., tumble, hop) in GROUP BY.
+        let mut window_exprs: Vec<_> = agg
+            .group_expr
+            .iter()
+            .enumerate()
+            .filter_map(|(i, e)| find_window(e).map(|opt| opt.map(|w| (i, w))).transpose())
+            .collect::<Result<Vec<_>>>()?;
+
+        if window_exprs.len() > 1 {
+            return not_impl_err!("Streaming aggregates support at most one window expression");
+        }
+
+        // 2. Prepare internal metadata for Key-based distribution.
+        let mut key_fields: Vec<DFField> = fields_with_qualifiers(&agg.schema)
+            .iter()
+            .take(agg.group_expr.len())
+            .map(|f| {
+                DFField::new(
+                    f.qualifier().cloned(),
+                    format!("_key_{}", f.name()),
+                    f.data_type().clone(),
+                    f.is_nullable(),
+                )
+            })
+            .collect();
+
+        // 3. Dispatch to Updating Aggregate if no windowing is detected.
+        let input_window = StreamingWindowAnalzer::get_window(&agg.input)?;
+        if window_exprs.is_empty() && input_window.is_none() {
+            return self.rewrite_as_updating_aggregate(
+                agg.input,
+                key_fields,
+                agg.group_expr,
+                agg.aggr_expr,
+                agg.schema,
+            );
+        }
+
+        // 4. Resolve Windowing Strategy (InData vs FromOperator).
+        let behavior = self.resolve_window_context(
+            &agg.input,
+            &mut agg.group_expr,
+            &agg.schema,
+            &mut window_exprs,
+        )?;
+
+        // Adjust keys if windowing is handled by the operator.
+        if let WindowBehavior::FromOperator { window_index, .. } = &behavior {
+            key_fields.remove(*window_index);
+        }
+
+        let key_count = key_fields.len();
+        let keyed_input =
+            self.build_keyed_input(agg.input.clone(), &agg.group_expr, &key_fields)?;
+
+        // 5. Build the final AggregateExtension for the physical planner.
+        let mut internal_fields = fields_with_qualifiers(&agg.schema);
+        if let WindowBehavior::FromOperator { window_index, .. } = &behavior {
+            internal_fields.remove(*window_index);
+        }
+        let internal_schema = Arc::new(schema_from_df_fields_with_metadata(
+            &internal_fields,
+            agg.schema.metadata().clone(),
+        )?);
+
+        let rewritten_agg = Aggregate::try_new_with_schema(
+            Arc::new(keyed_input),
+            agg.group_expr,
+            agg.aggr_expr,
+            internal_schema,
+        )?;
+
+        let extension = AggregateExtension::new(
+            behavior,
+            LogicalPlan::Aggregate(rewritten_agg),
+            (0..key_count).collect(),
+        );
+
+        Ok(Transformed::yes(LogicalPlan::Extension(Extension {
+            node: Arc::new(extension),
+        })))
+    }
+}
+
+impl<'a> AggregateRewriter<'a> {
+    pub fn new(schema_provider: &'a StreamSchemaProvider) -> Self {
+        Self { schema_provider }
+    }
+
+    /// [Internal] Builds the physical Key Calculation layer required for distributed Shuffling.
+    /// This wraps the input in a Projection and a KeyCalculationExtension.
+    fn build_keyed_input(
+        &self,
+        input: Arc<LogicalPlan>,
+        group_expr: &[Expr],
+        key_fields: &[DFField],
+    ) -> Result<LogicalPlan> {
+        let key_count = group_expr.len();
+        let mut projection_fields = key_fields.to_vec();
+        projection_fields.extend(fields_with_qualifiers(input.schema()));
+
+        let key_schema = Arc::new(schema_from_df_fields_with_metadata(
+            &projection_fields,
+            input.schema().metadata().clone(),
+        )?);
+
+        // Map group expressions to '_key_' aliases while passing through all original columns.
+        let mut exprs: Vec<_> = group_expr
+            .iter()
+            .zip(key_fields.iter())
+            .map(|(expr, f)| expr.clone().alias(f.name().to_string()))
+            .collect();
+
+        exprs.extend(
+            fields_with_qualifiers(input.schema())
+                .iter()
+                .map(|f| Expr::Column(f.qualified_column())),
+        );
+
+        let projection =
+            LogicalPlan::Projection(Projection::try_new_with_schema(exprs, input, key_schema)?);
+
+        Ok(LogicalPlan::Extension(Extension {
+            node: Arc::new(KeyCalculationExtension::new(
+                projection,
+                KeysOrExprs::Keys((0..key_count).collect()),
+            )),
+        }))
+    }
+
+    /// [Strategy] Rewrites standard GROUP BY into a non-windowed updating aggregate.
+    /// Injected max(_timestamp) ensures the streaming pulse (Watermark) continues to propagate.
+    fn rewrite_as_updating_aggregate(
+        &self,
+        input: Arc<LogicalPlan>,
+        key_fields: Vec<DFField>,
+        group_expr: Vec<Expr>,
+        mut aggr_expr: Vec<Expr>,
+        schema: Arc<DFSchema>,
+    ) -> Result<Transformed<LogicalPlan>> {
+        let keyed_input = self.build_keyed_input(input, &group_expr, &key_fields)?;
+
+        // Ensure the updating stream maintains time awareness.
+        let timestamp_col = keyed_input
+            .schema()
+            .qualified_field_with_unqualified_name(TIMESTAMP_FIELD)
+            .map_err(|_| {
+                DataFusionError::Plan(
+                    "Required _timestamp field missing for updating aggregate".to_string(),
+                )
+            })?;
+
+        let timestamp_field: DFField = timestamp_col.into();
+        aggr_expr.push(max(col(timestamp_field.qualified_column())).alias(TIMESTAMP_FIELD));
+
+        let mut output_fields = fields_with_qualifiers(&schema);
+        output_fields.push(timestamp_field);
+
+        let output_schema = Arc::new(schema_from_df_fields_with_metadata(
+            &output_fields,
+            schema.metadata().clone(),
+        )?);
+
+        let aggregate = Aggregate::try_new_with_schema(
+            Arc::new(keyed_input),
+            group_expr,
+            aggr_expr,
+            output_schema,
+        )?;
+
+        Ok(Transformed::yes(LogicalPlan::Aggregate(aggregate)))
+    }
+
+    /// [Strategy] Reconciles window definitions between the input stream and the current GROUP BY.
+    fn resolve_window_context(
+        &self,
+        input: &LogicalPlan,
+        group_expr: &mut Vec<Expr>,
+        schema: &DFSchema,
+        window_expr_info: &mut Vec<(usize, WindowType)>,
+    ) -> Result<WindowBehavior> {
+        let mut visitor = StreamingWindowAnalzer::default();
+        input.visit_with_subqueries(&mut visitor)?;
+
+        let input_window = visitor.window;
+        let has_group_window = !window_expr_info.is_empty();
+
+        match (input_window, has_group_window) {
+            // Re-aggregation or subquery with an existing window.
+            (Some(i_win), true) => {
+                let (idx, g_win) = window_expr_info.pop().unwrap();
+                if i_win != g_win {
+                    return plan_err!(
+                        "Inconsistent windowing: input is {:?}, but group by is {:?}",
+                        i_win,
+                        g_win
+                    );
+                }
+
+                if let Some(field) = visitor.fields.iter().next() {
+                    group_expr[idx] = Expr::Column(field.qualified_column());
+                    Ok(WindowBehavior::InData)
+                } else {
+                    if matches!(i_win, WindowType::Session { .. }) {
+                        return plan_err!("Nested session windows are not supported");
+                    }
+                    group_expr.remove(idx);
+                    Ok(WindowBehavior::FromOperator {
+                        window: i_win,
+                        window_field: schema.qualified_field(idx).into(),
+                        window_index: idx,
+                        is_nested: true,
+                    })
+                }
+            }
+            // First-time windowing defined in this aggregate.
+            (None, true) => {
+                let (idx, g_win) = window_expr_info.pop().unwrap();
+                group_expr.remove(idx);
+                Ok(WindowBehavior::FromOperator {
+                    window: g_win,
+                    window_field: schema.qualified_field(idx).into(),
+                    window_index: idx,
+                    is_nested: false,
+                })
+            }
+            // Passthrough: input is already windowed, no new window in group by.
+            (Some(_), false) => Ok(WindowBehavior::InData),
+            _ => unreachable!("Dispatched to non-windowed path previously"),
+        }
+    }
+}
diff --git a/src/sql/planner/plan/join.rs b/src/sql/planner/plan/join.rs
deleted file mode 100644
index 04a27e9b..00000000
--- a/src/sql/planner/plan/join.rs
+++ /dev/null
@@ -1,240 +0,0 @@
-use std::sync::Arc;
-
-use datafusion::common::tree_node::{Transformed, TreeNodeRewriter};
-use datafusion::common::{
-    Column, DataFusionError, JoinConstraint, JoinType, Result, ScalarValue, Spans, TableReference,
-    not_impl_err,
-};
-use datafusion::logical_expr;
-use datafusion::logical_expr::expr::Alias;
-use datafusion::logical_expr::{
-    BinaryExpr, Case, Expr, Extension, Join, LogicalPlan, Projection, build_join_schema,
-};
-use datafusion::prelude::coalesce;
-
-use crate::sql::planner::StreamSchemaProvider;
-use crate::sql::planner::extension::join::JoinExtension;
-use crate::sql::planner::extension::key_calculation::KeyCalculationExtension;
-use crate::sql::planner::plan::WindowDetectingVisitor;
-use crate::sql::types::{WindowType, fields_with_qualifiers, schema_from_df_fields_with_metadata};
-
-pub(crate) struct JoinRewriter<'a> {
-    pub schema_provider: &'a StreamSchemaProvider,
-}
-
-impl JoinRewriter<'_> {
-    fn check_join_windowing(join: &Join) -> Result<bool> {
-        let left_window = WindowDetectingVisitor::get_window(&join.left)?;
-        let right_window = WindowDetectingVisitor::get_window(&join.right)?;
-        match (left_window, right_window) {
-            (None, None) => {
-                if join.join_type == JoinType::Inner {
-                    Ok(false)
-                } else {
-                    Err(DataFusionError::NotImplemented(
-                        "can't handle non-inner joins without windows".into(),
-                    ))
-                }
-            }
-            (None, Some(_)) => Err(DataFusionError::NotImplemented(
-                "can't handle mixed windowing between left (non-windowed) and right (windowed)"
-                    .into(),
-            )),
-            (Some(_), None) => Err(DataFusionError::NotImplemented(
-                "can't handle mixed windowing between left (windowed) and right (non-windowed)"
-                    .into(),
-            )),
-            (Some(left_window), Some(right_window)) => {
-                if left_window != right_window {
-                    return Err(DataFusionError::NotImplemented(
-                        "can't handle mixed windowing between left and right".into(),
-                    ));
-                }
-                if let WindowType::Session { .. } = left_window {
-                    return Err(DataFusionError::NotImplemented(
-                        "can't handle session windows in joins".into(),
-                    ));
-                }
-                Ok(true)
-            }
-        }
-    }
-
-    fn create_join_key_plan(
-        input: Arc<LogicalPlan>,
-        join_expressions: Vec<Expr>,
-        name: &'static str,
-    ) -> Result<LogicalPlan> {
-        let key_count = join_expressions.len();
-
-        let join_expressions: Vec<_> = join_expressions
-            .into_iter()
-            .enumerate()
-            .map(|(index, expr)| {
-                expr.alias_qualified(
-                    Some(TableReference::bare("_stream")),
-                    format!("_key_{index}"),
-                )
-            })
-            .chain(
-                fields_with_qualifiers(input.schema())
-                    .iter()
-                    .map(|field| Expr::Column(field.qualified_column())),
-            )
-            .collect();
-
-        let projection = Projection::try_new(join_expressions, input)?;
-        let key_calculation_extension = KeyCalculationExtension::new_named_and_trimmed(
-            LogicalPlan::Projection(projection),
-            (0..key_count).collect(),
-            name.to_string(),
-        );
-        Ok(LogicalPlan::Extension(Extension {
-            node: Arc::new(key_calculation_extension),
-        }))
-    }
-
-    fn post_join_timestamp_projection(&mut self, input: LogicalPlan) -> Result<LogicalPlan> {
-        let schema = input.schema().clone();
-        let mut schema_with_timestamp = fields_with_qualifiers(&schema);
-        let timestamp_fields = schema_with_timestamp
-            .iter()
-            .filter(|field| field.name() == "_timestamp")
-            .cloned()
-            .collect::<Vec<_>>();
-
-        if timestamp_fields.len() != 2 {
-            return not_impl_err!("join must have two timestamp fields");
-        }
-
-        schema_with_timestamp.retain(|field| field.name() != "_timestamp");
-        let mut projection_expr = schema_with_timestamp
-            .iter()
-            .map(|field| {
-                Expr::Column(Column {
-                    relation: field.qualifier().cloned(),
-                    name: field.name().to_string(),
-                    spans: Spans::default(),
-                })
-            })
-            .collect::<Vec<_>>();
-
-        schema_with_timestamp.push(timestamp_fields[0].clone());
-
-        let output_schema = Arc::new(schema_from_df_fields_with_metadata(
-            &schema_with_timestamp,
-            schema.metadata().clone(),
-        )?);
-
-        let left_field = &timestamp_fields[0];
-        let left_column = Expr::Column(Column {
-            relation: left_field.qualifier().cloned(),
-            name: left_field.name().to_string(),
-            spans: Spans::default(),
-        });
-        let right_field = &timestamp_fields[1];
-        let right_column = Expr::Column(Column {
-            relation: right_field.qualifier().cloned(),
-            name: right_field.name().to_string(),
-            spans: Spans::default(),
-        });
-
-        let max_timestamp = Expr::Case(Case {
-            expr: Some(Box::new(Expr::BinaryExpr(BinaryExpr {
-                left: Box::new(left_column.clone()),
-                op: logical_expr::Operator::GtEq,
-                right: Box::new(right_column.clone()),
-            }))),
-            when_then_expr: vec![
-                (
-                    Box::new(Expr::Literal(ScalarValue::Boolean(Some(true)), None)),
-                    Box::new(left_column.clone()),
-                ),
-                (
-                    Box::new(Expr::Literal(ScalarValue::Boolean(Some(false)), None)),
-                    Box::new(right_column.clone()),
-                ),
-            ],
-            else_expr: Some(Box::new(coalesce(vec![
-                left_column.clone(),
-                right_column.clone(),
-            ]))),
-        });
-
-        projection_expr.push(Expr::Alias(Alias {
-            expr: Box::new(max_timestamp),
-            relation: timestamp_fields[0].qualifier().cloned(),
-            name: timestamp_fields[0].name().to_string(),
-            metadata: None,
-        }));
-
-        Ok(LogicalPlan::Projection(Projection::try_new_with_schema(
-            projection_expr,
-            Arc::new(input),
-            output_schema,
-        )?))
-    }
-}
-
-impl TreeNodeRewriter for JoinRewriter<'_> {
-    type Node = LogicalPlan;
-
-    fn f_up(&mut self, node: Self::Node) -> Result<Transformed<Self::Node>> {
-        let LogicalPlan::Join(join) = node else {
-            return Ok(Transformed::no(node));
-        };
-
-        let is_instant = Self::check_join_windowing(&join)?;
-
-        let Join {
-            left,
-            right,
-            on,
-            filter,
-            join_type,
-            join_constraint: JoinConstraint::On,
-            schema: _,
-            null_equals_null: false,
-        } = join
-        else {
-            return not_impl_err!("can't handle join constraint other than ON");
-        };
-
-        if on.is_empty() && !is_instant {
-            return not_impl_err!("Updating joins must include an equijoin condition");
-        }
-
-        let (left_expressions, right_expressions): (Vec<_>, Vec<_>) =
-            on.clone().into_iter().unzip();
-
-        let left_input = Self::create_join_key_plan(left, left_expressions, "left")?;
-        let right_input = Self::create_join_key_plan(right, right_expressions, "right")?;
-
-        let rewritten_join = LogicalPlan::Join(Join {
-            schema: Arc::new(build_join_schema(
-                left_input.schema(),
-                right_input.schema(),
-                &join_type,
-            )?),
-            left: Arc::new(left_input),
-            right: Arc::new(right_input),
-            on,
-            join_type,
-            join_constraint: JoinConstraint::On,
-            null_equals_null: false,
-            filter,
-        });
-
-        let final_logical_plan = self.post_join_timestamp_projection(rewritten_join)?;
-
-        let join_extension = JoinExtension {
-            rewritten_join: final_logical_plan,
-            is_instant,
-            ttl: (!is_instant).then_some(self.schema_provider.planning_options.ttl),
-        };
-
-        Ok(Transformed::yes(LogicalPlan::Extension(Extension {
-            node: Arc::new(join_extension),
-        })))
-    }
-}
diff --git a/src/sql/planner/plan/join_rewriter.rs b/src/sql/planner/plan/join_rewriter.rs
new file mode 100644
index 00000000..f6031183
--- /dev/null
+++ b/src/sql/planner/plan/join_rewriter.rs
@@ -0,0 +1,224 @@
+use crate::sql::planner::StreamSchemaProvider;
+use crate::sql::planner::extension::join::JoinExtension;
+use crate::sql::planner::extension::key_calculation::KeyCalculationExtension;
+use crate::sql::planner::plan::streaming_window_analzer::StreamingWindowAnalzer;
+use crate::sql::types::{WindowType, fields_with_qualifiers, schema_from_df_fields_with_metadata};
+use crate::types::TIMESTAMP_FIELD;
+use datafusion::common::tree_node::{Transformed, TreeNodeRewriter};
+use datafusion::common::{
+    Column, DataFusionError, JoinConstraint, JoinType, Result, ScalarValue, Spans, TableReference,
+    not_impl_err, plan_err,
+};
+use datafusion::logical_expr::{
+    self, BinaryExpr, Case, Expr, Extension, Join, LogicalPlan, Projection, build_join_schema,
+};
+use datafusion::prelude::coalesce;
+use std::sync::Arc;
+
+/// JoinRewriter handles the transformation of standard SQL joins into streaming-capable joins.
+/// It manages stateful "Updating Joins" and time-aligned "Instant Joins".
+pub(crate) struct JoinRewriter<'a> {
+    pub schema_provider: &'a StreamSchemaProvider,
+}
+
+impl<'a> JoinRewriter<'a> {
+    pub fn new(schema_provider: &'a StreamSchemaProvider) -> Self {
+        Self { schema_provider }
+    }
+
+    /// [Validation] Ensures left and right streams have compatible windowing strategies.
+    fn validate_join_windows(&self, join: &Join) -> Result<bool> {
+        let left_win = StreamingWindowAnalzer::get_window(&join.left)?;
+        let right_win = StreamingWindowAnalzer::get_window(&join.right)?;
+
+        match (left_win, right_win) {
+            (None, None) => {
+                if join.join_type == JoinType::Inner {
+                    Ok(false) // Standard Updating Join (Inner)
+                } else {
+                    plan_err!(
+                        "Non-inner joins (e.g., LEFT/RIGHT) require windowing to bound state."
+                    )
+                }
+            }
+            (Some(l), Some(r)) => {
+                if l != r {
+                    return plan_err!(
+                        "Join window mismatch: left={:?}, right={:?}. Windows must match exactly.",
+                        l,
+                        r
+                    );
+                }
+                if let WindowType::Session { .. } = l {
+                    return plan_err!(
+                        "Session windows are currently not supported in streaming joins."
+                    );
+                }
+                Ok(true) // Instant Windowed Join
+            }
+            _ => plan_err!(
+                "Mixed windowing detected. Both sides of a join must be either windowed or non-windowed."
+            ),
+        }
+    }
+
+    /// [Internal] Wraps a join input in a KeyCalculation layer to facilitate Shuffle/KeyBy distribution.
+    fn build_keyed_side(
+        &self,
+        input: Arc<LogicalPlan>,
+        keys: Vec<Expr>,
+        side: &str,
+    ) -> Result<LogicalPlan> {
+        let key_count = keys.len();
+
+        let projection_exprs = keys
+            .into_iter()
+            .enumerate()
+            .map(|(i, e)| {
+                e.alias_qualified(Some(TableReference::bare("_stream")), format!("_key_{i}"))
+            })
+            .chain(
+                fields_with_qualifiers(input.schema())
+                    .iter()
+                    .map(|f| Expr::Column(f.qualified_column())),
+            )
+            .collect();
+
+        let projection = Projection::try_new(projection_exprs, input)?;
+        let key_ext = KeyCalculationExtension::new_named_and_trimmed(
+            LogicalPlan::Projection(projection),
+            (0..key_count).collect(),
+            side.to_string(),
+        );
+
+        Ok(LogicalPlan::Extension(Extension {
+            node: Arc::new(key_ext),
+        }))
+    }
+
+    /// [Strategy] Resolves the output timestamp of the join.
+    /// Streaming joins must output the 'max' of the two input timestamps to ensure Watermark progression.
+    fn apply_timestamp_resolution(&self, join_plan: LogicalPlan) -> Result<LogicalPlan> {
+        let schema = join_plan.schema();
+        let all_fields = fields_with_qualifiers(schema);
+
+        let timestamp_fields: Vec<_> = all_fields
+            .iter()
+            .filter(|f| f.name() == "_timestamp")
+            .cloned()
+            .collect();
+
+        if timestamp_fields.len() != 2 {
+            return plan_err!(
+                "Streaming join requires exactly two input timestamp fields to resolve output time."
+            );
+        }
+
+        // Project all fields except the two raw timestamps
+        let mut exprs: Vec<_> = all_fields
+            .iter()
+            .filter(|f| f.name() != "_timestamp")
+            .map(|f| Expr::Column(f.qualified_column()))
+            .collect();
+
+        // Calculate: GREATEST(left._timestamp, right._timestamp)
+        let left_ts = Expr::Column(timestamp_fields[0].qualified_column());
+        let right_ts = Expr::Column(timestamp_fields[1].qualified_column());
+
+        let max_ts_expr = Expr::Case(Case {
+            expr: Some(Box::new(Expr::BinaryExpr(BinaryExpr {
+                left: Box::new(left_ts.clone()),
+                op: logical_expr::Operator::GtEq,
+                right: Box::new(right_ts.clone()),
+            }))),
+            when_then_expr: vec![
+                (
+                    Box::new(Expr::Literal(ScalarValue::Boolean(Some(true)), None)),
+                    Box::new(left_ts.clone()),
+                ),
+                (
+                    Box::new(Expr::Literal(ScalarValue::Boolean(Some(false)), None)),
+                    Box::new(right_ts.clone()),
+                ),
+            ],
+            else_expr: Some(Box::new(coalesce(vec![left_ts, right_ts]))),
+        })
+        .alias(TIMESTAMP_FIELD);
+
+        exprs.push(max_ts_expr);
+
+        let out_fields: Vec<_> = all_fields
+            .iter()
+            .filter(|f| f.name() != "_timestamp")
+            .cloned()
+            .chain(std::iter::once(timestamp_fields[0].clone()))
+            .collect();
+
+        let out_schema = Arc::new(schema_from_df_fields_with_metadata(
+            &out_fields,
+            schema.metadata().clone(),
+        )?);
+
+        Ok(LogicalPlan::Projection(Projection::try_new_with_schema(
+            exprs,
+            Arc::new(join_plan),
+            out_schema,
+        )?))
+    }
+}
+
+impl TreeNodeRewriter for JoinRewriter<'_> {
+    type Node = LogicalPlan;
+
+    fn f_up(&mut self, node: Self::Node) -> Result<Transformed<Self::Node>> {
+        let LogicalPlan::Join(join) = node else {
+            return Ok(Transformed::no(node));
+        };
+
+        // 1. Validate Streaming Context
+        let is_instant = self.validate_join_windows(&join)?;
+        if join.join_constraint != JoinConstraint::On {
+            return not_impl_err!("Only 'ON' join constraints are supported in streaming SQL.");
+        }
+        if join.on.is_empty() && !is_instant {
+            return plan_err!("Updating joins require at least one equality condition (Equijoin).");
+        }
+
+        // 2. Prepare Keyed Inputs for Shuffle
+        let (left_on, right_on): (Vec<_>, Vec<_>) = join.on.clone().into_iter().unzip();
+        let keyed_left = self.build_keyed_side(join.left, left_on, "left")?;
+        let keyed_right = self.build_keyed_side(join.right, right_on, "right")?;
+
+        // 3. Assemble Rewritten Join Node
+        let join_schema = Arc::new(build_join_schema(
+            keyed_left.schema(),
+            keyed_right.schema(),
+            &join.join_type,
+        )?);
+        let rewritten_join = LogicalPlan::Join(Join {
+            left: Arc::new(keyed_left),
+            right: Arc::new(keyed_right),
+            on: join.on,
+            filter: join.filter,
+            join_type: join.join_type,
+            join_constraint: JoinConstraint::On,
+            schema: join_schema,
+            null_equals_null: false,
+        });
+
+        // 4. Resolve Output Watermark (Timestamp Projection)
+        let plan_with_timestamp = self.apply_timestamp_resolution(rewritten_join)?;
+
+        // 5. Wrap in JoinExtension for Physical Planning
+        let ttl = (!is_instant).then_some(self.schema_provider.planning_options.ttl);
+        let extension = JoinExtension {
+            rewritten_join: plan_with_timestamp,
+            is_instant,
+            ttl,
+        };
+
+        Ok(Transformed::yes(LogicalPlan::Extension(Extension {
+            node: Arc::new(extension),
+        })))
+    }
+}
diff --git a/src/sql/planner/plan/mod.rs b/src/sql/planner/plan/mod.rs
index 83891731..c734a88b 100644
--- a/src/sql/planner/plan/mod.rs
+++ b/src/sql/planner/plan/mod.rs
@@ -1,38 +1,54 @@
 use datafusion::common::Result;
-use datafusion::common::tree_node::TreeNode;
+use datafusion::common::tree_node::{Transformed, TreeNode};
 use datafusion::logical_expr::LogicalPlan;
+use tracing::{debug, info, instrument};
 
 use crate::sql::planner::StreamSchemaProvider;
+use crate::sql::planner::plan::stream_rewriter::StreamRewriter;
 use crate::sql::planner::rewrite::TimeWindowUdfChecker;
 
-use self::aggregate::AggregateRewriter;
-use self::join::JoinRewriter;
-use self::stream_rewriter::StreamRewriter;
-use self::window_detecting_visitor::{WindowDetectingVisitor, extract_column};
-use self::window_fn::WindowFunctionRewriter;
-
-pub(crate) mod aggregate;
-pub(crate) mod join;
+// Module declarations
+pub(crate) mod aggregate_rewriter;
+pub(crate) mod join_rewriter;
+pub(crate) mod row_time_rewriter;
 pub(crate) mod stream_rewriter;
-pub(crate) mod window_detecting_visitor;
-pub(crate) mod window_fn;
-
-use tracing::debug;
-
+pub(crate) mod streaming_window_analzer;
+pub(crate) mod window_function_rewriter;
+
+/// Entry point for transforming a standard DataFusion LogicalPlan into a
+/// Streaming-aware LogicalPlan.
+///
+/// This function coordinates multiple rewriting passes and ensures the
+/// resulting plan satisfies streaming constraints.
+#[instrument(skip_all, level = "debug")]
 pub fn rewrite_plan(
     plan: LogicalPlan,
     schema_provider: &StreamSchemaProvider,
 ) -> Result<LogicalPlan> {
-    let rewritten_plan = plan.rewrite_with_subqueries(&mut StreamRewriter { schema_provider })?;
-
-    rewritten_plan
-        .data
-        .visit_with_subqueries(&mut TimeWindowUdfChecker {})?;
-
-    debug!(
-        "Streaming logical plan:\n{}",
-        rewritten_plan.data.display_graphviz()
-    );
-
-    Ok(rewritten_plan.data)
+    info!("Starting streaming plan rewrite pipeline");
+
+    // Phase 1: Core Transformation
+    // This pass handles the structural changes (Aggregates, Joins, Windows)
+    // using a Bottom-Up traversal.
+    let mut rewriter = StreamRewriter::new(schema_provider);
+    let Transformed {
+        data: rewritten_plan,
+        ..
+    } = plan.rewrite_with_subqueries(&mut rewriter)?;
+
+    // Phase 2: Post-rewrite Validation
+    // Ensure that the rewritten plan doesn't violate specific streaming UDF rules.
+    rewritten_plan.visit_with_subqueries(&mut TimeWindowUdfChecker {})?;
+
+    // Phase 3: Observability & Debugging
+    // Industrial engines use Graphviz or specialized Explain formats for plan diffs.
+    if cfg!(debug_assertions) {
+        debug!(
+            "Streaming logical plan graphviz:\n{}",
+            rewritten_plan.display_graphviz()
+        );
+    }
+
+    info!("Streaming plan rewrite completed successfully");
+    Ok(rewritten_plan)
 }
diff --git a/src/sql/planner/plan/row_time_rewriter.rs b/src/sql/planner/plan/row_time_rewriter.rs
new file mode 100644
index 00000000..0a31d9f8
--- /dev/null
+++ b/src/sql/planner/plan/row_time_rewriter.rs
@@ -0,0 +1,36 @@
+use datafusion::common::tree_node::{Transformed, TreeNodeRewriter};
+use datafusion::common::{Column, Result as DFResult};
+use datafusion::logical_expr::Expr;
+
+use crate::sql::types::TIMESTAMP_FIELD;
+
+/// Replaces the virtual `row_time()` scalar function with a physical reference to `_timestamp`.
+///
+/// This is a critical mapping step that allows users to use a friendly SQL function
+/// while the engine operates on the mandatory internal streaming timestamp.
+pub struct RowTimeRewriter;
+
+impl TreeNodeRewriter for RowTimeRewriter {
+    type Node = Expr;
+
+    fn f_down(&mut self, node: Self::Node) -> DFResult<Transformed<Self::Node>> {
+        // Use pattern matching to identify the 'row_time' scalar function.
+        if let Expr::ScalarFunction(func) = &node
+            && func.name() == "row_time"
+        {
+            // Map the virtual function to the physical internal timestamp column.
+            // We use .alias() to preserve the original name "row_time()" in the output schema,
+            // ensuring that user-facing column names do not change unexpectedly.
+            let physical_col = Expr::Column(Column {
+                relation: None,
+                name: TIMESTAMP_FIELD.to_string(),
+                spans: Default::default(),
+            })
+            .alias("row_time()");
+
+            return Ok(Transformed::yes(physical_col));
+        }
+
+        Ok(Transformed::no(node))
+    }
+}
diff --git a/src/sql/planner/plan/stream_rewriter.rs b/src/sql/planner/plan/stream_rewriter.rs
index 53549af4..c3caed0e 100644
--- a/src/sql/planner/plan/stream_rewriter.rs
+++ b/src/sql/planner/plan/stream_rewriter.rs
@@ -1,148 +1,219 @@
 use std::sync::Arc;
 
+use super::StreamSchemaProvider;
 use crate::sql::planner::extension::StreamExtension;
 use crate::sql::planner::extension::remote_table::RemoteTableExtension;
+use crate::sql::planner::plan::row_time_rewriter::RowTimeRewriter;
 use crate::sql::planner::plan::{
-    aggregate::AggregateRewriter, join::JoinRewriter, window_fn::WindowFunctionRewriter,
+    aggregate_rewriter::AggregateRewriter, join_rewriter::JoinRewriter,
+    window_function_rewriter::WindowFunctionRewriter,
 };
-use crate::sql::planner::rewrite::{RowTimeRewriter, TimeWindowNullCheckRemover};
+use crate::sql::planner::rewrite::TimeWindowNullCheckRemover;
 use crate::sql::planner::schemas::{add_timestamp_field, has_timestamp_field};
 use crate::sql::types::{DFField, TIMESTAMP_FIELD};
 use datafusion::common::tree_node::{Transformed, TreeNodeRewriter};
 use datafusion::common::{Column, DataFusionError, Result, Spans, TableReference, plan_err};
-use datafusion::logical_expr::{Expr, Extension, Filter, LogicalPlan, SubqueryAlias};
+use datafusion::logical_expr::{
+    Expr, Extension, Filter, LogicalPlan, Projection, SubqueryAlias, Union,
+};
 use datafusion_common::tree_node::TreeNode;
-
-use super::StreamSchemaProvider;
+use datafusion_expr::{Aggregate, Join};
 
 pub struct StreamRewriter<'a> {
     pub(crate) schema_provider: &'a StreamSchemaProvider,
 }
 
+impl TreeNodeRewriter for StreamRewriter<'_> {
+    type Node = LogicalPlan;
+
+    fn f_up(&mut self, node: Self::Node) -> Result<Transformed<Self::Node>> {
+        match node {
+            // Logic Delegation
+            LogicalPlan::Projection(p) => self.rewrite_projection(p),
+            LogicalPlan::Filter(f) => self.rewrite_filter(f),
+            LogicalPlan::Union(u) => self.rewrite_union(u),
+
+            // Delegation to specialized sub-rewriters
+            LogicalPlan::Aggregate(agg) => self.rewrite_aggregate(agg),
+            LogicalPlan::Join(join) => self.rewrite_join(join),
+            LogicalPlan::Window(_) => self.rewrite_window(node),
+            LogicalPlan::SubqueryAlias(sa) => self.rewrite_subquery_alias(sa),
+
+            // Explicitly Unsupported Operations
+            LogicalPlan::Sort(_) => self.unsupported_error("ORDER BY", &node),
+            LogicalPlan::Limit(_) => self.unsupported_error("LIMIT", &node),
+            LogicalPlan::Repartition(_) => self.unsupported_error("Repartitions", &node),
+            LogicalPlan::Explain(_) => self.unsupported_error("EXPLAIN", &node),
+            LogicalPlan::Analyze(_) => self.unsupported_error("ANALYZE", &node),
+
+            _ => Ok(Transformed::no(node)),
+        }
+    }
+}
+
 impl<'a> StreamRewriter<'a> {
     pub fn new(schema_provider: &'a StreamSchemaProvider) -> Self {
         Self { schema_provider }
     }
-}
 
-impl TreeNodeRewriter for StreamRewriter<'_> {
-    type Node = LogicalPlan;
+    /// Delegates to AggregateRewriter to transform batch aggregates into streaming stateful operators.
+    fn rewrite_aggregate(&self, agg: Aggregate) -> Result<Transformed<LogicalPlan>> {
+        AggregateRewriter {
+            schema_provider: self.schema_provider,
+        }
+        .f_up(LogicalPlan::Aggregate(agg))
+    }
 
-    fn f_up(&mut self, mut node: Self::Node) -> Result<Transformed<Self::Node>> {
-        match node {
-            LogicalPlan::Projection(ref mut projection) => {
-                if !has_timestamp_field(&projection.schema) {
-                    let timestamp_field: DFField = projection
-                        .input
-                        .schema()
-                        .qualified_field_with_unqualified_name(TIMESTAMP_FIELD)
-                        .map_err(|_| {
-                            DataFusionError::Plan(format!(
-                                "No timestamp field found in projection input ({})",
-                                projection.input.display()
-                            ))
-                        })?
-                        .into();
-                    projection.schema = add_timestamp_field(
-                        projection.schema.clone(),
-                        timestamp_field.qualifier().cloned(),
-                    )
-                    .expect("in projection");
-                    projection.expr.push(Expr::Column(Column {
-                        relation: timestamp_field.qualifier().cloned(),
-                        name: TIMESTAMP_FIELD.to_string(),
-                        spans: Spans::default(),
-                    }));
-                }
+    /// Delegates to JoinRewriter to handle streaming join semantics (e.g., TTL, state management).
+    fn rewrite_join(&self, join: Join) -> Result<Transformed<LogicalPlan>> {
+        JoinRewriter {
+            schema_provider: self.schema_provider,
+        }
+        .f_up(LogicalPlan::Join(join))
+    }
 
-                let rewritten = projection
-                    .expr
-                    .iter()
-                    .map(|expr| expr.clone().rewrite(&mut RowTimeRewriter {}))
-                    .collect::<Result<Vec<_>>>()?;
-                if rewritten.iter().any(|r| r.transformed) {
-                    projection.expr = rewritten.into_iter().map(|r| r.data).collect();
-                }
-                return Ok(Transformed::yes(node));
-            }
-            LogicalPlan::Aggregate(aggregate) => {
-                return AggregateRewriter {
-                    schema_provider: self.schema_provider,
-                }
-                .f_up(LogicalPlan::Aggregate(aggregate));
-            }
-            LogicalPlan::Join(join) => {
-                return JoinRewriter {
-                    schema_provider: self.schema_provider,
-                }
-                .f_up(LogicalPlan::Join(join));
-            }
-            LogicalPlan::Filter(f) => {
-                let expr = f
-                    .predicate
-                    .clone()
-                    .rewrite(&mut TimeWindowNullCheckRemover {})?;
-                return Ok(if expr.transformed {
-                    Transformed::yes(LogicalPlan::Filter(Filter::try_new(expr.data, f.input)?))
-                } else {
-                    Transformed::no(LogicalPlan::Filter(f))
-                });
-            }
-            LogicalPlan::Window(_) => {
-                return WindowFunctionRewriter {}.f_up(node);
-            }
-            LogicalPlan::Sort(_) => {
-                return plan_err!(
-                    "ORDER BY is not currently supported in streaming SQL ({})",
-                    node.display()
-                );
-            }
-            LogicalPlan::Repartition(_) => {
-                return plan_err!(
-                    "Repartitions are not currently supported ({})",
-                    node.display()
-                );
-            }
-            LogicalPlan::Union(mut union) => {
-                union.schema = union.inputs[0].schema().clone();
-                for input in union.inputs.iter_mut() {
-                    if let LogicalPlan::Extension(Extension { node }) = input.as_ref() {
-                        let stream_extension: &dyn StreamExtension = node.try_into().unwrap();
-                        if !stream_extension.transparent() {
-                            continue;
-                        }
-                    }
-                    let remote_table_extension = Arc::new(RemoteTableExtension {
-                        input: input.as_ref().clone(),
-                        name: TableReference::bare("union_input"),
-                        schema: union.schema.clone(),
-                        materialize: false,
-                    });
-                    *input = Arc::new(LogicalPlan::Extension(Extension {
-                        node: remote_table_extension,
-                    }));
+    /// Delegates to WindowFunctionRewriter for stream-aware windowing logic.
+    fn rewrite_window(&self, node: LogicalPlan) -> Result<Transformed<LogicalPlan>> {
+        WindowFunctionRewriter {}.f_up(node)
+    }
+
+    /// Refreshes SubqueryAlias metadata to align with potentially rewritten internal schemas.
+    fn rewrite_subquery_alias(&self, sa: SubqueryAlias) -> Result<Transformed<LogicalPlan>> {
+        // Since the inner 'sa.input' has been rewritten (bottom-up), we must re-create
+        // the alias node to ensure the outer schema correctly reflects internal changes.
+        let new_sa = SubqueryAlias::try_new(sa.input, sa.alias).map_err(|e| {
+            DataFusionError::Internal(format!("Failed to re-alias subquery: {}", e))
+        })?;
+
+        Ok(Transformed::yes(LogicalPlan::SubqueryAlias(new_sa)))
+    }
+
+    /// Handles timestamp propagation and row_time() mapping for Projections
+    fn rewrite_projection(&self, mut projection: Projection) -> Result<Transformed<LogicalPlan>> {
+        // Check if the current projection already has a timestamp field;
+        // if not, we must inject it to maintain streaming heartbeats.
+        if !has_timestamp_field(&projection.schema) {
+            let input_schema = projection.input.schema();
+
+            // Resolve the timestamp field from the input schema using the global constant.
+            let timestamp_field: DFField = input_schema
+                .qualified_field_with_unqualified_name(TIMESTAMP_FIELD)
+                .map_err(|_| {
+                    DataFusionError::Plan(format!(
+                        "No timestamp field found in projection input ({})",
+                        projection.input.display()
+                    ))
+                })?
+                .into();
+
+            // Update the logical schema to include the newly injected timestamp.
+            projection.schema = add_timestamp_field(
+                projection.schema.clone(),
+                timestamp_field.qualifier().cloned(),
+            )
+            .expect("Failed to add timestamp to projection schema");
+
+            // Physically push the timestamp column into the expression list.
+            projection.expr.push(Expr::Column(Column {
+                relation: timestamp_field.qualifier().cloned(),
+                name: TIMESTAMP_FIELD.to_string(),
+                spans: Spans::default(),
+            }));
+        }
+
+        // Map user-friendly row_time() function calls to internal _timestamp column references.
+        let rewritten = projection
+            .expr
+            .iter()
+            .map(|expr| expr.clone().rewrite(&mut RowTimeRewriter {}))
+            .collect::<Result<Vec<_>>>()?;
+
+        // If any expressions were modified (e.g., row_time() was replaced), update the projection.
+        if rewritten.iter().any(|r| r.transformed) {
+            projection.expr = rewritten.into_iter().map(|r| r.data).collect();
+        }
+
+        // Return the updated plan node wrapped in a Transformed container.
+        Ok(Transformed::yes(LogicalPlan::Projection(projection)))
+    }
+
+    /// Harmonizes schemas across Union branches and wraps them in RemoteTableExtensions.
+    ///
+    /// This ensures that all inputs to a UNION operation share the exact same schema metadata,
+    /// preventing "Schema Drift" where different branches have different field qualifiers.
+    fn rewrite_union(&self, mut union: Union) -> Result<Transformed<LogicalPlan>> {
+        // Industrial engines use the first branch as the "Master Schema" for the Union.
+        // We clone it once to ensure all subsequent branches are forced to comply.
+        let master_schema = union.inputs[0].schema().clone();
+        union.schema = master_schema.clone();
+
+        for input in union.inputs.iter_mut() {
+            // Optimization: If the node is already a non-transparent Extension,
+            // we skip wrapping to avoid unnecessary nesting of logical nodes.
+            if let LogicalPlan::Extension(Extension { node }) = input.as_ref() {
+                let stream_ext: &dyn StreamExtension = node.try_into().map_err(|e| {
+                    DataFusionError::Internal(format!("Failed to resolve StreamExtension: {}", e))
+                })?;
+
+                if !stream_ext.transparent() {
+                    continue;
                 }
-                return Ok(Transformed::yes(LogicalPlan::Union(union)));
-            }
-            LogicalPlan::SubqueryAlias(sa) => {
-                return Ok(Transformed::yes(LogicalPlan::SubqueryAlias(
-                    SubqueryAlias::try_new(sa.input, sa.alias)?,
-                )));
-            }
-            LogicalPlan::Limit(_) => {
-                return plan_err!(
-                    "LIMIT is not currently supported in streaming SQL ({})",
-                    node.display()
-                );
-            }
-            LogicalPlan::Explain(_) => {
-                return plan_err!("EXPLAIN is not supported ({})", node.display());
             }
-            LogicalPlan::Analyze(_) => {
-                return plan_err!("ANALYZE is not supported ({})", node.display());
-            }
-            _ => {}
+
+            // Wrap each branch in a RemoteTableExtension.
+            // This acts as a logical "bridge" that forces the input to adopt the master_schema,
+            // effectively stripping away branch-specific qualifiers (e.g., table aliases).
+            let remote_ext = Arc::new(RemoteTableExtension {
+                input: input.as_ref().clone(),
+                name: TableReference::bare("union_input"),
+                schema: master_schema.clone(),
+                materialize: false, // Internal logical boundary only; does not require physical sink.
+            });
+
+            // Atomically replace the input with the wrapped version.
+            *input = Arc::new(LogicalPlan::Extension(Extension { node: remote_ext }));
+        }
+
+        Ok(Transformed::yes(LogicalPlan::Union(union)))
+    }
+
+    /// Optimizes Filter nodes by stripping redundant NULL checks on time window expressions.
+    ///
+    /// In streaming SQL, DataFusion often injects 'IS NOT NULL' guards for window functions
+    /// that are redundant or can interfere with watermark propagation. This rewriter
+    /// cleans those predicates to simplify the physical execution plan.
+    fn rewrite_filter(&self, filter: Filter) -> Result<Transformed<LogicalPlan>> {
+        // We attempt to rewrite the predicate using a specialized sub-rewriter.
+        // The TimeWindowNullCheckRemover specifically targets expressions like
+        // `tumble(...) IS NOT NULL` and simplifies them to `TRUE`.
+        let rewritten_expr = filter
+            .predicate
+            .clone()
+            .rewrite(&mut TimeWindowNullCheckRemover {})?;
+
+        if !rewritten_expr.transformed {
+            return Ok(Transformed::no(LogicalPlan::Filter(filter)));
         }
-        Ok(Transformed::no(node))
+
+        // Industrial Guard: Re-validate the predicate against the input schema.
+        // 'Filter::try_new' ensures that the transformed expression is still semantically
+        // valid for the underlying data stream.
+        let new_filter = Filter::try_new(rewritten_expr.data, filter.input).map_err(|e| {
+            DataFusionError::Internal(format!(
+                "Failed to re-validate filtered predicate after NULL-check removal: {}",
+                e
+            ))
+        })?;
+
+        Ok(Transformed::yes(LogicalPlan::Filter(new_filter)))
+    }
+
+    /// Centralized error handler for unsupported streaming operations
+    fn unsupported_error(&self, op: &str, node: &LogicalPlan) -> Result<Transformed<LogicalPlan>> {
+        plan_err!(
+            "{} is not currently supported in streaming SQL ({})",
+            op,
+            node.display()
+        )
     }
 }
diff --git a/src/sql/planner/plan/streaming_window_analzer.rs b/src/sql/planner/plan/streaming_window_analzer.rs
new file mode 100644
index 00000000..db3506b7
--- /dev/null
+++ b/src/sql/planner/plan/streaming_window_analzer.rs
@@ -0,0 +1,203 @@
+use std::collections::HashSet;
+use std::sync::Arc;
+
+use datafusion::common::tree_node::{TreeNode, TreeNodeRecursion, TreeNodeVisitor};
+use datafusion::common::{Column, DFSchema, DataFusionError, Result};
+use datafusion::logical_expr::{Aggregate, Expr, Extension, LogicalPlan, expr::Alias};
+
+use crate::sql::planner::extension::aggregate::{AGGREGATE_EXTENSION_NAME, AggregateExtension};
+use crate::sql::planner::extension::join::JOIN_NODE_NAME;
+use crate::sql::types::{DFField, WindowBehavior, WindowType, fields_with_qualifiers, find_window};
+
+/// WindowDetectingVisitor identifies windowing strategies and tracks window-carrying fields
+/// as they propagate upward through the logical plan tree.
+#[derive(Debug, Default)]
+pub(crate) struct StreamingWindowAnalzer {
+    /// The specific window type discovered (Tumble, Hop, etc.)
+    pub(crate) window: Option<WindowType>,
+    /// Set of fields in the current plan node that carry window semantics.
+    pub(crate) fields: HashSet<DFField>,
+}
+
+impl StreamingWindowAnalzer {
+    /// Entry point to resolve the WindowType of a given plan branch.
+    pub(crate) fn get_window(logical_plan: &LogicalPlan) -> Result<Option<WindowType>> {
+        let mut visitor = Self::default();
+        logical_plan.visit_with_subqueries(&mut visitor)?;
+        Ok(visitor.window)
+    }
+
+    /// Resolves whether an expression is a reference to an existing window field
+    /// or a definition of a new window function.
+    fn resolve_window_from_expr(
+        &self,
+        expr: &Expr,
+        input_schema: &DFSchema,
+    ) -> Result<Option<WindowType>> {
+        // 1. Check if the expression directly references a known window field.
+        if let Some(col) = extract_column(expr) {
+            let field = input_schema.field_with_name(col.relation.as_ref(), &col.name)?;
+            let df_field: DFField = (col.relation.clone(), Arc::new(field.clone())).into();
+
+            if self.fields.contains(&df_field) {
+                return Ok(self.window.clone());
+            }
+        }
+
+        // 2. Otherwise, check if it's a new window function call (e.g., tumble(), hop()).
+        find_window(expr)
+    }
+
+    /// Updates the internal state with new window findings and maps them to the output schema.
+    fn update_state(
+        &mut self,
+        matched_windows: Vec<(usize, WindowType)>,
+        schema: &DFSchema,
+    ) -> Result<()> {
+        // Clear fields from the previous level to maintain schema strictly for the current node.
+        self.fields.clear();
+
+        for (index, window) in matched_windows {
+            if let Some(existing) = &self.window {
+                if existing != &window {
+                    return Err(DataFusionError::Plan(format!(
+                        "Conflicting windows in the same operator: expected {:?}, found {:?}",
+                        existing, window
+                    )));
+                }
+            } else {
+                self.window = Some(window);
+            }
+            // Record this specific index in the schema as a window carrier.
+            self.fields.insert(schema.qualified_field(index).into());
+        }
+        Ok(())
+    }
+}
+
+pub(crate) fn extract_column(expr: &Expr) -> Option<&Column> {
+    match expr {
+        Expr::Column(column) => Some(column),
+        Expr::Alias(Alias { expr, .. }) => extract_column(expr),
+        _ => None,
+    }
+}
+
+impl TreeNodeVisitor<'_> for StreamingWindowAnalzer {
+    type Node = LogicalPlan;
+
+    fn f_down(&mut self, node: &Self::Node) -> Result<TreeNodeRecursion> {
+        // Joins require cross-branch validation to ensure left and right sides align on time.
+        if let LogicalPlan::Extension(Extension { node }) = node
+            && node.name() == JOIN_NODE_NAME
+        {
+            let mut branch_windows = HashSet::new();
+            for input in node.inputs() {
+                if let Some(w) = Self::get_window(input)? {
+                    branch_windows.insert(w);
+                }
+            }
+
+            if branch_windows.len() > 1 {
+                return Err(DataFusionError::Plan(
+                    "Join inputs have mismatched windowing strategies.".into(),
+                ));
+            }
+            self.window = branch_windows.into_iter().next();
+
+            // Optimization: No need to recurse manually if we've resolved the join boundary.
+            return Ok(TreeNodeRecursion::Jump);
+        }
+        Ok(TreeNodeRecursion::Continue)
+    }
+
+    fn f_up(&mut self, node: &Self::Node) -> Result<TreeNodeRecursion> {
+        match node {
+            LogicalPlan::Projection(p) => {
+                let windows = p
+                    .expr
+                    .iter()
+                    .enumerate()
+                    .filter_map(|(i, e)| {
+                        self.resolve_window_from_expr(e, p.input.schema())
+                            .transpose()
+                            .map(|res| res.map(|w| (i, w)))
+                    })
+                    .collect::<Result<Vec<_>>>()?;
+
+                self.update_state(windows, &p.schema)?;
+            }
+
+            LogicalPlan::Aggregate(agg) => {
+                let windows = agg
+                    .group_expr
+                    .iter()
+                    .enumerate()
+                    .filter_map(|(i, e)| {
+                        self.resolve_window_from_expr(e, agg.input.schema())
+                            .transpose()
+                            .map(|res| res.map(|w| (i, w)))
+                    })
+                    .collect::<Result<Vec<_>>>()?;
+
+                self.update_state(windows, &agg.schema)?;
+            }
+
+            LogicalPlan::SubqueryAlias(sa) => {
+                // Map fields through the alias layer by resolving column indices.
+                let input_schema = sa.input.schema();
+                let mapped = self
+                    .fields
+                    .drain()
+                    .map(|f| {
+                        let idx = input_schema.index_of_column(&f.qualified_column())?;
+                        Ok(sa.schema.qualified_field(idx).into())
+                    })
+                    .collect::<Result<HashSet<_>>>()?;
+
+                self.fields = mapped;
+            }
+
+            LogicalPlan::Extension(Extension { node })
+                if node.name() == AGGREGATE_EXTENSION_NAME =>
+            {
+                let ext = node
+                    .as_any()
+                    .downcast_ref::<AggregateExtension>()
+                    .ok_or_else(|| {
+                        DataFusionError::Internal("AggregateExtension node is malformed".into())
+                    })?;
+
+                match &ext.window_behavior {
+                    WindowBehavior::FromOperator {
+                        window,
+                        window_field,
+                        is_nested,
+                        ..
+                    } => {
+                        if self.window.is_some() && !*is_nested {
+                            return Err(DataFusionError::Plan(
+                                "Redundant window definition on an already windowed stream.".into(),
+                            ));
+                        }
+                        self.window = Some(window.clone());
+                        self.fields.insert(window_field.clone());
+                    }
+                    WindowBehavior::InData => {
+                        let current_schema_fields: HashSet<_> =
+                            fields_with_qualifiers(node.schema()).into_iter().collect();
+                        self.fields.retain(|f| current_schema_fields.contains(f));
+
+                        if self.fields.is_empty() {
+                            return Err(DataFusionError::Plan(
+                                "Windowed aggregate missing window metadata from its input.".into(),
+                            ));
+                        }
+                    }
+                }
+            }
+            _ => {}
+        }
+        Ok(TreeNodeRecursion::Continue)
+    }
+}
diff --git a/src/sql/planner/plan/window_detecting_visitor.rs b/src/sql/planner/plan/window_detecting_visitor.rs
deleted file mode 100644
index 0a0a0323..00000000
--- a/src/sql/planner/plan/window_detecting_visitor.rs
+++ /dev/null
@@ -1,215 +0,0 @@
-use std::collections::HashSet;
-use std::sync::Arc;
-
-use datafusion::common::{
-    Column, DataFusionError, Result,
-    tree_node::{TreeNode, TreeNodeRecursion, TreeNodeVisitor},
-};
-use datafusion::logical_expr::{Aggregate, Expr, Extension, LogicalPlan, expr::Alias};
-
-use crate::sql::planner::extension::aggregate::{AGGREGATE_EXTENSION_NAME, AggregateExtension};
-use crate::sql::planner::extension::join::JOIN_NODE_NAME;
-use crate::sql::types::{DFField, WindowBehavior, WindowType, fields_with_qualifiers, find_window};
-
-#[derive(Debug, Default)]
-pub(crate) struct WindowDetectingVisitor {
-    pub(crate) window: Option<WindowType>,
-    pub(crate) fields: HashSet<DFField>,
-}
-
-impl WindowDetectingVisitor {
-    pub(crate) fn get_window(logical_plan: &LogicalPlan) -> Result<Option<WindowType>> {
-        let mut visitor = WindowDetectingVisitor {
-            window: None,
-            fields: HashSet::new(),
-        };
-        logical_plan.visit_with_subqueries(&mut visitor)?;
-        Ok(visitor.window.take())
-    }
-}
-
-pub(crate) fn extract_column(expr: &Expr) -> Option<&Column> {
-    match expr {
-        Expr::Column(column) => Some(column),
-        Expr::Alias(Alias { expr, .. }) => extract_column(expr),
-        _ => None,
-    }
-}
-
-impl TreeNodeVisitor<'_> for WindowDetectingVisitor {
-    type Node = LogicalPlan;
-
-    fn f_down(&mut self, node: &Self::Node) -> Result<TreeNodeRecursion> {
-        let LogicalPlan::Extension(Extension { node }) = node else {
-            return Ok(TreeNodeRecursion::Continue);
-        };
-
-        if node.name() == JOIN_NODE_NAME {
-            let input_windows: HashSet<_> = node
-                .inputs()
-                .iter()
-                .map(|input| Self::get_window(input))
-                .collect::<Result<HashSet<_>>>()?;
-            if input_windows.len() > 1 {
-                return Err(DataFusionError::Plan(
-                    "can't handle mixed windowing between left and right".to_string(),
-                ));
-            }
-            self.window = input_windows
-                .into_iter()
-                .next()
-                .expect("join has at least one input");
-            return Ok(TreeNodeRecursion::Jump);
-        }
-        Ok(TreeNodeRecursion::Continue)
-    }
-
-    fn f_up(&mut self, node: &Self::Node) -> Result<TreeNodeRecursion> {
-        match node {
-            LogicalPlan::Projection(projection) => {
-                let window_expressions = projection
-                    .expr
-                    .iter()
-                    .enumerate()
-                    .filter_map(|(index, expr)| {
-                        if let Some(column) = extract_column(expr) {
-                            let input_field = projection
-                                .input
-                                .schema()
-                                .field_with_name(column.relation.as_ref(), &column.name);
-                            let input_field = match input_field {
-                                Ok(field) => field,
-                                Err(err) => return Some(Err(err)),
-                            };
-                            if self.fields.contains(
-                                &(column.relation.clone(), Arc::new(input_field.clone())).into(),
-                            ) {
-                                return self.window.clone().map(|window| Ok((index, window)));
-                            }
-                        }
-                        find_window(expr)
-                            .map(|option| option.map(|inner| (index, inner)))
-                            .transpose()
-                    })
-                    .collect::<Result<Vec<_>>>()?;
-                self.fields.clear();
-                for (index, window) in window_expressions {
-                    if let Some(existing_window) = &self.window {
-                        if *existing_window != window {
-                            return Err(DataFusionError::Plan(
-                                "window expressions do not match".to_string(),
-                            ));
-                        }
-                    } else {
-                        self.window = Some(window);
-                    }
-                    self.fields
-                        .insert(projection.schema.qualified_field(index).into());
-                }
-            }
-            LogicalPlan::SubqueryAlias(subquery_alias) => {
-                self.fields = self
-                    .fields
-                    .drain()
-                    .map(|field| {
-                        Ok(subquery_alias
-                            .schema
-                            .qualified_field(
-                                subquery_alias
-                                    .input
-                                    .schema()
-                                    .index_of_column(&field.qualified_column())?,
-                            )
-                            .into())
-                    })
-                    .collect::<Result<HashSet<_>>>()?;
-            }
-            LogicalPlan::Aggregate(Aggregate {
-                input,
-                group_expr,
-                aggr_expr: _,
-                schema,
-                ..
-            }) => {
-                let window_expressions = group_expr
-                    .iter()
-                    .enumerate()
-                    .filter_map(|(index, expr)| {
-                        if let Some(column) = extract_column(expr) {
-                            let input_field = input
-                                .schema()
-                                .field_with_name(column.relation.as_ref(), &column.name);
-                            let input_field = match input_field {
-                                Ok(field) => field,
-                                Err(err) => return Some(Err(err)),
-                            };
-                            if self
-                                .fields
-                                .contains(&(column.relation.as_ref(), input_field).into())
-                            {
-                                return self.window.clone().map(|window| Ok((index, window)));
-                            }
-                        }
-                        find_window(expr)
-                            .map(|option| option.map(|inner| (index, inner)))
-                            .transpose()
-                    })
-                    .collect::<Result<Vec<_>>>()?;
-                self.fields.clear();
-                for (index, window) in window_expressions {
-                    if let Some(existing_window) = &self.window {
-                        if *existing_window != window {
-                            return Err(DataFusionError::Plan(
-                                "window expressions do not match".to_string(),
-                            ));
-                        }
-                    } else {
-                        self.window = Some(window);
-                    }
-                    self.fields.insert(schema.qualified_field(index).into());
-                }
-            }
-            LogicalPlan::Extension(Extension { node }) => {
-                if node.name() == AGGREGATE_EXTENSION_NAME {
-                    let aggregate_extension = node
-                        .as_any()
-                        .downcast_ref::<AggregateExtension>()
-                        .expect("should be aggregate extension");
-
-                    match &aggregate_extension.window_behavior {
-                        WindowBehavior::FromOperator {
-                            window,
-                            window_field,
-                            window_index: _,
-                            is_nested,
-                        } => {
-                            if self.window.is_some() && !*is_nested {
-                                return Err(DataFusionError::Plan(
-                                    "aggregate node should not be recalculating window, as input is windowed.".to_string(),
-                                ));
-                            }
-                            self.window = Some(window.clone());
-                            self.fields.insert(window_field.clone());
-                        }
-                        WindowBehavior::InData => {
-                            let input_fields = self.fields.clone();
-                            self.fields.clear();
-                            for field in fields_with_qualifiers(node.schema()) {
-                                if input_fields.contains(&field) {
-                                    self.fields.insert(field);
-                                }
-                            }
-                            if self.fields.is_empty() {
-                                return Err(DataFusionError::Plan(
-                                    "must have window in aggregate. Make sure you are calling one of the windowing functions (hop, tumble, session) or using the window field of the input".to_string(),
-                                ));
-                            }
-                        }
-                    }
-                }
-            }
-            _ => {}
-        }
-        Ok(TreeNodeRecursion::Continue)
-    }
-}
diff --git a/src/sql/planner/plan/window_fn.rs b/src/sql/planner/plan/window_fn.rs
deleted file mode 100644
index 66f673d1..00000000
--- a/src/sql/planner/plan/window_fn.rs
+++ /dev/null
@@ -1,178 +0,0 @@
-use std::sync::Arc;
-
-use datafusion::common::tree_node::Transformed;
-use datafusion::common::{Result as DFResult, plan_err, tree_node::TreeNodeRewriter};
-use datafusion::logical_expr;
-use datafusion::logical_expr::expr::WindowFunctionParams;
-use datafusion::logical_expr::{
-    Expr, Extension, LogicalPlan, Projection, Sort, Window, expr::WindowFunction,
-};
-use tracing::debug;
-
-use crate::sql::planner::extension::key_calculation::{KeyCalculationExtension, KeysOrExprs};
-use crate::sql::planner::extension::window_fn::WindowFunctionExtension;
-use crate::sql::planner::plan::{WindowDetectingVisitor, extract_column};
-use crate::sql::types::{WindowType, fields_with_qualifiers, schema_from_df_fields};
-
-pub(crate) struct WindowFunctionRewriter;
-
-fn get_window_and_name(expr: &Expr) -> DFResult<(WindowFunction, String)> {
-    match expr {
-        Expr::Alias(alias) => {
-            let (window, _) = get_window_and_name(&alias.expr)?;
-            Ok((window, alias.name.clone()))
-        }
-        Expr::WindowFunction(window_function) => {
-            Ok((*window_function.clone(), expr.name_for_alias()?))
-        }
-        _ => plan_err!("Expect a column or alias expression, not {:?}", expr),
-    }
-}
-
-impl TreeNodeRewriter for WindowFunctionRewriter {
-    type Node = LogicalPlan;
-
-    fn f_up(&mut self, node: Self::Node) -> DFResult<Transformed<Self::Node>> {
-        let LogicalPlan::Window(window) = node else {
-            return Ok(Transformed::no(node));
-        };
-
-        debug!(
-            "Rewriting window function: {:?}",
-            LogicalPlan::Window(window.clone())
-        );
-
-        let mut window_detecting_visitor = WindowDetectingVisitor::default();
-        window
-            .input
-            .visit_with_subqueries(&mut window_detecting_visitor)?;
-
-        let Some(input_window) = window_detecting_visitor.window else {
-            return plan_err!("Window functions require already windowed input");
-        };
-        if matches!(input_window, WindowType::Session { .. }) {
-            return plan_err!("Window functions do not support session windows");
-        }
-
-        let input_window_fields = window_detecting_visitor.fields;
-
-        let Window {
-            input, window_expr, ..
-        } = window;
-
-        if window_expr.len() != 1 {
-            return plan_err!("Window functions require exactly one window expression");
-        }
-
-        let (WindowFunction { fun, params }, original_name) = get_window_and_name(&window_expr[0])?;
-
-        let mut window_field: Vec<_> = params
-            .partition_by
-            .iter()
-            .enumerate()
-            .filter_map(|(index, expr)| {
-                if let Some(column) = extract_column(expr) {
-                    let Ok(input_field) = input
-                        .schema()
-                        .field_with_name(column.relation.as_ref(), &column.name)
-                    else {
-                        return Some(plan_err!(
-                            "Column {} not found in input schema",
-                            column.name
-                        ));
-                    };
-                    if input_window_fields.contains(&(column.relation.as_ref(), input_field).into())
-                    {
-                        return Some(Ok((input_field.clone(), index)));
-                    }
-                }
-                None
-            })
-            .collect::<DFResult<_>>()?;
-
-        if window_field.len() != 1 {
-            return plan_err!(
-                "Window function requires exactly one window expression in partition_by"
-            );
-        }
-
-        let (_window_field, index) = window_field.pop().unwrap();
-        let mut additional_keys = params.partition_by.clone();
-        additional_keys.remove(index);
-        let key_count = additional_keys.len();
-
-        let params = WindowFunctionParams {
-            args: params.args,
-            partition_by: additional_keys.clone(),
-            order_by: params.order_by,
-            window_frame: params.window_frame,
-            null_treatment: params.null_treatment,
-        };
-
-        let new_window_func = WindowFunction { fun, params };
-
-        let mut key_projection_expressions: Vec<_> = additional_keys
-            .iter()
-            .enumerate()
-            .map(|(index, expression)| expression.clone().alias(format!("_key_{index}")))
-            .collect();
-
-        key_projection_expressions.extend(
-            fields_with_qualifiers(input.schema())
-                .iter()
-                .map(|field| Expr::Column(field.qualified_column())),
-        );
-
-        let auto_schema =
-            Projection::try_new(key_projection_expressions.clone(), input.clone())?.schema;
-        let mut key_fields = fields_with_qualifiers(&auto_schema)
-            .iter()
-            .take(additional_keys.len())
-            .cloned()
-            .collect::<Vec<_>>();
-        key_fields.extend(fields_with_qualifiers(input.schema()));
-        let key_schema = Arc::new(schema_from_df_fields(&key_fields)?);
-
-        let key_projection = LogicalPlan::Projection(Projection::try_new_with_schema(
-            key_projection_expressions,
-            input.clone(),
-            key_schema,
-        )?);
-
-        let key_plan = LogicalPlan::Extension(Extension {
-            node: Arc::new(KeyCalculationExtension::new(
-                key_projection,
-                KeysOrExprs::Keys((0..key_count).collect()),
-            )),
-        });
-
-        let mut sort_expressions: Vec<_> = additional_keys
-            .iter()
-            .map(|partition| logical_expr::expr::Sort {
-                expr: partition.clone(),
-                asc: true,
-                nulls_first: false,
-            })
-            .collect();
-        sort_expressions.extend(new_window_func.params.order_by.clone());
-
-        let shuffle = LogicalPlan::Sort(Sort {
-            expr: sort_expressions,
-            input: Arc::new(key_plan),
-            fetch: None,
-        });
-
-        let window_expr =
-            Expr::WindowFunction(Box::new(new_window_func)).alias_if_changed(original_name)?;
-
-        let rewritten_window_plan =
-            LogicalPlan::Window(Window::try_new(vec![window_expr], Arc::new(shuffle))?);
-
-        Ok(Transformed::yes(LogicalPlan::Extension(Extension {
-            node: Arc::new(WindowFunctionExtension::new(
-                rewritten_window_plan,
-                (0..key_count).collect(),
-            )),
-        })))
-    }
-}
diff --git a/src/sql/planner/plan/window_function_rewriter.rs b/src/sql/planner/plan/window_function_rewriter.rs
new file mode 100644
index 00000000..5c8e511b
--- /dev/null
+++ b/src/sql/planner/plan/window_function_rewriter.rs
@@ -0,0 +1,191 @@
+use datafusion::common::tree_node::Transformed;
+use datafusion::common::{Column, Result as DFResult, plan_err, tree_node::TreeNodeRewriter};
+use datafusion::logical_expr::{
+    self, Expr, Extension, LogicalPlan, Projection, Sort, Window, expr::WindowFunction,
+    expr::WindowFunctionParams,
+};
+use datafusion_common::DataFusionError;
+use std::sync::Arc;
+use tracing::debug;
+
+use crate::sql::planner::extension::key_calculation::{KeyCalculationExtension, KeysOrExprs};
+use crate::sql::planner::extension::window_fn::WindowFunctionExtension;
+use crate::sql::planner::plan::streaming_window_analzer::{StreamingWindowAnalzer, extract_column};
+use crate::sql::types::{WindowType, fields_with_qualifiers, schema_from_df_fields};
+
+/// WindowFunctionRewriter transforms standard SQL Window functions into streaming-compatible
+/// stateful operators, ensuring proper data partitioning and sorting for distributed execution.
+pub(crate) struct WindowFunctionRewriter;
+
+impl WindowFunctionRewriter {
+    /// Recursively unwraps Aliases to find the underlying WindowFunction.
+    fn resolve_window_function(&self, expr: &Expr) -> DFResult<(WindowFunction, String)> {
+        match expr {
+            Expr::Alias(alias) => {
+                let (func, _) = self.resolve_window_function(&alias.expr)?;
+                Ok((func, alias.name.clone()))
+            }
+            Expr::WindowFunction(wf) => Ok((wf.as_ref().clone(), expr.name_for_alias()?)),
+            _ => plan_err!("Expected WindowFunction or Alias, found: {:?}", expr),
+        }
+    }
+
+    /// Identifies which field in the PARTITION BY clause corresponds to the streaming window.
+    fn identify_window_partition(
+        &self,
+        params: &WindowFunctionParams,
+        input: &LogicalPlan,
+        input_window_fields: &std::collections::HashSet<crate::sql::types::DFField>,
+    ) -> DFResult<usize> {
+        let matched: Vec<_> = params
+            .partition_by
+            .iter()
+            .enumerate()
+            .filter_map(|(i, e)| {
+                let col = extract_column(e)?;
+                let field = input
+                    .schema()
+                    .field_with_name(col.relation.as_ref(), &col.name)
+                    .ok()?;
+                let df_field = (col.relation.clone(), Arc::new(field.clone())).into();
+
+                if input_window_fields.contains(&df_field) {
+                    Some(i)
+                } else {
+                    None
+                }
+            })
+            .collect();
+
+        if matched.len() != 1 {
+            return plan_err!(
+                "Streaming window functions require exactly one window column in PARTITION BY. Found: {}",
+                matched.len()
+            );
+        }
+        Ok(matched[0])
+    }
+
+    /// Wraps the input in a Projection and KeyCalculationExtension to handle data distribution.
+    fn build_keyed_input(
+        &self,
+        input: Arc<LogicalPlan>,
+        partition_keys: &[Expr],
+    ) -> DFResult<LogicalPlan> {
+        let key_count = partition_keys.len();
+
+        // 1. Build projection: [_key_0, _key_1, ..., original_columns]
+        let mut exprs: Vec<_> = partition_keys
+            .iter()
+            .enumerate()
+            .map(|(i, e)| e.clone().alias(format!("_key_{i}")))
+            .collect();
+
+        exprs.extend(
+            fields_with_qualifiers(input.schema())
+                .iter()
+                .map(|f| Expr::Column(f.qualified_column())),
+        );
+
+        // 2. Derive the keyed schema
+        let mut keyed_fields =
+            fields_with_qualifiers(&Projection::try_new(exprs.clone(), input.clone())?.schema)
+                .iter()
+                .take(key_count)
+                .cloned()
+                .collect::<Vec<_>>();
+        keyed_fields.extend(fields_with_qualifiers(input.schema()));
+
+        let keyed_schema = Arc::new(schema_from_df_fields(&keyed_fields)?);
+
+        let projection =
+            LogicalPlan::Projection(Projection::try_new_with_schema(exprs, input, keyed_schema)?);
+
+        // 3. Wrap in KeyCalculationExtension for the physical planner
+        Ok(LogicalPlan::Extension(Extension {
+            node: Arc::new(KeyCalculationExtension::new(
+                projection,
+                KeysOrExprs::Keys((0..key_count).collect()),
+            )),
+        }))
+    }
+}
+
+impl TreeNodeRewriter for WindowFunctionRewriter {
+    type Node = LogicalPlan;
+
+    fn f_up(&mut self, node: Self::Node) -> DFResult<Transformed<Self::Node>> {
+        let LogicalPlan::Window(window) = node else {
+            return Ok(Transformed::no(node));
+        };
+
+        debug!("Rewriting window function for streaming: {:?}", window);
+
+        // 1. Analyze input windowing context
+        let mut analyzer = StreamingWindowAnalzer::default();
+        window.input.visit_with_subqueries(&mut analyzer)?;
+
+        let input_window = analyzer.window.ok_or_else(|| {
+            DataFusionError::Plan(
+                "Window functions require a windowed input stream (e.g., TUMBLE/HOP)".into(),
+            )
+        })?;
+
+        if matches!(input_window, WindowType::Session { .. }) {
+            return plan_err!(
+                "Streaming window functions (OVER) are not supported on Session windows."
+            );
+        }
+
+        // 2. Validate window expression constraints
+        if window.window_expr.len() != 1 {
+            return plan_err!(
+                "Arroyo currently supports exactly one window expression per OVER clause."
+            );
+        }
+
+        let (mut wf, original_name) = self.resolve_window_function(&window.window_expr[0])?;
+
+        // 3. Identify and extract the window column from PARTITION BY
+        let window_part_idx =
+            self.identify_window_partition(&wf.params, &window.input, &analyzer.fields)?;
+        let mut partition_keys = wf.params.partition_by.clone();
+        partition_keys.remove(window_part_idx);
+
+        // Update function params to exclude the window column from internal partitioning
+        // as the streaming engine handles window boundaries natively.
+        wf.params.partition_by = partition_keys.clone();
+        let key_count = partition_keys.len();
+
+        // 4. Build the data-shuffling pipeline (Projection -> KeyCalc -> Sort)
+        let keyed_plan = self.build_keyed_input(window.input.clone(), &partition_keys)?;
+
+        let mut sort_exprs: Vec<_> = partition_keys
+            .iter()
+            .map(|e| logical_expr::expr::Sort {
+                expr: e.clone(),
+                asc: true,
+                nulls_first: false,
+            })
+            .collect();
+        sort_exprs.extend(wf.params.order_by.clone());
+
+        let sorted_plan = LogicalPlan::Sort(Sort {
+            expr: sort_exprs,
+            input: Arc::new(keyed_plan),
+            fetch: None,
+        });
+
+        // 5. Final Assembly
+        let final_wf_expr = Expr::WindowFunction(Box::new(wf)).alias_if_changed(original_name)?;
+        let rewritten_window =
+            LogicalPlan::Window(Window::try_new(vec![final_wf_expr], Arc::new(sorted_plan))?);
+
+        Ok(Transformed::yes(LogicalPlan::Extension(Extension {
+            node: Arc::new(WindowFunctionExtension::new(
+                rewritten_window,
+                (0..key_count).collect(),
+            )),
+        })))
+    }
+}
diff --git a/src/sql/planner/rewrite/mod.rs b/src/sql/planner/rewrite/mod.rs
index 20b2e9bb..bfebae4c 100644
--- a/src/sql/planner/rewrite/mod.rs
+++ b/src/sql/planner/rewrite/mod.rs
@@ -11,7 +11,6 @@
 // limitations under the License.
 
 pub mod async_udf_rewriter;
-pub mod row_time;
 pub mod sink_input_rewriter;
 pub mod source_metadata_visitor;
 pub mod source_rewriter;
@@ -19,7 +18,6 @@ pub mod time_window;
 pub mod unnest_rewriter;
 
 pub use async_udf_rewriter::{AsyncOptions, AsyncUdfRewriter};
-pub use row_time::RowTimeRewriter;
 pub use sink_input_rewriter::SinkInputRewriter;
 pub use source_metadata_visitor::SourceMetadataVisitor;
 pub use source_rewriter::SourceRewriter;
diff --git a/src/sql/planner/rewrite/row_time.rs b/src/sql/planner/rewrite/row_time.rs
deleted file mode 100644
index 51309feb..00000000
--- a/src/sql/planner/rewrite/row_time.rs
+++ /dev/null
@@ -1,39 +0,0 @@
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use datafusion::common::tree_node::{Transformed, TreeNodeRewriter};
-use datafusion::common::{Column, Result as DFResult};
-use datafusion::logical_expr::Expr;
-
-use crate::sql::types::TIMESTAMP_FIELD;
-
-/// Rewrites `row_time()` scalar function calls to a column reference on `_timestamp`.
-pub struct RowTimeRewriter {}
-
-impl TreeNodeRewriter for RowTimeRewriter {
-    type Node = Expr;
-
-    fn f_down(&mut self, node: Self::Node) -> DFResult<Transformed<Self::Node>> {
-        if let Expr::ScalarFunction(func) = &node
-            && func.name() == "row_time"
-        {
-            let transformed = Expr::Column(Column {
-                relation: None,
-                name: TIMESTAMP_FIELD.to_string(),
-                spans: Default::default(),
-            })
-            .alias("row_time()");
-            return Ok(Transformed::yes(transformed));
-        }
-        Ok(Transformed::no(node))
-    }
-}

From 3c94267334a879c351fc910a02e5cd6e3148cb44 Mon Sep 17 00:00:00 2001
From: luoluoyuyu <zhenyu@apache.org>
Date: Sat, 21 Mar 2026 18:13:55 +0800
Subject: [PATCH 07/44] update

---
 protocol/proto/fs_api.proto                   |  11 +
 src/coordinator/coordinator.rs                |   2 +-
 src/coordinator/execution/executor.rs         |  23 +-
 src/coordinator/plan/logical_plan_visitor.rs  |  54 +-
 src/coordinator/plan/lookup_table_plan.rs     |   2 +-
 .../plan/streaming_table_connector_plan.rs    |   2 +-
 src/coordinator/plan/streaming_table_plan.rs  |   2 +-
 src/datastream/mod.rs                         |   2 -
 src/lib.rs                                    |   1 -
 src/main.rs                                   |   1 -
 src/runtime/processor/wasm/wasm_processor.rs  |   1 +
 src/server/handler.rs                         |   2 +-
 .../plan => analysis}/aggregate_rewriter.rs   |   8 +-
 .../async_udf_rewriter.rs                     |   8 +-
 .../plan => analysis}/join_rewriter.rs        |   8 +-
 src/sql/analysis/mod.rs                       | 227 +++++++
 .../plan => analysis}/row_time_rewriter.rs    |   0
 .../sink_input_rewriter.rs                    |   5 +-
 .../source_metadata_visitor.rs                |   8 +-
 .../rewrite => analysis}/source_rewriter.rs   |  12 +-
 .../plan => analysis}/stream_rewriter.rs      |  12 +-
 .../streaming_window_analzer.rs               |   4 +-
 .../rewrite => analysis}/time_window.rs       |   0
 src/sql/{planner => analysis}/udafs.rs        |   0
 .../rewrite => analysis}/unnest_rewriter.rs   |   0
 .../window_function_rewriter.rs               |   6 +-
 src/sql/extensions/aggregate.rs               | 607 ++++++++++++++++++
 .../extension => extensions}/debezium.rs      | 129 ++--
 src/sql/extensions/join.rs                    | 120 ++++
 src/sql/extensions/key_calculation.rs         | 242 +++++++
 src/sql/extensions/lookup.rs                  | 194 ++++++
 .../{planner/extension => extensions}/mod.rs  | 251 +++++---
 src/sql/extensions/projection.rs              | 154 +++++
 src/sql/extensions/remote_table.rs            | 124 ++++
 src/sql/extensions/sink.rs                    | 168 +++++
 .../extension => extensions}/table_source.rs  |  58 +-
 src/sql/extensions/updating_aggregate.rs      | 165 +++++
 .../watermark_node.rs                         |  64 +-
 src/sql/extensions/window_fn.rs               | 123 ++++
 src/sql/functions/mod.rs                      |   2 +-
 .../logical_node}/logical.rs                  |  77 ++-
 src/sql/logical_node/mod.rs                   |   1 +
 src/sql/{physical => logical_planner}/mod.rs  |   7 +-
 .../logical_planner}/optimizers.rs            |  14 +-
 .../planner.rs}                               |  24 +-
 src/sql/mod.rs                                |  18 +-
 src/sql/{planner => }/parse.rs                |   0
 src/sql/planner/extension/aggregate.rs        | 348 ----------
 src/sql/planner/extension/join.rs             |  61 --
 src/sql/planner/extension/key_calculation.rs  | 138 ----
 src/sql/planner/extension/lookup.rs           | 127 ----
 src/sql/planner/extension/projection.rs       |  91 ---
 src/sql/planner/extension/remote_table.rs     |  71 --
 src/sql/planner/extension/sink.rs             | 128 ----
 .../planner/extension/updating_aggregate.rs   |  89 ---
 src/sql/planner/extension/window_fn.rs        |  62 --
 src/sql/planner/mod.rs                        | 348 ----------
 src/sql/planner/plan/mod.rs                   |  54 --
 src/sql/planner/rewrite/mod.rs                |  25 -
 src/sql/planner/schemas.rs                    |   5 -
 src/sql/planner/sql_to_plan.rs                |  22 -
 src/sql/{catalog => schema}/connector.rs      |  28 -
 .../{catalog => schema}/connector_table.rs    |  36 +-
 src/sql/{catalog => schema}/field_spec.rs     |   0
 src/sql/{catalog => schema}/insert.rs         |   2 +-
 src/sql/{catalog => schema}/mod.rs            |   4 +-
 src/sql/{catalog => schema}/optimizer.rs      |   2 +-
 .../{planner => schema}/schema_provider.rs    |  11 +-
 src/sql/{catalog => schema}/table.rs          |  23 +-
 src/sql/{catalog => schema}/utils.rs          |   0
 src/types/converter.rs                        |  83 +++
 src/types/df.rs                               | 370 ++++++-----
 src/types/mod.rs                              |   3 +-
 73 files changed, 3013 insertions(+), 2061 deletions(-)
 delete mode 100644 src/datastream/mod.rs
 rename src/sql/{planner/plan => analysis}/aggregate_rewriter.rs (97%)
 rename src/sql/{planner/rewrite => analysis}/async_udf_rewriter.rs (93%)
 rename src/sql/{planner/plan => analysis}/join_rewriter.rs (96%)
 create mode 100644 src/sql/analysis/mod.rs
 rename src/sql/{planner/plan => analysis}/row_time_rewriter.rs (100%)
 rename src/sql/{planner/rewrite => analysis}/sink_input_rewriter.rs (91%)
 rename src/sql/{planner/rewrite => analysis}/source_metadata_visitor.rs (86%)
 rename src/sql/{planner/rewrite => analysis}/source_rewriter.rs (96%)
 rename src/sql/{planner/plan => analysis}/stream_rewriter.rs (96%)
 rename src/sql/{planner/plan => analysis}/streaming_window_analzer.rs (98%)
 rename src/sql/{planner/rewrite => analysis}/time_window.rs (100%)
 rename src/sql/{planner => analysis}/udafs.rs (100%)
 rename src/sql/{planner/rewrite => analysis}/unnest_rewriter.rs (100%)
 rename src/sql/{planner/plan => analysis}/window_function_rewriter.rs (96%)
 create mode 100644 src/sql/extensions/aggregate.rs
 rename src/sql/{planner/extension => extensions}/debezium.rs (63%)
 create mode 100644 src/sql/extensions/join.rs
 create mode 100644 src/sql/extensions/key_calculation.rs
 create mode 100644 src/sql/extensions/lookup.rs
 rename src/sql/{planner/extension => extensions}/mod.rs (66%)
 create mode 100644 src/sql/extensions/projection.rs
 create mode 100644 src/sql/extensions/remote_table.rs
 create mode 100644 src/sql/extensions/sink.rs
 rename src/sql/{planner/extension => extensions}/table_source.rs (54%)
 create mode 100644 src/sql/extensions/updating_aggregate.rs
 rename src/sql/{planner/extension => extensions}/watermark_node.rs (57%)
 create mode 100644 src/sql/extensions/window_fn.rs
 rename src/{datastream => sql/logical_node}/logical.rs (80%)
 create mode 100644 src/sql/logical_node/mod.rs
 rename src/sql/{physical => logical_planner}/mod.rs (99%)
 rename src/{datastream => sql/logical_planner}/optimizers.rs (88%)
 rename src/sql/{planner/physical_planner.rs => logical_planner/planner.rs} (95%)
 rename src/sql/{planner => }/parse.rs (100%)
 delete mode 100644 src/sql/planner/extension/aggregate.rs
 delete mode 100644 src/sql/planner/extension/join.rs
 delete mode 100644 src/sql/planner/extension/key_calculation.rs
 delete mode 100644 src/sql/planner/extension/lookup.rs
 delete mode 100644 src/sql/planner/extension/projection.rs
 delete mode 100644 src/sql/planner/extension/remote_table.rs
 delete mode 100644 src/sql/planner/extension/sink.rs
 delete mode 100644 src/sql/planner/extension/updating_aggregate.rs
 delete mode 100644 src/sql/planner/extension/window_fn.rs
 delete mode 100644 src/sql/planner/mod.rs
 delete mode 100644 src/sql/planner/plan/mod.rs
 delete mode 100644 src/sql/planner/rewrite/mod.rs
 delete mode 100644 src/sql/planner/schemas.rs
 delete mode 100644 src/sql/planner/sql_to_plan.rs
 rename src/sql/{catalog => schema}/connector.rs (57%)
 rename src/sql/{catalog => schema}/connector_table.rs (91%)
 rename src/sql/{catalog => schema}/field_spec.rs (100%)
 rename src/sql/{catalog => schema}/insert.rs (97%)
 rename src/sql/{catalog => schema}/mod.rs (85%)
 rename src/sql/{catalog => schema}/optimizer.rs (98%)
 rename src/sql/{planner => schema}/schema_provider.rs (97%)
 rename src/sql/{catalog => schema}/table.rs (90%)
 rename src/sql/{catalog => schema}/utils.rs (100%)
 create mode 100644 src/types/converter.rs

diff --git a/protocol/proto/fs_api.proto b/protocol/proto/fs_api.proto
index 24525583..b178f6ea 100644
--- a/protocol/proto/fs_api.proto
+++ b/protocol/proto/fs_api.proto
@@ -12,6 +12,17 @@ message ConnectorOp {
   string description = 3;
 }
 
+message ValuePlanOperator {
+  string name = 1;
+  bytes physical_plan = 2;
+}
+
+message KeyPlanOperator {
+  string name = 1;
+  bytes physical_plan = 2;
+  repeated uint64 key_fields = 3;
+}
+
 message ProjectionOperator {
   string name = 1;
   FsSchema input_schema = 2;
diff --git a/src/coordinator/coordinator.rs b/src/coordinator/coordinator.rs
index 378c670b..8dc55c4d 100644
--- a/src/coordinator/coordinator.rs
+++ b/src/coordinator/coordinator.rs
@@ -20,7 +20,7 @@ use crate::coordinator::execution::Executor;
 use crate::coordinator::plan::{LogicalPlanVisitor, LogicalPlanner, PlanNode};
 use crate::coordinator::statement::Statement;
 use crate::runtime::taskexecutor::TaskManager;
-use crate::sql::planner::StreamSchemaProvider;
+use crate::sql::schema::StreamSchemaProvider;
 
 use super::execution_context::ExecutionContext;
 
diff --git a/src/coordinator/execution/executor.rs b/src/coordinator/execution/executor.rs
index 2dfb6326..8285a2c5 100644
--- a/src/coordinator/execution/executor.rs
+++ b/src/coordinator/execution/executor.rs
@@ -19,13 +19,11 @@ use crate::coordinator::plan::{
 };
 use crate::coordinator::statement::{ConfigSource, FunctionSource};
 use crate::runtime::taskexecutor::TaskManager;
+use crate::sql::schema::table::Table as CatalogTable;
+use crate::sql::analysis::{ StreamSchemaProvider};
 use std::sync::Arc;
 use thiserror::Error;
 use tracing::{debug, info};
-use crate::datastream::logical::{LogicalProgram, ProgramConfig};
-use crate::datastream::optimizers::ChainingOptimizer;
-use crate::sql::CompiledSql;
-use crate::sql::planner::{physical_planner, rewrite_sinks};
 
 #[derive(Error, Debug)]
 pub enum ExecuteError {
@@ -222,12 +220,21 @@ impl PlanVisitor for Executor {
 
     fn visit_streaming_table(
         &self,
-        _plan: &StreamingTable,
+        plan: &StreamingTable,
         _context: &PlanVisitorContext,
     ) -> PlanVisitorResult {
-        let result = Err(ExecuteError::Internal(
-            "StreamingTable execution not yet implemented".to_string(),
-        ));
+        let result = (|| -> Result<ExecuteResult, ExecuteError> {
+            let catalog_table =
+                CatalogTable::ConnectorTable(plan.connector_table.clone());
+            let mut schema_provider = StreamSchemaProvider::new();
+            schema_provider.insert_catalog_table(catalog_table.clone());
+
+
+            Ok(ExecuteResult::ok_with_data(
+                format!("Streaming table '{}' compiled successfully", plan.name),
+                empty_record_batch(),
+            ))
+        })();
         PlanVisitorResult::Execute(result)
     }
 
diff --git a/src/coordinator/plan/logical_plan_visitor.rs b/src/coordinator/plan/logical_plan_visitor.rs
index dfcf2e10..1daf5a16 100644
--- a/src/coordinator/plan/logical_plan_visitor.rs
+++ b/src/coordinator/plan/logical_plan_visitor.rs
@@ -13,8 +13,10 @@
 use std::sync::Arc;
 
 use datafusion::common::{Result, plan_datafusion_err, plan_err};
+use datafusion::execution::SessionStateBuilder;
 use datafusion::sql::sqlparser::ast::{SqlOption, Statement as DFStatement};
 use datafusion_common::TableReference;
+use datafusion_execution::config::SessionConfig;
 use datafusion_expr::{Expr, Extension, LogicalPlan, col};
 use sqlparser::ast::Statement;
 use tracing::debug;
@@ -30,21 +32,24 @@ use crate::coordinator::statement::{
     StreamingTableStatement,
 };
 use crate::coordinator::tool::ConnectorOptions;
-use crate::sql::catalog::Table;
-use crate::sql::catalog::connector::ConnectionType;
-use crate::sql::catalog::connector_table::ConnectorTable;
-use crate::sql::catalog::field_spec::FieldSpec;
-use crate::sql::catalog::optimizer::produce_optimized_plan;
+use crate::sql::logical_node::logical::{LogicalProgram, ProgramConfig};
+use crate::sql::logical_planner::optimizers::ChainingOptimizer;
+use crate::sql::schema::Table;
+use crate::sql::schema::connector::ConnectionType;
+use crate::sql::schema::connector_table::ConnectorTable;
+use crate::sql::schema::field_spec::FieldSpec;
+use crate::sql::schema::optimizer::produce_optimized_plan;
 use crate::sql::functions::{is_json_union, serialize_outgoing_json};
-use crate::sql::planner::extension::sink::SinkExtension;
-use crate::sql::planner::{StreamSchemaProvider, maybe_add_key_extension_to_sink, rewrite_sinks};
+use crate::sql::extensions::sink::SinkExtension;
+use crate::sql::logical_planner::planner;
+use crate::sql::analysis::{StreamSchemaProvider, maybe_add_key_extension_to_sink, rewrite_sinks};
 use crate::sql::rewrite_plan;
 
 const CONNECTOR: &str = "connector";
 const PARTITION_BY: &str = "partition_by";
 const IDLE_MICROS: &str = "idle_time";
 
-/// 将 WITH 选项列表转为 key-value map，便于读取 connector 等配置。
+/// Convert `WITH` option list to a key-value map (e.g. connector settings).
 fn with_options_to_map(options: &[SqlOption]) -> std::collections::HashMap<String, String> {
     options
         .iter()
@@ -153,6 +158,8 @@ impl LogicalPlanVisitor {
             primary_keys: Arc::new(vec![]), // PKs are inferred or explicitly set here
             inferred_fields: None,
             partition_exprs: Arc::new(partition_exprs),
+            lookup_cache_ttl:None,
+            lookup_cache_max_bytes:None,
         };
 
         // 6. Sink Extension & Final Rewrites
@@ -173,6 +180,37 @@ impl LogicalPlanVisitor {
         let final_extensions = rewrite_sinks(vec![plan_with_keys])?;
         let final_plan = final_extensions.into_iter().next().unwrap();
 
+
+
+        let mut config = SessionConfig::new();
+        config
+            .options_mut()
+            .optimizer
+            .enable_round_robin_repartition = false;
+        config.options_mut().optimizer.repartition_aggregations = false;
+        config.options_mut().optimizer.repartition_windows = false;
+        config.options_mut().optimizer.repartition_sorts = false;
+        config.options_mut().optimizer.repartition_joins = false;
+        config.options_mut().execution.target_partitions = 1;
+
+        let session_state = SessionStateBuilder::new()
+            .with_config(config)
+            .with_default_features()
+            .with_physical_optimizer_rules(vec![])
+            .build();
+
+        let mut plan_to_graph_visitor =
+            planner::PlanToGraphVisitor::new(&self.schema_provider, &session_state);
+
+        plan_to_graph_visitor.add_plan(final_plan.clone())?;
+
+        let graph = plan_to_graph_visitor.into_graph();
+
+        let mut program = LogicalProgram::new(graph, ProgramConfig::default());
+
+        program.optimize(&ChainingOptimizer {});
+
+
         Ok(Box::new(StreamingTable {
             name: table_name,
             comment: comment.clone(),
diff --git a/src/coordinator/plan/lookup_table_plan.rs b/src/coordinator/plan/lookup_table_plan.rs
index 889f57e1..e0ea06ba 100644
--- a/src/coordinator/plan/lookup_table_plan.rs
+++ b/src/coordinator/plan/lookup_table_plan.rs
@@ -10,7 +10,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-use crate::sql::catalog::connector_table::ConnectorTable;
+use crate::sql::schema::connector_table::ConnectorTable;
 
 use super::{PlanNode, PlanVisitor, PlanVisitorContext, PlanVisitorResult};
 
diff --git a/src/coordinator/plan/streaming_table_connector_plan.rs b/src/coordinator/plan/streaming_table_connector_plan.rs
index be1cda31..c2407ec8 100644
--- a/src/coordinator/plan/streaming_table_connector_plan.rs
+++ b/src/coordinator/plan/streaming_table_connector_plan.rs
@@ -10,7 +10,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-use crate::sql::catalog::connector_table::ConnectorTable;
+use crate::sql::schema::connector_table::ConnectorTable;
 
 use super::{PlanNode, PlanVisitor, PlanVisitorContext, PlanVisitorResult};
 
diff --git a/src/coordinator/plan/streaming_table_plan.rs b/src/coordinator/plan/streaming_table_plan.rs
index 577e6494..30e519f8 100644
--- a/src/coordinator/plan/streaming_table_plan.rs
+++ b/src/coordinator/plan/streaming_table_plan.rs
@@ -11,7 +11,7 @@
 // limitations under the License.
 
 use super::{PlanNode, PlanVisitor, PlanVisitorContext, PlanVisitorResult};
-use crate::sql::catalog::connector_table::ConnectorTable;
+use crate::sql::schema::connector_table::ConnectorTable;
 use datafusion::logical_expr::LogicalPlan;
 
 /// Plan node representing a fully resolved streaming table (DDL).
diff --git a/src/datastream/mod.rs b/src/datastream/mod.rs
deleted file mode 100644
index 994a96b4..00000000
--- a/src/datastream/mod.rs
+++ /dev/null
@@ -1,2 +0,0 @@
-pub mod logical;
-pub mod optimizers;
diff --git a/src/lib.rs b/src/lib.rs
index a41536c5..0a3c6dc6 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -17,7 +17,6 @@
 pub mod api;
 pub mod config;
 pub mod coordinator;
-pub mod datastream;
 pub mod logging;
 pub mod runtime;
 pub mod server;
diff --git a/src/main.rs b/src/main.rs
index 29935d62..e847b16c 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -15,7 +15,6 @@
 mod api;
 mod config;
 mod coordinator;
-mod datastream;
 mod logging;
 mod runtime;
 mod server;
diff --git a/src/runtime/processor/wasm/wasm_processor.rs b/src/runtime/processor/wasm/wasm_processor.rs
index 1afc9dcf..cd61be98 100644
--- a/src/runtime/processor/wasm/wasm_processor.rs
+++ b/src/runtime/processor/wasm/wasm_processor.rs
@@ -679,3 +679,4 @@ impl WasmProcessor for WasmProcessorImpl {
         Ok(())
     }
 }
+
diff --git a/src/server/handler.rs b/src/server/handler.rs
index bf9350e6..1920680c 100644
--- a/src/server/handler.rs
+++ b/src/server/handler.rs
@@ -29,7 +29,7 @@ use crate::coordinator::{
     CreateFunction, CreatePythonFunction, DataSet, DropFunction, ShowFunctions,
     ShowFunctionsResult, StartFunction, Statement, StopFunction,
 };
-use crate::sql::planner::parse::parse_sql;
+use crate::sql::parse::parse_sql;
 
 pub struct FunctionStreamServiceImpl {
     coordinator: Arc<Coordinator>,
diff --git a/src/sql/planner/plan/aggregate_rewriter.rs b/src/sql/analysis/aggregate_rewriter.rs
similarity index 97%
rename from src/sql/planner/plan/aggregate_rewriter.rs
rename to src/sql/analysis/aggregate_rewriter.rs
index 802fa180..04ac0896 100644
--- a/src/sql/planner/plan/aggregate_rewriter.rs
+++ b/src/sql/analysis/aggregate_rewriter.rs
@@ -5,10 +5,10 @@ use datafusion::logical_expr::{self, Aggregate, Expr, Extension, LogicalPlan, Pr
 use datafusion::prelude::col;
 use std::sync::Arc;
 
-use crate::sql::planner::StreamSchemaProvider;
-use crate::sql::planner::extension::aggregate::AggregateExtension;
-use crate::sql::planner::extension::key_calculation::{KeyCalculationExtension, KeysOrExprs};
-use crate::sql::planner::plan::streaming_window_analzer::StreamingWindowAnalzer;
+use crate::sql::schema::StreamSchemaProvider;
+use crate::sql::extensions::aggregate::AggregateExtension;
+use crate::sql::extensions::key_calculation::{KeyCalculationExtension, KeysOrExprs};
+use crate::sql::analysis::streaming_window_analzer::StreamingWindowAnalzer;
 use crate::sql::types::{
     DFField, TIMESTAMP_FIELD, WindowBehavior, WindowType, fields_with_qualifiers, find_window,
     schema_from_df_fields_with_metadata,
diff --git a/src/sql/planner/rewrite/async_udf_rewriter.rs b/src/sql/analysis/async_udf_rewriter.rs
similarity index 93%
rename from src/sql/planner/rewrite/async_udf_rewriter.rs
rename to src/sql/analysis/async_udf_rewriter.rs
index def3c4ef..9584c022 100644
--- a/src/sql/planner/rewrite/async_udf_rewriter.rs
+++ b/src/sql/analysis/async_udf_rewriter.rs
@@ -1,6 +1,6 @@
-use crate::sql::planner::extension::remote_table::RemoteTableExtension;
-use crate::sql::planner::extension::{ASYNC_RESULT_FIELD, AsyncUDFExtension};
-use crate::sql::planner::mod_prelude::StreamSchemaProvider;
+use crate::sql::extensions::remote_table::RemoteTableExtension;
+use crate::sql::extensions::{ASYNC_RESULT_FIELD, AsyncUDFExtension};
+use crate::sql::schema::StreamSchemaProvider;
 use datafusion::common::tree_node::{Transformed, TreeNode, TreeNodeRewriter};
 use datafusion::common::{Column, Result as DFResult, TableReference, plan_err};
 use datafusion::logical_expr::expr::ScalarFunction;
@@ -88,6 +88,7 @@ impl TreeNodeRewriter for AsyncUdfRewriter<'_> {
         let Some((name, opts, arg_exprs)) = args else {
             return Ok(Transformed::no(LogicalPlan::Projection(projection)));
         };
+        let udf = self.provider.dylib_udfs.get(&name).unwrap().clone();
 
         let input = if matches!(*projection.input, LogicalPlan::Projection(..)) {
             Arc::new(LogicalPlan::Extension(Extension {
@@ -106,6 +107,7 @@ impl TreeNodeRewriter for AsyncUdfRewriter<'_> {
             node: Arc::new(AsyncUDFExtension {
                 input,
                 name,
+                udf,
                 arg_exprs,
                 final_exprs: projection.expr,
                 ordered: opts.ordered,
diff --git a/src/sql/planner/plan/join_rewriter.rs b/src/sql/analysis/join_rewriter.rs
similarity index 96%
rename from src/sql/planner/plan/join_rewriter.rs
rename to src/sql/analysis/join_rewriter.rs
index f6031183..465d4620 100644
--- a/src/sql/planner/plan/join_rewriter.rs
+++ b/src/sql/analysis/join_rewriter.rs
@@ -1,7 +1,7 @@
-use crate::sql::planner::StreamSchemaProvider;
-use crate::sql::planner::extension::join::JoinExtension;
-use crate::sql::planner::extension::key_calculation::KeyCalculationExtension;
-use crate::sql::planner::plan::streaming_window_analzer::StreamingWindowAnalzer;
+use crate::sql::schema::StreamSchemaProvider;
+use crate::sql::extensions::join::JoinExtension;
+use crate::sql::extensions::key_calculation::KeyCalculationExtension;
+use crate::sql::analysis::streaming_window_analzer::StreamingWindowAnalzer;
 use crate::sql::types::{WindowType, fields_with_qualifiers, schema_from_df_fields_with_metadata};
 use crate::types::TIMESTAMP_FIELD;
 use datafusion::common::tree_node::{Transformed, TreeNodeRewriter};
diff --git a/src/sql/analysis/mod.rs b/src/sql/analysis/mod.rs
new file mode 100644
index 00000000..04230aa0
--- /dev/null
+++ b/src/sql/analysis/mod.rs
@@ -0,0 +1,227 @@
+#![allow(clippy::new_without_default)]
+
+pub(crate) mod aggregate_rewriter;
+pub(crate) mod join_rewriter;
+pub(crate) mod row_time_rewriter;
+pub(crate) mod stream_rewriter;
+pub(crate) mod streaming_window_analzer;
+pub(crate) mod window_function_rewriter;
+
+pub mod async_udf_rewriter;
+pub mod sink_input_rewriter;
+pub mod source_metadata_visitor;
+pub mod source_rewriter;
+pub mod time_window;
+pub mod unnest_rewriter;
+
+pub use async_udf_rewriter::{AsyncOptions, AsyncUdfRewriter};
+pub use sink_input_rewriter::SinkInputRewriter;
+pub use source_metadata_visitor::SourceMetadataVisitor;
+pub use source_rewriter::SourceRewriter;
+pub use time_window::{TimeWindowNullCheckRemover, TimeWindowUdfChecker, is_time_window};
+pub use unnest_rewriter::{UNNESTED_COL, UnnestRewriter};
+
+pub use crate::sql::schema::schema_provider::{
+    LogicalBatchInput, StreamSchemaProvider, StreamTable,
+};
+
+pub(crate) mod mod_prelude {
+    pub use super::StreamSchemaProvider;
+}
+
+use std::collections::{HashMap, HashSet};
+use std::sync::Arc;
+
+use datafusion::common::tree_node::{Transformed, TreeNode};
+use datafusion::common::{Result, plan_err};
+use datafusion::error::DataFusionError;
+use datafusion::execution::SessionStateBuilder;
+use datafusion::logical_expr::{Extension, LogicalPlan, UserDefinedLogicalNodeCore};
+use datafusion::prelude::SessionConfig;
+use datafusion::sql::TableReference;
+use datafusion::sql::sqlparser::ast::{OneOrManyWithParens, Statement};
+use datafusion::sql::sqlparser::dialect::FunctionStreamDialect;
+use datafusion::sql::sqlparser::parser::Parser;
+use tracing::{debug, info, instrument};
+
+use crate::sql::logical_node::logical::{LogicalProgram, ProgramConfig};
+use crate::sql::logical_planner::optimizers::ChainingOptimizer;
+use crate::sql::schema::insert::Insert;
+use crate::sql::schema::table::Table as CatalogTable;
+use crate::sql::functions::{is_json_union, serialize_outgoing_json};
+use crate::sql::extensions::key_calculation::{KeyCalculationExtension, KeysOrExprs};
+use crate::sql::extensions::projection::ProjectionExtension;
+use crate::sql::extensions::sink::SinkExtension;
+use crate::sql::extensions::{ StreamExtension};
+use crate::sql::logical_planner::planner::NamedNode;
+use crate::sql::types::SqlConfig;
+
+// ── Compilation pipeline ──────────────────────────────────────────────
+
+#[derive(Clone, Debug)]
+pub struct CompiledSql {
+    pub program: LogicalProgram,
+    pub connection_ids: Vec<i64>,
+}
+
+fn duration_from_sql_expr(
+    expr: &datafusion::sql::sqlparser::ast::Expr,
+) -> Result<std::time::Duration> {
+    use datafusion::sql::sqlparser::ast::Expr as SqlExpr;
+    use datafusion::sql::sqlparser::ast::Value as SqlValue;
+    use datafusion::sql::sqlparser::ast::ValueWithSpan;
+
+    match expr {
+        SqlExpr::Interval(interval) => {
+            let value_str = match interval.value.as_ref() {
+                SqlExpr::Value(ValueWithSpan {
+                    value: SqlValue::SingleQuotedString(s),
+                    ..
+                }) => s.clone(),
+                other => return plan_err!("expected interval string literal, found {other}"),
+            };
+
+            parse_interval_to_duration(&value_str)
+        }
+        SqlExpr::Value(ValueWithSpan {
+            value: SqlValue::SingleQuotedString(s),
+            ..
+        }) => parse_interval_to_duration(s),
+        other => plan_err!("expected an interval expression, found {other}"),
+    }
+}
+
+fn parse_interval_to_duration(s: &str) -> Result<std::time::Duration> {
+    let parts: Vec<&str> = s.trim().split_whitespace().collect();
+    if parts.len() != 2 {
+        return plan_err!("invalid interval string '{s}'; expected '<value> <unit>'");
+    }
+    let value: u64 = parts[0]
+        .parse()
+        .map_err(|_| DataFusionError::Plan(format!("invalid interval number: {}", parts[0])))?;
+    match parts[1].to_lowercase().as_str() {
+        "second" | "seconds" | "s" => Ok(std::time::Duration::from_secs(value)),
+        "minute" | "minutes" | "min" => Ok(std::time::Duration::from_secs(value * 60)),
+        "hour" | "hours" | "h" => Ok(std::time::Duration::from_secs(value * 3600)),
+        "day" | "days" | "d" => Ok(std::time::Duration::from_secs(value * 86400)),
+        unit => plan_err!("unsupported interval unit '{unit}'"),
+    }
+}
+
+fn build_sink_inputs(extensions: &[LogicalPlan]) -> HashMap<NamedNode, Vec<LogicalPlan>> {
+    let mut sink_inputs = HashMap::<NamedNode, Vec<LogicalPlan>>::new();
+    for extension in extensions.iter() {
+        if let LogicalPlan::Extension(ext) = extension {
+            if let Some(sink_node) = ext.node.as_any().downcast_ref::<SinkExtension>() {
+                if let Some(named_node) = sink_node.node_name() {
+                    let inputs = sink_node
+                        .inputs()
+                        .into_iter()
+                        .cloned()
+                        .collect::<Vec<LogicalPlan>>();
+                    sink_inputs.entry(named_node).or_default().extend(inputs);
+                }
+            }
+        }
+    }
+    sink_inputs
+}
+
+pub(crate) fn maybe_add_key_extension_to_sink(plan: LogicalPlan) -> Result<LogicalPlan> {
+    let LogicalPlan::Extension(ref ext) = plan else {
+        return Ok(plan);
+    };
+
+    let Some(sink) = ext.node.as_any().downcast_ref::<SinkExtension>() else {
+        return Ok(plan);
+    };
+
+    let Some(partition_exprs) = sink.table.partition_exprs() else {
+        return Ok(plan);
+    };
+
+    if partition_exprs.is_empty() {
+        return Ok(plan);
+    }
+
+    let inputs = plan
+        .inputs()
+        .into_iter()
+        .map(|input| {
+            Ok(LogicalPlan::Extension(Extension {
+                node: Arc::new(KeyCalculationExtension {
+                    name: Some("key-calc-partition".to_string()),
+                    schema: input.schema().clone(),
+                    input: input.clone(),
+                    keys: KeysOrExprs::Exprs(partition_exprs.clone()),
+                }),
+            }))
+        })
+        .collect::<Result<_>>()?;
+
+    use datafusion::prelude::col;
+    let unkey = LogicalPlan::Extension(Extension {
+        node: Arc::new(
+            ProjectionExtension::new(
+                inputs,
+                Some("unkey".to_string()),
+                sink.schema().iter().map(|(_, f)| col(f.name())).collect(),
+            )
+            .shuffled(),
+        ),
+    });
+
+    let node = sink.with_exprs_and_inputs(vec![], vec![unkey])?;
+    Ok(LogicalPlan::Extension(Extension {
+        node: Arc::new(node),
+    }))
+}
+
+pub fn rewrite_sinks(extensions: Vec<LogicalPlan>) -> Result<Vec<LogicalPlan>> {
+    let mut sink_inputs = build_sink_inputs(&extensions);
+    let mut new_extensions = vec![];
+    for extension in extensions {
+        let mut rewriter = SinkInputRewriter::new(&mut sink_inputs);
+        let result = extension.rewrite(&mut rewriter)?;
+        if !rewriter.was_removed {
+            new_extensions.push(result.data);
+        }
+    }
+
+    new_extensions
+        .into_iter()
+        .map(maybe_add_key_extension_to_sink)
+        .collect()
+
+}
+
+/// Entry point for transforming a standard DataFusion LogicalPlan into a
+/// Streaming-aware LogicalPlan.
+///
+/// This function coordinates multiple rewriting passes and ensures the
+/// resulting plan satisfies streaming constraints.
+#[instrument(skip_all, level = "debug")]
+pub fn rewrite_plan(
+    plan: LogicalPlan,
+    schema_provider: &StreamSchemaProvider,
+) -> Result<LogicalPlan> {
+    info!("Starting streaming plan rewrite pipeline");
+
+    let mut rewriter = stream_rewriter::StreamRewriter::new(schema_provider);
+    let Transformed {
+        data: rewritten_plan,
+        ..
+    } = plan.rewrite_with_subqueries(&mut rewriter)?;
+
+    rewritten_plan.visit_with_subqueries(&mut TimeWindowUdfChecker {})?;
+
+    if cfg!(debug_assertions) {
+        debug!(
+            "Streaming logical plan graphviz:\n{}",
+            rewritten_plan.display_graphviz()
+        );
+    }
+
+    info!("Streaming plan rewrite completed successfully");
+    Ok(rewritten_plan)
+}
diff --git a/src/sql/planner/plan/row_time_rewriter.rs b/src/sql/analysis/row_time_rewriter.rs
similarity index 100%
rename from src/sql/planner/plan/row_time_rewriter.rs
rename to src/sql/analysis/row_time_rewriter.rs
diff --git a/src/sql/planner/rewrite/sink_input_rewriter.rs b/src/sql/analysis/sink_input_rewriter.rs
similarity index 91%
rename from src/sql/planner/rewrite/sink_input_rewriter.rs
rename to src/sql/analysis/sink_input_rewriter.rs
index e6b6a0bd..b33ac647 100644
--- a/src/sql/planner/rewrite/sink_input_rewriter.rs
+++ b/src/sql/analysis/sink_input_rewriter.rs
@@ -1,10 +1,11 @@
-use crate::sql::planner::extension::sink::SinkExtension;
-use crate::sql::planner::extension::{NamedNode, StreamExtension};
+use crate::sql::extensions::sink::SinkExtension;
+use crate::sql::extensions::{StreamExtension};
 use datafusion::common::Result as DFResult;
 use datafusion::common::tree_node::{Transformed, TreeNodeRecursion, TreeNodeRewriter};
 use datafusion::logical_expr::{Extension, LogicalPlan, UserDefinedLogicalNodeCore};
 use std::collections::HashMap;
 use std::sync::Arc;
+use crate::sql::logical_planner::planner::NamedNode;
 
 type SinkInputs = HashMap<NamedNode, Vec<LogicalPlan>>;
 
diff --git a/src/sql/planner/rewrite/source_metadata_visitor.rs b/src/sql/analysis/source_metadata_visitor.rs
similarity index 86%
rename from src/sql/planner/rewrite/source_metadata_visitor.rs
rename to src/sql/analysis/source_metadata_visitor.rs
index 168ff712..a49a7e72 100644
--- a/src/sql/planner/rewrite/source_metadata_visitor.rs
+++ b/src/sql/analysis/source_metadata_visitor.rs
@@ -1,6 +1,6 @@
-use crate::sql::planner::extension::sink::SinkExtension;
-use crate::sql::planner::extension::table_source::TableSourceExtension;
-use crate::sql::planner::mod_prelude::StreamSchemaProvider;
+use crate::sql::extensions::sink::SinkExtension;
+use crate::sql::extensions::table_source::TableSourceExtension;
+use crate::sql::schema::StreamSchemaProvider;
 use datafusion::common::Result as DFResult;
 use datafusion::common::tree_node::{TreeNodeRecursion, TreeNodeVisitor};
 use datafusion::logical_expr::{Extension, LogicalPlan};
@@ -39,7 +39,7 @@ impl<'a> SourceMetadataVisitor<'a> {
 
         let table = self.schema_provider.get_catalog_table(&table_name)?;
         match table {
-            crate::sql::catalog::table::Table::ConnectorTable(t) => t.id,
+            crate::sql::schema::table::Table::ConnectorTable(t) => t.id,
             _ => None,
         }
     }
diff --git a/src/sql/planner/rewrite/source_rewriter.rs b/src/sql/analysis/source_rewriter.rs
similarity index 96%
rename from src/sql/planner/rewrite/source_rewriter.rs
rename to src/sql/analysis/source_rewriter.rs
index 27281b41..1bba1551 100644
--- a/src/sql/planner/rewrite/source_rewriter.rs
+++ b/src/sql/analysis/source_rewriter.rs
@@ -20,12 +20,12 @@ use datafusion::logical_expr::{
     self, BinaryExpr, Expr, Extension, LogicalPlan, Projection, TableScan,
 };
 
-use crate::sql::catalog::connector_table::ConnectorTable;
-use crate::sql::catalog::field_spec::FieldSpec;
-use crate::sql::catalog::table::Table;
-use crate::sql::planner::StreamSchemaProvider;
-use crate::sql::planner::extension::remote_table::RemoteTableExtension;
-use crate::sql::planner::extension::watermark_node::WatermarkNode;
+use crate::sql::schema::connector_table::ConnectorTable;
+use crate::sql::schema::field_spec::FieldSpec;
+use crate::sql::schema::table::Table;
+use crate::sql::schema::StreamSchemaProvider;
+use crate::sql::extensions::remote_table::RemoteTableExtension;
+use crate::sql::extensions::watermark_node::WatermarkNode;
 use crate::sql::types::TIMESTAMP_FIELD;
 
 /// Rewrites table scans into proper source nodes with projections and watermarks.
diff --git a/src/sql/planner/plan/stream_rewriter.rs b/src/sql/analysis/stream_rewriter.rs
similarity index 96%
rename from src/sql/planner/plan/stream_rewriter.rs
rename to src/sql/analysis/stream_rewriter.rs
index c3caed0e..999b1fb8 100644
--- a/src/sql/planner/plan/stream_rewriter.rs
+++ b/src/sql/analysis/stream_rewriter.rs
@@ -1,15 +1,15 @@
 use std::sync::Arc;
 
 use super::StreamSchemaProvider;
-use crate::sql::planner::extension::StreamExtension;
-use crate::sql::planner::extension::remote_table::RemoteTableExtension;
-use crate::sql::planner::plan::row_time_rewriter::RowTimeRewriter;
-use crate::sql::planner::plan::{
+use crate::sql::extensions::StreamExtension;
+use crate::sql::extensions::remote_table::RemoteTableExtension;
+use crate::sql::analysis::row_time_rewriter::RowTimeRewriter;
+use crate::sql::analysis::{
     aggregate_rewriter::AggregateRewriter, join_rewriter::JoinRewriter,
     window_function_rewriter::WindowFunctionRewriter,
 };
-use crate::sql::planner::rewrite::TimeWindowNullCheckRemover;
-use crate::sql::planner::schemas::{add_timestamp_field, has_timestamp_field};
+use crate::sql::analysis::TimeWindowNullCheckRemover;
+use crate::sql::schema::utils::{add_timestamp_field, has_timestamp_field};
 use crate::sql::types::{DFField, TIMESTAMP_FIELD};
 use datafusion::common::tree_node::{Transformed, TreeNodeRewriter};
 use datafusion::common::{Column, DataFusionError, Result, Spans, TableReference, plan_err};
diff --git a/src/sql/planner/plan/streaming_window_analzer.rs b/src/sql/analysis/streaming_window_analzer.rs
similarity index 98%
rename from src/sql/planner/plan/streaming_window_analzer.rs
rename to src/sql/analysis/streaming_window_analzer.rs
index db3506b7..59ded792 100644
--- a/src/sql/planner/plan/streaming_window_analzer.rs
+++ b/src/sql/analysis/streaming_window_analzer.rs
@@ -5,8 +5,8 @@ use datafusion::common::tree_node::{TreeNode, TreeNodeRecursion, TreeNodeVisitor
 use datafusion::common::{Column, DFSchema, DataFusionError, Result};
 use datafusion::logical_expr::{Aggregate, Expr, Extension, LogicalPlan, expr::Alias};
 
-use crate::sql::planner::extension::aggregate::{AGGREGATE_EXTENSION_NAME, AggregateExtension};
-use crate::sql::planner::extension::join::JOIN_NODE_NAME;
+use crate::sql::extensions::aggregate::{AGGREGATE_EXTENSION_NAME, AggregateExtension};
+use crate::sql::extensions::join::JOIN_NODE_NAME;
 use crate::sql::types::{DFField, WindowBehavior, WindowType, fields_with_qualifiers, find_window};
 
 /// WindowDetectingVisitor identifies windowing strategies and tracks window-carrying fields
diff --git a/src/sql/planner/rewrite/time_window.rs b/src/sql/analysis/time_window.rs
similarity index 100%
rename from src/sql/planner/rewrite/time_window.rs
rename to src/sql/analysis/time_window.rs
diff --git a/src/sql/planner/udafs.rs b/src/sql/analysis/udafs.rs
similarity index 100%
rename from src/sql/planner/udafs.rs
rename to src/sql/analysis/udafs.rs
diff --git a/src/sql/planner/rewrite/unnest_rewriter.rs b/src/sql/analysis/unnest_rewriter.rs
similarity index 100%
rename from src/sql/planner/rewrite/unnest_rewriter.rs
rename to src/sql/analysis/unnest_rewriter.rs
diff --git a/src/sql/planner/plan/window_function_rewriter.rs b/src/sql/analysis/window_function_rewriter.rs
similarity index 96%
rename from src/sql/planner/plan/window_function_rewriter.rs
rename to src/sql/analysis/window_function_rewriter.rs
index 5c8e511b..ce580eaf 100644
--- a/src/sql/planner/plan/window_function_rewriter.rs
+++ b/src/sql/analysis/window_function_rewriter.rs
@@ -8,9 +8,9 @@ use datafusion_common::DataFusionError;
 use std::sync::Arc;
 use tracing::debug;
 
-use crate::sql::planner::extension::key_calculation::{KeyCalculationExtension, KeysOrExprs};
-use crate::sql::planner::extension::window_fn::WindowFunctionExtension;
-use crate::sql::planner::plan::streaming_window_analzer::{StreamingWindowAnalzer, extract_column};
+use crate::sql::extensions::key_calculation::{KeyCalculationExtension, KeysOrExprs};
+use crate::sql::extensions::window_fn::WindowFunctionExtension;
+use crate::sql::analysis::streaming_window_analzer::{StreamingWindowAnalzer, extract_column};
 use crate::sql::types::{WindowType, fields_with_qualifiers, schema_from_df_fields};
 
 /// WindowFunctionRewriter transforms standard SQL Window functions into streaming-compatible
diff --git a/src/sql/extensions/aggregate.rs b/src/sql/extensions/aggregate.rs
new file mode 100644
index 00000000..c8c070f2
--- /dev/null
+++ b/src/sql/extensions/aggregate.rs
@@ -0,0 +1,607 @@
+use std::fmt::Formatter;
+use std::sync::Arc;
+use std::time::Duration;
+use arrow_array::types::IntervalMonthDayNanoType;
+use datafusion::common::{Column, DFSchemaRef, Result, ScalarValue, internal_err};
+use datafusion::logical_expr;
+use datafusion::logical_expr::{
+    BinaryExpr, Expr, Extension, LogicalPlan, UserDefinedLogicalNodeCore, expr::ScalarFunction,
+};
+use datafusion_common::{plan_err, DFSchema, DataFusionError};
+use datafusion_expr::Aggregate;
+use datafusion_proto::physical_plan::{AsExecutionPlan, DefaultPhysicalExtensionCodec};
+use datafusion_proto::physical_plan::to_proto::serialize_physical_expr;
+use datafusion_proto::protobuf::PhysicalPlanNode;
+use prost::Message;
+use protocol::grpc::api::{ SessionWindowAggregateOperator, SlidingWindowAggregateOperator, TumblingWindowAggregateOperator};
+use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName};
+use crate::multifield_partial_ord;
+use crate::sql::logical_planner::{window, FsPhysicalExtensionCodec};
+use crate::sql::extensions::{ NodeWithIncomingEdges, StreamExtension, TimestampAppendExtension};
+use crate::sql::logical_planner::planner::{NamedNode, Planner, SplitPlanOutput};
+use crate::sql::types::{
+    DFField, TIMESTAMP_FIELD, WindowBehavior, WindowType, fields_with_qualifiers,
+    schema_from_df_fields, schema_from_df_fields_with_metadata,
+};
+use crate::types::{FsSchema, FsSchemaRef};
+
+pub(crate) const AGGREGATE_EXTENSION_NAME: &str = "AggregateExtension";
+
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub(crate) struct AggregateExtension {
+    pub(crate) window_behavior: WindowBehavior,
+    pub(crate) aggregate: LogicalPlan,
+    pub(crate) schema: DFSchemaRef,
+    pub(crate) key_fields: Vec<usize>,
+    pub(crate) final_calculation: LogicalPlan,
+}
+
+multifield_partial_ord!(AggregateExtension, aggregate, key_fields, final_calculation);
+
+impl AggregateExtension {
+    pub fn new(
+        window_behavior: WindowBehavior,
+        aggregate: LogicalPlan,
+        key_fields: Vec<usize>,
+    ) -> Self {
+        let final_calculation =
+            Self::final_projection(&aggregate, window_behavior.clone()).unwrap();
+
+        Self {
+            window_behavior,
+            aggregate,
+            schema: final_calculation.schema().clone(),
+            key_fields,
+            final_calculation,
+        }
+    }
+
+    pub fn tumbling_window_config(
+        &self,
+        planner: &Planner,
+        index: usize,
+        input_schema: DFSchemaRef,
+        width: Duration,
+    ) -> Result<LogicalNode> {
+        let binning_function_proto = planner.binning_function_proto(width, input_schema.clone())?;
+        let SplitPlanOutput {
+            partial_aggregation_plan,
+            partial_schema,
+            finish_plan,
+        } = planner.split_physical_plan(self.key_fields.clone(), &self.aggregate, true)?;
+
+        let final_physical_plan = planner.sync_plan(&self.final_calculation)?;
+        let final_physical_plan_node = PhysicalPlanNode::try_from_physical_plan(
+            final_physical_plan,
+            &FsPhysicalExtensionCodec::default(),
+        )?;
+
+        let config = TumblingWindowAggregateOperator {
+            name: "TumblingWindow".to_string(),
+            width_micros: width.as_micros() as u64,
+            binning_function: binning_function_proto.encode_to_vec(),
+            input_schema: Some(
+                FsSchema::from_schema_keys(
+                    Arc::new(input_schema.as_ref().into()),
+                    self.key_fields.clone(),
+                )?.into(),
+            ),
+            partial_schema: Some(partial_schema.into()),
+            partial_aggregation_plan: partial_aggregation_plan.encode_to_vec(),
+            final_aggregation_plan: finish_plan.encode_to_vec(),
+            final_projection: Some(final_physical_plan_node.encode_to_vec()),
+        };
+
+        Ok(LogicalNode::single(
+            index as u32,
+            format!("tumbling_{index}"),
+            OperatorName::TumblingWindowAggregate,
+            config.encode_to_vec(),
+            format!("TumblingWindow<{}>", config.name),
+            1,
+        ))
+    }
+
+    pub fn sliding_window_config(
+        &self,
+        planner: &Planner,
+        index: usize,
+        input_schema: DFSchemaRef,
+        width: Duration,
+        slide: Duration,
+    ) -> Result<LogicalNode> {
+        let binning_function_proto = planner.binning_function_proto(slide, input_schema.clone())?;
+
+        let SplitPlanOutput {
+            partial_aggregation_plan,
+            partial_schema,
+            finish_plan,
+        } = planner.split_physical_plan(self.key_fields.clone(), &self.aggregate, true)?;
+
+        let final_physical_plan = planner.sync_plan(&self.final_calculation)?;
+        let final_physical_plan_node = PhysicalPlanNode::try_from_physical_plan(
+            final_physical_plan,
+            &FsPhysicalExtensionCodec::default(),
+        )?;
+
+        let config = SlidingWindowAggregateOperator {
+            name: format!("SlidingWindow<{width:?}>"),
+            width_micros: width.as_micros() as u64,
+            slide_micros: slide.as_micros() as u64,
+            binning_function: binning_function_proto.encode_to_vec(),
+            input_schema: Some(
+                FsSchema::from_schema_keys(
+                    Arc::new(input_schema.as_ref().into()),
+                    self.key_fields.clone(),
+                )?.into(),
+            ),
+            partial_schema: Some(partial_schema.into()),
+            partial_aggregation_plan: partial_aggregation_plan.encode_to_vec(),
+            final_aggregation_plan: finish_plan.encode_to_vec(),
+            final_projection: final_physical_plan_node.encode_to_vec(),
+            // TODO add final aggregation.
+        };
+
+        Ok(LogicalNode::single(
+            index as u32,
+            format!("sliding_window_{index}"),
+            OperatorName::SlidingWindowAggregate,
+            config.encode_to_vec(),
+            "sliding window".to_string(),
+            1,
+        ))
+    }
+
+    pub fn session_window_config(
+        &self,
+        planner: &Planner,
+        index: usize,
+        input_schema: DFSchemaRef,
+    ) -> Result<LogicalNode> {
+        let WindowBehavior::FromOperator {
+            window: WindowType::Session { gap },
+            window_index,
+            window_field,
+            is_nested: false,
+        } = &self.window_behavior
+        else {
+            return plan_err!("expected sliding window");
+        };
+        let output_schema = fields_with_qualifiers(self.aggregate.schema());
+        let LogicalPlan::Aggregate(agg) = self.aggregate.clone() else {
+            return plan_err!("expected aggregate");
+        };
+        let key_count = self.key_fields.len();
+        let unkeyed_aggregate_schema = Arc::new(schema_from_df_fields_with_metadata(
+            &output_schema[key_count..],
+            self.aggregate.schema().metadata().clone(),
+        )?);
+
+        let unkeyed_aggregate = Aggregate::try_new_with_schema(
+            agg.input.clone(),
+            vec![],
+            agg.aggr_expr.clone(),
+            unkeyed_aggregate_schema.clone(),
+        )?;
+        let aggregate_plan = planner.sync_plan(&LogicalPlan::Aggregate(unkeyed_aggregate))?;
+
+        let physical_plan_node = PhysicalPlanNode::try_from_physical_plan(
+            aggregate_plan,
+            &FsPhysicalExtensionCodec::default(),
+        )?;
+        let input_schema = FsSchema::from_schema_keys(
+            Arc::new(input_schema.as_ref().into()),
+            self.key_fields.clone(),
+        )?;
+
+        let config = SessionWindowAggregateOperator {
+            name: format!("session_window_{index}"),
+            gap_micros: gap.as_micros() as u64,
+            window_field_name: window_field.name().to_string(),
+            window_index: *window_index as u64,
+            input_schema: Some(input_schema.into()),
+            unkeyed_aggregate_schema: None,
+            partial_aggregation_plan: vec![],
+            final_aggregation_plan: physical_plan_node.encode_to_vec(),
+        };
+
+        Ok(LogicalNode::single(
+            index as u32,
+            format!("SessionWindow<{gap:?}>"),
+            OperatorName::SessionWindowAggregate,
+            config.encode_to_vec(),
+            config.name.clone(),
+            1,
+        ))
+    }
+
+    pub fn instant_window_config(
+        &self,
+        planner: &Planner,
+        index: usize,
+        input_schema: DFSchemaRef,
+        use_final_projection: bool,
+    ) -> Result<LogicalNode> {
+        let binning_function = planner.create_physical_expr(
+            &Expr::Column(Column::new_unqualified("_timestamp".to_string())),
+            &input_schema,
+        )?;
+        let binning_function_proto =
+            serialize_physical_expr(&binning_function, &DefaultPhysicalExtensionCodec {})?;
+
+        let final_projection = use_final_projection
+            .then(|| {
+                let final_physical_plan = planner.sync_plan(&self.final_calculation)?;
+                let final_physical_plan_node = PhysicalPlanNode::try_from_physical_plan(
+                    final_physical_plan,
+                    &FsPhysicalExtensionCodec::default(),
+                )?;
+                Ok::<Vec<u8>, DataFusionError>(final_physical_plan_node.encode_to_vec())
+            })
+            .transpose()?;
+
+        let SplitPlanOutput {
+            partial_aggregation_plan,
+            partial_schema,
+            finish_plan,
+        } = planner.split_physical_plan(self.key_fields.clone(), &self.aggregate, true)?;
+
+        let config = TumblingWindowAggregateOperator {
+            name: "InstantWindow".to_string(),
+            width_micros: 0,
+            binning_function: binning_function_proto.encode_to_vec(),
+            input_schema: Some(
+                FsSchema::from_schema_keys(
+                    Arc::new(input_schema.as_ref().into()),
+                    self.key_fields.clone(),
+                )?.into(),
+            ),
+            partial_schema: Some(partial_schema.into()),
+            partial_aggregation_plan: partial_aggregation_plan.encode_to_vec(),
+            final_aggregation_plan: finish_plan.encode_to_vec(),
+            final_projection,
+        };
+
+        Ok(LogicalNode::single(
+            index as u32,
+            format!("instant_window_{index}"),
+            OperatorName::TumblingWindowAggregate,
+            config.encode_to_vec(),
+            "instant window".to_string(),
+            1,
+        ))
+    }
+
+    // projection assuming that _timestamp has been populated with the start of the bin.
+    pub fn final_projection(
+        aggregate_plan: &LogicalPlan,
+        window_behavior: WindowBehavior,
+    ) -> Result<LogicalPlan> {
+        let timestamp_field: DFField = aggregate_plan.inputs()[0]
+            .schema()
+            .qualified_field_with_unqualified_name(TIMESTAMP_FIELD)?
+            .into();
+        let timestamp_append = LogicalPlan::Extension(Extension {
+            node: Arc::new(TimestampAppendExtension::new(
+                aggregate_plan.clone(),
+                timestamp_field.qualifier().cloned(),
+            )),
+        });
+        let mut aggregate_fields = fields_with_qualifiers(aggregate_plan.schema());
+        let mut aggregate_expressions: Vec<_> = aggregate_fields
+            .iter()
+            .map(|field| Expr::Column(field.qualified_column()))
+            .collect();
+        let (window_field, window_index, width, is_nested) = match window_behavior {
+            WindowBehavior::InData => return Ok(timestamp_append),
+            WindowBehavior::FromOperator {
+                window,
+                window_field,
+                window_index,
+                is_nested,
+            } => match window {
+                WindowType::Tumbling { width, .. } | WindowType::Sliding { width, .. } => {
+                    (window_field, window_index, width, is_nested)
+                }
+                WindowType::Session { .. } => {
+                    return Ok(LogicalPlan::Extension(Extension {
+                        node: Arc::new(WindowAppendExtension::new(
+                            timestamp_append,
+                            window_field,
+                            window_index,
+                        )),
+                    }));
+                }
+                WindowType::Instant => return Ok(timestamp_append),
+            },
+        };
+        if is_nested {
+            return Self::nested_final_projection(
+                timestamp_append,
+                window_field,
+                window_index,
+                width,
+            );
+        }
+        let timestamp_column =
+            Column::new(timestamp_field.qualifier().cloned(), timestamp_field.name());
+        aggregate_fields.insert(window_index, window_field.clone());
+
+        let window_expression = Expr::ScalarFunction(ScalarFunction {
+            func: window(),
+            args: vec![
+                // copy bin_start as first argument
+                Expr::Column(timestamp_column.clone()),
+                // add width interval to _timestamp for bin end
+                Expr::BinaryExpr(BinaryExpr {
+                    left: Box::new(Expr::Column(timestamp_column.clone())),
+                    op: logical_expr::Operator::Plus,
+                    right: Box::new(Expr::Literal(
+                        ScalarValue::IntervalMonthDayNano(Some(
+                            IntervalMonthDayNanoType::make_value(0, 0, width.as_nanos() as i64),
+                        )),
+                        None,
+                    )),
+                }),
+            ],
+        });
+        aggregate_expressions.insert(
+            window_index,
+            window_expression
+                .alias_qualified(window_field.qualifier().cloned(), window_field.name()),
+        );
+        aggregate_fields.push(timestamp_field);
+        let bin_end_calculation = Expr::BinaryExpr(BinaryExpr {
+            left: Box::new(Expr::Column(timestamp_column.clone())),
+            op: logical_expr::Operator::Plus,
+            right: Box::new(Expr::Literal(
+                ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNanoType::make_value(
+                    0,
+                    0,
+                    (width.as_nanos() - 1) as i64,
+                ))),
+                None,
+            )),
+        });
+        aggregate_expressions.push(bin_end_calculation);
+        Ok(LogicalPlan::Projection(
+            logical_expr::Projection::try_new_with_schema(
+                aggregate_expressions,
+                Arc::new(timestamp_append),
+                Arc::new(schema_from_df_fields(&aggregate_fields)?),
+            )?,
+        ))
+    }
+
+    fn nested_final_projection(
+        aggregate_plan: LogicalPlan,
+        window_field: DFField,
+        window_index: usize,
+        width: Duration,
+    ) -> Result<LogicalPlan> {
+        let timestamp_field: DFField = aggregate_plan
+            .schema()
+            .qualified_field_with_unqualified_name(TIMESTAMP_FIELD)
+            .unwrap()
+            .into();
+        let timestamp_column =
+            Column::new(timestamp_field.qualifier().cloned(), timestamp_field.name());
+
+        let mut aggregate_fields = fields_with_qualifiers(aggregate_plan.schema());
+        let mut aggregate_expressions: Vec<_> = aggregate_fields
+            .iter()
+            .map(|field| Expr::Column(field.qualified_column()))
+            .collect();
+        aggregate_fields.insert(window_index, window_field.clone());
+        let window_expression = Expr::ScalarFunction(ScalarFunction {
+            func: window(),
+            args: vec![
+                // calculate the start of the bin
+                Expr::BinaryExpr(BinaryExpr {
+                    left: Box::new(Expr::Column(timestamp_column.clone())),
+                    op: logical_expr::Operator::Minus,
+                    right: Box::new(Expr::Literal(
+                        ScalarValue::IntervalMonthDayNano(Some(
+                            IntervalMonthDayNanoType::make_value(0, 0, width.as_nanos() as i64 - 1),
+                        )),
+                        None,
+                    )),
+                }),
+                // add 1 nanosecond to the timestamp
+                Expr::BinaryExpr(BinaryExpr {
+                    left: Box::new(Expr::Column(timestamp_column.clone())),
+                    op: logical_expr::Operator::Plus,
+                    right: Box::new(Expr::Literal(
+                        ScalarValue::IntervalMonthDayNano(Some(
+                            IntervalMonthDayNanoType::make_value(0, 0, 1),
+                        )),
+                        None,
+                    )),
+                }),
+            ],
+        });
+        aggregate_expressions.insert(
+            window_index,
+            window_expression
+                .alias_qualified(window_field.qualifier().cloned(), window_field.name()),
+        );
+        Ok(LogicalPlan::Projection(
+            logical_expr::Projection::try_new_with_schema(
+                aggregate_expressions,
+                Arc::new(aggregate_plan),
+                Arc::new(schema_from_df_fields(&aggregate_fields).unwrap()),
+            )
+                .unwrap(),
+        ))
+    }
+}
+
+impl UserDefinedLogicalNodeCore for AggregateExtension {
+    fn name(&self) -> &str {
+        AGGREGATE_EXTENSION_NAME
+    }
+
+    fn inputs(&self) -> Vec<&LogicalPlan> {
+        vec![&self.aggregate]
+    }
+
+    fn schema(&self) -> &DFSchemaRef {
+        &self.schema
+    }
+
+    fn expressions(&self) -> Vec<Expr> {
+        vec![]
+    }
+
+    fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result {
+        write!(
+            f,
+            "AggregateExtension: {} | window_behavior: {:?}",
+            self.schema(),
+            match &self.window_behavior {
+                WindowBehavior::InData => "InData".to_string(),
+                WindowBehavior::FromOperator { window, .. } => format!("FromOperator({window:?})"),
+            }
+        )
+    }
+
+    fn with_exprs_and_inputs(&self, _exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> Result<Self> {
+        if inputs.len() != 1 {
+            return internal_err!("input size inconsistent");
+        }
+
+        Ok(Self::new(
+            self.window_behavior.clone(),
+            inputs[0].clone(),
+            self.key_fields.clone(),
+        ))
+    }
+}
+
+impl StreamExtension for AggregateExtension {
+    fn node_name(&self) -> Option<NamedNode> {
+        None
+    }
+
+    fn plan_node(
+        &self,
+        planner: &Planner,
+        index: usize,
+        input_schemas: Vec<FsSchemaRef>,
+    ) -> Result<NodeWithIncomingEdges> {
+        if input_schemas.len() != 1 {
+            return plan_err!("AggregateExtension should have exactly one input");
+        }
+        let input_schema = input_schemas[0].clone();
+        let input_df_schema =
+            Arc::new(DFSchema::try_from(input_schema.schema.as_ref().clone()).unwrap());
+        let logical_node = match &self.window_behavior {
+            WindowBehavior::FromOperator {
+                window,
+                window_field: _,
+                window_index: _,
+                is_nested,
+            } => {
+                if *is_nested {
+                    self.instant_window_config(planner, index, input_df_schema, true)?
+                } else {
+                    match window {
+                        WindowType::Tumbling { width } => {
+                            self.tumbling_window_config(planner, index, input_df_schema, *width)?
+                        }
+                        WindowType::Sliding { width, slide } => self.sliding_window_config(
+                            planner,
+                            index,
+                            input_df_schema,
+                            *width,
+                            *slide,
+                        )?,
+                        WindowType::Instant => {
+                            return plan_err!(
+                                "instant window not supported in aggregate extension"
+                            );
+                        }
+                        WindowType::Session { gap: _ } => {
+                            self.session_window_config(planner, index, input_df_schema)?
+                        }
+                    }
+                }
+            }
+            WindowBehavior::InData => self
+                .instant_window_config(planner, index, input_df_schema, false)
+                .map_err(|e| e.context("instant window"))?,
+        };
+        let edge = LogicalEdge::project_all(LogicalEdgeType::Shuffle, (*input_schema).clone());
+        Ok(NodeWithIncomingEdges {
+            node: logical_node,
+            edges: vec![edge],
+        })
+    }
+
+    fn output_schema(&self) -> FsSchema {
+        let output_schema = (*self.schema).clone().into();
+        FsSchema::from_schema_keys(Arc::new(output_schema), vec![]).unwrap()
+    }
+}
+
+/*
+This is a plan used for appending a _timestamp field to an existing record batch.
+ */
+
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+struct WindowAppendExtension {
+    pub(crate) input: LogicalPlan,
+    pub(crate) window_field: DFField,
+    pub(crate) window_index: usize,
+    pub(crate) schema: DFSchemaRef,
+}
+
+multifield_partial_ord!(WindowAppendExtension, input, window_index);
+
+impl WindowAppendExtension {
+    fn new(input: LogicalPlan, window_field: DFField, window_index: usize) -> Self {
+        let mut fields = fields_with_qualifiers(input.schema());
+        fields.insert(window_index, window_field.clone());
+        let metadata = input.schema().metadata().clone();
+        Self {
+            input,
+            window_field,
+            window_index,
+            schema: Arc::new(schema_from_df_fields_with_metadata(&fields, metadata).unwrap()),
+        }
+    }
+}
+
+impl UserDefinedLogicalNodeCore for WindowAppendExtension {
+    fn name(&self) -> &str {
+        "WindowAppendExtension"
+    }
+
+    fn inputs(&self) -> Vec<&LogicalPlan> {
+        vec![&self.input]
+    }
+
+    fn schema(&self) -> &DFSchemaRef {
+        &self.schema
+    }
+
+    fn expressions(&self) -> Vec<Expr> {
+        vec![]
+    }
+
+    fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        write!(
+            f,
+            "WindowAppendExtension: field {:?} at {}",
+            self.window_field, self.window_index
+        )
+    }
+
+    fn with_exprs_and_inputs(&self, _exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> Result<Self> {
+        Ok(Self::new(
+            inputs[0].clone(),
+            self.window_field.clone(),
+            self.window_index,
+        ))
+    }
+}
diff --git a/src/sql/planner/extension/debezium.rs b/src/sql/extensions/debezium.rs
similarity index 63%
rename from src/sql/planner/extension/debezium.rs
rename to src/sql/extensions/debezium.rs
index 1760533c..184de88d 100644
--- a/src/sql/planner/extension/debezium.rs
+++ b/src/sql/extensions/debezium.rs
@@ -1,22 +1,28 @@
+
+use super::{ StreamExtension};
+use crate::sql::types::{StreamSchema, TIMESTAMP_FIELD};
 use std::sync::Arc;
 
-use datafusion::arrow::datatypes::{DataType, Field, Schema};
-use datafusion::common::{DFSchema, DFSchemaRef, Result, TableReference, plan_err};
+use arrow_schema::{DataType, Schema};
+
+use datafusion::common::{DFSchema, DFSchemaRef, Result, TableReference, internal_err, plan_err};
+use datafusion::error::DataFusionError;
 use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore};
+use datafusion::physical_plan::DisplayAs;
 
-use super::{NamedNode, StreamExtension};
+use super::{NodeWithIncomingEdges};
 use crate::multifield_partial_ord;
-use crate::sql::types::{StreamSchema, TIMESTAMP_FIELD};
+use crate::sql::logical_planner::updating_meta_field;
+use crate::sql::logical_planner::planner::{NamedNode, Planner};
+use crate::types::{FsSchema, FsSchemaRef, UPDATING_META_FIELD};
 
 pub(crate) const DEBEZIUM_UNROLLING_EXTENSION_NAME: &str = "DebeziumUnrollingExtension";
 pub(crate) const TO_DEBEZIUM_EXTENSION_NAME: &str = "ToDebeziumExtension";
 
-/// Unrolls a Debezium-formatted (before/after/op) stream into individual rows
-/// with an updating metadata column.
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub struct DebeziumUnrollingExtension {
-    pub(crate) input: LogicalPlan,
-    pub(crate) schema: DFSchemaRef,
+    input: LogicalPlan,
+    schema: DFSchemaRef,
     pub primary_keys: Vec<usize>,
     primary_key_names: Arc<Vec<String>>,
 }
@@ -45,23 +51,34 @@ impl DebeziumUnrollingExtension {
         let struct_schema: Vec<_> = input_schema
             .fields()
             .iter()
-            .filter(|field| field.name() != TIMESTAMP_FIELD)
+            .filter(|field| field.name() != TIMESTAMP_FIELD && field.name() != UPDATING_META_FIELD)
             .cloned()
             .collect();
 
         let struct_type = DataType::Struct(struct_schema.into());
 
-        let before = Arc::new(Field::new("before", struct_type.clone(), true));
-        let after = Arc::new(Field::new("after", struct_type, true));
-        let op = Arc::new(Field::new("op", DataType::Utf8, true));
+        let before = Arc::new(arrow::datatypes::Field::new(
+            "before",
+            struct_type.clone(),
+            true,
+        ));
+        let after = Arc::new(arrow::datatypes::Field::new(
+            "after",
+            struct_type.clone(),
+            true,
+        ));
+
+        let op = Arc::new(arrow::datatypes::Field::new("op", DataType::Utf8, true));
         let mut fields = vec![before, after, op];
 
-        if let Some(ts) = timestamp_field {
-            fields.push(Arc::new(ts));
+        if let Some(timestamp_field) = timestamp_field {
+            fields.push(Arc::new(timestamp_field));
         }
 
         let schema = match qualifier {
-            Some(q) => DFSchema::try_from_qualified_schema(q, &Schema::new(fields))?,
+            Some(qualifier) => {
+                DFSchema::try_from_qualified_schema(qualifier, &Schema::new(fields))?
+            }
             None => DFSchema::try_from(Schema::new(fields))?,
         };
         Ok(Arc::new(schema))
@@ -70,6 +87,7 @@ impl DebeziumUnrollingExtension {
     pub fn try_new(input: LogicalPlan, primary_keys: Arc<Vec<String>>) -> Result<Self> {
         let input_schema = input.schema();
 
+        // confirm that the input schema has before, after and op columns, and before and after match
         let Some(before_index) = input_schema.index_of_column_by_name(None, "before") else {
             return plan_err!("DebeziumUnrollingExtension requires a before column");
         };
@@ -90,11 +108,13 @@ impl DebeziumUnrollingExtension {
             );
         }
 
+        // check that op is a string
         let op_type = input_schema.field(op_index).data_type();
         if *op_type != DataType::Utf8 {
             return plan_err!("op column must be a string, not {}", op_type);
         }
 
+        // create the output schema
         let DataType::Struct(fields) = before_type else {
             return plan_err!(
                 "before and after columns must be structs, not {}",
@@ -102,41 +122,44 @@ impl DebeziumUnrollingExtension {
             );
         };
 
+        // get the primary keys
         let primary_key_idx = primary_keys
             .iter()
             .map(|pk| fields.find(pk).map(|(i, _)| i))
             .collect::<Option<Vec<_>>>()
             .ok_or_else(|| {
-                datafusion::error::DataFusionError::Plan(
-                    "primary key field not found in Debezium schema".to_string(),
-                )
+                DataFusionError::Plan("primary key field not found in Debezium schema".to_string())
             })?;
 
+        // determine the qualifier from the before and after columns
         let qualifier = match (
             input_schema.qualified_field(before_index).0,
             input_schema.qualified_field(after_index).0,
         ) {
-            (Some(bq), Some(aq)) => {
-                if bq != aq {
+            (Some(before_qualifier), Some(after_qualifier)) => {
+                if before_qualifier != after_qualifier {
                     return plan_err!("before and after columns must have the same alias");
                 }
-                Some(bq.clone())
+                Some(before_qualifier.clone())
             }
             (None, None) => None,
             _ => return plan_err!("before and after columns must both have an alias or neither"),
         };
 
-        let mut out_fields = fields.to_vec();
+        let mut fields = fields.to_vec();
+        fields.push(updating_meta_field());
 
-        let Some(input_ts_index) = input_schema.index_of_column_by_name(None, TIMESTAMP_FIELD)
+        let Some(input_timestamp_field) =
+            input_schema.index_of_column_by_name(None, TIMESTAMP_FIELD)
         else {
             return plan_err!("DebeziumUnrollingExtension requires a timestamp field");
         };
-        out_fields.push(Arc::new(input_schema.field(input_ts_index).clone()));
 
-        let arrow_schema = Schema::new(out_fields);
+        fields.push(Arc::new(input_schema.field(input_timestamp_field).clone()));
+        let arrow_schema = Schema::new(fields);
+
         let schema = match qualifier {
-            Some(q) => DFSchema::try_from_qualified_schema(q, &arrow_schema)?,
+            Some(qualifier) => DFSchema::try_from_qualified_schema(qualifier, &arrow_schema)?,
             None => DFSchema::try_from(arrow_schema)?,
         };
 
@@ -180,8 +203,17 @@ impl StreamExtension for DebeziumUnrollingExtension {
         None
     }
 
-    fn output_schema(&self) -> StreamSchema {
-        StreamSchema::from_schema_unkeyed(Arc::new(self.schema.as_ref().into())).unwrap()
+    fn plan_node(
+        &self,
+        _planner: &Planner,
+        _index: usize,
+        _input_schemas: Vec<FsSchemaRef>,
+    ) -> Result<NodeWithIncomingEdges> {
+        plan_err!("DebeziumUnrollingExtension should not be planned")
+    }
+
+    fn output_schema(&self) -> FsSchema {
+        FsSchema::from_schema_unkeyed(Arc::new(self.schema.as_ref().into())).unwrap()
     }
 
     fn transparent(&self) -> bool {
@@ -189,19 +221,19 @@ impl StreamExtension for DebeziumUnrollingExtension {
     }
 }
 
-/// Wraps an input stream into Debezium format (before/after/op) for updating sinks.
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub(crate) struct ToDebeziumExtension {
-    pub(crate) input: Arc<LogicalPlan>,
-    pub(crate) schema: DFSchemaRef,
+    input: Arc<LogicalPlan>,
+    schema: DFSchemaRef,
 }
 
 multifield_partial_ord!(ToDebeziumExtension, input);
 
 impl ToDebeziumExtension {
     pub(crate) fn try_new(input: LogicalPlan) -> Result<Self> {
-        let schema = DebeziumUnrollingExtension::as_debezium_schema(input.schema(), None)
-            .expect("should be able to create ToDebeziumExtension");
+        let input_schema = input.schema();
+        let schema = DebeziumUnrollingExtension::as_debezium_schema(input_schema, None)
+            .expect("should be able to create ToDebeziumExtenison");
         Ok(Self {
             input: Arc::new(input),
             schema,
@@ -209,6 +241,16 @@ impl ToDebeziumExtension {
     }
 }
 
+impl DisplayAs for ToDebeziumExtension {
+    fn fmt_as(
+        &self,
+        _t: datafusion::physical_plan::DisplayFormatType,
+        f: &mut std::fmt::Formatter,
+    ) -> std::fmt::Result {
+        write!(f, "ToDebeziumExtension")
+    }
+}
+
 impl UserDefinedLogicalNodeCore for ToDebeziumExtension {
     fn name(&self) -> &str {
         TO_DEBEZIUM_EXTENSION_NAME
@@ -222,7 +264,7 @@ impl UserDefinedLogicalNodeCore for ToDebeziumExtension {
         &self.schema
     }
 
-    fn expressions(&self) -> Vec<Expr> {
+    fn expressions(&self) -> Vec<datafusion::prelude::Expr> {
         vec![]
     }
 
@@ -230,7 +272,11 @@ impl UserDefinedLogicalNodeCore for ToDebeziumExtension {
         write!(f, "ToDebeziumExtension")
     }
 
-    fn with_exprs_and_inputs(&self, _exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> Result<Self> {
+    fn with_exprs_and_inputs(
+        &self,
+        _exprs: Vec<datafusion::prelude::Expr>,
+        inputs: Vec<LogicalPlan>,
+    ) -> Result<Self> {
         Self::try_new(inputs[0].clone())
     }
 }
@@ -240,8 +286,17 @@ impl StreamExtension for ToDebeziumExtension {
         None
     }
 
-    fn output_schema(&self) -> StreamSchema {
-        StreamSchema::from_schema_unkeyed(Arc::new(self.schema.as_ref().into())).unwrap()
+    fn plan_node(
+        &self,
+        _planner: &Planner,
+        _index: usize,
+        _input_schemas: Vec<FsSchemaRef>,
+    ) -> Result<NodeWithIncomingEdges> {
+        internal_err!("ToDebeziumExtension should not be planned")
+    }
+
+    fn output_schema(&self) -> FsSchema {
+        FsSchema::from_schema_unkeyed(Arc::new(self.schema.as_ref().into())).unwrap()
     }
 
     fn transparent(&self) -> bool {
diff --git a/src/sql/extensions/join.rs b/src/sql/extensions/join.rs
new file mode 100644
index 00000000..c28a6e01
--- /dev/null
+++ b/src/sql/extensions/join.rs
@@ -0,0 +1,120 @@
+use std::time::Duration;
+
+use datafusion::common::{DFSchemaRef, Result};
+use datafusion::logical_expr::expr::Expr;
+use datafusion::logical_expr::{LogicalPlan, UserDefinedLogicalNodeCore};
+
+use crate::sql::extensions::{NodeWithIncomingEdges, StreamExtension};
+use crate::sql::types::StreamSchema;
+
+use std::sync::Arc;
+use datafusion_common::plan_err;
+use datafusion_proto::physical_plan::AsExecutionPlan;
+use datafusion_proto::protobuf::PhysicalPlanNode;
+use prost::Message;
+use protocol::grpc::api::JoinOperator;
+use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName};
+use crate::sql::logical_planner::FsPhysicalExtensionCodec;
+use crate::sql::logical_planner::planner::{NamedNode, Planner};
+use crate::types::{FsSchema, FsSchemaRef};
+
+pub(crate) const JOIN_NODE_NAME: &str = "JoinNode";
+
+#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd)]
+pub struct JoinExtension {
+    pub(crate) rewritten_join: LogicalPlan,
+    pub(crate) is_instant: bool,
+    pub(crate) ttl: Option<Duration>,
+}
+
+impl StreamExtension for JoinExtension {
+    fn node_name(&self) -> Option<NamedNode> {
+        None
+    }
+
+    fn plan_node(
+        &self,
+        planner: &Planner,
+        index: usize,
+        input_schemas: Vec<FsSchemaRef>,
+    ) -> Result<NodeWithIncomingEdges> {
+        if input_schemas.len() != 2 {
+            return plan_err!("join should have exactly two inputs");
+        }
+        let left_schema = input_schemas[0].clone();
+        let right_schema = input_schemas[1].clone();
+
+        let join_plan = planner.sync_plan(&self.rewritten_join)?;
+        let physical_plan_node = PhysicalPlanNode::try_from_physical_plan(
+            join_plan.clone(),
+            &FsPhysicalExtensionCodec::default(),
+        )?;
+
+        let operator_name = if self.is_instant {
+            OperatorName::InstantJoin
+        } else {
+            OperatorName::Join
+        };
+
+        let config = JoinOperator {
+            name: format!("join_{index}"),
+            left_schema: Some(left_schema.as_ref().clone().into()),
+            right_schema: Some(right_schema.as_ref().clone().into()),
+            output_schema: Some(self.output_schema().into()),
+            join_plan: physical_plan_node.encode_to_vec(),
+            ttl_micros: self.ttl.map(|t| t.as_micros() as u64),
+        };
+
+        let logical_node = LogicalNode::single(
+            index as u32,
+            format!("join_{index}"),
+            operator_name,
+            config.encode_to_vec(),
+            "join".to_string(),
+            1,
+        );
+
+        let left_edge =
+            LogicalEdge::project_all(LogicalEdgeType::LeftJoin, left_schema.as_ref().clone());
+        let right_edge =
+            LogicalEdge::project_all(LogicalEdgeType::RightJoin, right_schema.as_ref().clone());
+        Ok(NodeWithIncomingEdges {
+            node: logical_node,
+            edges: vec![left_edge, right_edge],
+        })
+    }
+
+    fn output_schema(&self) -> FsSchema {
+        FsSchema::from_schema_unkeyed(self.schema().inner().clone()).unwrap()
+    }
+}
+
+impl UserDefinedLogicalNodeCore for JoinExtension {
+    fn name(&self) -> &str {
+        JOIN_NODE_NAME
+    }
+
+    fn inputs(&self) -> Vec<&LogicalPlan> {
+        vec![&self.rewritten_join]
+    }
+
+    fn schema(&self) -> &DFSchemaRef {
+        self.rewritten_join.schema()
+    }
+
+    fn expressions(&self) -> Vec<Expr> {
+        vec![]
+    }
+
+    fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        write!(f, "JoinExtension: {}", self.schema())
+    }
+
+    fn with_exprs_and_inputs(&self, _exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> Result<Self> {
+        Ok(Self {
+            rewritten_join: inputs[0].clone(),
+            is_instant: self.is_instant,
+            ttl: self.ttl,
+        })
+    }
+}
diff --git a/src/sql/extensions/key_calculation.rs b/src/sql/extensions/key_calculation.rs
new file mode 100644
index 00000000..e0edb67a
--- /dev/null
+++ b/src/sql/extensions/key_calculation.rs
@@ -0,0 +1,242 @@
+use std::fmt::Formatter;
+use std::sync::Arc;
+
+use datafusion::arrow::datatypes::{Field, Schema};
+use datafusion::common::{DFSchemaRef, Result, internal_err};
+use datafusion::logical_expr::{Expr, ExprSchemable, LogicalPlan, UserDefinedLogicalNodeCore};
+use datafusion_common::{plan_err, DFSchema};
+use datafusion_expr::col;
+use datafusion_proto::physical_plan::{AsExecutionPlan, DefaultPhysicalExtensionCodec};
+use datafusion_proto::physical_plan::to_proto::serialize_physical_expr;
+use datafusion_proto::protobuf::PhysicalPlanNode;
+use itertools::Itertools;
+use prost::Message;
+use protocol::grpc::api::{KeyPlanOperator, ProjectionOperator};
+use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName};
+use crate::multifield_partial_ord;
+use crate::sql::logical_planner::FsPhysicalExtensionCodec;
+use crate::sql::extensions::{NodeWithIncomingEdges, StreamExtension};
+use crate::sql::logical_planner::planner::{NamedNode, Planner};
+use crate::sql::types::{
+    StreamSchema, fields_with_qualifiers, schema_from_df_fields_with_metadata,
+};
+use crate::types::{FsSchema, FsSchemaRef};
+
+pub(crate) const KEY_CALCULATION_NAME: &str = "KeyCalculationExtension";
+
+/// Two ways of specifying keys — either as col indexes in the existing data or as a set of
+/// exprs to evaluate
+#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd)]
+pub enum KeysOrExprs {
+    Keys(Vec<usize>),
+    Exprs(Vec<Expr>),
+}
+
+/// Calculation for computing keyed data, with a vec of keys
+/// that will be used for shuffling data to the correct nodes.
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub(crate) struct KeyCalculationExtension {
+    pub(crate) name: Option<String>,
+    pub(crate) input: LogicalPlan,
+    pub(crate) keys: KeysOrExprs,
+    pub(crate) schema: DFSchemaRef,
+}
+
+multifield_partial_ord!(KeyCalculationExtension, name, input, keys);
+
+impl KeyCalculationExtension {
+    pub fn new_named_and_trimmed(input: LogicalPlan, keys: Vec<usize>, name: String) -> Self {
+        let output_fields: Vec<_> = fields_with_qualifiers(input.schema())
+            .into_iter()
+            .enumerate()
+            .filter_map(|(index, field)| {
+                if !keys.contains(&index) {
+                    Some(field.clone())
+                } else {
+                    None
+                }
+            })
+            .collect();
+
+        let schema =
+            schema_from_df_fields_with_metadata(&output_fields, input.schema().metadata().clone())
+                .unwrap();
+        Self {
+            name: Some(name),
+            input,
+            keys: KeysOrExprs::Keys(keys),
+            schema: Arc::new(schema),
+        }
+    }
+    pub fn new(input: LogicalPlan, keys: KeysOrExprs) -> Self {
+        let schema = input.schema().clone();
+        Self {
+            name: None,
+            input,
+            keys,
+            schema,
+        }
+    }
+}
+
+impl StreamExtension for KeyCalculationExtension {
+    fn node_name(&self) -> Option<NamedNode> {
+        None
+    }
+
+    fn plan_node(
+        &self,
+        planner: &Planner,
+        index: usize,
+        input_schemas: Vec<FsSchemaRef>,
+    ) -> Result<NodeWithIncomingEdges> {
+        // check there's only one input
+        if input_schemas.len() != 1 {
+            return plan_err!("KeyCalculationExtension should have exactly one input");
+        }
+        let input_schema = (*input_schemas[0]).clone();
+        let input_df_schema = Arc::new(DFSchema::try_from(input_schema.schema.as_ref().clone())?);
+
+        let physical_plan = planner.sync_plan(&self.input)?;
+
+        let physical_plan_node: PhysicalPlanNode = PhysicalPlanNode::try_from_physical_plan(
+            physical_plan,
+            &FsPhysicalExtensionCodec::default(),
+        )?;
+
+        let (config, name) = match &self.keys {
+            KeysOrExprs::Keys(keys) => (
+                KeyPlanOperator {
+                    name: "key".into(),
+                    physical_plan: physical_plan_node.encode_to_vec(),
+                    key_fields: keys.iter().map(|k| *k as u64).collect(),
+                }
+                    .encode_to_vec(),
+                OperatorName::ArrowKey,
+            ),
+            KeysOrExprs::Exprs(key_exprs) => {
+                let mut exprs = vec![];
+                for k in key_exprs {
+                    exprs.push(k.clone())
+                }
+
+                for f in input_schema.schema.fields.iter() {
+                    exprs.push(col(f.name()));
+                }
+
+                let output_schema = self.output_schema();
+
+                // ensure that the exprs generate the output schema
+                for (expr, expected) in exprs.iter().zip(output_schema.schema.fields()) {
+                    let (data_type, nullable) = expr.data_type_and_nullable(&input_df_schema)?;
+                    assert_eq!(data_type, *expected.data_type());
+                    assert_eq!(nullable, expected.is_nullable());
+                }
+
+                let mut physical_exprs = vec![];
+
+                for e in exprs {
+                    let phys = planner
+                        .create_physical_expr(&e, &input_df_schema)
+                        .map_err(|e| e.context("in PARTITION BY"))?;
+                    physical_exprs.push(
+                        serialize_physical_expr(&phys, &DefaultPhysicalExtensionCodec {})?
+                            .encode_to_vec(),
+                    );
+                }
+
+                let config = ProjectionOperator {
+                    name: self.name.as_deref().unwrap_or("key").to_string(),
+                    input_schema: Some(input_schema.clone().into()),
+
+                    output_schema: Some(self.output_schema().into()),
+                    exprs: physical_exprs,
+                };
+
+                (config.encode_to_vec(), OperatorName::Projection)
+            }
+        };
+
+        let node = LogicalNode::single(
+            index as u32,
+            format!("key_{index}"),
+            name,
+            config,
+            format!("ArrowKey<{}>", self.name.as_deref().unwrap_or("_")),
+            1,
+        );
+        let edge = LogicalEdge::project_all(LogicalEdgeType::Forward, input_schema);
+        Ok(NodeWithIncomingEdges {
+            node,
+            edges: vec![edge],
+        })
+    }
+
+    fn output_schema(&self) -> FsSchema {
+        let arrow_schema = self.input.schema().as_ref();
+
+        match &self.keys {
+            KeysOrExprs::Keys(keys) => {
+                FsSchema::from_schema_keys(Arc::new(arrow_schema.into()), keys.clone()).unwrap()
+            }
+            KeysOrExprs::Exprs(exprs) => {
+                let mut fields = vec![];
+
+                for (i, e) in exprs.iter().enumerate() {
+                    let (dt, nullable) = e.data_type_and_nullable(arrow_schema).unwrap();
+                    fields.push(Field::new(format!("__key_{i}"), dt, nullable).into());
+                }
+
+                for f in arrow_schema.fields().iter() {
+                    fields.push(f.clone());
+                }
+
+                FsSchema::from_schema_keys(
+                    Arc::new(Schema::new(fields)),
+                    (1..=exprs.len()).collect_vec(),
+                )
+                    .unwrap()
+            }
+        }
+    }
+}
+
+impl UserDefinedLogicalNodeCore for KeyCalculationExtension {
+    fn name(&self) -> &str {
+        KEY_CALCULATION_NAME
+    }
+
+    fn inputs(&self) -> Vec<&LogicalPlan> {
+        vec![&self.input]
+    }
+
+    fn schema(&self) -> &DFSchemaRef {
+        &self.schema
+    }
+
+    fn expressions(&self) -> Vec<Expr> {
+        vec![]
+    }
+
+    fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result {
+        write!(f, "KeyCalculationExtension: {}", self.schema())
+    }
+
+    fn with_exprs_and_inputs(&self, exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> Result<Self> {
+        if inputs.len() != 1 {
+            return internal_err!("input size inconsistent");
+        }
+
+        let keys = match &self.keys {
+            KeysOrExprs::Keys(k) => KeysOrExprs::Keys(k.clone()),
+            KeysOrExprs::Exprs(_) => KeysOrExprs::Exprs(exprs),
+        };
+
+        Ok(Self {
+            name: self.name.clone(),
+            input: inputs[0].clone(),
+            keys,
+            schema: self.schema.clone(),
+        })
+    }
+}
diff --git a/src/sql/extensions/lookup.rs b/src/sql/extensions/lookup.rs
new file mode 100644
index 00000000..2dc76265
--- /dev/null
+++ b/src/sql/extensions/lookup.rs
@@ -0,0 +1,194 @@
+use datafusion::common::{Column, DFSchemaRef, JoinType, internal_err, plan_err};
+use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore};
+use datafusion::sql::TableReference;
+use datafusion_proto::physical_plan::DefaultPhysicalExtensionCodec;
+use datafusion_proto::physical_plan::to_proto::serialize_physical_expr;
+use prost::Message;
+use std::fmt::Formatter;
+use std::sync::Arc;
+use protocol::grpc::api;
+use protocol::grpc::api::{ConnectorOp, LookupJoinCondition, LookupJoinOperator};
+use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName};
+use crate::multifield_partial_ord;
+use crate::sql::schema::ConnectorTable;
+use crate::sql::schema::utils::add_timestamp_field_arrow;
+use crate::sql::extensions::{NodeWithIncomingEdges, StreamExtension};
+use crate::sql::logical_planner::planner::{NamedNode, Planner};
+use crate::types::{FsSchema, FsSchemaRef};
+
+pub const SOURCE_EXTENSION_NAME: &str = "LookupSource";
+pub const JOIN_EXTENSION_NAME: &str = "LookupJoin";
+
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct LookupSource {
+    pub(crate) table: ConnectorTable,
+    pub(crate) schema: DFSchemaRef,
+}
+
+multifield_partial_ord!(LookupSource, table);
+
+impl UserDefinedLogicalNodeCore for LookupSource {
+    fn name(&self) -> &str {
+        SOURCE_EXTENSION_NAME
+    }
+
+    fn inputs(&self) -> Vec<&LogicalPlan> {
+        vec![]
+    }
+
+    fn schema(&self) -> &DFSchemaRef {
+        &self.schema
+    }
+
+    fn expressions(&self) -> Vec<Expr> {
+        vec![]
+    }
+
+    fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result {
+        write!(f, "LookupSource: {}", self.schema)
+    }
+
+    fn with_exprs_and_inputs(
+        &self,
+        _exprs: Vec<Expr>,
+        inputs: Vec<LogicalPlan>,
+    ) -> datafusion::common::Result<Self> {
+        if !inputs.is_empty() {
+            return internal_err!("LookupSource cannot have inputs");
+        }
+
+        Ok(Self {
+            table: self.table.clone(),
+            schema: self.schema.clone(),
+        })
+    }
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct LookupJoin {
+    pub(crate) input: LogicalPlan,
+    pub(crate) schema: DFSchemaRef,
+    pub(crate) connector: ConnectorTable,
+    pub(crate) on: Vec<(Expr, Column)>,
+    pub(crate) filter: Option<Expr>,
+    pub(crate) alias: Option<TableReference>,
+    pub(crate) join_type: JoinType,
+}
+
+multifield_partial_ord!(LookupJoin, input, connector, on, filter, alias);
+
+impl StreamExtension for LookupJoin {
+    fn node_name(&self) -> Option<NamedNode> {
+        None
+    }
+
+    fn plan_node(
+        &self,
+        planner: &Planner,
+        index: usize,
+        input_schemas: Vec<FsSchemaRef>,
+    ) -> datafusion::common::Result<NodeWithIncomingEdges> {
+        let schema = FsSchema::from_schema_unkeyed(Arc::new(self.schema.as_ref().into()))?;
+        let lookup_schema = FsSchema::from_schema_unkeyed(add_timestamp_field_arrow(
+            self.connector.physical_schema(),
+        ))?;
+        let join_config = LookupJoinOperator {
+            input_schema: Some(schema.into()),
+            lookup_schema: Some(lookup_schema.into()),
+            connector: Some(ConnectorOp {
+                connector: self.connector.connector.clone(),
+                config: self.connector.config.clone(),
+                description: self.connector.description.clone(),
+            }),
+            key_exprs: self
+                .on
+                .iter()
+                .map(|(l, r)| {
+                    let expr = planner.create_physical_expr(l, &self.schema)?;
+                    let expr = serialize_physical_expr(&expr, &DefaultPhysicalExtensionCodec {})?;
+                    Ok(LookupJoinCondition {
+                        left_expr: expr.encode_to_vec(),
+                        right_key: r.name.clone(),
+                    })
+                })
+                .collect::<datafusion::error::Result<Vec<_>>>()?,
+            join_type: match self.join_type {
+                JoinType::Inner => api::JoinType::Inner as i32,
+                JoinType::Left => api::JoinType::Left as i32,
+                j => {
+                    return plan_err!(
+                        "unsupported join type '{j}' for lookup join; only inner and left joins are supported"
+                    );
+                }
+            },
+            ttl_micros: self
+                .connector
+                .lookup_cache_ttl
+                .map(|t| t.as_micros() as u64),
+            max_capacity_bytes: self.connector.lookup_cache_max_bytes,
+        };
+
+        let incoming_edge =
+            LogicalEdge::project_all(LogicalEdgeType::Shuffle, (*input_schemas[0]).clone());
+
+        Ok(NodeWithIncomingEdges {
+            node: LogicalNode::single(
+                index as u32,
+                format!("lookupjoin_{index}"),
+                OperatorName::LookupJoin,
+                join_config.encode_to_vec(),
+                format!("LookupJoin<{}>", self.connector.name),
+                1,
+            ),
+            edges: vec![incoming_edge],
+        })
+    }
+
+    fn output_schema(&self) -> FsSchema {
+        FsSchema::from_schema_unkeyed(self.schema.inner().clone()).unwrap()
+    }
+}
+
+impl UserDefinedLogicalNodeCore for LookupJoin {
+    fn name(&self) -> &str {
+        JOIN_EXTENSION_NAME
+    }
+
+    fn inputs(&self) -> Vec<&LogicalPlan> {
+        vec![&self.input]
+    }
+
+    fn schema(&self) -> &DFSchemaRef {
+        &self.schema
+    }
+
+    fn expressions(&self) -> Vec<Expr> {
+        let mut e: Vec<_> = self.on.iter().map(|(l, _)| l.clone()).collect();
+
+        if let Some(filter) = &self.filter {
+            e.push(filter.clone());
+        }
+
+        e
+    }
+
+    fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result {
+        write!(f, "LookupJoinExtension: {}", self.schema)
+    }
+
+    fn with_exprs_and_inputs(
+        &self,
+        _: Vec<Expr>,
+        inputs: Vec<LogicalPlan>,
+    ) -> datafusion::common::Result<Self> {
+        Ok(Self {
+            input: inputs[0].clone(),
+            schema: self.schema.clone(),
+            connector: self.connector.clone(),
+            on: self.on.clone(),
+            filter: self.filter.clone(),
+            alias: self.alias.clone(),
+            join_type: self.join_type,
+        })
+    }
+}
\ No newline at end of file
diff --git a/src/sql/planner/extension/mod.rs b/src/sql/extensions/mod.rs
similarity index 66%
rename from src/sql/planner/extension/mod.rs
rename to src/sql/extensions/mod.rs
index 4de1892e..25632930 100644
--- a/src/sql/planner/extension/mod.rs
+++ b/src/sql/extensions/mod.rs
@@ -7,13 +7,33 @@ use datafusion::common::{DFSchemaRef, DataFusionError, Result, TableReference};
 use datafusion::logical_expr::{
     Expr, LogicalPlan, UserDefinedLogicalNode, UserDefinedLogicalNodeCore,
 };
-
-use crate::datastream::logical::{LogicalEdge, LogicalNode};
-use crate::sql::planner::schemas::{add_timestamp_field, has_timestamp_field};
+use datafusion_common::internal_err;
+use datafusion_proto::physical_plan::DefaultPhysicalExtensionCodec;
+use datafusion_proto::physical_plan::to_proto::serialize_physical_expr;
+use prost::Message;
+use protocol::grpc::api::{AsyncUdfOperator, AsyncUdfOrdering};
+use crate::sql::logical_node::logical::{DylibUdfConfig, LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName};
+use crate::sql::logical_planner::updating_meta_field;
+use crate::sql::extensions::aggregate::AggregateExtension;
+use crate::sql::extensions::debezium::{DebeziumUnrollingExtension, ToDebeziumExtension};
+use crate::sql::extensions::join::JoinExtension;
+use crate::sql::extensions::key_calculation::KeyCalculationExtension;
+use crate::sql::extensions::lookup::LookupJoin;
+use crate::sql::extensions::projection::ProjectionExtension;
+use crate::sql::extensions::remote_table::RemoteTableExtension;
+use crate::sql::extensions::sink::SinkExtension;
+use crate::sql::extensions::table_source::TableSourceExtension;
+use crate::sql::extensions::updating_aggregate::UpdatingAggregateExtension;
+use crate::sql::extensions::watermark_node::WatermarkNode;
+use crate::sql::extensions::window_fn::WindowFunctionExtension;
+use crate::sql::logical_planner::planner::{NamedNode, Planner};
+use crate::sql::schema::utils::{add_timestamp_field, has_timestamp_field};
 use crate::sql::types::{
     DFField, StreamSchema, TIMESTAMP_FIELD, fields_with_qualifiers, schema_from_df_fields,
 };
-use crate::types::FsSchemaRef;
+use crate::types::{FsSchema, FsSchemaRef};
+
+pub const ASYNC_RESULT_FIELD: &str = "__async_result";
 
 pub(crate) mod aggregate;
 pub(crate) mod debezium;
@@ -28,43 +48,29 @@ pub(crate) mod updating_aggregate;
 pub(crate) mod watermark_node;
 pub(crate) mod window_fn;
 
-pub(crate) struct NodeWithIncomingEdges {
-    pub node: LogicalNode,
-    pub edges: Vec<LogicalEdge>,
-}
 
 pub(crate) trait StreamExtension: Debug {
     fn node_name(&self) -> Option<NamedNode>;
-
     fn plan_node(
         &self,
-        _planner: &super::physical_planner::Planner,
-        _index: usize,
-        _input_schemas: Vec<FsSchemaRef>,
-    ) -> Result<NodeWithIncomingEdges> {
-        Err(DataFusionError::NotImplemented(format!(
-            "plan_node not yet implemented for {:?}",
-            self
-        )))
-    }
-
-    fn output_schema(&self) -> StreamSchema;
+        planner: &Planner,
+        index: usize,
+        input_schemas: Vec<FsSchemaRef>,
+    ) -> Result<NodeWithIncomingEdges>;
+    fn output_schema(&self) -> FsSchema;
     fn transparent(&self) -> bool {
         false
     }
 }
 
-#[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub enum NamedNode {
-    Source(TableReference),
-    Watermark(TableReference),
-    RemoteTable(TableReference),
-    Sink(TableReference),
+pub(crate) struct NodeWithIncomingEdges {
+    pub node: LogicalNode,
+    pub edges: Vec<LogicalEdge>,
 }
 
 fn try_from_t<T: StreamExtension + 'static>(
     node: &dyn UserDefinedLogicalNode,
-) -> std::result::Result<&dyn StreamExtension, ()> {
+) -> Result<&dyn StreamExtension, ()> {
     node.as_any()
         .downcast_ref::<T>()
         .map(|t| t as &dyn StreamExtension)
@@ -75,19 +81,6 @@ impl<'a> TryFrom<&'a dyn UserDefinedLogicalNode> for &'a dyn StreamExtension {
     type Error = DataFusionError;
 
     fn try_from(node: &'a dyn UserDefinedLogicalNode) -> Result<Self, Self::Error> {
-        use aggregate::AggregateExtension;
-        use debezium::{DebeziumUnrollingExtension, ToDebeziumExtension};
-        use join::JoinExtension;
-        use key_calculation::KeyCalculationExtension;
-        use lookup::{LookupJoin, LookupSource};
-        use projection::ProjectionExtension;
-        use remote_table::RemoteTableExtension;
-        use sink::SinkExtension;
-        use table_source::TableSourceExtension;
-        use updating_aggregate::UpdatingAggregateExtension;
-        use watermark_node::WatermarkNode;
-        use window_fn::WindowFunctionExtension;
-
         try_from_t::<TableSourceExtension>(node)
             .or_else(|_| try_from_t::<WatermarkNode>(node))
             .or_else(|_| try_from_t::<SinkExtension>(node))
@@ -101,9 +94,7 @@ impl<'a> TryFrom<&'a dyn UserDefinedLogicalNode> for &'a dyn StreamExtension {
             .or_else(|_| try_from_t::<DebeziumUnrollingExtension>(node))
             .or_else(|_| try_from_t::<UpdatingAggregateExtension>(node))
             .or_else(|_| try_from_t::<LookupJoin>(node))
-            .or_else(|_| try_from_t::<LookupSource>(node))
             .or_else(|_| try_from_t::<ProjectionExtension>(node))
-            .or_else(|_| try_from_t::<IsRetractExtension>(node))
             .map_err(|_| DataFusionError::Plan(format!("unexpected node: {}", node.name())))
     }
 }
@@ -130,8 +121,8 @@ macro_rules! multifield_partial_ord {
                 Some(std::cmp::Ordering::Equal)
             }
         }
-    }
 }
+    }
 
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub(crate) struct TimestampAppendExtension {
@@ -141,10 +132,11 @@ pub(crate) struct TimestampAppendExtension {
 }
 
 impl TimestampAppendExtension {
-    pub(crate) fn new(input: LogicalPlan, qualifier: Option<TableReference>) -> Self {
+    fn new(input: LogicalPlan, qualifier: Option<TableReference>) -> Self {
         if has_timestamp_field(input.schema()) {
             unreachable!(
-                "shouldn't be adding timestamp to a plan that already has it: {:?}",
+                "shouldn't be adding timestamp to a plan that already has it: plan :\n {:?}\n schema: {:?}",
+                input,
                 input.schema()
             );
         }
@@ -195,8 +187,111 @@ impl UserDefinedLogicalNodeCore for TimestampAppendExtension {
     }
 }
 
-/// Appends an `_updating_meta` and properly qualified `_timestamp` field
-/// to the output schema of an updating aggregate.
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub(crate) struct AsyncUDFExtension {
+    pub(crate) input: Arc<LogicalPlan>,
+    pub(crate) name: String,
+    pub(crate) udf: DylibUdfConfig,
+    pub(crate) arg_exprs: Vec<Expr>,
+    pub(crate) final_exprs: Vec<Expr>,
+    pub(crate) ordered: bool,
+    pub(crate) max_concurrency: usize,
+    pub(crate) timeout: Duration,
+    pub(crate) final_schema: DFSchemaRef,
+}
+
+multifield_partial_ord!(
+    AsyncUDFExtension,
+    input,
+    name,
+    udf,
+    arg_exprs,
+    final_exprs,
+    ordered,
+    max_concurrency,
+    timeout
+);
+
+impl StreamExtension for AsyncUDFExtension {
+    fn node_name(&self) -> Option<NamedNode> {
+        None
+    }
+
+    fn plan_node(
+        &self,
+        planner: &Planner,
+        index: usize,
+        input_schemas: Vec<FsSchemaRef>,
+    ) -> Result<NodeWithIncomingEdges> {
+        let arg_exprs = self
+            .arg_exprs
+            .iter()
+            .map(|e| {
+                let p = planner.create_physical_expr(e, self.input.schema())?;
+                Ok(serialize_physical_expr(&p, &DefaultPhysicalExtensionCodec {})?.encode_to_vec())
+            })
+            .collect::<Result<Vec<_>>>()?;
+
+        let mut final_fields = fields_with_qualifiers(self.input.schema());
+        final_fields.push(DFField::new(
+            None,
+            ASYNC_RESULT_FIELD,
+            self.udf.return_type.clone(),
+            true,
+        ));
+        let post_udf_schema = schema_from_df_fields(&final_fields)?;
+
+        let final_exprs = self
+            .final_exprs
+            .iter()
+            .map(|e| {
+                let p = planner.create_physical_expr(e, &post_udf_schema)?;
+                Ok(serialize_physical_expr(&p, &DefaultPhysicalExtensionCodec {})?.encode_to_vec())
+            })
+            .collect::<Result<Vec<_>>>()?;
+
+        let config = AsyncUdfOperator {
+            name: self.name.clone(),
+            udf: Some(self.udf.clone().into()),
+            arg_exprs,
+            final_exprs,
+            ordering: if self.ordered {
+                AsyncUdfOrdering::Ordered as i32
+            } else {
+                AsyncUdfOrdering::Unordered as i32
+            },
+            max_concurrency: self.max_concurrency as u32,
+            timeout_micros: self.timeout.as_micros() as u64,
+        };
+
+        let node = LogicalNode::single(
+            index as u32,
+            format!("async_udf_{index}"),
+            OperatorName::AsyncUdf,
+            config.encode_to_vec(),
+            format!("async_udf<{}>", self.name),
+            1,
+        );
+
+        let incoming_edge =
+            LogicalEdge::project_all(LogicalEdgeType::Forward, input_schemas[0].as_ref().clone());
+        Ok(NodeWithIncomingEdges {
+            node,
+            edges: vec![incoming_edge],
+        })
+    }
+
+    fn output_schema(&self) -> FsSchema {
+        FsSchema::from_fields(
+            self.final_schema
+                .fields()
+                .iter()
+                .map(|f| (**f).clone())
+                .collect(),
+        )
+    }
+}
+
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub(crate) struct IsRetractExtension {
     pub(crate) input: LogicalPlan,
@@ -217,6 +312,7 @@ impl IsRetractExtension {
             DataType::Timestamp(TimeUnit::Nanosecond, None),
             false,
         );
+        output_fields.push((timestamp_qualifier.clone(), updating_meta_field()).into());
         let schema = Arc::new(schema_from_df_fields(&output_fields).unwrap());
         Self {
             input,
@@ -255,42 +351,6 @@ impl UserDefinedLogicalNodeCore for IsRetractExtension {
     }
 }
 
-impl StreamExtension for IsRetractExtension {
-    fn node_name(&self) -> Option<NamedNode> {
-        None
-    }
-
-    fn output_schema(&self) -> StreamSchema {
-        StreamSchema::from_schema_unkeyed(Arc::new(self.schema.as_ref().into())).unwrap()
-    }
-}
-
-pub(crate) const ASYNC_RESULT_FIELD: &str = "__async_result";
-
-/// Extension node for async UDF calls in streaming projections.
-#[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub(crate) struct AsyncUDFExtension {
-    pub(crate) input: Arc<LogicalPlan>,
-    pub(crate) name: String,
-    pub(crate) arg_exprs: Vec<Expr>,
-    pub(crate) final_exprs: Vec<Expr>,
-    pub(crate) ordered: bool,
-    pub(crate) max_concurrency: usize,
-    pub(crate) timeout: Duration,
-    pub(crate) final_schema: DFSchemaRef,
-}
-
-multifield_partial_ord!(
-    AsyncUDFExtension,
-    input,
-    name,
-    arg_exprs,
-    final_exprs,
-    ordered,
-    max_concurrency,
-    timeout
-);
-
 impl UserDefinedLogicalNodeCore for AsyncUDFExtension {
     fn name(&self) -> &str {
         "AsyncUDFNode"
@@ -308,7 +368,7 @@ impl UserDefinedLogicalNodeCore for AsyncUDFExtension {
         self.arg_exprs
             .iter()
             .chain(self.final_exprs.iter())
-            .cloned()
+            .map(|e| e.to_owned())
             .collect()
     }
 
@@ -318,17 +378,16 @@ impl UserDefinedLogicalNodeCore for AsyncUDFExtension {
 
     fn with_exprs_and_inputs(&self, exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> Result<Self> {
         if inputs.len() != 1 {
-            return Err(DataFusionError::Internal("input size inconsistent".into()));
+            return internal_err!("input size inconsistent");
         }
         if UserDefinedLogicalNode::expressions(self) != exprs {
-            return Err(DataFusionError::Internal(
-                "Tried to recreate async UDF node with different expressions".into(),
-            ));
+            return internal_err!("Tried to recreate async UDF node with different expressions");
         }
 
         Ok(Self {
             input: Arc::new(inputs[0].clone()),
             name: self.name.clone(),
+            udf: self.udf.clone(),
             arg_exprs: self.arg_exprs.clone(),
             final_exprs: self.final_exprs.clone(),
             ordered: self.ordered,
@@ -338,19 +397,3 @@ impl UserDefinedLogicalNodeCore for AsyncUDFExtension {
         })
     }
 }
-
-impl StreamExtension for AsyncUDFExtension {
-    fn node_name(&self) -> Option<NamedNode> {
-        None
-    }
-
-    fn output_schema(&self) -> StreamSchema {
-        StreamSchema::from_fields(
-            self.final_schema
-                .fields()
-                .iter()
-                .map(|f| (**f).clone())
-                .collect(),
-        )
-    }
-}
diff --git a/src/sql/extensions/projection.rs b/src/sql/extensions/projection.rs
new file mode 100644
index 00000000..fa0f118b
--- /dev/null
+++ b/src/sql/extensions/projection.rs
@@ -0,0 +1,154 @@
+
+use datafusion::common::{DFSchema, DFSchemaRef, Result, internal_err};
+use std::{fmt::Formatter, sync::Arc};
+
+use super::{StreamExtension, NodeWithIncomingEdges};
+use datafusion::logical_expr::{Expr, ExprSchemable, LogicalPlan, UserDefinedLogicalNodeCore};
+use datafusion_proto::physical_plan::DefaultPhysicalExtensionCodec;
+use datafusion_proto::physical_plan::to_proto::serialize_physical_expr;
+use itertools::Itertools;
+use prost::Message;
+use protocol::grpc::api::ProjectionOperator;
+use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName};
+use crate::multifield_partial_ord;
+use crate::sql::logical_planner::planner::{NamedNode, Planner};
+use crate::sql::types::{schema_from_df_fields, DFField};
+use crate::types::{FsSchema, FsSchemaRef};
+
+pub(crate) const PROJECTION_NAME: &str = "ProjectionExtension";
+
+/// Projection operations
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub(crate) struct ProjectionExtension {
+    pub(crate) inputs: Vec<LogicalPlan>,
+    pub(crate) name: Option<String>,
+    pub(crate) exprs: Vec<Expr>,
+    pub(crate) schema: DFSchemaRef,
+    pub(crate) shuffle: bool,
+}
+
+multifield_partial_ord!(ProjectionExtension, name, exprs);
+
+impl ProjectionExtension {
+    pub(crate) fn new(inputs: Vec<LogicalPlan>, name: Option<String>, exprs: Vec<Expr>) -> Self {
+        let input_schema = inputs.first().unwrap().schema();
+        let fields = exprs
+            .iter()
+            .map(|e| DFField::from(e.to_field(input_schema).unwrap()))
+            .collect_vec();
+
+        let schema = Arc::new(schema_from_df_fields(&fields).unwrap());
+
+        Self {
+            inputs,
+            name,
+            exprs,
+            schema,
+            shuffle: false,
+        }
+    }
+
+    pub(crate) fn shuffled(mut self) -> Self {
+        self.shuffle = true;
+        self
+    }
+}
+
+impl StreamExtension for ProjectionExtension {
+    fn node_name(&self) -> Option<NamedNode> {
+        None
+    }
+
+    fn plan_node(
+        &self,
+        planner: &Planner,
+        index: usize,
+        input_schemas: Vec<FsSchemaRef>,
+    ) -> Result<NodeWithIncomingEdges> {
+        let input_schema = (*input_schemas[0]).clone();
+
+        // check that all inputs have the same schemas
+        for s in input_schemas.iter().skip(1) {
+            if **s != input_schema {
+                return internal_err!("all input schemas to a projection node must mast");
+            }
+        }
+
+        let input_df_schema = Arc::new(DFSchema::try_from(input_schema.schema.as_ref().clone())?);
+        let mut physical_exprs = vec![];
+
+        for e in &self.exprs {
+            let phys = planner
+                .create_physical_expr(e, &input_df_schema)
+                .map_err(|e| e.context("projection"))?;
+            physical_exprs.push(
+                serialize_physical_expr(&phys, &DefaultPhysicalExtensionCodec {})?.encode_to_vec(),
+            );
+        }
+
+        let config = ProjectionOperator {
+            name: self.name.as_deref().unwrap_or("projection").to_string(),
+            input_schema: Some(input_schema.clone().into()),
+
+            output_schema: Some(self.output_schema().into()),
+            exprs: physical_exprs,
+        };
+
+        let node = LogicalNode::single(
+            index as u32,
+            format!("projection_{index}"),
+            OperatorName::Projection,
+            config.encode_to_vec(),
+            format!("ArrowProjection<{}>", self.name.as_deref().unwrap_or("_")),
+            1,
+        );
+
+        let edge_type = if self.shuffle {
+            LogicalEdgeType::Shuffle
+        } else {
+            LogicalEdgeType::Forward
+        };
+
+        let edge = LogicalEdge::project_all(edge_type, input_schema);
+        Ok(NodeWithIncomingEdges {
+            node,
+            edges: vec![edge],
+        })
+    }
+
+    fn output_schema(&self) -> FsSchema {
+        FsSchema::from_schema_unkeyed(Arc::new(self.schema.as_arrow().clone())).unwrap()
+    }
+}
+
+impl UserDefinedLogicalNodeCore for ProjectionExtension {
+    fn name(&self) -> &str {
+        PROJECTION_NAME
+    }
+
+    fn inputs(&self) -> Vec<&LogicalPlan> {
+        self.inputs.iter().collect()
+    }
+
+    fn schema(&self) -> &DFSchemaRef {
+        &self.schema
+    }
+
+    fn expressions(&self) -> Vec<Expr> {
+        vec![]
+    }
+
+    fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result {
+        write!(f, "KeyCalculationExtension: {}", self.schema())
+    }
+
+    fn with_exprs_and_inputs(&self, exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> Result<Self> {
+        Ok(Self {
+            name: self.name.clone(),
+            inputs,
+            exprs,
+            schema: self.schema.clone(),
+            shuffle: self.shuffle,
+        })
+    }
+}
diff --git a/src/sql/extensions/remote_table.rs b/src/sql/extensions/remote_table.rs
new file mode 100644
index 00000000..91ef4d0e
--- /dev/null
+++ b/src/sql/extensions/remote_table.rs
@@ -0,0 +1,124 @@
+use std::{fmt::Formatter, sync::Arc};
+
+use datafusion::common::{DFSchemaRef, Result, TableReference, internal_err, plan_err};
+
+use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore};
+use datafusion_proto::{physical_plan::AsExecutionPlan, protobuf::PhysicalPlanNode};
+use prost::Message;
+use protocol::grpc::api::ValuePlanOperator;
+use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName};
+use crate::multifield_partial_ord;
+use crate::sql::logical_planner::FsPhysicalExtensionCodec;
+use crate::sql::logical_planner::planner::{NamedNode, Planner};
+use crate::types::{FsSchema, FsSchemaRef};
+use super::{StreamExtension, NodeWithIncomingEdges};
+
+pub(crate) const REMOTE_TABLE_NAME: &str = "RemoteTableExtension";
+
+/* Lightweight extension that allows us to segment the graph and merge nodes with the same name.
+  An Extension Planner will be used to isolate computation to individual nodes.
+*/
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub(crate) struct RemoteTableExtension {
+    pub(crate) input: LogicalPlan,
+    pub(crate) name: TableReference,
+    pub(crate) schema: DFSchemaRef,
+    pub(crate) materialize: bool,
+}
+
+multifield_partial_ord!(RemoteTableExtension, input, name, materialize);
+
+impl StreamExtension for RemoteTableExtension {
+    fn node_name(&self) -> Option<NamedNode> {
+        if self.materialize {
+            Some(NamedNode::RemoteTable(self.name.to_owned()))
+        } else {
+            None
+        }
+    }
+
+    fn plan_node(
+        &self,
+        planner: &Planner,
+        index: usize,
+        input_schemas: Vec<FsSchemaRef>,
+    ) -> Result<NodeWithIncomingEdges> {
+        match input_schemas.len() {
+            0 => return plan_err!("RemoteTableExtension should have exactly one input"),
+            1 => {}
+            _multiple_inputs => {
+                // check they are all the same
+                let first = input_schemas[0].clone();
+                for schema in input_schemas.iter().skip(1) {
+                    if *schema != first {
+                        return plan_err!(
+                            "If a node has multiple inputs, they must all have the same schema"
+                        );
+                    }
+                }
+            }
+        }
+        let physical_plan = planner.sync_plan(&self.input)?;
+        let physical_plan_node = PhysicalPlanNode::try_from_physical_plan(
+            physical_plan,
+            &FsPhysicalExtensionCodec::default(),
+        )?;
+        let config = ValuePlanOperator {
+            name: format!("value_calculation({})", self.name),
+            physical_plan: physical_plan_node.encode_to_vec(),
+        };
+        let node = LogicalNode::single(
+            index as u32,
+            format!("value_{index}"),
+            OperatorName::ArrowValue,
+            config.encode_to_vec(),
+            self.name.to_string(),
+            1,
+        );
+
+        let edges = input_schemas
+            .into_iter()
+            .map(|schema| LogicalEdge::project_all(LogicalEdgeType::Forward, (*schema).clone()))
+            .collect();
+        Ok(NodeWithIncomingEdges { node, edges })
+    }
+
+    fn output_schema(&self) -> FsSchema {
+        FsSchema::from_schema_keys(Arc::new(self.schema.as_ref().into()), vec![]).unwrap()
+    }
+}
+
+impl UserDefinedLogicalNodeCore for RemoteTableExtension {
+    fn name(&self) -> &str {
+        REMOTE_TABLE_NAME
+    }
+
+    fn inputs(&self) -> Vec<&LogicalPlan> {
+        vec![&self.input]
+    }
+
+    fn schema(&self) -> &DFSchemaRef {
+        &self.schema
+    }
+
+    fn expressions(&self) -> Vec<Expr> {
+        vec![]
+    }
+
+    fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result {
+        write!(f, "RemoteTableExtension: {}", self.schema)
+    }
+
+    fn with_exprs_and_inputs(&self, _exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> Result<Self> {
+        if inputs.len() != 1 {
+            return internal_err!("input size inconsistent");
+        }
+
+        Ok(Self {
+            input: inputs[0].clone(),
+            name: self.name.clone(),
+            schema: self.schema.clone(),
+            materialize: self.materialize,
+        })
+    }
+}
diff --git a/src/sql/extensions/sink.rs b/src/sql/extensions/sink.rs
new file mode 100644
index 00000000..7b58a7b4
--- /dev/null
+++ b/src/sql/extensions/sink.rs
@@ -0,0 +1,168 @@
+use std::sync::Arc;
+
+use datafusion::common::{DFSchemaRef, Result, TableReference, plan_err};
+
+use datafusion::logical_expr::{Expr, Extension, LogicalPlan, UserDefinedLogicalNodeCore};
+
+use prost::Message;
+use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName};
+use crate::multifield_partial_ord;
+use crate::sql::schema::Table;
+use crate::sql::logical_planner::planner::{NamedNode, Planner};
+use crate::types::{FsSchema, FsSchemaRef, UPDATING_META_FIELD};
+use super::{
+    StreamExtension, NodeWithIncomingEdges, debezium::ToDebeziumExtension,
+    remote_table::RemoteTableExtension,
+};
+
+pub(crate) const SINK_NODE_NAME: &str = "SinkExtension";
+
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub(crate) struct SinkExtension {
+    pub(crate) name: TableReference,
+    pub(crate) table: Table,
+    pub(crate) schema: DFSchemaRef,
+    inputs: Arc<Vec<LogicalPlan>>,
+}
+
+multifield_partial_ord!(SinkExtension, name, inputs);
+
+impl SinkExtension {
+    pub fn new(
+        name: TableReference,
+        table: Table,
+        mut schema: DFSchemaRef,
+        mut input: Arc<LogicalPlan>,
+    ) -> Result<Self> {
+        let input_is_updating = input
+            .schema()
+            .has_column_with_unqualified_name(UPDATING_META_FIELD);
+        match &table {
+            Table::ConnectorTable(connector_table) => {
+                match (input_is_updating, connector_table.is_updating()) {
+                    (_, true) => {
+                        let to_debezium_extension =
+                            ToDebeziumExtension::try_new(input.as_ref().clone())?;
+                        input = Arc::new(LogicalPlan::Extension(Extension {
+                            node: Arc::new(to_debezium_extension),
+                        }));
+                        schema = input.schema().clone();
+                    }
+                    (true, false) => {
+                        return plan_err!(
+                            "input is updating, but sink is not configured as an updating sink (hint: use `format = 'debezium_json'`)"
+                        );
+                    }
+                    (false, false) => {}
+                }
+            }
+            Table::LookupTable(..) => return plan_err!("cannot use a lookup table as a sink"),
+            Table::TableFromQuery { .. } => {}
+
+        }
+        Self::add_remote_if_necessary(&schema, &mut input);
+
+        let inputs = Arc::new(vec![(*input).clone()]);
+        Ok(Self {
+            name,
+            table,
+            schema,
+            inputs,
+        })
+    }
+
+    // The input to a sink needs to be a non-transparent logical plan extension.
+    // If it isn't, wrap the input in a RemoteTableExtension.
+    pub fn add_remote_if_necessary(schema: &DFSchemaRef, input: &mut Arc<LogicalPlan>) {
+        if let LogicalPlan::Extension(node) = input.as_ref() {
+            let arroyo_extension: &dyn StreamExtension = (&node.node).try_into().unwrap();
+            if !arroyo_extension.transparent() {
+                return;
+            }
+        }
+        let remote_table_extension = RemoteTableExtension {
+            input: input.as_ref().clone(),
+            name: TableReference::bare("sink projection"),
+            schema: schema.clone(),
+            materialize: false,
+        };
+        *input = Arc::new(LogicalPlan::Extension(Extension {
+            node: Arc::new(remote_table_extension),
+        }));
+    }
+}
+
+impl UserDefinedLogicalNodeCore for SinkExtension {
+    fn name(&self) -> &str {
+        SINK_NODE_NAME
+    }
+
+    fn inputs(&self) -> Vec<&LogicalPlan> {
+        self.inputs.iter().collect()
+    }
+
+    fn schema(&self) -> &DFSchemaRef {
+        &self.schema
+    }
+
+    fn expressions(&self) -> Vec<Expr> {
+        vec![]
+    }
+
+    fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        write!(f, "SinkExtension({:?}): {}", self.name, self.schema)
+    }
+
+    fn with_exprs_and_inputs(&self, _exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> Result<Self> {
+        Ok(Self {
+            name: self.name.clone(),
+            table: self.table.clone(),
+            schema: self.schema.clone(),
+            inputs: Arc::new(inputs),
+        })
+    }
+}
+
+impl StreamExtension for SinkExtension {
+    fn node_name(&self) -> Option<NamedNode> {
+        match &self.table {
+            _ => Some(NamedNode::Sink(self.name.clone())),
+        }
+    }
+
+    fn plan_node(
+        &self,
+        _planner: &Planner,
+        index: usize,
+        input_schemas: Vec<FsSchemaRef>,
+    ) -> Result<NodeWithIncomingEdges> {
+        let operator_config = (self
+            .table
+            .connector_op()
+            .map_err(|e| e.context("connector op"))?)
+            .encode_to_vec();
+
+        let node = LogicalNode::single(
+            index as u32,
+            format!("sink_{}_{}", self.name, index),
+            OperatorName::ConnectorSink,
+            operator_config,
+            self.table.connector_op()?.description.clone(),
+            1,
+        );
+
+        let edges = input_schemas
+            .into_iter()
+            .map(|input_schema| {
+                LogicalEdge::project_all(LogicalEdgeType::Forward, (*input_schema).clone())
+            })
+            .collect();
+        Ok(NodeWithIncomingEdges { node, edges })
+    }
+
+
+
+    fn output_schema(&self) -> FsSchema {
+        FsSchema::from_fields(vec![])
+    }
+}
\ No newline at end of file
diff --git a/src/sql/planner/extension/table_source.rs b/src/sql/extensions/table_source.rs
similarity index 54%
rename from src/sql/planner/extension/table_source.rs
rename to src/sql/extensions/table_source.rs
index cab3ae3d..bdf470e2 100644
--- a/src/sql/planner/extension/table_source.rs
+++ b/src/sql/extensions/table_source.rs
@@ -1,15 +1,22 @@
 use std::sync::Arc;
 
-use datafusion::common::{DFSchemaRef, Result, TableReference};
-use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore};
+use datafusion::common::{DFSchemaRef, Result, TableReference, plan_err};
 
-use super::{NamedNode, StreamExtension};
-use crate::multifield_partial_ord;
-use crate::sql::catalog::connector_table::ConnectorTable;
-use crate::sql::catalog::field_spec::FieldSpec;
-use crate::sql::planner::schemas::add_timestamp_field;
-use crate::sql::types::{StreamSchema, schema_from_df_fields};
+use datafusion::logical_expr::{Expr, Extension, LogicalPlan, UserDefinedLogicalNodeCore};
 
+use prost::Message;
+use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName};
+use crate::multifield_partial_ord;
+use crate::sql::schema::{ConnectorTable, FieldSpec, Table};
+use crate::sql::schema::utils::add_timestamp_field;
+use crate::sql::extensions::debezium::DebeziumUnrollingExtension;
+use crate::sql::logical_planner::planner::{NamedNode, Planner};
+use crate::sql::types::schema_from_df_fields;
+use crate::types::{FsSchema, FsSchemaRef, UPDATING_META_FIELD};
+use super::{
+    StreamExtension, NodeWithIncomingEdges, debezium::ToDebeziumExtension,
+    remote_table::RemoteTableExtension,
+};
 pub(crate) const TABLE_SOURCE_NAME: &str = "TableSourceExtension";
 
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
@@ -36,11 +43,8 @@ impl TableSourceExtension {
         let base_schema = Arc::new(schema_from_df_fields(&physical_fields).unwrap());
 
         let schema = if table.is_updating() {
-            super::debezium::DebeziumUnrollingExtension::as_debezium_schema(
-                &base_schema,
-                Some(name.clone()),
-            )
-            .unwrap()
+            DebeziumUnrollingExtension::as_debezium_schema(&base_schema, Some(name.clone()))
+                .unwrap()
         } else {
             base_schema
         };
@@ -88,7 +92,31 @@ impl StreamExtension for TableSourceExtension {
         Some(NamedNode::Source(self.name.clone()))
     }
 
-    fn output_schema(&self) -> StreamSchema {
-        StreamSchema::from_schema_keys(Arc::new(self.schema.as_ref().into()), vec![]).unwrap()
+    fn plan_node(
+        &self,
+        _planner: &Planner,
+        index: usize,
+        input_schemas: Vec<FsSchemaRef>,
+    ) -> Result<NodeWithIncomingEdges> {
+        if !input_schemas.is_empty() {
+            return plan_err!("TableSourceExtension should not have inputs");
+        }
+        let sql_source = self.table.as_sql_source()?;
+        let node = LogicalNode::single(
+            index as u32,
+            format!("source_{}_{}", self.name, index),
+            OperatorName::ConnectorSource,
+            sql_source.source.config.encode_to_vec(),
+            sql_source.source.config.description.clone(),
+            1,
+        );
+        Ok(NodeWithIncomingEdges {
+            node,
+            edges: vec![],
+        })
+    }
+
+    fn output_schema(&self) -> FsSchema {
+        FsSchema::from_schema_keys(Arc::new(self.schema.as_ref().into()), vec![]).unwrap()
     }
 }
diff --git a/src/sql/extensions/updating_aggregate.rs b/src/sql/extensions/updating_aggregate.rs
new file mode 100644
index 00000000..fdb2bb1d
--- /dev/null
+++ b/src/sql/extensions/updating_aggregate.rs
@@ -0,0 +1,165 @@
+use datafusion::common::{DFSchemaRef, Result, TableReference, ToDFSchema, plan_err};
+use datafusion::logical_expr::expr::ScalarFunction;
+use datafusion::logical_expr::{
+    Expr, Extension, LogicalPlan, UserDefinedLogicalNodeCore, col, lit,
+};
+use datafusion::prelude::named_struct;
+use datafusion::scalar::ScalarValue;
+use datafusion_proto::physical_plan::AsExecutionPlan;
+use datafusion_proto::protobuf::PhysicalPlanNode;
+use prost::Message;
+use std::sync::Arc;
+use std::time::Duration;
+use protocol::grpc::api::UpdatingAggregateOperator;
+use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName};
+use crate::sql::functions::multi_hash;
+use crate::sql::logical_planner::FsPhysicalExtensionCodec;
+use crate::sql::extensions::{IsRetractExtension, NodeWithIncomingEdges, StreamExtension};
+use crate::sql::logical_planner::planner::{NamedNode, Planner};
+use crate::types::{FsSchema, FsSchemaRef};
+
+pub(crate) const UPDATING_AGGREGATE_EXTENSION_NAME: &str = "UpdatingAggregateExtension";
+
+#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd)]
+pub(crate) struct UpdatingAggregateExtension {
+    pub(crate) aggregate: LogicalPlan,
+    pub(crate) key_fields: Vec<usize>,
+    pub(crate) final_calculation: LogicalPlan,
+    pub(crate) timestamp_qualifier: Option<TableReference>,
+    pub(crate) ttl: Duration,
+}
+
+impl UpdatingAggregateExtension {
+    pub fn new(
+        aggregate: LogicalPlan,
+        key_fields: Vec<usize>,
+        timestamp_qualifier: Option<TableReference>,
+        ttl: Duration,
+    ) -> Result<Self> {
+        let final_calculation = LogicalPlan::Extension(Extension {
+            node: Arc::new(IsRetractExtension::new(
+                aggregate.clone(),
+                timestamp_qualifier.clone(),
+            )),
+        });
+
+        Ok(Self {
+            aggregate,
+            key_fields,
+            final_calculation,
+            timestamp_qualifier,
+            ttl,
+        })
+    }
+}
+
+impl UserDefinedLogicalNodeCore for UpdatingAggregateExtension {
+    fn name(&self) -> &str {
+        UPDATING_AGGREGATE_EXTENSION_NAME
+    }
+
+    fn inputs(&self) -> Vec<&LogicalPlan> {
+        vec![&self.aggregate]
+    }
+
+    fn schema(&self) -> &DFSchemaRef {
+        self.final_calculation.schema()
+    }
+
+    fn expressions(&self) -> Vec<datafusion::prelude::Expr> {
+        vec![]
+    }
+
+    fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        write!(f, "UpdatingAggregateExtension")
+    }
+
+    fn with_exprs_and_inputs(
+        &self,
+        _exprs: Vec<datafusion::prelude::Expr>,
+        inputs: Vec<LogicalPlan>,
+    ) -> Result<Self> {
+        Self::new(
+            inputs[0].clone(),
+            self.key_fields.clone(),
+            self.timestamp_qualifier.clone(),
+            self.ttl,
+        )
+    }
+}
+
+impl StreamExtension for UpdatingAggregateExtension {
+    fn node_name(&self) -> Option<NamedNode> {
+        None
+    }
+
+    fn plan_node(
+        &self,
+        planner: &Planner,
+        index: usize,
+        input_schemas: Vec<FsSchemaRef>,
+    ) -> Result<NodeWithIncomingEdges> {
+        if input_schemas.len() != 1 {
+            return plan_err!(
+                "UpdatingAggregateExtension requires exactly one input schema, found {}",
+                input_schemas.len()
+            );
+        }
+
+        let input_schema = input_schemas[0].clone();
+        let input_dfschema = input_schema.schema.clone().to_dfschema()?;
+
+        let aggregate_exec = PhysicalPlanNode::try_from_physical_plan(
+            planner.sync_plan(&self.aggregate)?,
+            &FsPhysicalExtensionCodec::default(),
+        )?;
+
+        let key_exprs: Vec<Expr> = self
+            .key_fields
+            .iter()
+            .map(|&i| col(input_schema.schema.field(i).name()))
+            .collect();
+        let hash_expr = if key_exprs.is_empty() {
+            Expr::Literal(ScalarValue::FixedSizeBinary(16, Some(vec![0; 16])), None)
+        } else {
+            Expr::ScalarFunction(ScalarFunction {
+                func: multi_hash(),
+                args: key_exprs,
+            })
+        };
+
+        let updating_meta_expr =
+            named_struct(vec![lit("is_retract"), lit(false), lit("id"), hash_expr]);
+
+        let config = UpdatingAggregateOperator {
+            name: "UpdatingAggregate".to_string(),
+            input_schema: Some((*input_schema).clone().into()),
+            final_schema: Some(self.output_schema().into()),
+            aggregate_exec: aggregate_exec.encode_to_vec(),
+            metadata_expr: planner
+                .serialize_as_physical_expr(&updating_meta_expr, &input_dfschema)?,
+            flush_interval_micros: 10_000_000,
+            ttl_micros: self.ttl.as_micros() as u64,
+        };
+
+        let node = LogicalNode::single(
+            index as u32,
+            format!("updating_aggregate_{index}"),
+            OperatorName::UpdatingAggregate,
+            config.encode_to_vec(),
+            "UpdatingAggregate".to_string(),
+            1,
+        );
+
+        let edge = LogicalEdge::project_all(LogicalEdgeType::Shuffle, (*input_schema).clone());
+
+        Ok(NodeWithIncomingEdges {
+            node,
+            edges: vec![edge],
+        })
+    }
+
+    fn output_schema(&self) -> FsSchema {
+        FsSchema::from_schema_unkeyed(Arc::new(self.schema().as_ref().into())).unwrap()
+    }
+}
diff --git a/src/sql/planner/extension/watermark_node.rs b/src/sql/extensions/watermark_node.rs
similarity index 57%
rename from src/sql/planner/extension/watermark_node.rs
rename to src/sql/extensions/watermark_node.rs
index a06bdb9a..f13b3472 100644
--- a/src/sql/planner/extension/watermark_node.rs
+++ b/src/sql/extensions/watermark_node.rs
@@ -1,19 +1,20 @@
-use std::fmt::Formatter;
-use std::sync::Arc;
-
 use datafusion::common::{DFSchemaRef, Result, TableReference, internal_err};
 use datafusion::error::DataFusionError;
 use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore};
-
+use datafusion_proto::physical_plan::DefaultPhysicalExtensionCodec;
+use datafusion_proto::physical_plan::to_proto::serialize_physical_expr;
+use prost::Message;
+use std::fmt::Formatter;
+use std::sync::Arc;
+use protocol::grpc::api::ExpressionWatermarkConfig;
+use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName};
 use crate::multifield_partial_ord;
-use crate::sql::planner::extension::{NamedNode, StreamExtension};
-use crate::sql::planner::schemas::add_timestamp_field;
-use crate::sql::types::{StreamSchema, TIMESTAMP_FIELD};
+use crate::sql::schema::utils::add_timestamp_field;
+use crate::sql::extensions::{NodeWithIncomingEdges, StreamExtension};
+use crate::sql::logical_planner::planner::{NamedNode, Planner};
+use crate::types::{FsSchema, FsSchemaRef};
 
 pub(crate) const WATERMARK_NODE_NAME: &str = "WatermarkNode";
-
-/// Represents a watermark node in the streaming query plan.
-/// Watermarks track event-time progress and enable time-based operations.
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub struct WatermarkNode {
     pub input: LogicalPlan,
@@ -62,7 +63,7 @@ impl UserDefinedLogicalNodeCore for WatermarkNode {
 
         let timestamp_index = self
             .schema
-            .index_of_column_by_name(Some(&self.qualifier), TIMESTAMP_FIELD)
+            .index_of_column_by_name(Some(&self.qualifier), "_timestamp")
             .ok_or_else(|| DataFusionError::Plan("missing timestamp column".to_string()))?;
 
         Ok(Self {
@@ -80,8 +81,38 @@ impl StreamExtension for WatermarkNode {
         Some(NamedNode::Watermark(self.qualifier.clone()))
     }
 
-    fn output_schema(&self) -> StreamSchema {
-        self.stream_schema()
+    fn plan_node(
+        &self,
+        planner: &Planner,
+        index: usize,
+        input_schemas: Vec<FsSchemaRef>,
+    ) -> Result<NodeWithIncomingEdges> {
+        let expression = planner.create_physical_expr(&self.watermark_expression, &self.schema)?;
+        let expression = serialize_physical_expr(&expression, &DefaultPhysicalExtensionCodec {})?;
+        let node = LogicalNode::single(
+            index as u32,
+            format!("watermark_{index}"),
+            OperatorName::ExpressionWatermark,
+            ExpressionWatermarkConfig {
+                period_micros: 1_000_000,
+                idle_time_micros: None,
+                expression: expression.encode_to_vec(),
+                input_schema: Some(self.arroyo_schema().into()),
+            }
+                .encode_to_vec(),
+            "watermark".to_string(),
+            1,
+        );
+
+        let incoming_edge =
+            LogicalEdge::project_all(LogicalEdgeType::Forward, input_schemas[0].as_ref().clone());
+        Ok(NodeWithIncomingEdges {
+            node,
+            edges: vec![incoming_edge],
+        })
+    }
+    fn output_schema(&self) -> FsSchema {
+        self.arroyo_schema()
     }
 }
 
@@ -93,7 +124,7 @@ impl WatermarkNode {
     ) -> Result<Self> {
         let schema = add_timestamp_field(input.schema().clone(), Some(qualifier.clone()))?;
         let timestamp_index = schema
-            .index_of_column_by_name(None, TIMESTAMP_FIELD)
+            .index_of_column_by_name(None, "_timestamp")
             .ok_or_else(|| DataFusionError::Plan("missing _timestamp column".to_string()))?;
         Ok(Self {
             input,
@@ -103,8 +134,7 @@ impl WatermarkNode {
             timestamp_index,
         })
     }
-
-    pub(crate) fn stream_schema(&self) -> StreamSchema {
-        StreamSchema::new_unkeyed(Arc::new(self.schema.as_ref().into()), self.timestamp_index)
+    pub(crate) fn arroyo_schema(&self) -> FsSchema {
+        FsSchema::new_unkeyed(Arc::new(self.schema.as_ref().into()), self.timestamp_index)
     }
 }
diff --git a/src/sql/extensions/window_fn.rs b/src/sql/extensions/window_fn.rs
new file mode 100644
index 00000000..1c8b5687
--- /dev/null
+++ b/src/sql/extensions/window_fn.rs
@@ -0,0 +1,123 @@
+use std::sync::Arc;
+use datafusion::common::{Column, DFSchema, DFSchemaRef, Result, plan_err};
+use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore};
+use datafusion_proto::physical_plan::DefaultPhysicalExtensionCodec;
+use datafusion_proto::physical_plan::to_proto::serialize_physical_expr;
+use datafusion_proto::{physical_plan::AsExecutionPlan, protobuf::PhysicalPlanNode};
+use prost::Message;
+use protocol::grpc::api::WindowFunctionOperator;
+use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName};
+use crate::sql::logical_planner::FsPhysicalExtensionCodec;
+use crate::sql::logical_planner::planner::{NamedNode, Planner};
+use crate::sql::types::TIMESTAMP_FIELD;
+use crate::types::{FsSchema, FsSchemaRef};
+use super::{ NodeWithIncomingEdges, StreamExtension};
+
+pub(crate) const WINDOW_FUNCTION_EXTENSION_NAME: &str = "WindowFunctionExtension";
+
+#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd)]
+pub(crate) struct WindowFunctionExtension {
+    window_plan: LogicalPlan,
+    key_fields: Vec<usize>,
+}
+
+impl WindowFunctionExtension {
+    pub fn new(window_plan: LogicalPlan, key_fields: Vec<usize>) -> Self {
+        Self {
+            window_plan,
+            key_fields,
+        }
+    }
+}
+
+impl UserDefinedLogicalNodeCore for WindowFunctionExtension {
+    fn name(&self) -> &str {
+        WINDOW_FUNCTION_EXTENSION_NAME
+    }
+
+    fn inputs(&self) -> Vec<&LogicalPlan> {
+        vec![&self.window_plan]
+    }
+
+    fn schema(&self) -> &DFSchemaRef {
+        self.window_plan.schema()
+    }
+
+    fn expressions(&self) -> Vec<datafusion::prelude::Expr> {
+        vec![]
+    }
+
+    fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        write!(f, "WindowFunction: {}", self.schema())
+    }
+
+    fn with_exprs_and_inputs(
+        &self,
+        _exprs: Vec<datafusion::prelude::Expr>,
+        inputs: Vec<LogicalPlan>,
+    ) -> Result<Self> {
+        Ok(Self::new(inputs[0].clone(), self.key_fields.clone()))
+    }
+}
+
+impl StreamExtension for WindowFunctionExtension {
+    fn node_name(&self) -> Option<NamedNode> {
+        None
+    }
+
+    fn plan_node(
+        &self,
+        planner: &Planner,
+        index: usize,
+        input_schemas: Vec<FsSchemaRef>,
+    ) -> Result<super::NodeWithIncomingEdges> {
+        if input_schemas.len() != 1 {
+            return plan_err!("WindowFunctionExtension requires exactly one input");
+        }
+        let input_schema = input_schemas[0].clone();
+        let input_df_schema =
+            Arc::new(DFSchema::try_from(input_schema.schema.as_ref().clone()).unwrap());
+
+        let binning_function = planner.create_physical_expr(
+            &Expr::Column(Column::new_unqualified(TIMESTAMP_FIELD.to_string())),
+            &input_df_schema,
+        )?;
+        let binning_function_proto =
+            serialize_physical_expr(&binning_function, &DefaultPhysicalExtensionCodec {})?;
+
+        let window_plan = planner.sync_plan(&self.window_plan)?;
+        let codec = FsPhysicalExtensionCodec::default();
+        let window_plan_proto = PhysicalPlanNode::try_from_physical_plan(window_plan, &codec)?;
+
+        let config = WindowFunctionOperator {
+            name: "WindowFunction".to_string(),
+            input_schema: Some(input_schema.as_ref().clone().into()),
+            binning_function: binning_function_proto.encode_to_vec(),
+            window_function_plan: window_plan_proto.encode_to_vec(),
+        };
+
+        let logical_node = LogicalNode::single(
+            index as u32,
+            format!("window_function_{index}"),
+            OperatorName::WindowFunction,
+            config.encode_to_vec(),
+            "window function".to_string(),
+            1,
+        );
+
+        let edge = LogicalEdge::project_all(
+            // TODO: detect when this shuffle is unnecessary
+            LogicalEdgeType::Shuffle,
+            input_schema.as_ref().clone(),
+        );
+
+        Ok(NodeWithIncomingEdges {
+            node: logical_node,
+            edges: vec![edge],
+        })
+    }
+
+    fn output_schema(&self) -> FsSchema {
+        FsSchema::from_schema_unkeyed(Arc::new(self.schema().as_ref().clone().into())).unwrap()
+    }
+}
diff --git a/src/sql/functions/mod.rs b/src/sql/functions/mod.rs
index 84d3c7d4..bfd59654 100644
--- a/src/sql/functions/mod.rs
+++ b/src/sql/functions/mod.rs
@@ -1,4 +1,4 @@
-use crate::sql::planner::StreamSchemaProvider;
+use crate::sql::schema::StreamSchemaProvider;
 use datafusion::arrow::array::{
     Array, ArrayRef, StringArray, UnionArray,
     builder::{FixedSizeBinaryBuilder, ListBuilder, StringBuilder},
diff --git a/src/datastream/logical.rs b/src/sql/logical_node/logical.rs
similarity index 80%
rename from src/datastream/logical.rs
rename to src/sql/logical_node/logical.rs
index a6486760..13560a3e 100644
--- a/src/datastream/logical.rs
+++ b/src/sql/logical_node/logical.rs
@@ -1,7 +1,5 @@
 use itertools::Itertools;
 
-use crate::datastream::optimizers::Optimizer;
-use crate::sql::types::StreamSchema;
 use datafusion::arrow::datatypes::DataType;
 use petgraph::Direction;
 use petgraph::dot::Dot;
@@ -9,7 +7,11 @@ use petgraph::graph::DiGraph;
 use std::collections::{HashMap, HashSet};
 use std::fmt::{Debug, Display, Formatter};
 use std::sync::Arc;
+use datafusion_proto::protobuf::ArrowType;
+use prost::Message;
 use strum::{Display, EnumString};
+use protocol::grpc::api;
+use crate::types::FsSchema;
 
 #[derive(Clone, Copy, Debug, Eq, PartialEq, EnumString, Display)]
 pub enum OperatorName {
@@ -52,18 +54,18 @@ impl Display for LogicalEdgeType {
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub struct LogicalEdge {
     pub edge_type: LogicalEdgeType,
-    pub schema: Arc<StreamSchema>,
+    pub schema: Arc<FsSchema>,
 }
 
 impl LogicalEdge {
-    pub fn new(edge_type: LogicalEdgeType, schema: StreamSchema) -> Self {
+    pub fn new(edge_type: LogicalEdgeType, schema: FsSchema) -> Self {
         LogicalEdge {
             edge_type,
             schema: Arc::new(schema),
         }
     }
 
-    pub fn project_all(edge_type: LogicalEdgeType, schema: StreamSchema) -> Self {
+    pub fn project_all(edge_type: LogicalEdgeType, schema: FsSchema) -> Self {
         LogicalEdge {
             edge_type,
             schema: Arc::new(schema),
@@ -81,7 +83,7 @@ pub struct ChainedLogicalOperator {
 #[derive(Clone, Debug)]
 pub struct OperatorChain {
     pub(crate) operators: Vec<ChainedLogicalOperator>,
-    pub(crate) edges: Vec<Arc<StreamSchema>>,
+    pub(crate) edges: Vec<Arc<FsSchema>>,
 }
 
 impl OperatorChain {
@@ -94,7 +96,7 @@ impl OperatorChain {
 
     pub fn iter(
         &self,
-    ) -> impl Iterator<Item = (&ChainedLogicalOperator, Option<&Arc<StreamSchema>>)> {
+    ) -> impl Iterator<Item = (&ChainedLogicalOperator, Option<&Arc<FsSchema>>)> {
         self.operators
             .iter()
             .zip_longest(self.edges.iter())
@@ -104,7 +106,7 @@ impl OperatorChain {
 
     pub fn iter_mut(
         &mut self,
-    ) -> impl Iterator<Item = (&mut ChainedLogicalOperator, Option<&Arc<StreamSchema>>)> {
+    ) -> impl Iterator<Item = (&mut ChainedLogicalOperator, Option<&Arc<FsSchema>>)> {
         self.operators
             .iter_mut()
             .zip_longest(self.edges.iter())
@@ -190,6 +192,18 @@ impl Debug for LogicalNode {
 
 pub type LogicalGraph = DiGraph<LogicalNode, LogicalEdge>;
 
+pub trait Optimizer {
+    fn optimize_once(&self, plan: &mut LogicalGraph) -> bool;
+
+    fn optimize(&self, plan: &mut LogicalGraph) {
+        loop {
+            if !self.optimize_once(plan) {
+                break;
+            }
+        }
+    }
+}
+
 #[derive(Clone, Debug, Eq, PartialEq, Hash, PartialOrd)]
 pub struct DylibUdfConfig {
     pub dylib_path: String,
@@ -315,3 +329,50 @@ impl LogicalProgram {
         s
     }
 }
+
+
+impl From<DylibUdfConfig> for api::DylibUdfConfig {
+    fn from(from: DylibUdfConfig) -> Self {
+        api::DylibUdfConfig {
+            dylib_path: from.dylib_path,
+            arg_types: from
+                .arg_types
+                .iter()
+                .map(|t| {
+                    ArrowType::try_from(t)
+                        .expect("unsupported data type")
+                        .encode_to_vec()
+                })
+                .collect(),
+            return_type: ArrowType::try_from(&from.return_type)
+                .expect("unsupported data type")
+                .encode_to_vec(),
+            aggregate: from.aggregate,
+            is_async: from.is_async,
+        }
+    }
+}
+
+impl From<api::DylibUdfConfig> for DylibUdfConfig {
+    fn from(from: api::DylibUdfConfig) -> Self {
+        DylibUdfConfig {
+            dylib_path: from.dylib_path,
+            arg_types: from
+                .arg_types
+                .iter()
+                .map(|t| {
+                    DataType::try_from(
+                        &ArrowType::decode(&mut t.as_slice()).expect("invalid arrow type"),
+                    )
+                        .expect("invalid arrow type")
+                })
+                .collect(),
+            return_type: DataType::try_from(
+                &ArrowType::decode(&mut from.return_type.as_slice()).unwrap(),
+            )
+                .expect("invalid arrow type"),
+            aggregate: from.aggregate,
+            is_async: from.is_async,
+        }
+    }
+}
\ No newline at end of file
diff --git a/src/sql/logical_node/mod.rs b/src/sql/logical_node/mod.rs
new file mode 100644
index 00000000..82d25f24
--- /dev/null
+++ b/src/sql/logical_node/mod.rs
@@ -0,0 +1 @@
+pub mod logical;
diff --git a/src/sql/physical/mod.rs b/src/sql/logical_planner/mod.rs
similarity index 99%
rename from src/sql/physical/mod.rs
rename to src/sql/logical_planner/mod.rs
index bfb37f11..e4db07a0 100644
--- a/src/sql/physical/mod.rs
+++ b/src/sql/logical_planner/mod.rs
@@ -29,8 +29,8 @@ use std::{
 
 use crate::make_udf_function;
 use crate::sql::functions::MultiHashFunction;
-use crate::sql::planner::rewrite::UNNESTED_COL;
-use crate::sql::planner::schemas::window_arrow_struct;
+use crate::sql::analysis::UNNESTED_COL;
+use crate::sql::schema::utils::window_arrow_struct;
 use crate::types::{TIMESTAMP_FIELD, UPDATING_META_FIELD};
 use datafusion::arrow::datatypes::{TimestampNanosecondType, UInt64Type};
 use datafusion::catalog::memory::MemorySourceConfig;
@@ -56,6 +56,9 @@ use std::fmt::Debug;
 use tokio::sync::mpsc::UnboundedReceiver;
 use tokio_stream::wrappers::UnboundedReceiverStream;
 
+pub(crate) mod planner;
+pub mod optimizers;
+
 // ─────────────────── Updating Meta Helpers ───────────────────
 
 pub fn updating_meta_fields() -> Fields {
diff --git a/src/datastream/optimizers.rs b/src/sql/logical_planner/optimizers.rs
similarity index 88%
rename from src/datastream/optimizers.rs
rename to src/sql/logical_planner/optimizers.rs
index 2d258aff..bdf32657 100644
--- a/src/datastream/optimizers.rs
+++ b/src/sql/logical_planner/optimizers.rs
@@ -1,20 +1,8 @@
-use crate::datastream::logical::{LogicalEdgeType, LogicalGraph};
+use crate::sql::logical_node::logical::{LogicalEdgeType, LogicalGraph, Optimizer};
 use petgraph::prelude::*;
 use petgraph::visit::NodeRef;
 use std::mem;
 
-pub trait Optimizer {
-    fn optimize_once(&self, plan: &mut LogicalGraph) -> bool;
-
-    fn optimize(&self, plan: &mut LogicalGraph) {
-        loop {
-            if !self.optimize_once(plan) {
-                break;
-            }
-        }
-    }
-}
-
 pub struct ChainingOptimizer {}
 
 fn remove_in_place<N, E>(graph: &mut DiGraph<N, E>, node: NodeIndex) {
diff --git a/src/sql/planner/physical_planner.rs b/src/sql/logical_planner/planner.rs
similarity index 95%
rename from src/sql/planner/physical_planner.rs
rename to src/sql/logical_planner/planner.rs
index e7e1cf60..150b86f1 100644
--- a/src/sql/planner/physical_planner.rs
+++ b/src/sql/logical_planner/planner.rs
@@ -26,22 +26,32 @@ use tokio::runtime::Builder;
 use tokio::sync::oneshot;
 
 use async_trait::async_trait;
+use datafusion_common::TableReference;
 use datafusion_proto::physical_plan::DefaultPhysicalExtensionCodec;
 use datafusion_proto::physical_plan::to_proto::serialize_physical_expr;
 
-use crate::datastream::logical::{LogicalEdge, LogicalGraph, LogicalNode};
-use crate::sql::physical::{
+use crate::sql::logical_node::logical::{LogicalEdge, LogicalGraph, LogicalNode};
+use crate::sql::logical_planner::{
     DebeziumUnrollingExec, DecodingContext, FsMemExec, FsPhysicalExtensionCodec, ToDebeziumExec,
 };
-use crate::sql::planner::StreamSchemaProvider;
-use crate::sql::planner::extension::debezium::{
+use crate::sql::extensions::debezium::{
     DEBEZIUM_UNROLLING_EXTENSION_NAME, DebeziumUnrollingExtension, TO_DEBEZIUM_EXTENSION_NAME,
 };
-use crate::sql::planner::extension::key_calculation::KeyCalculationExtension;
-use crate::sql::planner::extension::{NamedNode, NodeWithIncomingEdges, StreamExtension};
-use crate::sql::planner::schemas::add_timestamp_field_arrow;
+use crate::sql::extensions::key_calculation::KeyCalculationExtension;
+use crate::sql::extensions::{NodeWithIncomingEdges, StreamExtension};
+use crate::sql::schema::utils::add_timestamp_field_arrow;
+use crate::sql::schema::StreamSchemaProvider;
 use crate::types::{FsSchema, FsSchemaRef};
 
+#[derive(Eq, Hash, PartialEq)]
+#[derive(Debug)]
+pub(crate) enum NamedNode {
+    Source(TableReference),
+    Watermark(TableReference),
+    RemoteTable(TableReference),
+    Sink(TableReference),
+}
+
 pub(crate) struct PlanToGraphVisitor<'a> {
     graph: DiGraph<LogicalNode, LogicalEdge>,
     output_schemas: HashMap<NodeIndex, FsSchemaRef>,
diff --git a/src/sql/mod.rs b/src/sql/mod.rs
index e0931530..be44d979 100644
--- a/src/sql/mod.rs
+++ b/src/sql/mod.rs
@@ -10,14 +10,16 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-pub mod catalog;
+pub mod schema;
 pub mod functions;
-pub mod physical;
-pub mod planner;
+pub mod parse;
+pub mod logical_node;
+pub mod logical_planner;
+pub mod analysis;
+pub(crate) mod extensions;
 pub mod types;
 
-pub use planner::StreamSchemaProvider;
-pub use planner::parse::parse_sql;
-pub use planner::plan::rewrite_plan;
-pub use planner::sql_to_plan::statement_to_plan;
-pub use planner::{CompiledSql, parse_and_get_arrow_program, parse_sql_statements};
+pub use schema::StreamSchemaProvider;
+pub use parse::parse_sql;
+pub use analysis::rewrite_plan;
+pub use analysis::{CompiledSql};
diff --git a/src/sql/planner/parse.rs b/src/sql/parse.rs
similarity index 100%
rename from src/sql/planner/parse.rs
rename to src/sql/parse.rs
diff --git a/src/sql/planner/extension/aggregate.rs b/src/sql/planner/extension/aggregate.rs
deleted file mode 100644
index 878d3cc5..00000000
--- a/src/sql/planner/extension/aggregate.rs
+++ /dev/null
@@ -1,348 +0,0 @@
-use std::fmt::Formatter;
-use std::sync::Arc;
-use std::time::Duration;
-
-use datafusion::arrow::datatypes::DataType;
-use datafusion::common::{Column, DFSchemaRef, Result, ScalarValue, internal_err};
-use datafusion::logical_expr;
-use datafusion::logical_expr::{
-    BinaryExpr, Expr, Extension, LogicalPlan, UserDefinedLogicalNodeCore, expr::ScalarFunction,
-};
-
-use crate::multifield_partial_ord;
-use crate::sql::planner::extension::{NamedNode, StreamExtension, TimestampAppendExtension};
-use crate::sql::types::{
-    DFField, StreamSchema, TIMESTAMP_FIELD, WindowBehavior, WindowType, fields_with_qualifiers,
-    schema_from_df_fields, schema_from_df_fields_with_metadata,
-};
-
-pub(crate) const AGGREGATE_EXTENSION_NAME: &str = "AggregateExtension";
-
-/// Extension node for windowed aggregate operations in streaming SQL.
-/// Supports tumbling, sliding, session, and instant window aggregations.
-#[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub(crate) struct AggregateExtension {
-    pub(crate) window_behavior: WindowBehavior,
-    pub(crate) aggregate: LogicalPlan,
-    pub(crate) schema: DFSchemaRef,
-    pub(crate) key_fields: Vec<usize>,
-    pub(crate) final_calculation: LogicalPlan,
-}
-
-multifield_partial_ord!(AggregateExtension, aggregate, key_fields, final_calculation);
-
-impl AggregateExtension {
-    pub fn new(
-        window_behavior: WindowBehavior,
-        aggregate: LogicalPlan,
-        key_fields: Vec<usize>,
-    ) -> Self {
-        let final_calculation =
-            Self::final_projection(&aggregate, window_behavior.clone()).unwrap();
-        Self {
-            window_behavior,
-            aggregate,
-            schema: final_calculation.schema().clone(),
-            key_fields,
-            final_calculation,
-        }
-    }
-
-    /// Build the final projection after aggregation, which adds the window struct
-    /// and computes the output timestamp based on the window behavior.
-    pub fn final_projection(
-        aggregate_plan: &LogicalPlan,
-        window_behavior: WindowBehavior,
-    ) -> Result<LogicalPlan> {
-        let timestamp_field: DFField = aggregate_plan.inputs()[0]
-            .schema()
-            .qualified_field_with_unqualified_name(TIMESTAMP_FIELD)?
-            .into();
-        let timestamp_append = LogicalPlan::Extension(Extension {
-            node: Arc::new(TimestampAppendExtension::new(
-                aggregate_plan.clone(),
-                timestamp_field.qualifier().cloned(),
-            )),
-        });
-        let mut aggregate_fields = fields_with_qualifiers(aggregate_plan.schema());
-        let mut aggregate_expressions: Vec<_> = aggregate_fields
-            .iter()
-            .map(|field| Expr::Column(field.qualified_column()))
-            .collect();
-
-        let (window_field, window_index, width, is_nested) = match window_behavior {
-            WindowBehavior::InData => return Ok(timestamp_append),
-            WindowBehavior::FromOperator {
-                window,
-                window_field,
-                window_index,
-                is_nested,
-            } => match window {
-                WindowType::Tumbling { width, .. } | WindowType::Sliding { width, .. } => {
-                    (window_field, window_index, width, is_nested)
-                }
-                WindowType::Session { .. } => {
-                    return Ok(LogicalPlan::Extension(Extension {
-                        node: Arc::new(WindowAppendExtension::new(
-                            timestamp_append,
-                            window_field,
-                            window_index,
-                        )),
-                    }));
-                }
-                WindowType::Instant => return Ok(timestamp_append),
-            },
-        };
-
-        if is_nested {
-            return Self::nested_final_projection(
-                timestamp_append,
-                window_field,
-                window_index,
-                width,
-            );
-        }
-
-        let timestamp_column =
-            Column::new(timestamp_field.qualifier().cloned(), timestamp_field.name());
-        aggregate_fields.insert(window_index, window_field.clone());
-
-        let window_expression = Self::build_window_struct_expr(&timestamp_column, width);
-        aggregate_expressions.insert(
-            window_index,
-            window_expression
-                .alias_qualified(window_field.qualifier().cloned(), window_field.name()),
-        );
-        aggregate_fields.push(timestamp_field);
-
-        let bin_end_calculation = Expr::BinaryExpr(BinaryExpr {
-            left: Box::new(Expr::Column(timestamp_column.clone())),
-            op: logical_expr::Operator::Plus,
-            right: Box::new(Expr::Literal(
-                ScalarValue::IntervalMonthDayNano(Some(
-                    datafusion::arrow::datatypes::IntervalMonthDayNanoType::make_value(
-                        0,
-                        0,
-                        (width.as_nanos() - 1) as i64,
-                    ),
-                )),
-                None,
-            )),
-        });
-        aggregate_expressions.push(bin_end_calculation);
-
-        Ok(LogicalPlan::Projection(
-            logical_expr::Projection::try_new_with_schema(
-                aggregate_expressions,
-                Arc::new(timestamp_append),
-                Arc::new(schema_from_df_fields(&aggregate_fields)?),
-            )?,
-        ))
-    }
-
-    fn build_window_struct_expr(timestamp_column: &Column, width: Duration) -> Expr {
-        let start_expr = Expr::Column(timestamp_column.clone());
-        let end_expr = Expr::BinaryExpr(BinaryExpr {
-            left: Box::new(Expr::Column(timestamp_column.clone())),
-            op: logical_expr::Operator::Plus,
-            right: Box::new(Expr::Literal(
-                ScalarValue::IntervalMonthDayNano(Some(
-                    datafusion::arrow::datatypes::IntervalMonthDayNanoType::make_value(
-                        0,
-                        0,
-                        width.as_nanos() as i64,
-                    ),
-                )),
-                None,
-            )),
-        });
-
-        Expr::ScalarFunction(ScalarFunction {
-            func: Arc::new(datafusion::logical_expr::ScalarUDF::new_from_impl(
-                WindowStructUdf {},
-            )),
-            args: vec![start_expr, end_expr],
-        })
-    }
-
-    fn nested_final_projection(
-        aggregate_plan: LogicalPlan,
-        window_field: DFField,
-        window_index: usize,
-        width: Duration,
-    ) -> Result<LogicalPlan> {
-        let timestamp_field: DFField = aggregate_plan
-            .schema()
-            .qualified_field_with_unqualified_name(TIMESTAMP_FIELD)
-            .unwrap()
-            .into();
-        let timestamp_column =
-            Column::new(timestamp_field.qualifier().cloned(), timestamp_field.name());
-
-        let mut aggregate_fields = fields_with_qualifiers(aggregate_plan.schema());
-        let mut aggregate_expressions: Vec<_> = aggregate_fields
-            .iter()
-            .map(|field| Expr::Column(field.qualified_column()))
-            .collect();
-        aggregate_fields.insert(window_index, window_field.clone());
-
-        let window_expression = Self::build_window_struct_expr(&timestamp_column, width);
-        aggregate_expressions.insert(
-            window_index,
-            window_expression
-                .alias_qualified(window_field.qualifier().cloned(), window_field.name()),
-        );
-
-        Ok(LogicalPlan::Projection(
-            logical_expr::Projection::try_new_with_schema(
-                aggregate_expressions,
-                Arc::new(aggregate_plan),
-                Arc::new(schema_from_df_fields(&aggregate_fields).unwrap()),
-            )
-            .unwrap(),
-        ))
-    }
-}
-
-impl UserDefinedLogicalNodeCore for AggregateExtension {
-    fn name(&self) -> &str {
-        AGGREGATE_EXTENSION_NAME
-    }
-
-    fn inputs(&self) -> Vec<&LogicalPlan> {
-        vec![&self.aggregate]
-    }
-
-    fn schema(&self) -> &DFSchemaRef {
-        &self.schema
-    }
-
-    fn expressions(&self) -> Vec<Expr> {
-        vec![]
-    }
-
-    fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result {
-        write!(
-            f,
-            "AggregateExtension: {} | window_behavior: {:?}",
-            self.schema(),
-            match &self.window_behavior {
-                WindowBehavior::InData => "InData".to_string(),
-                WindowBehavior::FromOperator { window, .. } => format!("FromOperator({window:?})"),
-            }
-        )
-    }
-
-    fn with_exprs_and_inputs(&self, _exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> Result<Self> {
-        if inputs.len() != 1 {
-            return internal_err!("input size inconsistent");
-        }
-        Ok(Self::new(
-            self.window_behavior.clone(),
-            inputs[0].clone(),
-            self.key_fields.clone(),
-        ))
-    }
-}
-
-impl StreamExtension for AggregateExtension {
-    fn node_name(&self) -> Option<NamedNode> {
-        None
-    }
-
-    fn output_schema(&self) -> StreamSchema {
-        let output_schema = (*self.schema).clone().into();
-        StreamSchema::from_schema_keys(Arc::new(output_schema), vec![]).unwrap()
-    }
-}
-
-/// Extension for appending window struct (start, end) to the output
-#[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub(crate) struct WindowAppendExtension {
-    pub(crate) input: LogicalPlan,
-    pub(crate) window_field: DFField,
-    pub(crate) window_index: usize,
-    pub(crate) schema: DFSchemaRef,
-}
-
-multifield_partial_ord!(WindowAppendExtension, input, window_index);
-
-impl WindowAppendExtension {
-    fn new(input: LogicalPlan, window_field: DFField, window_index: usize) -> Self {
-        let mut fields = fields_with_qualifiers(input.schema());
-        fields.insert(window_index, window_field.clone());
-        let metadata = input.schema().metadata().clone();
-        Self {
-            input,
-            window_field,
-            window_index,
-            schema: Arc::new(schema_from_df_fields_with_metadata(&fields, metadata).unwrap()),
-        }
-    }
-}
-
-impl UserDefinedLogicalNodeCore for WindowAppendExtension {
-    fn name(&self) -> &str {
-        "WindowAppendExtension"
-    }
-
-    fn inputs(&self) -> Vec<&LogicalPlan> {
-        vec![&self.input]
-    }
-
-    fn schema(&self) -> &DFSchemaRef {
-        &self.schema
-    }
-
-    fn expressions(&self) -> Vec<Expr> {
-        vec![]
-    }
-
-    fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        write!(
-            f,
-            "WindowAppendExtension: field {:?} at {}",
-            self.window_field, self.window_index
-        )
-    }
-
-    fn with_exprs_and_inputs(&self, _exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> Result<Self> {
-        Ok(Self::new(
-            inputs[0].clone(),
-            self.window_field.clone(),
-            self.window_index,
-        ))
-    }
-}
-
-/// Placeholder UDF to construct the window struct at plan time
-#[derive(Debug)]
-struct WindowStructUdf;
-
-impl datafusion::logical_expr::ScalarUDFImpl for WindowStructUdf {
-    fn as_any(&self) -> &dyn std::any::Any {
-        self
-    }
-
-    fn name(&self) -> &str {
-        "window"
-    }
-
-    fn signature(&self) -> &datafusion::logical_expr::Signature {
-        &datafusion::logical_expr::Signature {
-            type_signature: datafusion::logical_expr::TypeSignature::Any(2),
-            volatility: datafusion::logical_expr::Volatility::Immutable,
-        }
-    }
-
-    fn return_type(&self, _args: &[DataType]) -> Result<DataType> {
-        Ok(crate::sql::planner::schemas::window_arrow_struct())
-    }
-
-    fn invoke_with_args(
-        &self,
-        _args: datafusion::logical_expr::ScalarFunctionArgs,
-    ) -> Result<datafusion::logical_expr::ColumnarValue> {
-        unimplemented!("WindowStructUdf is a plan-time-only function")
-    }
-}
diff --git a/src/sql/planner/extension/join.rs b/src/sql/planner/extension/join.rs
deleted file mode 100644
index 3857fee7..00000000
--- a/src/sql/planner/extension/join.rs
+++ /dev/null
@@ -1,61 +0,0 @@
-use std::time::Duration;
-
-use datafusion::common::{DFSchemaRef, Result};
-use datafusion::logical_expr::expr::Expr;
-use datafusion::logical_expr::{LogicalPlan, UserDefinedLogicalNodeCore};
-
-use crate::sql::planner::extension::{NamedNode, StreamExtension};
-use crate::sql::types::StreamSchema;
-
-use std::sync::Arc;
-
-pub(crate) const JOIN_NODE_NAME: &str = "JoinNode";
-
-/// Extension node for streaming joins.
-/// Supports instant joins (windowed, no state) and updating joins (with TTL-based state).
-#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd)]
-pub struct JoinExtension {
-    pub(crate) rewritten_join: LogicalPlan,
-    pub(crate) is_instant: bool,
-    pub(crate) ttl: Option<Duration>,
-}
-
-impl StreamExtension for JoinExtension {
-    fn node_name(&self) -> Option<NamedNode> {
-        None
-    }
-
-    fn output_schema(&self) -> StreamSchema {
-        StreamSchema::from_schema_unkeyed(Arc::new(self.schema().as_ref().into())).unwrap()
-    }
-}
-
-impl UserDefinedLogicalNodeCore for JoinExtension {
-    fn name(&self) -> &str {
-        JOIN_NODE_NAME
-    }
-
-    fn inputs(&self) -> Vec<&LogicalPlan> {
-        vec![&self.rewritten_join]
-    }
-
-    fn schema(&self) -> &DFSchemaRef {
-        self.rewritten_join.schema()
-    }
-
-    fn expressions(&self) -> Vec<Expr> {
-        vec![]
-    }
-
-    fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        write!(f, "JoinExtension: {}", self.schema())
-    }
-
-    fn with_exprs_and_inputs(&self, _exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> Result<Self> {
-        Ok(Self {
-            rewritten_join: inputs[0].clone(),
-            is_instant: self.is_instant,
-            ttl: self.ttl,
-        })
-    }
-}
diff --git a/src/sql/planner/extension/key_calculation.rs b/src/sql/planner/extension/key_calculation.rs
deleted file mode 100644
index c90b6d1d..00000000
--- a/src/sql/planner/extension/key_calculation.rs
+++ /dev/null
@@ -1,138 +0,0 @@
-use std::fmt::Formatter;
-use std::sync::Arc;
-
-use datafusion::arrow::datatypes::{Field, Schema};
-use datafusion::common::{DFSchemaRef, Result, internal_err};
-use datafusion::logical_expr::{Expr, ExprSchemable, LogicalPlan, UserDefinedLogicalNodeCore};
-
-use crate::multifield_partial_ord;
-use crate::sql::planner::extension::{NamedNode, StreamExtension};
-use crate::sql::types::{
-    StreamSchema, fields_with_qualifiers, schema_from_df_fields_with_metadata,
-};
-
-pub(crate) const KEY_CALCULATION_NAME: &str = "KeyCalculationExtension";
-
-/// Two ways of specifying keys: column indices or expressions to evaluate
-#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd)]
-pub enum KeysOrExprs {
-    Keys(Vec<usize>),
-    Exprs(Vec<Expr>),
-}
-
-/// Calculation for computing keyed data, used for shuffling data to correct nodes
-#[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub(crate) struct KeyCalculationExtension {
-    pub(crate) name: Option<String>,
-    pub(crate) input: LogicalPlan,
-    pub(crate) keys: KeysOrExprs,
-    pub(crate) schema: DFSchemaRef,
-}
-
-multifield_partial_ord!(KeyCalculationExtension, name, input, keys);
-
-impl KeyCalculationExtension {
-    pub fn new_named_and_trimmed(input: LogicalPlan, keys: Vec<usize>, name: String) -> Self {
-        let output_fields: Vec<_> = fields_with_qualifiers(input.schema())
-            .into_iter()
-            .enumerate()
-            .filter_map(|(index, field)| {
-                if !keys.contains(&index) {
-                    Some(field.clone())
-                } else {
-                    None
-                }
-            })
-            .collect();
-
-        let schema =
-            schema_from_df_fields_with_metadata(&output_fields, input.schema().metadata().clone())
-                .unwrap();
-        Self {
-            name: Some(name),
-            input,
-            keys: KeysOrExprs::Keys(keys),
-            schema: Arc::new(schema),
-        }
-    }
-
-    pub fn new(input: LogicalPlan, keys: KeysOrExprs) -> Self {
-        let schema = input.schema().clone();
-        Self {
-            name: None,
-            input,
-            keys,
-            schema,
-        }
-    }
-}
-
-impl StreamExtension for KeyCalculationExtension {
-    fn node_name(&self) -> Option<NamedNode> {
-        None
-    }
-
-    fn output_schema(&self) -> StreamSchema {
-        let input_schema = self.input.schema().as_ref();
-        match &self.keys {
-            KeysOrExprs::Keys(keys) => {
-                StreamSchema::from_schema_keys(Arc::new(input_schema.into()), keys.clone()).unwrap()
-            }
-            KeysOrExprs::Exprs(exprs) => {
-                let mut fields = vec![];
-                for (i, e) in exprs.iter().enumerate() {
-                    let (dt, nullable) = e.data_type_and_nullable(input_schema).unwrap();
-                    fields.push(Field::new(format!("__key_{i}"), dt, nullable).into());
-                }
-                for f in input_schema.fields().iter() {
-                    fields.push(f.clone());
-                }
-                StreamSchema::from_schema_keys(
-                    Arc::new(Schema::new(fields)),
-                    (1..=exprs.len()).collect(),
-                )
-                .unwrap()
-            }
-        }
-    }
-}
-
-impl UserDefinedLogicalNodeCore for KeyCalculationExtension {
-    fn name(&self) -> &str {
-        KEY_CALCULATION_NAME
-    }
-
-    fn inputs(&self) -> Vec<&LogicalPlan> {
-        vec![&self.input]
-    }
-
-    fn schema(&self) -> &DFSchemaRef {
-        &self.schema
-    }
-
-    fn expressions(&self) -> Vec<Expr> {
-        vec![]
-    }
-
-    fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result {
-        write!(f, "KeyCalculationExtension: {}", self.schema())
-    }
-
-    fn with_exprs_and_inputs(&self, exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> Result<Self> {
-        if inputs.len() != 1 {
-            return internal_err!("input size inconsistent");
-        }
-
-        let keys = match &self.keys {
-            KeysOrExprs::Keys(k) => KeysOrExprs::Keys(k.clone()),
-            KeysOrExprs::Exprs(_) => KeysOrExprs::Exprs(exprs),
-        };
-
-        Ok(Self {
-            name: self.name.clone(),
-            input: inputs[0].clone(),
-            keys,
-            schema: self.schema.clone(),
-        })
-    }
-}
diff --git a/src/sql/planner/extension/lookup.rs b/src/sql/planner/extension/lookup.rs
deleted file mode 100644
index daa4b094..00000000
--- a/src/sql/planner/extension/lookup.rs
+++ /dev/null
@@ -1,127 +0,0 @@
-use std::fmt::Formatter;
-use std::sync::Arc;
-
-use datafusion::common::{Column, DFSchemaRef, JoinType, Result, TableReference, internal_err};
-use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore};
-
-use super::{NamedNode, StreamExtension};
-use crate::multifield_partial_ord;
-use crate::sql::catalog::connector_table::ConnectorTable;
-use crate::sql::types::StreamSchema;
-
-pub const SOURCE_EXTENSION_NAME: &str = "LookupSource";
-pub const JOIN_EXTENSION_NAME: &str = "LookupJoin";
-
-/// Represents a lookup table source in the streaming plan.
-/// Lookup sources provide point-query access to external state.
-#[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub struct LookupSource {
-    pub(crate) table: ConnectorTable,
-    pub(crate) schema: DFSchemaRef,
-}
-
-multifield_partial_ord!(LookupSource, table);
-
-impl UserDefinedLogicalNodeCore for LookupSource {
-    fn name(&self) -> &str {
-        SOURCE_EXTENSION_NAME
-    }
-
-    fn inputs(&self) -> Vec<&LogicalPlan> {
-        vec![]
-    }
-
-    fn schema(&self) -> &DFSchemaRef {
-        &self.schema
-    }
-
-    fn expressions(&self) -> Vec<Expr> {
-        vec![]
-    }
-
-    fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result {
-        write!(f, "LookupSource: {}", self.schema)
-    }
-
-    fn with_exprs_and_inputs(&self, _exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> Result<Self> {
-        if !inputs.is_empty() {
-            return internal_err!("LookupSource cannot have inputs");
-        }
-        Ok(Self {
-            table: self.table.clone(),
-            schema: self.schema.clone(),
-        })
-    }
-}
-
-impl StreamExtension for LookupSource {
-    fn node_name(&self) -> Option<NamedNode> {
-        None
-    }
-
-    fn output_schema(&self) -> StreamSchema {
-        StreamSchema::from_schema_unkeyed(Arc::new(self.schema.as_ref().into())).unwrap()
-    }
-}
-
-/// Represents a lookup join: a streaming input joined against a lookup table.
-#[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub struct LookupJoin {
-    pub(crate) input: LogicalPlan,
-    pub(crate) schema: DFSchemaRef,
-    pub(crate) connector: ConnectorTable,
-    pub(crate) on: Vec<(Expr, Column)>,
-    pub(crate) filter: Option<Expr>,
-    pub(crate) alias: Option<TableReference>,
-    pub(crate) join_type: JoinType,
-}
-
-multifield_partial_ord!(LookupJoin, input, connector, on, filter, alias);
-
-impl UserDefinedLogicalNodeCore for LookupJoin {
-    fn name(&self) -> &str {
-        JOIN_EXTENSION_NAME
-    }
-
-    fn inputs(&self) -> Vec<&LogicalPlan> {
-        vec![&self.input]
-    }
-
-    fn schema(&self) -> &DFSchemaRef {
-        &self.schema
-    }
-
-    fn expressions(&self) -> Vec<Expr> {
-        let mut e: Vec<_> = self.on.iter().map(|(l, _)| l.clone()).collect();
-        if let Some(filter) = &self.filter {
-            e.push(filter.clone());
-        }
-        e
-    }
-
-    fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result {
-        write!(f, "LookupJoinExtension: {}", self.schema)
-    }
-
-    fn with_exprs_and_inputs(&self, _: Vec<Expr>, inputs: Vec<LogicalPlan>) -> Result<Self> {
-        Ok(Self {
-            input: inputs[0].clone(),
-            schema: self.schema.clone(),
-            connector: self.connector.clone(),
-            on: self.on.clone(),
-            filter: self.filter.clone(),
-            alias: self.alias.clone(),
-            join_type: self.join_type,
-        })
-    }
-}
-
-impl StreamExtension for LookupJoin {
-    fn node_name(&self) -> Option<NamedNode> {
-        None
-    }
-
-    fn output_schema(&self) -> StreamSchema {
-        StreamSchema::from_schema_unkeyed(Arc::new(self.schema.as_ref().into())).unwrap()
-    }
-}
diff --git a/src/sql/planner/extension/projection.rs b/src/sql/planner/extension/projection.rs
deleted file mode 100644
index e6dc8ce7..00000000
--- a/src/sql/planner/extension/projection.rs
+++ /dev/null
@@ -1,91 +0,0 @@
-use std::fmt::Formatter;
-use std::sync::Arc;
-
-use datafusion::common::{DFSchemaRef, Result};
-use datafusion::logical_expr::{Expr, ExprSchemable, LogicalPlan, UserDefinedLogicalNodeCore};
-
-use crate::multifield_partial_ord;
-use crate::sql::planner::extension::{NamedNode, StreamExtension};
-use crate::sql::types::{DFField, StreamSchema, schema_from_df_fields};
-
-pub(crate) const PROJECTION_NAME: &str = "ProjectionExtension";
-
-/// Projection operations for streaming SQL plans.
-/// Handles column projections, shuffles for key-based operations, etc.
-#[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub(crate) struct ProjectionExtension {
-    pub(crate) inputs: Vec<LogicalPlan>,
-    pub(crate) name: Option<String>,
-    pub(crate) exprs: Vec<Expr>,
-    pub(crate) schema: DFSchemaRef,
-    pub(crate) shuffle: bool,
-}
-
-multifield_partial_ord!(ProjectionExtension, name, exprs);
-
-impl ProjectionExtension {
-    pub(crate) fn new(inputs: Vec<LogicalPlan>, name: Option<String>, exprs: Vec<Expr>) -> Self {
-        let input_schema = inputs.first().unwrap().schema();
-        let fields: Vec<DFField> = exprs
-            .iter()
-            .map(|e| DFField::from(e.to_field(input_schema).unwrap()))
-            .collect();
-
-        let schema = Arc::new(schema_from_df_fields(&fields).unwrap());
-
-        Self {
-            inputs,
-            name,
-            exprs,
-            schema,
-            shuffle: false,
-        }
-    }
-
-    pub(crate) fn shuffled(mut self) -> Self {
-        self.shuffle = true;
-        self
-    }
-}
-
-impl StreamExtension for ProjectionExtension {
-    fn node_name(&self) -> Option<NamedNode> {
-        None
-    }
-
-    fn output_schema(&self) -> StreamSchema {
-        StreamSchema::from_schema_unkeyed(Arc::new(self.schema.as_arrow().clone())).unwrap()
-    }
-}
-
-impl UserDefinedLogicalNodeCore for ProjectionExtension {
-    fn name(&self) -> &str {
-        PROJECTION_NAME
-    }
-
-    fn inputs(&self) -> Vec<&LogicalPlan> {
-        self.inputs.iter().collect()
-    }
-
-    fn schema(&self) -> &DFSchemaRef {
-        &self.schema
-    }
-
-    fn expressions(&self) -> Vec<Expr> {
-        vec![]
-    }
-
-    fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result {
-        write!(f, "ProjectionExtension: {}", self.schema())
-    }
-
-    fn with_exprs_and_inputs(&self, exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> Result<Self> {
-        Ok(Self {
-            name: self.name.clone(),
-            inputs,
-            exprs,
-            schema: self.schema.clone(),
-            shuffle: self.shuffle,
-        })
-    }
-}
diff --git a/src/sql/planner/extension/remote_table.rs b/src/sql/planner/extension/remote_table.rs
deleted file mode 100644
index 2d81cafc..00000000
--- a/src/sql/planner/extension/remote_table.rs
+++ /dev/null
@@ -1,71 +0,0 @@
-use std::fmt::Formatter;
-use std::sync::Arc;
-
-use datafusion::common::{DFSchemaRef, Result, TableReference, internal_err};
-use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore};
-
-use crate::multifield_partial_ord;
-use crate::sql::planner::extension::{NamedNode, StreamExtension};
-use crate::sql::types::StreamSchema;
-
-pub(crate) const REMOTE_TABLE_NAME: &str = "RemoteTableExtension";
-
-/// Lightweight extension that segments the execution graph and enables merging
-/// nodes with the same name. Allows materializing intermediate results.
-#[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub(crate) struct RemoteTableExtension {
-    pub(crate) input: LogicalPlan,
-    pub(crate) name: TableReference,
-    pub(crate) schema: DFSchemaRef,
-    pub(crate) materialize: bool,
-}
-
-multifield_partial_ord!(RemoteTableExtension, input, name, materialize);
-
-impl StreamExtension for RemoteTableExtension {
-    fn node_name(&self) -> Option<NamedNode> {
-        if self.materialize {
-            Some(NamedNode::RemoteTable(self.name.to_owned()))
-        } else {
-            None
-        }
-    }
-
-    fn output_schema(&self) -> StreamSchema {
-        StreamSchema::from_schema_keys(Arc::new(self.schema.as_ref().into()), vec![]).unwrap()
-    }
-}
-
-impl UserDefinedLogicalNodeCore for RemoteTableExtension {
-    fn name(&self) -> &str {
-        REMOTE_TABLE_NAME
-    }
-
-    fn inputs(&self) -> Vec<&LogicalPlan> {
-        vec![&self.input]
-    }
-
-    fn schema(&self) -> &DFSchemaRef {
-        &self.schema
-    }
-
-    fn expressions(&self) -> Vec<Expr> {
-        vec![]
-    }
-
-    fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result {
-        write!(f, "RemoteTableExtension: {}", self.schema)
-    }
-
-    fn with_exprs_and_inputs(&self, _exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> Result<Self> {
-        if inputs.len() != 1 {
-            return internal_err!("input size inconsistent");
-        }
-        Ok(Self {
-            input: inputs[0].clone(),
-            name: self.name.clone(),
-            schema: self.schema.clone(),
-            materialize: self.materialize,
-        })
-    }
-}
diff --git a/src/sql/planner/extension/sink.rs b/src/sql/planner/extension/sink.rs
deleted file mode 100644
index e73a8383..00000000
--- a/src/sql/planner/extension/sink.rs
+++ /dev/null
@@ -1,128 +0,0 @@
-use std::sync::Arc;
-
-use datafusion::common::{DFSchemaRef, Result, TableReference, plan_err};
-use datafusion::logical_expr::{Expr, Extension, LogicalPlan, UserDefinedLogicalNodeCore};
-
-use super::debezium::ToDebeziumExtension;
-use super::remote_table::RemoteTableExtension;
-use super::{NamedNode, StreamExtension};
-use crate::multifield_partial_ord;
-use crate::sql::catalog::table::Table;
-use crate::sql::types::StreamSchema;
-
-pub(crate) const SINK_NODE_NAME: &str = "SinkExtension";
-
-/// Extension node representing a sink (output) in the streaming plan.
-#[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub(crate) struct SinkExtension {
-    pub(crate) name: TableReference,
-    pub(crate) table: Table,
-    pub(crate) schema: DFSchemaRef,
-    pub(crate) inputs: Arc<Vec<LogicalPlan>>,
-}
-
-multifield_partial_ord!(SinkExtension, name, inputs);
-
-impl SinkExtension {
-    pub fn new(
-        name: TableReference,
-        table: Table,
-        mut schema: DFSchemaRef,
-        mut input: Arc<LogicalPlan>,
-    ) -> Result<Self> {
-        match &table {
-            Table::ConnectorTable(connector_table) => {
-                if connector_table.is_updating() {
-                    let to_debezium = ToDebeziumExtension::try_new(input.as_ref().clone())?;
-                    input = Arc::new(LogicalPlan::Extension(Extension {
-                        node: Arc::new(to_debezium),
-                    }));
-                    schema = input.schema().clone();
-                }
-            }
-            Table::LookupTable(..) => return plan_err!("cannot use a lookup table as a sink"),
-            Table::TableFromQuery { .. } => {}
-        }
-
-        Self::add_remote_if_necessary(&schema, &mut input);
-
-        let inputs = Arc::new(vec![(*input).clone()]);
-        Ok(Self {
-            name,
-            table,
-            schema,
-            inputs,
-        })
-    }
-
-    pub fn add_remote_if_necessary(schema: &DFSchemaRef, input: &mut Arc<LogicalPlan>) {
-        if let LogicalPlan::Extension(node) = input.as_ref() {
-            let Ok(ext): Result<&dyn StreamExtension, _> = (&node.node).try_into() else {
-                // not a StreamExtension, wrap it
-                let remote = RemoteTableExtension {
-                    input: input.as_ref().clone(),
-                    name: TableReference::bare("sink projection"),
-                    schema: schema.clone(),
-                    materialize: false,
-                };
-                *input = Arc::new(LogicalPlan::Extension(Extension {
-                    node: Arc::new(remote),
-                }));
-                return;
-            };
-            if !ext.transparent() {
-                return;
-            }
-        }
-        let remote = RemoteTableExtension {
-            input: input.as_ref().clone(),
-            name: TableReference::bare("sink projection"),
-            schema: schema.clone(),
-            materialize: false,
-        };
-        *input = Arc::new(LogicalPlan::Extension(Extension {
-            node: Arc::new(remote),
-        }));
-    }
-}
-
-impl UserDefinedLogicalNodeCore for SinkExtension {
-    fn name(&self) -> &str {
-        SINK_NODE_NAME
-    }
-
-    fn inputs(&self) -> Vec<&LogicalPlan> {
-        self.inputs.iter().collect()
-    }
-
-    fn schema(&self) -> &DFSchemaRef {
-        &self.schema
-    }
-
-    fn expressions(&self) -> Vec<Expr> {
-        vec![]
-    }
-
-    fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        write!(f, "SinkExtension({:?}): {}", self.name, self.schema)
-    }
-
-    fn with_exprs_and_inputs(&self, _exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> Result<Self> {
-        Ok(Self {
-            name: self.name.clone(),
-            table: self.table.clone(),
-            schema: self.schema.clone(),
-            inputs: Arc::new(inputs),
-        })
-    }
-}
-
-impl StreamExtension for SinkExtension {
-    fn node_name(&self) -> Option<NamedNode> {
-        Some(NamedNode::Sink(self.name.clone()))
-    }
-
-    fn output_schema(&self) -> StreamSchema {
-        StreamSchema::from_fields(vec![])
-    }
-}
diff --git a/src/sql/planner/extension/updating_aggregate.rs b/src/sql/planner/extension/updating_aggregate.rs
deleted file mode 100644
index 758edc67..00000000
--- a/src/sql/planner/extension/updating_aggregate.rs
+++ /dev/null
@@ -1,89 +0,0 @@
-use std::sync::Arc;
-use std::time::Duration;
-
-use datafusion::common::{DFSchemaRef, Result, TableReference, plan_err};
-use datafusion::logical_expr::{Expr, Extension, LogicalPlan, UserDefinedLogicalNodeCore};
-
-use super::{IsRetractExtension, NamedNode, StreamExtension};
-use crate::sql::types::StreamSchema;
-
-pub(crate) const UPDATING_AGGREGATE_EXTENSION_NAME: &str = "UpdatingAggregateExtension";
-
-/// Extension node for updating (non-windowed) aggregations.
-/// Maintains state with TTL and emits retraction/update pairs.
-#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd)]
-pub(crate) struct UpdatingAggregateExtension {
-    pub(crate) aggregate: LogicalPlan,
-    pub(crate) key_fields: Vec<usize>,
-    pub(crate) final_calculation: LogicalPlan,
-    pub(crate) timestamp_qualifier: Option<TableReference>,
-    pub(crate) ttl: Duration,
-}
-
-impl UpdatingAggregateExtension {
-    pub fn new(
-        aggregate: LogicalPlan,
-        key_fields: Vec<usize>,
-        timestamp_qualifier: Option<TableReference>,
-        ttl: Duration,
-    ) -> Result<Self> {
-        let final_calculation = LogicalPlan::Extension(Extension {
-            node: Arc::new(IsRetractExtension::new(
-                aggregate.clone(),
-                timestamp_qualifier.clone(),
-            )),
-        });
-
-        Ok(Self {
-            aggregate,
-            key_fields,
-            final_calculation,
-            timestamp_qualifier,
-            ttl,
-        })
-    }
-}
-
-impl UserDefinedLogicalNodeCore for UpdatingAggregateExtension {
-    fn name(&self) -> &str {
-        UPDATING_AGGREGATE_EXTENSION_NAME
-    }
-
-    fn inputs(&self) -> Vec<&LogicalPlan> {
-        vec![&self.aggregate]
-    }
-
-    fn schema(&self) -> &DFSchemaRef {
-        self.final_calculation.schema()
-    }
-
-    fn expressions(&self) -> Vec<Expr> {
-        vec![]
-    }
-
-    fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        write!(f, "UpdatingAggregateExtension")
-    }
-
-    fn with_exprs_and_inputs(&self, _exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> Result<Self> {
-        if inputs.len() != 1 {
-            return plan_err!("UpdatingAggregateExtension expects exactly one input");
-        }
-        Self::new(
-            inputs[0].clone(),
-            self.key_fields.clone(),
-            self.timestamp_qualifier.clone(),
-            self.ttl,
-        )
-    }
-}
-
-impl StreamExtension for UpdatingAggregateExtension {
-    fn node_name(&self) -> Option<NamedNode> {
-        None
-    }
-
-    fn output_schema(&self) -> StreamSchema {
-        StreamSchema::from_schema_unkeyed(Arc::new(self.schema().as_ref().into())).unwrap()
-    }
-}
diff --git a/src/sql/planner/extension/window_fn.rs b/src/sql/planner/extension/window_fn.rs
deleted file mode 100644
index 95832183..00000000
--- a/src/sql/planner/extension/window_fn.rs
+++ /dev/null
@@ -1,62 +0,0 @@
-use std::sync::Arc;
-
-use datafusion::common::{DFSchemaRef, Result};
-use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore};
-
-use crate::sql::planner::extension::{NamedNode, StreamExtension};
-use crate::sql::types::StreamSchema;
-
-pub(crate) const WINDOW_FUNCTION_EXTENSION_NAME: &str = "WindowFunctionExtension";
-
-/// Extension for window functions (e.g., ROW_NUMBER, RANK) over windowed input.
-/// Window functions require already-windowed input and are evaluated per-window.
-#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd)]
-pub(crate) struct WindowFunctionExtension {
-    pub(crate) window_plan: LogicalPlan,
-    pub(crate) key_fields: Vec<usize>,
-}
-
-impl WindowFunctionExtension {
-    pub fn new(window_plan: LogicalPlan, key_fields: Vec<usize>) -> Self {
-        Self {
-            window_plan,
-            key_fields,
-        }
-    }
-}
-
-impl UserDefinedLogicalNodeCore for WindowFunctionExtension {
-    fn name(&self) -> &str {
-        WINDOW_FUNCTION_EXTENSION_NAME
-    }
-
-    fn inputs(&self) -> Vec<&LogicalPlan> {
-        vec![&self.window_plan]
-    }
-
-    fn schema(&self) -> &DFSchemaRef {
-        self.window_plan.schema()
-    }
-
-    fn expressions(&self) -> Vec<Expr> {
-        vec![]
-    }
-
-    fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        write!(f, "WindowFunction: {}", self.schema())
-    }
-
-    fn with_exprs_and_inputs(&self, _exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> Result<Self> {
-        Ok(Self::new(inputs[0].clone(), self.key_fields.clone()))
-    }
-}
-
-impl StreamExtension for WindowFunctionExtension {
-    fn node_name(&self) -> Option<NamedNode> {
-        None
-    }
-
-    fn output_schema(&self) -> StreamSchema {
-        StreamSchema::from_schema_unkeyed(Arc::new(self.schema().as_ref().clone().into())).unwrap()
-    }
-}
diff --git a/src/sql/planner/mod.rs b/src/sql/planner/mod.rs
deleted file mode 100644
index c85c0fb2..00000000
--- a/src/sql/planner/mod.rs
+++ /dev/null
@@ -1,348 +0,0 @@
-#![allow(clippy::new_without_default)]
-
-pub(crate) mod extension;
-pub mod parse;
-pub(crate) mod physical_planner;
-pub mod plan;
-pub mod rewrite;
-pub mod schema_provider;
-pub mod schemas;
-pub mod sql_to_plan;
-
-pub(crate) mod mod_prelude {
-    pub use super::StreamSchemaProvider;
-}
-
-pub use schema_provider::{LogicalBatchInput, StreamSchemaProvider, StreamTable};
-
-use std::collections::{HashMap, HashSet};
-use std::sync::Arc;
-
-use datafusion::common::tree_node::TreeNode;
-use datafusion::common::{Result, plan_err};
-use datafusion::error::DataFusionError;
-use datafusion::execution::SessionStateBuilder;
-use datafusion::logical_expr::{Extension, LogicalPlan, UserDefinedLogicalNodeCore};
-use datafusion::prelude::SessionConfig;
-use datafusion::sql::TableReference;
-use datafusion::sql::sqlparser::ast::{OneOrManyWithParens, Statement};
-use datafusion::sql::sqlparser::dialect::FunctionStreamDialect;
-use datafusion::sql::sqlparser::parser::Parser;
-use tracing::debug;
-
-use crate::datastream::logical::{LogicalProgram, ProgramConfig};
-use crate::datastream::optimizers::ChainingOptimizer;
-use crate::sql::catalog::insert::Insert;
-use crate::sql::catalog::table::Table as CatalogTable;
-use crate::sql::functions::{is_json_union, serialize_outgoing_json};
-use crate::sql::planner::extension::key_calculation::{KeyCalculationExtension, KeysOrExprs};
-use crate::sql::planner::extension::projection::ProjectionExtension;
-use crate::sql::planner::extension::sink::SinkExtension;
-use crate::sql::planner::extension::{NamedNode, StreamExtension};
-use crate::sql::planner::plan::rewrite_plan;
-use crate::sql::planner::rewrite::{SinkInputRewriter, SourceMetadataVisitor};
-use crate::sql::types::SqlConfig;
-
-// ── Compilation pipeline ──────────────────────────────────────────────
-
-#[derive(Clone, Debug)]
-pub struct CompiledSql {
-    pub program: LogicalProgram,
-    pub connection_ids: Vec<i64>,
-}
-
-pub fn parse_sql_statements(
-    sql: &str,
-) -> std::result::Result<Vec<Statement>, datafusion::sql::sqlparser::parser::ParserError> {
-    Parser::parse_sql(&FunctionStreamDialect {}, sql)
-}
-
-fn try_handle_set_variable(
-    statement: &Statement,
-    schema_provider: &mut StreamSchemaProvider,
-) -> Result<bool> {
-    if let Statement::SetVariable {
-        variables, value, ..
-    } = statement
-    {
-        let OneOrManyWithParens::One(opt) = variables else {
-            return plan_err!("invalid syntax for `SET` call");
-        };
-
-        if opt.to_string() != "updating_ttl" {
-            return plan_err!(
-                "invalid option '{}'; supported options are 'updating_ttl'",
-                opt
-            );
-        }
-
-        if value.len() != 1 {
-            return plan_err!("invalid `SET updating_ttl` call; expected exactly one expression");
-        }
-
-        let duration = duration_from_sql_expr(&value[0])?;
-        schema_provider.planning_options.ttl = duration;
-
-        return Ok(true);
-    }
-
-    Ok(false)
-}
-
-fn duration_from_sql_expr(
-    expr: &datafusion::sql::sqlparser::ast::Expr,
-) -> Result<std::time::Duration> {
-    use datafusion::sql::sqlparser::ast::Expr as SqlExpr;
-    use datafusion::sql::sqlparser::ast::Value as SqlValue;
-    use datafusion::sql::sqlparser::ast::ValueWithSpan;
-
-    match expr {
-        SqlExpr::Interval(interval) => {
-            let value_str = match interval.value.as_ref() {
-                SqlExpr::Value(ValueWithSpan {
-                    value: SqlValue::SingleQuotedString(s),
-                    ..
-                }) => s.clone(),
-                other => return plan_err!("expected interval string literal, found {other}"),
-            };
-
-            parse_interval_to_duration(&value_str)
-        }
-        SqlExpr::Value(ValueWithSpan {
-            value: SqlValue::SingleQuotedString(s),
-            ..
-        }) => parse_interval_to_duration(s),
-        other => plan_err!("expected an interval expression, found {other}"),
-    }
-}
-
-fn parse_interval_to_duration(s: &str) -> Result<std::time::Duration> {
-    let parts: Vec<&str> = s.trim().split_whitespace().collect();
-    if parts.len() != 2 {
-        return plan_err!("invalid interval string '{s}'; expected '<value> <unit>'");
-    }
-    let value: u64 = parts[0]
-        .parse()
-        .map_err(|_| DataFusionError::Plan(format!("invalid interval number: {}", parts[0])))?;
-    match parts[1].to_lowercase().as_str() {
-        "second" | "seconds" | "s" => Ok(std::time::Duration::from_secs(value)),
-        "minute" | "minutes" | "min" => Ok(std::time::Duration::from_secs(value * 60)),
-        "hour" | "hours" | "h" => Ok(std::time::Duration::from_secs(value * 3600)),
-        "day" | "days" | "d" => Ok(std::time::Duration::from_secs(value * 86400)),
-        unit => plan_err!("unsupported interval unit '{unit}'"),
-    }
-}
-
-fn build_sink_inputs(extensions: &[LogicalPlan]) -> HashMap<NamedNode, Vec<LogicalPlan>> {
-    let mut sink_inputs = HashMap::<NamedNode, Vec<LogicalPlan>>::new();
-    for extension in extensions.iter() {
-        if let LogicalPlan::Extension(ext) = extension {
-            if let Some(sink_node) = ext.node.as_any().downcast_ref::<SinkExtension>() {
-                if let Some(named_node) = sink_node.node_name() {
-                    let inputs = sink_node
-                        .inputs()
-                        .into_iter()
-                        .cloned()
-                        .collect::<Vec<LogicalPlan>>();
-                    sink_inputs.entry(named_node).or_default().extend(inputs);
-                }
-            }
-        }
-    }
-    sink_inputs
-}
-
-pub(crate) fn maybe_add_key_extension_to_sink(plan: LogicalPlan) -> Result<LogicalPlan> {
-    let LogicalPlan::Extension(ref ext) = plan else {
-        return Ok(plan);
-    };
-
-    let Some(sink) = ext.node.as_any().downcast_ref::<SinkExtension>() else {
-        return Ok(plan);
-    };
-
-    let Some(partition_exprs) = sink.table.partition_exprs() else {
-        return Ok(plan);
-    };
-
-    if partition_exprs.is_empty() {
-        return Ok(plan);
-    }
-
-    let inputs = plan
-        .inputs()
-        .into_iter()
-        .map(|input| {
-            Ok(LogicalPlan::Extension(Extension {
-                node: Arc::new(KeyCalculationExtension {
-                    name: Some("key-calc-partition".to_string()),
-                    schema: input.schema().clone(),
-                    input: input.clone(),
-                    keys: KeysOrExprs::Exprs(partition_exprs.clone()),
-                }),
-            }))
-        })
-        .collect::<Result<_>>()?;
-
-    use datafusion::prelude::col;
-    let unkey = LogicalPlan::Extension(Extension {
-        node: Arc::new(
-            ProjectionExtension::new(
-                inputs,
-                Some("unkey".to_string()),
-                sink.schema().iter().map(|(_, f)| col(f.name())).collect(),
-            )
-            .shuffled(),
-        ),
-    });
-
-    let node = sink.with_exprs_and_inputs(vec![], vec![unkey])?;
-    Ok(LogicalPlan::Extension(Extension {
-        node: Arc::new(node),
-    }))
-}
-
-pub fn rewrite_sinks(extensions: Vec<LogicalPlan>) -> Result<Vec<LogicalPlan>> {
-    let mut sink_inputs = build_sink_inputs(&extensions);
-    let mut new_extensions = vec![];
-    for extension in extensions {
-        let mut rewriter = SinkInputRewriter::new(&mut sink_inputs);
-        let result = extension.rewrite(&mut rewriter)?;
-        if !rewriter.was_removed {
-            new_extensions.push(result.data);
-        }
-    }
-
-    new_extensions
-        .into_iter()
-        .map(maybe_add_key_extension_to_sink)
-        .collect()
-}
-
-pub async fn parse_and_get_arrow_program(
-    query: String,
-    mut schema_provider: StreamSchemaProvider,
-    _config: SqlConfig,
-) -> Result<CompiledSql> {
-    let mut config = SessionConfig::new();
-    config
-        .options_mut()
-        .optimizer
-        .enable_round_robin_repartition = false;
-    config.options_mut().optimizer.repartition_aggregations = false;
-    config.options_mut().optimizer.repartition_windows = false;
-    config.options_mut().optimizer.repartition_sorts = false;
-    config.options_mut().optimizer.repartition_joins = false;
-    config.options_mut().execution.target_partitions = 1;
-
-    let session_state = SessionStateBuilder::new()
-        .with_config(config)
-        .with_default_features()
-        .with_physical_optimizer_rules(vec![])
-        .build();
-
-    let mut inserts = vec![];
-    for statement in parse_sql_statements(&query)? {
-        if try_handle_set_variable(&statement, &mut schema_provider)? {
-            continue;
-        }
-
-        if let Some(table) = CatalogTable::try_from_statement(&statement, &schema_provider)? {
-            schema_provider.insert_catalog_table(table);
-        } else {
-            inserts.push(Insert::try_from_statement(&statement, &schema_provider)?);
-        };
-    }
-
-    if inserts.is_empty() {
-        return plan_err!("The provided SQL does not contain a query");
-    }
-
-    let mut used_connections = HashSet::new();
-    let mut extensions = vec![];
-
-    for insert in inserts {
-        let (plan, sink_name) = match insert {
-            Insert::InsertQuery {
-                sink_name,
-                logical_plan,
-            } => (logical_plan, Some(sink_name)),
-            Insert::Anonymous { logical_plan } => (logical_plan, None),
-        };
-
-        let mut plan_rewrite = rewrite_plan(plan, &schema_provider)?;
-
-        if plan_rewrite
-            .schema()
-            .fields()
-            .iter()
-            .any(|f| is_json_union(f.data_type()))
-        {
-            plan_rewrite = serialize_outgoing_json(&schema_provider, Arc::new(plan_rewrite));
-        }
-
-        debug!("Plan = {}", plan_rewrite.display_graphviz());
-
-        let mut metadata = SourceMetadataVisitor::new(&schema_provider);
-        plan_rewrite.visit_with_subqueries(&mut metadata)?;
-        used_connections.extend(metadata.connection_ids.iter());
-
-        let sink = match sink_name {
-            Some(sink_name) => {
-                let table = schema_provider
-                    .get_catalog_table_mut(&sink_name)
-                    .ok_or_else(|| {
-                        DataFusionError::Plan(format!("Connection {sink_name} not found"))
-                    })?;
-                match table {
-                    CatalogTable::ConnectorTable(c) => {
-                        if let Some(id) = c.id {
-                            used_connections.insert(id);
-                        }
-
-                        SinkExtension::new(
-                            TableReference::bare(sink_name),
-                            table.clone(),
-                            plan_rewrite.schema().clone(),
-                            Arc::new(plan_rewrite),
-                        )
-                    }
-                    CatalogTable::LookupTable(_) => {
-                        plan_err!("lookup (temporary) tables cannot be inserted into")
-                    }
-                    CatalogTable::TableFromQuery { .. } => {
-                        plan_err!(
-                            "shouldn't be inserting more data into a table made with CREATE TABLE AS"
-                        )
-                    }
-                }
-            }
-            None => {
-                return plan_err!(
-                    "Anonymous query is not supported; use INSERT INTO <sink> SELECT ..."
-                );
-            }
-        };
-        extensions.push(LogicalPlan::Extension(Extension {
-            node: Arc::new(sink?),
-        }));
-    }
-
-    let extensions = rewrite_sinks(extensions)?;
-
-    let mut plan_to_graph_visitor =
-        physical_planner::PlanToGraphVisitor::new(&schema_provider, &session_state);
-    for extension in extensions {
-        plan_to_graph_visitor.add_plan(extension)?;
-    }
-    let graph = plan_to_graph_visitor.into_graph();
-
-    let mut program = LogicalProgram::new(graph, ProgramConfig::default());
-
-    program.optimize(&ChainingOptimizer {});
-
-    Ok(CompiledSql {
-        program,
-        connection_ids: used_connections.into_iter().collect(),
-    })
-}
diff --git a/src/sql/planner/plan/mod.rs b/src/sql/planner/plan/mod.rs
deleted file mode 100644
index c734a88b..00000000
--- a/src/sql/planner/plan/mod.rs
+++ /dev/null
@@ -1,54 +0,0 @@
-use datafusion::common::Result;
-use datafusion::common::tree_node::{Transformed, TreeNode};
-use datafusion::logical_expr::LogicalPlan;
-use tracing::{debug, info, instrument};
-
-use crate::sql::planner::StreamSchemaProvider;
-use crate::sql::planner::plan::stream_rewriter::StreamRewriter;
-use crate::sql::planner::rewrite::TimeWindowUdfChecker;
-
-// Module declarations
-pub(crate) mod aggregate_rewriter;
-pub(crate) mod join_rewriter;
-pub(crate) mod row_time_rewriter;
-pub(crate) mod stream_rewriter;
-pub(crate) mod streaming_window_analzer;
-pub(crate) mod window_function_rewriter;
-
-/// Entry point for transforming a standard DataFusion LogicalPlan into a
-/// Streaming-aware LogicalPlan.
-///
-/// This function coordinates multiple rewriting passes and ensures the
-/// resulting plan satisfies streaming constraints.
-#[instrument(skip_all, level = "debug")]
-pub fn rewrite_plan(
-    plan: LogicalPlan,
-    schema_provider: &StreamSchemaProvider,
-) -> Result<LogicalPlan> {
-    info!("Starting streaming plan rewrite pipeline");
-
-    // Phase 1: Core Transformation
-    // This pass handles the structural changes (Aggregates, Joins, Windows)
-    // using a Bottom-Up traversal.
-    let mut rewriter = StreamRewriter::new(schema_provider);
-    let Transformed {
-        data: rewritten_plan,
-        ..
-    } = plan.rewrite_with_subqueries(&mut rewriter)?;
-
-    // Phase 2: Post-rewrite Validation
-    // Ensure that the rewritten plan doesn't violate specific streaming UDF rules.
-    rewritten_plan.visit_with_subqueries(&mut TimeWindowUdfChecker {})?;
-
-    // Phase 3: Observability & Debugging
-    // Industrial engines use Graphviz or specialized Explain formats for plan diffs.
-    if cfg!(debug_assertions) {
-        debug!(
-            "Streaming logical plan graphviz:\n{}",
-            rewritten_plan.display_graphviz()
-        );
-    }
-
-    info!("Streaming plan rewrite completed successfully");
-    Ok(rewritten_plan)
-}
diff --git a/src/sql/planner/rewrite/mod.rs b/src/sql/planner/rewrite/mod.rs
deleted file mode 100644
index bfebae4c..00000000
--- a/src/sql/planner/rewrite/mod.rs
+++ /dev/null
@@ -1,25 +0,0 @@
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-pub mod async_udf_rewriter;
-pub mod sink_input_rewriter;
-pub mod source_metadata_visitor;
-pub mod source_rewriter;
-pub mod time_window;
-pub mod unnest_rewriter;
-
-pub use async_udf_rewriter::{AsyncOptions, AsyncUdfRewriter};
-pub use sink_input_rewriter::SinkInputRewriter;
-pub use source_metadata_visitor::SourceMetadataVisitor;
-pub use source_rewriter::SourceRewriter;
-pub use time_window::{TimeWindowNullCheckRemover, TimeWindowUdfChecker, is_time_window};
-pub use unnest_rewriter::{UNNESTED_COL, UnnestRewriter};
diff --git a/src/sql/planner/schemas.rs b/src/sql/planner/schemas.rs
deleted file mode 100644
index f903db83..00000000
--- a/src/sql/planner/schemas.rs
+++ /dev/null
@@ -1,5 +0,0 @@
-// Re-export schema utilities from catalog::utils.
-// Kept for backward compatibility with existing planner imports.
-pub use crate::sql::catalog::utils::{
-    add_timestamp_field, add_timestamp_field_arrow, has_timestamp_field, window_arrow_struct,
-};
diff --git a/src/sql/planner/sql_to_plan.rs b/src/sql/planner/sql_to_plan.rs
deleted file mode 100644
index 049cd18e..00000000
--- a/src/sql/planner/sql_to_plan.rs
+++ /dev/null
@@ -1,22 +0,0 @@
-use datafusion::common::Result;
-use datafusion::logical_expr::LogicalPlan;
-use datafusion::sql::sqlparser::ast::Statement;
-use tracing::debug;
-
-use crate::sql::planner::StreamSchemaProvider;
-
-/// Stage 2: Statement → LogicalPlan
-///
-/// Converts a parsed SQL AST statement into a DataFusion logical plan
-/// using the StreamSchemaProvider as the catalog context.
-pub fn statement_to_plan(
-    statement: Statement,
-    schema_provider: &StreamSchemaProvider,
-) -> Result<LogicalPlan> {
-    let sql_to_rel = datafusion::sql::planner::SqlToRel::new(schema_provider);
-    let plan = sql_to_rel.sql_statement_to_plan(statement)?;
-
-    debug!("Logical plan:\n{}", plan.display_graphviz());
-
-    Ok(plan)
-}
diff --git a/src/sql/catalog/connector.rs b/src/sql/schema/connector.rs
similarity index 57%
rename from src/sql/catalog/connector.rs
rename to src/sql/schema/connector.rs
index 01176d47..06a3df92 100644
--- a/src/sql/catalog/connector.rs
+++ b/src/sql/schema/connector.rs
@@ -29,31 +29,3 @@ impl fmt::Display for ConnectionType {
         }
     }
 }
-
-/// A connector operation that describes how to interact with an external system.
-#[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub struct ConnectorOp {
-    pub connector: String,
-    pub config: String,
-    pub description: String,
-}
-
-impl ConnectorOp {
-    pub fn new(connector: impl Into<String>, config: impl Into<String>) -> Self {
-        let connector = connector.into();
-        let description = connector.clone();
-        Self {
-            connector,
-            config: config.into(),
-            description,
-        }
-    }
-}
-
-/// Configuration for a connection profile (e.g., Kafka broker, Pulsar endpoint).
-#[derive(Debug, Clone, PartialEq, Eq)]
-pub struct ConnectionProfile {
-    pub name: String,
-    pub connector: String,
-    pub config: std::collections::HashMap<String, String>,
-}
diff --git a/src/sql/catalog/connector_table.rs b/src/sql/schema/connector_table.rs
similarity index 91%
rename from src/sql/catalog/connector_table.rs
rename to src/sql/schema/connector_table.rs
index 8dae1745..25e37184 100644
--- a/src/sql/catalog/connector_table.rs
+++ b/src/sql/schema/connector_table.rs
@@ -16,10 +16,11 @@ use std::time::Duration;
 use datafusion::arrow::datatypes::{FieldRef, Schema};
 use datafusion::common::{Result, plan_err};
 use datafusion::logical_expr::Expr;
-
-use super::connector::{ConnectionType, ConnectorOp};
+use protocol::grpc::api::ConnectorOp;
 use super::field_spec::FieldSpec;
 use crate::multifield_partial_ord;
+use crate::sql::schema::ConnectionType;
+use crate::sql::schema::table::SqlSource;
 use crate::sql::types::ProcessingMode;
 
 /// Represents a table backed by an external connector (e.g., Kafka, Pulsar, NATS).
@@ -38,6 +39,8 @@ pub struct ConnectorTable {
     pub primary_keys: Arc<Vec<String>>,
     pub inferred_fields: Option<Vec<FieldRef>>,
     pub partition_exprs: Arc<Option<Vec<Expr>>>,
+    pub lookup_cache_max_bytes: Option<u64>,
+    pub lookup_cache_ttl: Option<Duration>,
 }
 
 multifield_partial_ord!(
@@ -74,6 +77,8 @@ impl ConnectorTable {
             primary_keys: Arc::new(Vec::new()),
             inferred_fields: None,
             partition_exprs: Arc::new(None),
+            lookup_cache_max_bytes: None,
+            lookup_cache_ttl: None,
         }
     }
 
@@ -160,40 +165,41 @@ impl ConnectorTable {
             ConnectionType::Sink | ConnectionType::Lookup => {
                 return plan_err!("cannot read from sink");
             }
-        }
+        };
 
         if self.is_updating() && self.has_virtual_fields() {
-            return plan_err!("can't read from a source with virtual fields and update mode");
+            return plan_err!("can't read from a source with virtual fields and update mode.");
         }
 
         let timestamp_override = self.timestamp_override()?;
         let watermark_column = self.watermark_column()?;
 
-        Ok(SourceOperator {
-            name: self.name.clone(),
-            connector_op: self.connector_op(),
-            processing_mode: self.processing_mode(),
-            idle_time: self.idle_time,
-            struct_fields: self
+        let source = SqlSource {
+            id: self.id,
+            struct_def: self
                 .fields
                 .iter()
                 .filter(|f| !f.is_virtual())
                 .map(|f| Arc::new(f.field().clone()))
                 .collect(),
+            config: self.connector_op(),
+            processing_mode: self.processing_mode(),
+            idle_time: self.idle_time,
+        };
+
+        Ok(SourceOperator {
+            name: self.name.clone(),
+            source,
             timestamp_override,
             watermark_column,
         })
     }
 }
 
-/// A fully resolved source operator ready for execution graph construction.
 #[derive(Debug, Clone)]
 pub struct SourceOperator {
     pub name: String,
-    pub connector_op: ConnectorOp,
-    pub processing_mode: ProcessingMode,
-    pub idle_time: Option<Duration>,
-    pub struct_fields: Vec<FieldRef>,
+    pub source: SqlSource,
     pub timestamp_override: Option<Expr>,
     pub watermark_column: Option<Expr>,
 }
diff --git a/src/sql/catalog/field_spec.rs b/src/sql/schema/field_spec.rs
similarity index 100%
rename from src/sql/catalog/field_spec.rs
rename to src/sql/schema/field_spec.rs
diff --git a/src/sql/catalog/insert.rs b/src/sql/schema/insert.rs
similarity index 97%
rename from src/sql/catalog/insert.rs
rename to src/sql/schema/insert.rs
index a4a3814a..fe91325b 100644
--- a/src/sql/catalog/insert.rs
+++ b/src/sql/schema/insert.rs
@@ -15,7 +15,7 @@ use datafusion::logical_expr::{DmlStatement, LogicalPlan, WriteOp};
 use datafusion::sql::sqlparser::ast::Statement;
 
 use super::optimizer::produce_optimized_plan;
-use crate::sql::planner::StreamSchemaProvider;
+use crate::sql::schema::StreamSchemaProvider;
 
 /// Represents an INSERT operation in a streaming SQL pipeline.
 #[derive(Debug)]
diff --git a/src/sql/catalog/mod.rs b/src/sql/schema/mod.rs
similarity index 85%
rename from src/sql/catalog/mod.rs
rename to src/sql/schema/mod.rs
index 39c7bfcd..0bf7e4ea 100644
--- a/src/sql/catalog/mod.rs
+++ b/src/sql/schema/mod.rs
@@ -15,11 +15,13 @@ pub mod connector_table;
 pub mod field_spec;
 pub mod insert;
 pub mod optimizer;
+pub mod schema_provider;
 pub mod table;
 pub mod utils;
 
-pub use connector::{ConnectionType, ConnectorOp};
+pub use connector::{ConnectionType};
 pub use connector_table::{ConnectorTable, SourceOperator};
 pub use field_spec::FieldSpec;
 pub use insert::Insert;
+pub use schema_provider::{LogicalBatchInput, StreamSchemaProvider, StreamTable};
 pub use table::Table;
diff --git a/src/sql/catalog/optimizer.rs b/src/sql/schema/optimizer.rs
similarity index 98%
rename from src/sql/catalog/optimizer.rs
rename to src/sql/schema/optimizer.rs
index 15abe61e..fbb64845 100644
--- a/src/sql/catalog/optimizer.rs
+++ b/src/sql/schema/optimizer.rs
@@ -41,7 +41,7 @@ use datafusion::optimizer::simplify_expressions::SimplifyExpressions;
 use datafusion::sql::planner::SqlToRel;
 use datafusion::sql::sqlparser::ast::Statement;
 
-use crate::sql::planner::StreamSchemaProvider;
+use crate::sql::schema::StreamSchemaProvider;
 
 /// Converts a SQL statement into an optimized DataFusion logical plan.
 ///
diff --git a/src/sql/planner/schema_provider.rs b/src/sql/schema/schema_provider.rs
similarity index 97%
rename from src/sql/planner/schema_provider.rs
rename to src/sql/schema/schema_provider.rs
index d860fd6c..11c0d461 100644
--- a/src/sql/planner/schema_provider.rs
+++ b/src/sql/schema/schema_provider.rs
@@ -15,9 +15,9 @@ use datafusion::optimizer::Analyzer;
 use datafusion::sql::TableReference;
 use datafusion::sql::planner::ContextProvider;
 use unicase::UniCase;
-
-use crate::sql::catalog::table::Table as CatalogTable;
-use crate::sql::planner::schemas::window_arrow_struct;
+use crate::sql::logical_node::logical::DylibUdfConfig;
+use crate::sql::schema::table::Table as CatalogTable;
+use crate::sql::schema::utils::window_arrow_struct;
 use crate::sql::types::{PlaceholderUdf, PlanningOptions};
 
 #[derive(Clone, Default)]
@@ -28,6 +28,7 @@ pub struct StreamSchemaProvider {
     pub functions: HashMap<String, Arc<ScalarUDF>>,
     pub aggregate_functions: HashMap<String, Arc<AggregateUDF>>,
     pub window_functions: HashMap<String, Arc<WindowUDF>>,
+    pub dylib_udfs: HashMap<String, DylibUdfConfig>,
     config_options: datafusion::config::ConfigOptions,
     pub expr_planners: Vec<Arc<dyn ExprPlanner>>,
     pub planning_options: PlanningOptions,
@@ -97,7 +98,7 @@ impl datafusion::datasource::TableProvider for LogicalBatchInput {
         _filters: &[Expr],
         _limit: Option<usize>,
     ) -> Result<Arc<dyn datafusion::physical_plan::ExecutionPlan>> {
-        Ok(Arc::new(crate::sql::physical::FsMemExec::new(
+        Ok(Arc::new(crate::sql::logical_planner::FsMemExec::new(
             self.table_name.clone(),
             self.schema.clone(),
         )))
@@ -238,7 +239,7 @@ impl StreamSchemaProvider {
     pub fn get_async_udf_options(
         &self,
         _name: &str,
-    ) -> Option<crate::sql::planner::rewrite::AsyncOptions> {
+    ) -> Option<crate::sql::analysis::AsyncOptions> {
         // TODO: implement async UDF lookup
         None
     }
diff --git a/src/sql/catalog/table.rs b/src/sql/schema/table.rs
similarity index 90%
rename from src/sql/catalog/table.rs
rename to src/sql/schema/table.rs
index a997680b..21f064fe 100644
--- a/src/sql/catalog/table.rs
+++ b/src/sql/schema/table.rs
@@ -11,18 +11,18 @@
 // limitations under the License.
 
 use std::sync::Arc;
-
+use std::time::Duration;
 use datafusion::arrow::datatypes::FieldRef;
 use datafusion::common::{Result, plan_err};
 use datafusion::logical_expr::{Extension, LogicalPlan};
 use datafusion::sql::sqlparser::ast::Statement;
-
+use protocol::grpc::api::ConnectorOp;
 use super::connector_table::ConnectorTable;
 use super::optimizer::produce_optimized_plan;
-use crate::sql::planner::StreamSchemaProvider;
-use crate::sql::planner::extension::remote_table::RemoteTableExtension;
-use crate::sql::planner::plan::rewrite_plan;
-use crate::sql::types::DFField;
+use crate::sql::schema::StreamSchemaProvider;
+use crate::sql::extensions::remote_table::RemoteTableExtension;
+use crate::sql::analysis::rewrite_plan;
+use crate::sql::types::{DFField, ProcessingMode};
 
 /// Represents all table types in the FunctionStream SQL catalog.
 #[allow(clippy::enum_variant_names)]
@@ -137,7 +137,7 @@ impl Table {
         Ok(())
     }
 
-    pub fn connector_op(&self) -> Result<super::connector::ConnectorOp> {
+    pub fn connector_op(&self) -> Result<ConnectorOp> {
         match self {
             Table::ConnectorTable(c) | Table::LookupTable(c) => Ok(c.connector_op()),
             Table::TableFromQuery { .. } => plan_err!("can't write to a query-defined table"),
@@ -151,3 +151,12 @@ impl Table {
         }
     }
 }
+
+#[derive(Clone, Debug)]
+pub struct SqlSource {
+    pub id: Option<i64>,
+    pub struct_def: Vec<FieldRef>,
+    pub config: ConnectorOp,
+    pub processing_mode: ProcessingMode,
+    pub idle_time: Option<Duration>,
+}
diff --git a/src/sql/catalog/utils.rs b/src/sql/schema/utils.rs
similarity index 100%
rename from src/sql/catalog/utils.rs
rename to src/sql/schema/utils.rs
diff --git a/src/types/converter.rs b/src/types/converter.rs
new file mode 100644
index 00000000..8f6a2ba8
--- /dev/null
+++ b/src/types/converter.rs
@@ -0,0 +1,83 @@
+use std::sync::Arc;
+use arrow::row::{OwnedRow, RowConverter, RowParser, Rows, SortField};
+use arrow_array::{Array, ArrayRef, BooleanArray};
+use arrow_schema::{ArrowError, DataType};
+
+// need to handle the empty case as a row converter without sort fields emits empty Rows.
+#[derive(Debug)]
+pub enum Converter {
+    RowConverter(RowConverter),
+    Empty(RowConverter, Arc<dyn Array>),
+}
+
+impl Converter {
+    pub fn new(sort_fields: Vec<SortField>) -> Result<Self, ArrowError> {
+        if sort_fields.is_empty() {
+            let array = Arc::new(BooleanArray::from(vec![false]));
+            Ok(Self::Empty(
+                RowConverter::new(vec![SortField::new(DataType::Boolean)])?,
+                array,
+            ))
+        } else {
+            Ok(Self::RowConverter(RowConverter::new(sort_fields)?))
+        }
+    }
+
+    pub fn convert_columns(&self, columns: &[Arc<dyn Array>]) -> Result<OwnedRow, ArrowError> {
+        match self {
+            Converter::RowConverter(row_converter) => {
+                Ok(row_converter.convert_columns(columns)?.row(0).owned())
+            }
+            Converter::Empty(row_converter, array) => Ok(row_converter
+                .convert_columns(std::slice::from_ref(array))?
+                .row(0)
+                .owned()),
+        }
+    }
+
+    pub fn convert_all_columns(
+        &self,
+        columns: &[Arc<dyn Array>],
+        num_rows: usize,
+    ) -> Result<Rows, ArrowError> {
+        match self {
+            Converter::RowConverter(row_converter) => Ok(row_converter.convert_columns(columns)?),
+            Converter::Empty(row_converter, _array) => {
+                let array = Arc::new(BooleanArray::from(vec![false; num_rows]));
+                Ok(row_converter.convert_columns(&[array])?)
+            }
+        }
+    }
+
+    pub fn convert_rows(
+        &self,
+        rows: Vec<arrow::row::Row<'_>>,
+    ) -> Result<Vec<ArrayRef>, ArrowError> {
+        match self {
+            Converter::RowConverter(row_converter) => Ok(row_converter.convert_rows(rows)?),
+            Converter::Empty(_row_converter, _array) => Ok(vec![]),
+        }
+    }
+
+    pub fn convert_raw_rows(&self, row_bytes: Vec<&[u8]>) -> Result<Vec<ArrayRef>, ArrowError> {
+        match self {
+            Converter::RowConverter(row_converter) => {
+                let parser = row_converter.parser();
+                let mut row_list = vec![];
+                for bytes in row_bytes {
+                    let row = parser.parse(bytes);
+                    row_list.push(row);
+                }
+                Ok(row_converter.convert_rows(row_list)?)
+            }
+            Converter::Empty(_row_converter, _array) => Ok(vec![]),
+        }
+    }
+
+    pub fn parser(&self) -> Option<RowParser> {
+        match self {
+            Converter::RowConverter(r) => Some(r.parser()),
+            Converter::Empty(_, _) => None,
+        }
+    }
+}
\ No newline at end of file
diff --git a/src/types/df.rs b/src/types/df.rs
index 30b4eb9c..7266bb6b 100644
--- a/src/types/df.rs
+++ b/src/types/df.rs
@@ -4,22 +4,89 @@ use datafusion::arrow::datatypes::{DataType, Field, FieldRef, Schema, SchemaBuil
 use datafusion::arrow::error::ArrowError;
 use datafusion::common::{DataFusionError, Result as DFResult};
 use std::sync::Arc;
-
-use super::TIMESTAMP_FIELD;
-use crate::sql::types::StreamSchema;
+use std::time::SystemTime;
+use arrow::compute::{filter_record_batch, lexsort_to_indices, partition, take, SortColumn};
+use arrow::compute::kernels::cmp::gt_eq;
+use arrow::compute::kernels::numeric::div;
+use arrow::row::SortField;
+use arrow_array::{PrimitiveArray, UInt64Array};
+use arrow_array::types::UInt64Type;
+use protocol::grpc::api;
+use super::{to_nanos, TIMESTAMP_FIELD};
+use std::ops::Range;
+use crate::types::converter::Converter;
 
 pub type FsSchemaRef = Arc<FsSchema>;
 
-/// Core streaming schema with timestamp and key tracking.
-/// Analogous to Arroyo's `ArroyoSchema`.
 #[derive(Debug, Clone, Eq, PartialEq, Hash)]
 pub struct FsSchema {
     pub schema: Arc<Schema>,
     pub timestamp_index: usize,
     key_indices: Option<Vec<usize>>,
+    /// If defined, these indices are used for routing (i.e., which subtask gets which piece of data)
     routing_key_indices: Option<Vec<usize>>,
 }
 
+impl TryFrom<api::FsSchema> for FsSchema {
+    type Error = DataFusionError;
+    fn try_from(schema_proto: api::FsSchema) -> Result<Self, DataFusionError> {
+        let schema: Schema = serde_json::from_str(&schema_proto.arrow_schema)
+            .map_err(|e| DataFusionError::Plan(format!("Invalid arrow schema: {e}")))?;
+        let timestamp_index = schema_proto.timestamp_index as usize;
+
+        let key_indices = schema_proto.has_keys.then(|| {
+            schema_proto
+                .key_indices
+                .into_iter()
+                .map(|index| index as usize)
+                .collect()
+        });
+
+        let routing_key_indices = schema_proto.has_routing_keys.then(|| {
+            schema_proto
+                .routing_key_indices
+                .into_iter()
+                .map(|index| index as usize)
+                .collect()
+        });
+
+        Ok(Self {
+            schema: Arc::new(schema),
+            timestamp_index,
+            key_indices,
+            routing_key_indices,
+        })
+    }
+}
+
+impl From<FsSchema> for api::FsSchema {
+    fn from(schema: FsSchema) -> Self {
+        let arrow_schema = serde_json::to_string(schema.schema.as_ref()).unwrap();
+        let timestamp_index = schema.timestamp_index as u32;
+
+        let has_keys = schema.key_indices.is_some();
+        let key_indices = schema
+            .key_indices
+            .map(|ks| ks.into_iter().map(|index| index as u32).collect())
+            .unwrap_or_default();
+
+        let has_routing_keys = schema.routing_key_indices.is_some();
+        let routing_key_indices = schema
+            .routing_key_indices
+            .map(|ks| ks.into_iter().map(|index| index as u32).collect())
+            .unwrap_or_default();
+
+        Self {
+            arrow_schema,
+            timestamp_index,
+            key_indices,
+            has_keys,
+            routing_key_indices,
+            has_routing_keys,
+        }
+    }
+}
+
 impl FsSchema {
     pub fn new(
         schema: Arc<Schema>,
@@ -34,7 +101,6 @@ impl FsSchema {
             routing_key_indices,
         }
     }
-
     pub fn new_unkeyed(schema: Arc<Schema>, timestamp_index: usize) -> Self {
         Self {
             schema,
@@ -43,7 +109,6 @@ impl FsSchema {
             routing_key_indices: None,
         }
     }
-
     pub fn new_keyed(schema: Arc<Schema>, timestamp_index: usize, key_indices: Vec<usize>) -> Self {
         Self {
             schema,
@@ -141,22 +206,100 @@ impl FsSchema {
         self.key_indices.as_ref()
     }
 
-    pub fn sort_field_indices(&self, with_timestamp: bool) -> Vec<usize> {
-        let mut indices = vec![];
+    pub fn filter_by_time(
+        &self,
+        batch: RecordBatch,
+        cutoff: Option<SystemTime>,
+    ) -> Result<RecordBatch, ArrowError> {
+        let Some(cutoff) = cutoff else {
+            // no watermark, so we just return the same batch.
+            return Ok(batch);
+        };
+        // filter out late data
+        let timestamp_column = batch
+            .column(self.timestamp_index)
+            .as_any()
+            .downcast_ref::<TimestampNanosecondArray>()
+            .ok_or_else(|| ArrowError::CastError(
+                format!("failed to downcast column {} of {:?} to timestamp. Schema is supposed to be {:?}",
+                        self.timestamp_index, batch, self.schema)))?;
+        let cutoff_scalar = TimestampNanosecondArray::new_scalar(to_nanos(cutoff) as i64);
+        let on_time = gt_eq(timestamp_column, &cutoff_scalar)?;
+        filter_record_batch(&batch, &on_time)
+    }
+
+    pub fn sort_columns(&self, batch: &RecordBatch, with_timestamp: bool) -> Vec<SortColumn> {
+        let mut columns = vec![];
         if let Some(keys) = &self.key_indices {
-            indices.extend(keys.iter().copied());
+            columns.extend(keys.iter().map(|index| SortColumn {
+                values: batch.column(*index).clone(),
+                options: None,
+            }));
         }
         if with_timestamp {
-            indices.push(self.timestamp_index);
+            columns.push(SortColumn {
+                values: batch.column(self.timestamp_index).clone(),
+                options: None,
+            });
+        }
+        columns
+    }
+
+    pub fn sort_fields(&self, with_timestamp: bool) -> Vec<SortField> {
+        let mut sort_fields = vec![];
+        if let Some(keys) = &self.key_indices {
+            sort_fields.extend(keys.iter());
         }
+        if with_timestamp {
+            sort_fields.push(self.timestamp_index);
+        }
+        self.sort_fields_by_indices(&sort_fields)
+    }
+
+    fn sort_fields_by_indices(&self, indices: &[usize]) -> Vec<SortField> {
         indices
+            .iter()
+            .map(|index| SortField::new(self.schema.field(*index).data_type().clone()))
+            .collect()
+    }
+
+    pub fn converter(&self, with_timestamp: bool) -> Result<Converter, ArrowError> {
+        Converter::new(self.sort_fields(with_timestamp))
+    }
+
+    pub fn value_converter(
+        &self,
+        with_timestamp: bool,
+        generation_index: usize,
+    ) -> Result<Converter, ArrowError> {
+        match &self.key_indices {
+            None => {
+                let mut indices = (0..self.schema.fields().len()).collect::<Vec<_>>();
+                indices.remove(generation_index);
+                if !with_timestamp {
+                    indices.remove(self.timestamp_index);
+                }
+                Converter::new(self.sort_fields_by_indices(&indices))
+            }
+            Some(keys) => {
+                let indices = (0..self.schema.fields().len())
+                    .filter(|index| {
+                        !keys.contains(index)
+                            && (with_timestamp || *index != self.timestamp_index)
+                            && *index != generation_index
+                    })
+                    .collect::<Vec<_>>();
+                Converter::new(self.sort_fields_by_indices(&indices))
+            }
+        }
     }
 
     pub fn value_indices(&self, with_timestamp: bool) -> Vec<usize> {
         let field_count = self.schema.fields().len();
         match &self.key_indices {
             None => {
-                let mut indices: Vec<usize> = (0..field_count).collect();
+                let mut indices = (0..field_count).collect::<Vec<_>>();
+
                 if !with_timestamp {
                     indices.remove(self.timestamp_index);
                 }
@@ -166,10 +309,51 @@ impl FsSchema {
                 .filter(|index| {
                     !keys.contains(index) && (with_timestamp || *index != self.timestamp_index)
                 })
-                .collect(),
+                .collect::<Vec<_>>(),
         }
     }
 
+    pub fn sort(
+        &self,
+        batch: RecordBatch,
+        with_timestamp: bool,
+    ) -> Result<RecordBatch, ArrowError> {
+        if self.key_indices.is_none() && !with_timestamp {
+            return Ok(batch);
+        }
+        let sort_columns = self.sort_columns(&batch, with_timestamp);
+        let sort_indices = lexsort_to_indices(&sort_columns, None).expect("should be able to sort");
+        let columns = batch
+            .columns()
+            .iter()
+            .map(|c| take(c, &sort_indices, None).unwrap())
+            .collect();
+
+        RecordBatch::try_new(batch.schema(), columns)
+    }
+
+    pub fn partition(
+        &self,
+        batch: &RecordBatch,
+        with_timestamp: bool,
+    ) -> Result<Vec<Range<usize>>, ArrowError> {
+        if self.key_indices.is_none() && !with_timestamp {
+            #[allow(clippy::single_range_in_vec_init)]
+            return Ok(vec![0..batch.num_rows()]);
+        }
+
+        let mut partition_columns = vec![];
+
+        if let Some(keys) = &self.routing_keys() {
+            partition_columns.extend(keys.iter().map(|index| batch.column(*index).clone()));
+        }
+        if with_timestamp {
+            partition_columns.push(batch.column(self.timestamp_index).clone());
+        }
+
+        Ok(partition(&partition_columns)?.ranges())
+    }
+
     pub fn unkeyed_batch(&self, batch: &RecordBatch) -> Result<RecordBatch, ArrowError> {
         if self.key_indices.is_none() {
             return Ok(batch.clone());
@@ -190,7 +374,7 @@ impl FsSchema {
                 .fields()
                 .iter()
                 .enumerate()
-                .filter(|(index, _)| !key_indices.contains(index))
+                .filter(|(index, _field)| !key_indices.contains(index))
                 .map(|(_, field)| field.as_ref().clone())
                 .collect::<Vec<_>>(),
         );
@@ -239,156 +423,18 @@ impl FsSchema {
     ) -> Result<Self, ArrowError> {
         let mut fields = self.schema.fields.to_vec();
         fields.extend(new_fields.map(Arc::new));
-        self.with_fields(fields)
-    }
-}
-
-/// Proto serialization: convert between FsSchema and the proto `FsSchema` message.
-///
-/// Schema is encoded as JSON using Arrow's `SchemaRef` JSON representation.
-/// This approach avoids depending on serde for `arrow_schema::Schema` directly.
-impl FsSchema {
-    pub fn to_proto(&self) -> protocol::grpc::api::FsSchema {
-        let arrow_schema = schema_to_json_string(&self.schema);
-        let timestamp_index = self.timestamp_index as u32;
-
-        let has_keys = self.key_indices.is_some();
-        let key_indices = self
-            .key_indices
-            .as_ref()
-            .map(|ks| ks.iter().map(|i| *i as u32).collect())
-            .unwrap_or_default();
-
-        let has_routing_keys = self.routing_key_indices.is_some();
-        let routing_key_indices = self
-            .routing_key_indices
-            .as_ref()
-            .map(|ks| ks.iter().map(|i| *i as u32).collect())
-            .unwrap_or_default();
 
-        protocol::grpc::api::FsSchema {
-            arrow_schema,
-            timestamp_index,
-            key_indices,
-            has_keys,
-            routing_key_indices,
-            has_routing_keys,
-        }
-    }
-
-    pub fn from_proto(proto: protocol::grpc::api::FsSchema) -> Result<Self, DataFusionError> {
-        let schema = schema_from_json_string(&proto.arrow_schema)?;
-        let timestamp_index = proto.timestamp_index as usize;
-
-        let key_indices = proto
-            .has_keys
-            .then(|| proto.key_indices.into_iter().map(|i| i as usize).collect());
-
-        let routing_key_indices = proto.has_routing_keys.then(|| {
-            proto
-                .routing_key_indices
-                .into_iter()
-                .map(|i| i as usize)
-                .collect()
-        });
-
-        Ok(Self {
-            schema: Arc::new(schema),
-            timestamp_index,
-            key_indices,
-            routing_key_indices,
-        })
-    }
-}
-
-fn schema_to_json_string(schema: &Schema) -> String {
-    let json_fields: Vec<serde_json::Value> = schema
-        .fields()
-        .iter()
-        .map(|f| {
-            serde_json::json!({
-                "name": f.name(),
-                "data_type": format!("{:?}", f.data_type()),
-                "nullable": f.is_nullable(),
-            })
-        })
-        .collect();
-    serde_json::to_string(&json_fields).unwrap()
-}
-
-fn schema_from_json_string(s: &str) -> Result<Schema, DataFusionError> {
-    let json_fields: Vec<serde_json::Value> = serde_json::from_str(s)
-        .map_err(|e| DataFusionError::Plan(format!("Invalid schema JSON: {e}")))?;
-
-    let fields: Vec<Field> = json_fields
-        .into_iter()
-        .map(|v| {
-            let name = v["name"]
-                .as_str()
-                .ok_or_else(|| DataFusionError::Plan("missing field name".into()))?
-                .to_string();
-            let nullable = v["nullable"].as_bool().unwrap_or(true);
-            let dt_str = v["data_type"]
-                .as_str()
-                .ok_or_else(|| DataFusionError::Plan("missing data_type".into()))?;
-            let data_type = parse_debug_data_type(dt_str)?;
-            Ok(Field::new(name, data_type, nullable))
-        })
-        .collect::<Result<_, DataFusionError>>()?;
-
-    Ok(Schema::new(fields))
-}
-
-fn parse_debug_data_type(s: &str) -> Result<DataType, DataFusionError> {
-    match s {
-        "Boolean" => Ok(DataType::Boolean),
-        "Int8" => Ok(DataType::Int8),
-        "Int16" => Ok(DataType::Int16),
-        "Int32" => Ok(DataType::Int32),
-        "Int64" => Ok(DataType::Int64),
-        "UInt8" => Ok(DataType::UInt8),
-        "UInt16" => Ok(DataType::UInt16),
-        "UInt32" => Ok(DataType::UInt32),
-        "UInt64" => Ok(DataType::UInt64),
-        "Float16" => Ok(DataType::Float16),
-        "Float32" => Ok(DataType::Float32),
-        "Float64" => Ok(DataType::Float64),
-        "Utf8" => Ok(DataType::Utf8),
-        "LargeUtf8" => Ok(DataType::LargeUtf8),
-        "Binary" => Ok(DataType::Binary),
-        "LargeBinary" => Ok(DataType::LargeBinary),
-        "Date32" => Ok(DataType::Date32),
-        "Date64" => Ok(DataType::Date64),
-        "Null" => Ok(DataType::Null),
-        s if s.starts_with("Timestamp(Nanosecond") => {
-            Ok(DataType::Timestamp(TimeUnit::Nanosecond, None))
-        }
-        s if s.starts_with("Timestamp(Microsecond") => {
-            Ok(DataType::Timestamp(TimeUnit::Microsecond, None))
-        }
-        s if s.starts_with("Timestamp(Millisecond") => {
-            Ok(DataType::Timestamp(TimeUnit::Millisecond, None))
-        }
-        s if s.starts_with("Timestamp(Second") => Ok(DataType::Timestamp(TimeUnit::Second, None)),
-        _ => Err(DataFusionError::Plan(format!(
-            "Unsupported data type in schema JSON: {s}"
-        ))),
-    }
-}
-
-impl From<StreamSchema> for FsSchema {
-    fn from(s: StreamSchema) -> Self {
-        FsSchema {
-            schema: s.schema,
-            timestamp_index: s.timestamp_index,
-            key_indices: s.key_indices,
-            routing_key_indices: None,
-        }
+        self.with_fields(fields)
     }
 }
 
-impl From<StreamSchema> for Arc<FsSchema> {
-    fn from(s: StreamSchema) -> Self {
-        Arc::new(FsSchema::from(s))
-    }
+pub fn server_for_hash_array(
+    hash: &PrimitiveArray<UInt64Type>,
+    n: usize,
+) -> Result<PrimitiveArray<UInt64Type>, ArrowError> {
+    let range_size = u64::MAX / (n as u64) + 1;
+    let range_scalar = UInt64Array::new_scalar(range_size);
+    let division = div(hash, &range_scalar)?;
+    let result: &PrimitiveArray<UInt64Type> = division.as_any().downcast_ref().unwrap();
+    Ok(result.clone())
 }
diff --git a/src/types/mod.rs b/src/types/mod.rs
index ddf7baca..4da0a030 100644
--- a/src/types/mod.rs
+++ b/src/types/mod.rs
@@ -28,12 +28,13 @@ pub mod operator_config;
 pub mod task_info;
 pub mod time_utils;
 pub mod worker;
+mod converter;
 
 // ── Re-exports from existing modules ──
 pub use arrow_ext::{DisplayAsSql, FsExtensionType, GetArrowSchema, GetArrowType};
 pub use date::{DatePart, DateTruncPrecision};
 pub use debezium::{Debezium, DebeziumOp, UpdatingData};
-pub use hash::{HASH_SEEDS, range_for_server, server_for_hash};
+pub use hash::{range_for_server, server_for_hash, HASH_SEEDS};
 pub use message::{ArrowMessage, CheckpointBarrier, SignalMessage, Watermark};
 pub use task_info::{ChainInfo, TaskInfo};
 pub use time_utils::{from_micros, from_millis, from_nanos, to_micros, to_millis, to_nanos};

From 94879daac37c8485c025efa18c372f9b4fe31fb6 Mon Sep 17 00:00:00 2001
From: luoluoyuyu <zhenyu@apache.org>
Date: Sat, 21 Mar 2026 19:15:11 +0800
Subject: [PATCH 08/44] update

---
 src/lib.rs                                   |   2 -
 src/main.rs                                  |   2 -
 src/sql/analysis/join_rewriter.rs            |   2 +-
 src/sql/analysis/mod.rs                      |  13 -
 src/{ => sql}/api/checkpoints.rs             |   2 +-
 src/{ => sql}/api/connections.rs             |   4 +-
 src/{ => sql}/api/metrics.rs                 |   0
 src/{ => sql}/api/mod.rs                     |   0
 src/{ => sql}/api/pipelines.rs               |   2 +-
 src/{ => sql}/api/public_ids.rs              |   0
 src/{ => sql}/api/schema_resolver.rs         |   0
 src/{ => sql}/api/udfs.rs                    |   0
 src/{ => sql}/api/var_str.rs                 |   0
 src/{types => sql/common}/arrow_ext.rs       |   0
 src/{types => sql/common}/control.rs         |   0
 src/{types => sql/common}/converter.rs       |   0
 src/{types => sql/common}/date.rs            |   0
 src/{types => sql/common}/debezium.rs        |   0
 src/{types => sql/common}/errors.rs          |   2 +-
 src/{types => sql/common}/formats.rs         |   0
 src/{types/df.rs => sql/common/fs_schema.rs} |   6 +-
 src/{types => sql/common}/hash.rs            |   0
 src/{types => sql/common}/message.rs         |   0
 src/{types => sql/common}/mod.rs             |  10 +-
 src/{types => sql/common}/operator_config.rs |   0
 src/{types => sql/common}/task_info.rs       |   0
 src/{types => sql/common}/time_utils.rs      |   0
 src/{types => sql/common}/worker.rs          |   0
 src/sql/extensions/aggregate.rs              |   2 +-
 src/sql/extensions/async_udf.rs              | 187 +++++++++
 src/sql/extensions/constants.rs              |  13 +
 src/sql/extensions/debezium.rs               |   2 +-
 src/sql/extensions/extension_try_from.rs     |  70 ++++
 src/sql/extensions/is_retract.rs             |  80 ++++
 src/sql/extensions/join.rs                   |   2 +-
 src/sql/extensions/key_calculation.rs        |   2 +-
 src/sql/extensions/lookup.rs                 |   2 +-
 src/sql/extensions/macros.rs                 |  28 ++
 src/sql/extensions/mod.rs                    | 408 ++-----------------
 src/sql/extensions/projection.rs             |   2 +-
 src/sql/extensions/remote_table.rs           |   2 +-
 src/sql/extensions/sink.rs                   |   2 +-
 src/sql/extensions/stream_extension.rs       |  38 ++
 src/sql/extensions/table_source.rs           |   2 +-
 src/sql/extensions/timestamp_append.rs       |  80 ++++
 src/sql/extensions/updating_aggregate.rs     |   2 +-
 src/sql/extensions/watermark_node.rs         |   2 +-
 src/sql/extensions/window_fn.rs              |   2 +-
 src/sql/logical_node/logical.rs              |   2 +-
 src/sql/logical_planner/compiled_sql.rs      |  21 +
 src/sql/logical_planner/mod.rs               |   5 +-
 src/sql/logical_planner/planner.rs           |   2 +-
 src/sql/mod.rs                               |   5 +-
 src/sql/parse.rs                             |   5 -
 src/sql/types/data_type.rs                   |   2 +-
 55 files changed, 582 insertions(+), 433 deletions(-)
 rename src/{ => sql}/api/checkpoints.rs (98%)
 rename src/{ => sql}/api/connections.rs (99%)
 rename src/{ => sql}/api/metrics.rs (100%)
 rename src/{ => sql}/api/mod.rs (100%)
 rename src/{ => sql}/api/pipelines.rs (98%)
 rename src/{ => sql}/api/public_ids.rs (100%)
 rename src/{ => sql}/api/schema_resolver.rs (100%)
 rename src/{ => sql}/api/udfs.rs (100%)
 rename src/{ => sql}/api/var_str.rs (100%)
 rename src/{types => sql/common}/arrow_ext.rs (100%)
 rename src/{types => sql/common}/control.rs (100%)
 rename src/{types => sql/common}/converter.rs (100%)
 rename src/{types => sql/common}/date.rs (100%)
 rename src/{types => sql/common}/debezium.rs (100%)
 rename src/{types => sql/common}/errors.rs (96%)
 rename src/{types => sql/common}/formats.rs (100%)
 rename src/{types/df.rs => sql/common/fs_schema.rs} (98%)
 rename src/{types => sql/common}/hash.rs (100%)
 rename src/{types => sql/common}/message.rs (100%)
 rename src/{types => sql/common}/mod.rs (90%)
 rename src/{types => sql/common}/operator_config.rs (100%)
 rename src/{types => sql/common}/task_info.rs (100%)
 rename src/{types => sql/common}/time_utils.rs (100%)
 rename src/{types => sql/common}/worker.rs (100%)
 create mode 100644 src/sql/extensions/async_udf.rs
 create mode 100644 src/sql/extensions/constants.rs
 create mode 100644 src/sql/extensions/extension_try_from.rs
 create mode 100644 src/sql/extensions/is_retract.rs
 create mode 100644 src/sql/extensions/macros.rs
 create mode 100644 src/sql/extensions/stream_extension.rs
 create mode 100644 src/sql/extensions/timestamp_append.rs
 create mode 100644 src/sql/logical_planner/compiled_sql.rs

diff --git a/src/lib.rs b/src/lib.rs
index 0a3c6dc6..a6bb4d28 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -14,7 +14,6 @@
 
 #![allow(dead_code)]
 
-pub mod api;
 pub mod config;
 pub mod coordinator;
 pub mod logging;
@@ -22,4 +21,3 @@ pub mod runtime;
 pub mod server;
 pub mod sql;
 pub mod storage;
-pub mod types;
diff --git a/src/main.rs b/src/main.rs
index e847b16c..562b1526 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -12,7 +12,6 @@
 
 #![allow(dead_code)]
 
-mod api;
 mod config;
 mod coordinator;
 mod logging;
@@ -20,7 +19,6 @@ mod runtime;
 mod server;
 mod sql;
 mod storage;
-mod types;
 
 use anyhow::{Context, Result};
 use std::thread;
diff --git a/src/sql/analysis/join_rewriter.rs b/src/sql/analysis/join_rewriter.rs
index 465d4620..520af335 100644
--- a/src/sql/analysis/join_rewriter.rs
+++ b/src/sql/analysis/join_rewriter.rs
@@ -3,7 +3,7 @@ use crate::sql::extensions::join::JoinExtension;
 use crate::sql::extensions::key_calculation::KeyCalculationExtension;
 use crate::sql::analysis::streaming_window_analzer::StreamingWindowAnalzer;
 use crate::sql::types::{WindowType, fields_with_qualifiers, schema_from_df_fields_with_metadata};
-use crate::types::TIMESTAMP_FIELD;
+use crate::sql::common::TIMESTAMP_FIELD;
 use datafusion::common::tree_node::{Transformed, TreeNodeRewriter};
 use datafusion::common::{
     Column, DataFusionError, JoinConstraint, JoinType, Result, ScalarValue, Spans, TableReference,
diff --git a/src/sql/analysis/mod.rs b/src/sql/analysis/mod.rs
index 04230aa0..e13e2b7e 100644
--- a/src/sql/analysis/mod.rs
+++ b/src/sql/analysis/mod.rs
@@ -25,10 +25,6 @@ pub use crate::sql::schema::schema_provider::{
     LogicalBatchInput, StreamSchemaProvider, StreamTable,
 };
 
-pub(crate) mod mod_prelude {
-    pub use super::StreamSchemaProvider;
-}
-
 use std::collections::{HashMap, HashSet};
 use std::sync::Arc;
 
@@ -44,7 +40,6 @@ use datafusion::sql::sqlparser::dialect::FunctionStreamDialect;
 use datafusion::sql::sqlparser::parser::Parser;
 use tracing::{debug, info, instrument};
 
-use crate::sql::logical_node::logical::{LogicalProgram, ProgramConfig};
 use crate::sql::logical_planner::optimizers::ChainingOptimizer;
 use crate::sql::schema::insert::Insert;
 use crate::sql::schema::table::Table as CatalogTable;
@@ -56,14 +51,6 @@ use crate::sql::extensions::{ StreamExtension};
 use crate::sql::logical_planner::planner::NamedNode;
 use crate::sql::types::SqlConfig;
 
-// ── Compilation pipeline ──────────────────────────────────────────────
-
-#[derive(Clone, Debug)]
-pub struct CompiledSql {
-    pub program: LogicalProgram,
-    pub connection_ids: Vec<i64>,
-}
-
 fn duration_from_sql_expr(
     expr: &datafusion::sql::sqlparser::ast::Expr,
 ) -> Result<std::time::Duration> {
diff --git a/src/api/checkpoints.rs b/src/sql/api/checkpoints.rs
similarity index 98%
rename from src/api/checkpoints.rs
rename to src/sql/api/checkpoints.rs
index 8462f311..243cae40 100644
--- a/src/api/checkpoints.rs
+++ b/src/sql/api/checkpoints.rs
@@ -1,4 +1,4 @@
-use crate::types::to_micros;
+use crate::sql::common::to_micros;
 use serde::{Deserialize, Serialize};
 use std::time::SystemTime;
 
diff --git a/src/api/connections.rs b/src/sql/api/connections.rs
similarity index 99%
rename from src/api/connections.rs
rename to src/sql/api/connections.rs
index eb69690e..d88dee75 100644
--- a/src/api/connections.rs
+++ b/src/sql/api/connections.rs
@@ -1,5 +1,5 @@
-use crate::types::formats::{BadData, Format, Framing};
-use crate::types::{FsExtensionType, FsSchema};
+use crate::sql::common::formats::{BadData, Format, Framing};
+use crate::sql::common::{FsExtensionType, FsSchema};
 use datafusion::arrow::datatypes::{DataType, Field, Fields, TimeUnit};
 use serde::ser::SerializeMap;
 use serde::{Deserialize, Serialize, Serializer};
diff --git a/src/api/metrics.rs b/src/sql/api/metrics.rs
similarity index 100%
rename from src/api/metrics.rs
rename to src/sql/api/metrics.rs
diff --git a/src/api/mod.rs b/src/sql/api/mod.rs
similarity index 100%
rename from src/api/mod.rs
rename to src/sql/api/mod.rs
diff --git a/src/api/pipelines.rs b/src/sql/api/pipelines.rs
similarity index 98%
rename from src/api/pipelines.rs
rename to src/sql/api/pipelines.rs
index 3c77ce7a..8b42036c 100644
--- a/src/api/pipelines.rs
+++ b/src/sql/api/pipelines.rs
@@ -1,5 +1,5 @@
 use super::udfs::Udf;
-use crate::types::control::ErrorDomain;
+use crate::sql::common::control::ErrorDomain;
 use serde::{Deserialize, Serialize};
 
 #[derive(Serialize, Deserialize, Clone, Debug)]
diff --git a/src/api/public_ids.rs b/src/sql/api/public_ids.rs
similarity index 100%
rename from src/api/public_ids.rs
rename to src/sql/api/public_ids.rs
diff --git a/src/api/schema_resolver.rs b/src/sql/api/schema_resolver.rs
similarity index 100%
rename from src/api/schema_resolver.rs
rename to src/sql/api/schema_resolver.rs
diff --git a/src/api/udfs.rs b/src/sql/api/udfs.rs
similarity index 100%
rename from src/api/udfs.rs
rename to src/sql/api/udfs.rs
diff --git a/src/api/var_str.rs b/src/sql/api/var_str.rs
similarity index 100%
rename from src/api/var_str.rs
rename to src/sql/api/var_str.rs
diff --git a/src/types/arrow_ext.rs b/src/sql/common/arrow_ext.rs
similarity index 100%
rename from src/types/arrow_ext.rs
rename to src/sql/common/arrow_ext.rs
diff --git a/src/types/control.rs b/src/sql/common/control.rs
similarity index 100%
rename from src/types/control.rs
rename to src/sql/common/control.rs
diff --git a/src/types/converter.rs b/src/sql/common/converter.rs
similarity index 100%
rename from src/types/converter.rs
rename to src/sql/common/converter.rs
diff --git a/src/types/date.rs b/src/sql/common/date.rs
similarity index 100%
rename from src/types/date.rs
rename to src/sql/common/date.rs
diff --git a/src/types/debezium.rs b/src/sql/common/debezium.rs
similarity index 100%
rename from src/types/debezium.rs
rename to src/sql/common/debezium.rs
diff --git a/src/types/errors.rs b/src/sql/common/errors.rs
similarity index 96%
rename from src/types/errors.rs
rename to src/sql/common/errors.rs
index 2c425c93..bcda8667 100644
--- a/src/types/errors.rs
+++ b/src/sql/common/errors.rs
@@ -42,7 +42,7 @@ impl From<datafusion::error::DataFusionError> for DataflowError {
 #[macro_export]
 macro_rules! connector_err {
     ($($arg:tt)*) => {
-        $crate::types::errors::DataflowError::Connector(format!($($arg)*))
+        $crate::sql::common::errors::DataflowError::Connector(format!($($arg)*))
     };
 }
 
diff --git a/src/types/formats.rs b/src/sql/common/formats.rs
similarity index 100%
rename from src/types/formats.rs
rename to src/sql/common/formats.rs
diff --git a/src/types/df.rs b/src/sql/common/fs_schema.rs
similarity index 98%
rename from src/types/df.rs
rename to src/sql/common/fs_schema.rs
index 7266bb6b..e1507e3e 100644
--- a/src/types/df.rs
+++ b/src/sql/common/fs_schema.rs
@@ -1,3 +1,7 @@
+//! FunctionStream table/stream schema: Arrow [`Schema`] plus timestamp index and optional key columns.
+//!
+//! [`Schema`]: datafusion::arrow::datatypes::Schema
+
 use datafusion::arrow::array::builder::{ArrayBuilder, make_builder};
 use datafusion::arrow::array::{RecordBatch, TimestampNanosecondArray};
 use datafusion::arrow::datatypes::{DataType, Field, FieldRef, Schema, SchemaBuilder, TimeUnit};
@@ -14,7 +18,7 @@ use arrow_array::types::UInt64Type;
 use protocol::grpc::api;
 use super::{to_nanos, TIMESTAMP_FIELD};
 use std::ops::Range;
-use crate::types::converter::Converter;
+use crate::sql::common::converter::Converter;
 
 pub type FsSchemaRef = Arc<FsSchema>;
 
diff --git a/src/types/hash.rs b/src/sql/common/hash.rs
similarity index 100%
rename from src/types/hash.rs
rename to src/sql/common/hash.rs
diff --git a/src/types/message.rs b/src/sql/common/message.rs
similarity index 100%
rename from src/types/message.rs
rename to src/sql/common/message.rs
diff --git a/src/types/mod.rs b/src/sql/common/mod.rs
similarity index 90%
rename from src/types/mod.rs
rename to src/sql/common/mod.rs
index 4da0a030..d03511c0 100644
--- a/src/types/mod.rs
+++ b/src/sql/common/mod.rs
@@ -10,16 +10,16 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//! Core types shared across the FunctionStream system.
+//! Shared core types and constants for FunctionStream (`crate::sql::common`).
 //!
-//! This module provides fundamental types used by the runtime, SQL planner,
-//! coordinator, and other subsystems — analogous to `arroyo-types` + `arroyo-rpc` in Arroyo.
+//! Used by the runtime, SQL planner, coordinator, and other subsystems —
+//! analogous to `arroyo-types` + `arroyo-rpc` in Arroyo.
 
 pub mod arrow_ext;
 pub mod control;
 pub mod date;
 pub mod debezium;
-pub mod df;
+pub mod fs_schema;
 pub mod errors;
 pub mod formats;
 pub mod hash;
@@ -45,7 +45,7 @@ pub use control::{
     CheckpointCompleted, CheckpointEvent, CompactionResult, ControlMessage, ControlResp,
     ErrorDomain, RetryHint, StopMode, TaskCheckpointEventType, TaskError,
 };
-pub use df::{FsSchema, FsSchemaRef};
+pub use fs_schema::{FsSchema, FsSchemaRef};
 pub use errors::DataflowError;
 pub use formats::{BadData, Format, Framing, JsonFormat};
 pub use operator_config::{MetadataField, OperatorConfig, RateLimit};
diff --git a/src/types/operator_config.rs b/src/sql/common/operator_config.rs
similarity index 100%
rename from src/types/operator_config.rs
rename to src/sql/common/operator_config.rs
diff --git a/src/types/task_info.rs b/src/sql/common/task_info.rs
similarity index 100%
rename from src/types/task_info.rs
rename to src/sql/common/task_info.rs
diff --git a/src/types/time_utils.rs b/src/sql/common/time_utils.rs
similarity index 100%
rename from src/types/time_utils.rs
rename to src/sql/common/time_utils.rs
diff --git a/src/types/worker.rs b/src/sql/common/worker.rs
similarity index 100%
rename from src/types/worker.rs
rename to src/sql/common/worker.rs
diff --git a/src/sql/extensions/aggregate.rs b/src/sql/extensions/aggregate.rs
index c8c070f2..12cde08c 100644
--- a/src/sql/extensions/aggregate.rs
+++ b/src/sql/extensions/aggregate.rs
@@ -23,7 +23,7 @@ use crate::sql::types::{
     DFField, TIMESTAMP_FIELD, WindowBehavior, WindowType, fields_with_qualifiers,
     schema_from_df_fields, schema_from_df_fields_with_metadata,
 };
-use crate::types::{FsSchema, FsSchemaRef};
+use crate::sql::common::{FsSchema, FsSchemaRef};
 
 pub(crate) const AGGREGATE_EXTENSION_NAME: &str = "AggregateExtension";
 
diff --git a/src/sql/extensions/async_udf.rs b/src/sql/extensions/async_udf.rs
new file mode 100644
index 00000000..da0bdff1
--- /dev/null
+++ b/src/sql/extensions/async_udf.rs
@@ -0,0 +1,187 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::fmt::Formatter;
+use std::sync::Arc;
+use std::time::Duration;
+
+use datafusion::common::{DFSchemaRef, Result};
+use datafusion::logical_expr::{
+    Expr, LogicalPlan, UserDefinedLogicalNode, UserDefinedLogicalNodeCore,
+};
+use datafusion_common::internal_err;
+use datafusion_proto::physical_plan::DefaultPhysicalExtensionCodec;
+use datafusion_proto::physical_plan::to_proto::serialize_physical_expr;
+use prost::Message;
+use protocol::grpc::api::{AsyncUdfOperator, AsyncUdfOrdering};
+
+use crate::multifield_partial_ord;
+use crate::sql::extensions::constants::ASYNC_RESULT_FIELD;
+use crate::sql::extensions::stream_extension::{NodeWithIncomingEdges, StreamExtension};
+use crate::sql::logical_node::logical::{
+    DylibUdfConfig, LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName,
+};
+use crate::sql::logical_planner::planner::{NamedNode, Planner};
+use crate::sql::types::{DFField, fields_with_qualifiers, schema_from_df_fields};
+use crate::sql::common::{FsSchema, FsSchemaRef};
+
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub(crate) struct AsyncUDFExtension {
+    pub(crate) input: Arc<LogicalPlan>,
+    pub(crate) name: String,
+    pub(crate) udf: DylibUdfConfig,
+    pub(crate) arg_exprs: Vec<Expr>,
+    pub(crate) final_exprs: Vec<Expr>,
+    pub(crate) ordered: bool,
+    pub(crate) max_concurrency: usize,
+    pub(crate) timeout: Duration,
+    pub(crate) final_schema: DFSchemaRef,
+}
+
+multifield_partial_ord!(
+    AsyncUDFExtension,
+    input,
+    name,
+    udf,
+    arg_exprs,
+    final_exprs,
+    ordered,
+    max_concurrency,
+    timeout
+);
+
+impl StreamExtension for AsyncUDFExtension {
+    fn node_name(&self) -> Option<NamedNode> {
+        None
+    }
+
+    fn plan_node(
+        &self,
+        planner: &Planner,
+        index: usize,
+        input_schemas: Vec<FsSchemaRef>,
+    ) -> Result<NodeWithIncomingEdges> {
+        let arg_exprs = self
+            .arg_exprs
+            .iter()
+            .map(|e| {
+                let p = planner.create_physical_expr(e, self.input.schema())?;
+                Ok(serialize_physical_expr(&p, &DefaultPhysicalExtensionCodec {})?.encode_to_vec())
+            })
+            .collect::<Result<Vec<_>>>()?;
+
+        let mut final_fields = fields_with_qualifiers(self.input.schema());
+        final_fields.push(DFField::new(
+            None,
+            ASYNC_RESULT_FIELD,
+            self.udf.return_type.clone(),
+            true,
+        ));
+        let post_udf_schema = schema_from_df_fields(&final_fields)?;
+
+        let final_exprs = self
+            .final_exprs
+            .iter()
+            .map(|e| {
+                let p = planner.create_physical_expr(e, &post_udf_schema)?;
+                Ok(serialize_physical_expr(&p, &DefaultPhysicalExtensionCodec {})?.encode_to_vec())
+            })
+            .collect::<Result<Vec<_>>>()?;
+
+        let config = AsyncUdfOperator {
+            name: self.name.clone(),
+            udf: Some(self.udf.clone().into()),
+            arg_exprs,
+            final_exprs,
+            ordering: if self.ordered {
+                AsyncUdfOrdering::Ordered as i32
+            } else {
+                AsyncUdfOrdering::Unordered as i32
+            },
+            max_concurrency: self.max_concurrency as u32,
+            timeout_micros: self.timeout.as_micros() as u64,
+        };
+
+        let node = LogicalNode::single(
+            index as u32,
+            format!("async_udf_{index}"),
+            OperatorName::AsyncUdf,
+            config.encode_to_vec(),
+            format!("async_udf<{}>", self.name),
+            1,
+        );
+
+        let incoming_edge =
+            LogicalEdge::project_all(LogicalEdgeType::Forward, input_schemas[0].as_ref().clone());
+        Ok(NodeWithIncomingEdges {
+            node,
+            edges: vec![incoming_edge],
+        })
+    }
+
+    fn output_schema(&self) -> FsSchema {
+        FsSchema::from_fields(
+            self.final_schema
+                .fields()
+                .iter()
+                .map(|f| (**f).clone())
+                .collect(),
+        )
+    }
+}
+
+impl UserDefinedLogicalNodeCore for AsyncUDFExtension {
+    fn name(&self) -> &str {
+        "AsyncUDFNode"
+    }
+
+    fn inputs(&self) -> Vec<&LogicalPlan> {
+        vec![&self.input]
+    }
+
+    fn schema(&self) -> &DFSchemaRef {
+        &self.final_schema
+    }
+
+    fn expressions(&self) -> Vec<Expr> {
+        self.arg_exprs
+            .iter()
+            .chain(self.final_exprs.iter())
+            .map(|e| e.to_owned())
+            .collect()
+    }
+
+    fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result {
+        write!(f, "AsyncUdfExtension<{}>: {}", self.name, self.final_schema)
+    }
+
+    fn with_exprs_and_inputs(&self, exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> Result<Self> {
+        if inputs.len() != 1 {
+            return internal_err!("input size inconsistent");
+        }
+        if UserDefinedLogicalNode::expressions(self) != exprs {
+            return internal_err!("Tried to recreate async UDF node with different expressions");
+        }
+
+        Ok(Self {
+            input: Arc::new(inputs[0].clone()),
+            name: self.name.clone(),
+            udf: self.udf.clone(),
+            arg_exprs: self.arg_exprs.clone(),
+            final_exprs: self.final_exprs.clone(),
+            ordered: self.ordered,
+            max_concurrency: self.max_concurrency,
+            timeout: self.timeout,
+            final_schema: self.final_schema.clone(),
+        })
+    }
+}
diff --git a/src/sql/extensions/constants.rs b/src/sql/extensions/constants.rs
new file mode 100644
index 00000000..4f90ca6e
--- /dev/null
+++ b/src/sql/extensions/constants.rs
@@ -0,0 +1,13 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+pub const ASYNC_RESULT_FIELD: &str = "__async_result";
diff --git a/src/sql/extensions/debezium.rs b/src/sql/extensions/debezium.rs
index 184de88d..84407ee4 100644
--- a/src/sql/extensions/debezium.rs
+++ b/src/sql/extensions/debezium.rs
@@ -14,7 +14,7 @@ use super::{NodeWithIncomingEdges};
 use crate::multifield_partial_ord;
 use crate::sql::logical_planner::updating_meta_field;
 use crate::sql::logical_planner::planner::{NamedNode, Planner};
-use crate::types::{FsSchema, FsSchemaRef, UPDATING_META_FIELD};
+use crate::sql::common::{FsSchema, FsSchemaRef, UPDATING_META_FIELD};
 
 pub(crate) const DEBEZIUM_UNROLLING_EXTENSION_NAME: &str = "DebeziumUnrollingExtension";
 pub(crate) const TO_DEBEZIUM_EXTENSION_NAME: &str = "ToDebeziumExtension";
diff --git a/src/sql/extensions/extension_try_from.rs b/src/sql/extensions/extension_try_from.rs
new file mode 100644
index 00000000..eb042a90
--- /dev/null
+++ b/src/sql/extensions/extension_try_from.rs
@@ -0,0 +1,70 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::sync::Arc;
+
+use datafusion::common::{DataFusionError, Result};
+use datafusion::logical_expr::UserDefinedLogicalNode;
+
+use crate::sql::extensions::aggregate::AggregateExtension;
+use crate::sql::extensions::async_udf::AsyncUDFExtension;
+use crate::sql::extensions::debezium::{DebeziumUnrollingExtension, ToDebeziumExtension};
+use crate::sql::extensions::join::JoinExtension;
+use crate::sql::extensions::key_calculation::KeyCalculationExtension;
+use crate::sql::extensions::lookup::LookupJoin;
+use crate::sql::extensions::projection::ProjectionExtension;
+use crate::sql::extensions::remote_table::RemoteTableExtension;
+use crate::sql::extensions::sink::SinkExtension;
+use crate::sql::extensions::stream_extension::StreamExtension;
+use crate::sql::extensions::table_source::TableSourceExtension;
+use crate::sql::extensions::updating_aggregate::UpdatingAggregateExtension;
+use crate::sql::extensions::watermark_node::WatermarkNode;
+use crate::sql::extensions::window_fn::WindowFunctionExtension;
+
+fn try_from_t<T: StreamExtension + 'static>(
+    node: &dyn UserDefinedLogicalNode,
+) -> std::result::Result<&dyn StreamExtension, ()> {
+    node.as_any()
+        .downcast_ref::<T>()
+        .map(|t| t as &dyn StreamExtension)
+        .ok_or(())
+}
+
+impl<'a> TryFrom<&'a dyn UserDefinedLogicalNode> for &'a dyn StreamExtension {
+    type Error = DataFusionError;
+
+    fn try_from(node: &'a dyn UserDefinedLogicalNode) -> Result<Self, Self::Error> {
+        try_from_t::<TableSourceExtension>(node)
+            .or_else(|_| try_from_t::<WatermarkNode>(node))
+            .or_else(|_| try_from_t::<SinkExtension>(node))
+            .or_else(|_| try_from_t::<KeyCalculationExtension>(node))
+            .or_else(|_| try_from_t::<AggregateExtension>(node))
+            .or_else(|_| try_from_t::<RemoteTableExtension>(node))
+            .or_else(|_| try_from_t::<JoinExtension>(node))
+            .or_else(|_| try_from_t::<WindowFunctionExtension>(node))
+            .or_else(|_| try_from_t::<AsyncUDFExtension>(node))
+            .or_else(|_| try_from_t::<ToDebeziumExtension>(node))
+            .or_else(|_| try_from_t::<DebeziumUnrollingExtension>(node))
+            .or_else(|_| try_from_t::<UpdatingAggregateExtension>(node))
+            .or_else(|_| try_from_t::<LookupJoin>(node))
+            .or_else(|_| try_from_t::<ProjectionExtension>(node))
+            .map_err(|_| DataFusionError::Plan(format!("unexpected node: {}", node.name())))
+    }
+}
+
+impl<'a> TryFrom<&'a Arc<dyn UserDefinedLogicalNode>> for &'a dyn StreamExtension {
+    type Error = DataFusionError;
+
+    fn try_from(node: &'a Arc<dyn UserDefinedLogicalNode>) -> Result<Self, Self::Error> {
+        TryFrom::try_from(node.as_ref())
+    }
+}
diff --git a/src/sql/extensions/is_retract.rs b/src/sql/extensions/is_retract.rs
new file mode 100644
index 00000000..4375b716
--- /dev/null
+++ b/src/sql/extensions/is_retract.rs
@@ -0,0 +1,80 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::sync::Arc;
+
+use datafusion::arrow::datatypes::{DataType, TimeUnit};
+use datafusion::common::{DFSchemaRef, Result, TableReference};
+use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore};
+
+use crate::multifield_partial_ord;
+use crate::sql::logical_planner::updating_meta_field;
+use crate::sql::types::{DFField, TIMESTAMP_FIELD, fields_with_qualifiers, schema_from_df_fields};
+
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub(crate) struct IsRetractExtension {
+    pub(crate) input: LogicalPlan,
+    pub(crate) schema: DFSchemaRef,
+    pub(crate) timestamp_qualifier: Option<TableReference>,
+}
+
+multifield_partial_ord!(IsRetractExtension, input, timestamp_qualifier);
+
+impl IsRetractExtension {
+    pub(crate) fn new(input: LogicalPlan, timestamp_qualifier: Option<TableReference>) -> Self {
+        let mut output_fields = fields_with_qualifiers(input.schema());
+
+        let timestamp_index = output_fields.len() - 1;
+        output_fields[timestamp_index] = DFField::new(
+            timestamp_qualifier.clone(),
+            TIMESTAMP_FIELD,
+            DataType::Timestamp(TimeUnit::Nanosecond, None),
+            false,
+        );
+        output_fields.push((timestamp_qualifier.clone(), updating_meta_field()).into());
+        let schema = Arc::new(schema_from_df_fields(&output_fields).unwrap());
+        Self {
+            input,
+            schema,
+            timestamp_qualifier,
+        }
+    }
+}
+
+impl UserDefinedLogicalNodeCore for IsRetractExtension {
+    fn name(&self) -> &str {
+        "IsRetractExtension"
+    }
+
+    fn inputs(&self) -> Vec<&LogicalPlan> {
+        vec![&self.input]
+    }
+
+    fn schema(&self) -> &DFSchemaRef {
+        &self.schema
+    }
+
+    fn expressions(&self) -> Vec<Expr> {
+        vec![]
+    }
+
+    fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        write!(f, "IsRetractExtension")
+    }
+
+    fn with_exprs_and_inputs(&self, _exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> Result<Self> {
+        Ok(Self::new(
+            inputs[0].clone(),
+            self.timestamp_qualifier.clone(),
+        ))
+    }
+}
diff --git a/src/sql/extensions/join.rs b/src/sql/extensions/join.rs
index c28a6e01..74dcfde6 100644
--- a/src/sql/extensions/join.rs
+++ b/src/sql/extensions/join.rs
@@ -16,7 +16,7 @@ use protocol::grpc::api::JoinOperator;
 use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName};
 use crate::sql::logical_planner::FsPhysicalExtensionCodec;
 use crate::sql::logical_planner::planner::{NamedNode, Planner};
-use crate::types::{FsSchema, FsSchemaRef};
+use crate::sql::common::{FsSchema, FsSchemaRef};
 
 pub(crate) const JOIN_NODE_NAME: &str = "JoinNode";
 
diff --git a/src/sql/extensions/key_calculation.rs b/src/sql/extensions/key_calculation.rs
index e0edb67a..3a94f592 100644
--- a/src/sql/extensions/key_calculation.rs
+++ b/src/sql/extensions/key_calculation.rs
@@ -20,7 +20,7 @@ use crate::sql::logical_planner::planner::{NamedNode, Planner};
 use crate::sql::types::{
     StreamSchema, fields_with_qualifiers, schema_from_df_fields_with_metadata,
 };
-use crate::types::{FsSchema, FsSchemaRef};
+use crate::sql::common::{FsSchema, FsSchemaRef};
 
 pub(crate) const KEY_CALCULATION_NAME: &str = "KeyCalculationExtension";
 
diff --git a/src/sql/extensions/lookup.rs b/src/sql/extensions/lookup.rs
index 2dc76265..c2ef8f28 100644
--- a/src/sql/extensions/lookup.rs
+++ b/src/sql/extensions/lookup.rs
@@ -14,7 +14,7 @@ use crate::sql::schema::ConnectorTable;
 use crate::sql::schema::utils::add_timestamp_field_arrow;
 use crate::sql::extensions::{NodeWithIncomingEdges, StreamExtension};
 use crate::sql::logical_planner::planner::{NamedNode, Planner};
-use crate::types::{FsSchema, FsSchemaRef};
+use crate::sql::common::{FsSchema, FsSchemaRef};
 
 pub const SOURCE_EXTENSION_NAME: &str = "LookupSource";
 pub const JOIN_EXTENSION_NAME: &str = "LookupJoin";
diff --git a/src/sql/extensions/macros.rs b/src/sql/extensions/macros.rs
new file mode 100644
index 00000000..4ce649c2
--- /dev/null
+++ b/src/sql/extensions/macros.rs
@@ -0,0 +1,28 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#[macro_export]
+macro_rules! multifield_partial_ord {
+    ($ty:ty, $($field:tt), *) => {
+        impl PartialOrd for $ty {
+            fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
+                $(
+                    let cmp = self.$field.partial_cmp(&other.$field)?;
+                    if cmp != std::cmp::Ordering::Equal {
+                        return Some(cmp);
+                    }
+                )*
+                Some(std::cmp::Ordering::Equal)
+            }
+        }
+    };
+}
diff --git a/src/sql/extensions/mod.rs b/src/sql/extensions/mod.rs
index 25632930..a78ca419 100644
--- a/src/sql/extensions/mod.rs
+++ b/src/sql/extensions/mod.rs
@@ -1,39 +1,22 @@
-use std::fmt::{Debug, Formatter};
-use std::sync::Arc;
-use std::time::Duration;
-
-use datafusion::arrow::datatypes::{DataType, TimeUnit};
-use datafusion::common::{DFSchemaRef, DataFusionError, Result, TableReference};
-use datafusion::logical_expr::{
-    Expr, LogicalPlan, UserDefinedLogicalNode, UserDefinedLogicalNodeCore,
-};
-use datafusion_common::internal_err;
-use datafusion_proto::physical_plan::DefaultPhysicalExtensionCodec;
-use datafusion_proto::physical_plan::to_proto::serialize_physical_expr;
-use prost::Message;
-use protocol::grpc::api::{AsyncUdfOperator, AsyncUdfOrdering};
-use crate::sql::logical_node::logical::{DylibUdfConfig, LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName};
-use crate::sql::logical_planner::updating_meta_field;
-use crate::sql::extensions::aggregate::AggregateExtension;
-use crate::sql::extensions::debezium::{DebeziumUnrollingExtension, ToDebeziumExtension};
-use crate::sql::extensions::join::JoinExtension;
-use crate::sql::extensions::key_calculation::KeyCalculationExtension;
-use crate::sql::extensions::lookup::LookupJoin;
-use crate::sql::extensions::projection::ProjectionExtension;
-use crate::sql::extensions::remote_table::RemoteTableExtension;
-use crate::sql::extensions::sink::SinkExtension;
-use crate::sql::extensions::table_source::TableSourceExtension;
-use crate::sql::extensions::updating_aggregate::UpdatingAggregateExtension;
-use crate::sql::extensions::watermark_node::WatermarkNode;
-use crate::sql::extensions::window_fn::WindowFunctionExtension;
-use crate::sql::logical_planner::planner::{NamedNode, Planner};
-use crate::sql::schema::utils::{add_timestamp_field, has_timestamp_field};
-use crate::sql::types::{
-    DFField, StreamSchema, TIMESTAMP_FIELD, fields_with_qualifiers, schema_from_df_fields,
-};
-use crate::types::{FsSchema, FsSchemaRef};
-
-pub const ASYNC_RESULT_FIELD: &str = "__async_result";
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+mod macros;
+
+pub(crate) mod constants;
+pub(crate) use constants::ASYNC_RESULT_FIELD;
+
+pub(crate) mod stream_extension;
+pub(crate) use stream_extension::{NodeWithIncomingEdges, StreamExtension};
 
 pub(crate) mod aggregate;
 pub(crate) mod debezium;
@@ -48,352 +31,13 @@ pub(crate) mod updating_aggregate;
 pub(crate) mod watermark_node;
 pub(crate) mod window_fn;
 
+pub(crate) mod timestamp_append;
+pub(crate) use timestamp_append::TimestampAppendExtension;
 
-pub(crate) trait StreamExtension: Debug {
-    fn node_name(&self) -> Option<NamedNode>;
-    fn plan_node(
-        &self,
-        planner: &Planner,
-        index: usize,
-        input_schemas: Vec<FsSchemaRef>,
-    ) -> Result<NodeWithIncomingEdges>;
-    fn output_schema(&self) -> FsSchema;
-    fn transparent(&self) -> bool {
-        false
-    }
-}
-
-pub(crate) struct NodeWithIncomingEdges {
-    pub node: LogicalNode,
-    pub edges: Vec<LogicalEdge>,
-}
-
-fn try_from_t<T: StreamExtension + 'static>(
-    node: &dyn UserDefinedLogicalNode,
-) -> Result<&dyn StreamExtension, ()> {
-    node.as_any()
-        .downcast_ref::<T>()
-        .map(|t| t as &dyn StreamExtension)
-        .ok_or(())
-}
-
-impl<'a> TryFrom<&'a dyn UserDefinedLogicalNode> for &'a dyn StreamExtension {
-    type Error = DataFusionError;
-
-    fn try_from(node: &'a dyn UserDefinedLogicalNode) -> Result<Self, Self::Error> {
-        try_from_t::<TableSourceExtension>(node)
-            .or_else(|_| try_from_t::<WatermarkNode>(node))
-            .or_else(|_| try_from_t::<SinkExtension>(node))
-            .or_else(|_| try_from_t::<KeyCalculationExtension>(node))
-            .or_else(|_| try_from_t::<AggregateExtension>(node))
-            .or_else(|_| try_from_t::<RemoteTableExtension>(node))
-            .or_else(|_| try_from_t::<JoinExtension>(node))
-            .or_else(|_| try_from_t::<WindowFunctionExtension>(node))
-            .or_else(|_| try_from_t::<AsyncUDFExtension>(node))
-            .or_else(|_| try_from_t::<ToDebeziumExtension>(node))
-            .or_else(|_| try_from_t::<DebeziumUnrollingExtension>(node))
-            .or_else(|_| try_from_t::<UpdatingAggregateExtension>(node))
-            .or_else(|_| try_from_t::<LookupJoin>(node))
-            .or_else(|_| try_from_t::<ProjectionExtension>(node))
-            .map_err(|_| DataFusionError::Plan(format!("unexpected node: {}", node.name())))
-    }
-}
-
-impl<'a> TryFrom<&'a Arc<dyn UserDefinedLogicalNode>> for &'a dyn StreamExtension {
-    type Error = DataFusionError;
-
-    fn try_from(node: &'a Arc<dyn UserDefinedLogicalNode>) -> Result<Self, Self::Error> {
-        TryFrom::try_from(node.as_ref())
-    }
-}
-
-#[macro_export]
-macro_rules! multifield_partial_ord {
-    ($ty:ty, $($field:tt), *) => {
-        impl PartialOrd for $ty {
-            fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
-                $(
-                    let cmp = self.$field.partial_cmp(&other.$field)?;
-                    if cmp != std::cmp::Ordering::Equal {
-                        return Some(cmp);
-                    }
-                )*
-                Some(std::cmp::Ordering::Equal)
-            }
-        }
-}
-    }
-
-#[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub(crate) struct TimestampAppendExtension {
-    pub(crate) input: LogicalPlan,
-    pub(crate) qualifier: Option<TableReference>,
-    pub(crate) schema: DFSchemaRef,
-}
-
-impl TimestampAppendExtension {
-    fn new(input: LogicalPlan, qualifier: Option<TableReference>) -> Self {
-        if has_timestamp_field(input.schema()) {
-            unreachable!(
-                "shouldn't be adding timestamp to a plan that already has it: plan :\n {:?}\n schema: {:?}",
-                input,
-                input.schema()
-            );
-        }
-        let schema = add_timestamp_field(input.schema().clone(), qualifier.clone()).unwrap();
-        Self {
-            input,
-            qualifier,
-            schema,
-        }
-    }
-}
-
-multifield_partial_ord!(TimestampAppendExtension, input, qualifier);
-
-impl UserDefinedLogicalNodeCore for TimestampAppendExtension {
-    fn name(&self) -> &str {
-        "TimestampAppendExtension"
-    }
-
-    fn inputs(&self) -> Vec<&LogicalPlan> {
-        vec![&self.input]
-    }
-
-    fn schema(&self) -> &DFSchemaRef {
-        &self.schema
-    }
-
-    fn expressions(&self) -> Vec<Expr> {
-        vec![]
-    }
-
-    fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        write!(
-            f,
-            "TimestampAppendExtension({:?}): {}",
-            self.qualifier,
-            self.schema
-                .fields()
-                .iter()
-                .map(|f| f.name().to_string())
-                .collect::<Vec<_>>()
-                .join(", ")
-        )
-    }
-
-    fn with_exprs_and_inputs(&self, _exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> Result<Self> {
-        Ok(Self::new(inputs[0].clone(), self.qualifier.clone()))
-    }
-}
-
-#[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub(crate) struct AsyncUDFExtension {
-    pub(crate) input: Arc<LogicalPlan>,
-    pub(crate) name: String,
-    pub(crate) udf: DylibUdfConfig,
-    pub(crate) arg_exprs: Vec<Expr>,
-    pub(crate) final_exprs: Vec<Expr>,
-    pub(crate) ordered: bool,
-    pub(crate) max_concurrency: usize,
-    pub(crate) timeout: Duration,
-    pub(crate) final_schema: DFSchemaRef,
-}
-
-multifield_partial_ord!(
-    AsyncUDFExtension,
-    input,
-    name,
-    udf,
-    arg_exprs,
-    final_exprs,
-    ordered,
-    max_concurrency,
-    timeout
-);
-
-impl StreamExtension for AsyncUDFExtension {
-    fn node_name(&self) -> Option<NamedNode> {
-        None
-    }
-
-    fn plan_node(
-        &self,
-        planner: &Planner,
-        index: usize,
-        input_schemas: Vec<FsSchemaRef>,
-    ) -> Result<NodeWithIncomingEdges> {
-        let arg_exprs = self
-            .arg_exprs
-            .iter()
-            .map(|e| {
-                let p = planner.create_physical_expr(e, self.input.schema())?;
-                Ok(serialize_physical_expr(&p, &DefaultPhysicalExtensionCodec {})?.encode_to_vec())
-            })
-            .collect::<Result<Vec<_>>>()?;
-
-        let mut final_fields = fields_with_qualifiers(self.input.schema());
-        final_fields.push(DFField::new(
-            None,
-            ASYNC_RESULT_FIELD,
-            self.udf.return_type.clone(),
-            true,
-        ));
-        let post_udf_schema = schema_from_df_fields(&final_fields)?;
-
-        let final_exprs = self
-            .final_exprs
-            .iter()
-            .map(|e| {
-                let p = planner.create_physical_expr(e, &post_udf_schema)?;
-                Ok(serialize_physical_expr(&p, &DefaultPhysicalExtensionCodec {})?.encode_to_vec())
-            })
-            .collect::<Result<Vec<_>>>()?;
-
-        let config = AsyncUdfOperator {
-            name: self.name.clone(),
-            udf: Some(self.udf.clone().into()),
-            arg_exprs,
-            final_exprs,
-            ordering: if self.ordered {
-                AsyncUdfOrdering::Ordered as i32
-            } else {
-                AsyncUdfOrdering::Unordered as i32
-            },
-            max_concurrency: self.max_concurrency as u32,
-            timeout_micros: self.timeout.as_micros() as u64,
-        };
-
-        let node = LogicalNode::single(
-            index as u32,
-            format!("async_udf_{index}"),
-            OperatorName::AsyncUdf,
-            config.encode_to_vec(),
-            format!("async_udf<{}>", self.name),
-            1,
-        );
-
-        let incoming_edge =
-            LogicalEdge::project_all(LogicalEdgeType::Forward, input_schemas[0].as_ref().clone());
-        Ok(NodeWithIncomingEdges {
-            node,
-            edges: vec![incoming_edge],
-        })
-    }
-
-    fn output_schema(&self) -> FsSchema {
-        FsSchema::from_fields(
-            self.final_schema
-                .fields()
-                .iter()
-                .map(|f| (**f).clone())
-                .collect(),
-        )
-    }
-}
-
-#[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub(crate) struct IsRetractExtension {
-    pub(crate) input: LogicalPlan,
-    pub(crate) schema: DFSchemaRef,
-    pub(crate) timestamp_qualifier: Option<TableReference>,
-}
-
-multifield_partial_ord!(IsRetractExtension, input, timestamp_qualifier);
-
-impl IsRetractExtension {
-    pub(crate) fn new(input: LogicalPlan, timestamp_qualifier: Option<TableReference>) -> Self {
-        let mut output_fields = fields_with_qualifiers(input.schema());
-
-        let timestamp_index = output_fields.len() - 1;
-        output_fields[timestamp_index] = DFField::new(
-            timestamp_qualifier.clone(),
-            TIMESTAMP_FIELD,
-            DataType::Timestamp(TimeUnit::Nanosecond, None),
-            false,
-        );
-        output_fields.push((timestamp_qualifier.clone(), updating_meta_field()).into());
-        let schema = Arc::new(schema_from_df_fields(&output_fields).unwrap());
-        Self {
-            input,
-            schema,
-            timestamp_qualifier,
-        }
-    }
-}
-
-impl UserDefinedLogicalNodeCore for IsRetractExtension {
-    fn name(&self) -> &str {
-        "IsRetractExtension"
-    }
-
-    fn inputs(&self) -> Vec<&LogicalPlan> {
-        vec![&self.input]
-    }
-
-    fn schema(&self) -> &DFSchemaRef {
-        &self.schema
-    }
-
-    fn expressions(&self) -> Vec<Expr> {
-        vec![]
-    }
-
-    fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        write!(f, "IsRetractExtension")
-    }
-
-    fn with_exprs_and_inputs(&self, _exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> Result<Self> {
-        Ok(Self::new(
-            inputs[0].clone(),
-            self.timestamp_qualifier.clone(),
-        ))
-    }
-}
-
-impl UserDefinedLogicalNodeCore for AsyncUDFExtension {
-    fn name(&self) -> &str {
-        "AsyncUDFNode"
-    }
-
-    fn inputs(&self) -> Vec<&LogicalPlan> {
-        vec![&self.input]
-    }
-
-    fn schema(&self) -> &DFSchemaRef {
-        &self.final_schema
-    }
-
-    fn expressions(&self) -> Vec<Expr> {
-        self.arg_exprs
-            .iter()
-            .chain(self.final_exprs.iter())
-            .map(|e| e.to_owned())
-            .collect()
-    }
-
-    fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result {
-        write!(f, "AsyncUdfExtension<{}>: {}", self.name, self.final_schema)
-    }
+pub(crate) mod async_udf;
+pub(crate) use async_udf::AsyncUDFExtension;
 
-    fn with_exprs_and_inputs(&self, exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> Result<Self> {
-        if inputs.len() != 1 {
-            return internal_err!("input size inconsistent");
-        }
-        if UserDefinedLogicalNode::expressions(self) != exprs {
-            return internal_err!("Tried to recreate async UDF node with different expressions");
-        }
+pub(crate) mod is_retract;
+pub(crate) use is_retract::IsRetractExtension;
 
-        Ok(Self {
-            input: Arc::new(inputs[0].clone()),
-            name: self.name.clone(),
-            udf: self.udf.clone(),
-            arg_exprs: self.arg_exprs.clone(),
-            final_exprs: self.final_exprs.clone(),
-            ordered: self.ordered,
-            max_concurrency: self.max_concurrency,
-            timeout: self.timeout,
-            final_schema: self.final_schema.clone(),
-        })
-    }
-}
+mod extension_try_from;
diff --git a/src/sql/extensions/projection.rs b/src/sql/extensions/projection.rs
index fa0f118b..ff319d12 100644
--- a/src/sql/extensions/projection.rs
+++ b/src/sql/extensions/projection.rs
@@ -13,7 +13,7 @@ use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNod
 use crate::multifield_partial_ord;
 use crate::sql::logical_planner::planner::{NamedNode, Planner};
 use crate::sql::types::{schema_from_df_fields, DFField};
-use crate::types::{FsSchema, FsSchemaRef};
+use crate::sql::common::{FsSchema, FsSchemaRef};
 
 pub(crate) const PROJECTION_NAME: &str = "ProjectionExtension";
 
diff --git a/src/sql/extensions/remote_table.rs b/src/sql/extensions/remote_table.rs
index 91ef4d0e..570a3393 100644
--- a/src/sql/extensions/remote_table.rs
+++ b/src/sql/extensions/remote_table.rs
@@ -10,7 +10,7 @@ use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNod
 use crate::multifield_partial_ord;
 use crate::sql::logical_planner::FsPhysicalExtensionCodec;
 use crate::sql::logical_planner::planner::{NamedNode, Planner};
-use crate::types::{FsSchema, FsSchemaRef};
+use crate::sql::common::{FsSchema, FsSchemaRef};
 use super::{StreamExtension, NodeWithIncomingEdges};
 
 pub(crate) const REMOTE_TABLE_NAME: &str = "RemoteTableExtension";
diff --git a/src/sql/extensions/sink.rs b/src/sql/extensions/sink.rs
index 7b58a7b4..a1112c4b 100644
--- a/src/sql/extensions/sink.rs
+++ b/src/sql/extensions/sink.rs
@@ -9,7 +9,7 @@ use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNod
 use crate::multifield_partial_ord;
 use crate::sql::schema::Table;
 use crate::sql::logical_planner::planner::{NamedNode, Planner};
-use crate::types::{FsSchema, FsSchemaRef, UPDATING_META_FIELD};
+use crate::sql::common::{FsSchema, FsSchemaRef, UPDATING_META_FIELD};
 use super::{
     StreamExtension, NodeWithIncomingEdges, debezium::ToDebeziumExtension,
     remote_table::RemoteTableExtension,
diff --git a/src/sql/extensions/stream_extension.rs b/src/sql/extensions/stream_extension.rs
new file mode 100644
index 00000000..76954529
--- /dev/null
+++ b/src/sql/extensions/stream_extension.rs
@@ -0,0 +1,38 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::fmt::Debug;
+
+use datafusion::common::Result;
+
+use crate::sql::logical_node::logical::{LogicalEdge, LogicalNode};
+use crate::sql::logical_planner::planner::{NamedNode, Planner};
+use crate::sql::common::{FsSchema, FsSchemaRef};
+
+pub(crate) trait StreamExtension: Debug {
+    fn node_name(&self) -> Option<NamedNode>;
+    fn plan_node(
+        &self,
+        planner: &Planner,
+        index: usize,
+        input_schemas: Vec<FsSchemaRef>,
+    ) -> Result<NodeWithIncomingEdges>;
+    fn output_schema(&self) -> FsSchema;
+    fn transparent(&self) -> bool {
+        false
+    }
+}
+
+pub(crate) struct NodeWithIncomingEdges {
+    pub node: LogicalNode,
+    pub edges: Vec<LogicalEdge>,
+}
diff --git a/src/sql/extensions/table_source.rs b/src/sql/extensions/table_source.rs
index bdf470e2..0b069bbf 100644
--- a/src/sql/extensions/table_source.rs
+++ b/src/sql/extensions/table_source.rs
@@ -12,7 +12,7 @@ use crate::sql::schema::utils::add_timestamp_field;
 use crate::sql::extensions::debezium::DebeziumUnrollingExtension;
 use crate::sql::logical_planner::planner::{NamedNode, Planner};
 use crate::sql::types::schema_from_df_fields;
-use crate::types::{FsSchema, FsSchemaRef, UPDATING_META_FIELD};
+use crate::sql::common::{FsSchema, FsSchemaRef, UPDATING_META_FIELD};
 use super::{
     StreamExtension, NodeWithIncomingEdges, debezium::ToDebeziumExtension,
     remote_table::RemoteTableExtension,
diff --git a/src/sql/extensions/timestamp_append.rs b/src/sql/extensions/timestamp_append.rs
new file mode 100644
index 00000000..069b288a
--- /dev/null
+++ b/src/sql/extensions/timestamp_append.rs
@@ -0,0 +1,80 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use datafusion::common::{DFSchemaRef, Result, TableReference};
+use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore};
+
+use crate::multifield_partial_ord;
+use crate::sql::schema::utils::{add_timestamp_field, has_timestamp_field};
+
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub(crate) struct TimestampAppendExtension {
+    pub(crate) input: LogicalPlan,
+    pub(crate) qualifier: Option<TableReference>,
+    pub(crate) schema: DFSchemaRef,
+}
+
+impl TimestampAppendExtension {
+    pub(crate) fn new(input: LogicalPlan, qualifier: Option<TableReference>) -> Self {
+        if has_timestamp_field(input.schema()) {
+            unreachable!(
+                "shouldn't be adding timestamp to a plan that already has it: plan :\n {:?}\n schema: {:?}",
+                input,
+                input.schema()
+            );
+        }
+        let schema = add_timestamp_field(input.schema().clone(), qualifier.clone()).unwrap();
+        Self {
+            input,
+            qualifier,
+            schema,
+        }
+    }
+}
+
+multifield_partial_ord!(TimestampAppendExtension, input, qualifier);
+
+impl UserDefinedLogicalNodeCore for TimestampAppendExtension {
+    fn name(&self) -> &str {
+        "TimestampAppendExtension"
+    }
+
+    fn inputs(&self) -> Vec<&LogicalPlan> {
+        vec![&self.input]
+    }
+
+    fn schema(&self) -> &DFSchemaRef {
+        &self.schema
+    }
+
+    fn expressions(&self) -> Vec<Expr> {
+        vec![]
+    }
+
+    fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        write!(
+            f,
+            "TimestampAppendExtension({:?}): {}",
+            self.qualifier,
+            self.schema
+                .fields()
+                .iter()
+                .map(|f| f.name().to_string())
+                .collect::<Vec<_>>()
+                .join(", ")
+        )
+    }
+
+    fn with_exprs_and_inputs(&self, _exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> Result<Self> {
+        Ok(Self::new(inputs[0].clone(), self.qualifier.clone()))
+    }
+}
diff --git a/src/sql/extensions/updating_aggregate.rs b/src/sql/extensions/updating_aggregate.rs
index fdb2bb1d..8220945b 100644
--- a/src/sql/extensions/updating_aggregate.rs
+++ b/src/sql/extensions/updating_aggregate.rs
@@ -16,7 +16,7 @@ use crate::sql::functions::multi_hash;
 use crate::sql::logical_planner::FsPhysicalExtensionCodec;
 use crate::sql::extensions::{IsRetractExtension, NodeWithIncomingEdges, StreamExtension};
 use crate::sql::logical_planner::planner::{NamedNode, Planner};
-use crate::types::{FsSchema, FsSchemaRef};
+use crate::sql::common::{FsSchema, FsSchemaRef};
 
 pub(crate) const UPDATING_AGGREGATE_EXTENSION_NAME: &str = "UpdatingAggregateExtension";
 
diff --git a/src/sql/extensions/watermark_node.rs b/src/sql/extensions/watermark_node.rs
index f13b3472..5ef8aa49 100644
--- a/src/sql/extensions/watermark_node.rs
+++ b/src/sql/extensions/watermark_node.rs
@@ -12,7 +12,7 @@ use crate::multifield_partial_ord;
 use crate::sql::schema::utils::add_timestamp_field;
 use crate::sql::extensions::{NodeWithIncomingEdges, StreamExtension};
 use crate::sql::logical_planner::planner::{NamedNode, Planner};
-use crate::types::{FsSchema, FsSchemaRef};
+use crate::sql::common::{FsSchema, FsSchemaRef};
 
 pub(crate) const WATERMARK_NODE_NAME: &str = "WatermarkNode";
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
diff --git a/src/sql/extensions/window_fn.rs b/src/sql/extensions/window_fn.rs
index 1c8b5687..c2594546 100644
--- a/src/sql/extensions/window_fn.rs
+++ b/src/sql/extensions/window_fn.rs
@@ -10,7 +10,7 @@ use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNod
 use crate::sql::logical_planner::FsPhysicalExtensionCodec;
 use crate::sql::logical_planner::planner::{NamedNode, Planner};
 use crate::sql::types::TIMESTAMP_FIELD;
-use crate::types::{FsSchema, FsSchemaRef};
+use crate::sql::common::{FsSchema, FsSchemaRef};
 use super::{ NodeWithIncomingEdges, StreamExtension};
 
 pub(crate) const WINDOW_FUNCTION_EXTENSION_NAME: &str = "WindowFunctionExtension";
diff --git a/src/sql/logical_node/logical.rs b/src/sql/logical_node/logical.rs
index 13560a3e..9fa139d1 100644
--- a/src/sql/logical_node/logical.rs
+++ b/src/sql/logical_node/logical.rs
@@ -11,7 +11,7 @@ use datafusion_proto::protobuf::ArrowType;
 use prost::Message;
 use strum::{Display, EnumString};
 use protocol::grpc::api;
-use crate::types::FsSchema;
+use crate::sql::common::FsSchema;
 
 #[derive(Clone, Copy, Debug, Eq, PartialEq, EnumString, Display)]
 pub enum OperatorName {
diff --git a/src/sql/logical_planner/compiled_sql.rs b/src/sql/logical_planner/compiled_sql.rs
new file mode 100644
index 00000000..e0525097
--- /dev/null
+++ b/src/sql/logical_planner/compiled_sql.rs
@@ -0,0 +1,21 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use crate::sql::logical_node::logical::LogicalProgram;
+
+// ── Compilation pipeline ──────────────────────────────────────────────
+
+#[derive(Clone, Debug)]
+pub struct CompiledSql {
+    pub program: LogicalProgram,
+    pub connection_ids: Vec<i64>,
+}
diff --git a/src/sql/logical_planner/mod.rs b/src/sql/logical_planner/mod.rs
index e4db07a0..8b7d9e76 100644
--- a/src/sql/logical_planner/mod.rs
+++ b/src/sql/logical_planner/mod.rs
@@ -31,7 +31,7 @@ use crate::make_udf_function;
 use crate::sql::functions::MultiHashFunction;
 use crate::sql::analysis::UNNESTED_COL;
 use crate::sql::schema::utils::window_arrow_struct;
-use crate::types::{TIMESTAMP_FIELD, UPDATING_META_FIELD};
+use crate::sql::common::{TIMESTAMP_FIELD, UPDATING_META_FIELD};
 use datafusion::arrow::datatypes::{TimestampNanosecondType, UInt64Type};
 use datafusion::catalog::memory::MemorySourceConfig;
 use datafusion::datasource::memory::DataSourceExec;
@@ -56,9 +56,12 @@ use std::fmt::Debug;
 use tokio::sync::mpsc::UnboundedReceiver;
 use tokio_stream::wrappers::UnboundedReceiverStream;
 
+pub mod compiled_sql;
 pub(crate) mod planner;
 pub mod optimizers;
 
+pub use compiled_sql::CompiledSql;
+
 // ─────────────────── Updating Meta Helpers ───────────────────
 
 pub fn updating_meta_fields() -> Fields {
diff --git a/src/sql/logical_planner/planner.rs b/src/sql/logical_planner/planner.rs
index 150b86f1..45d373c3 100644
--- a/src/sql/logical_planner/planner.rs
+++ b/src/sql/logical_planner/planner.rs
@@ -41,7 +41,7 @@ use crate::sql::extensions::key_calculation::KeyCalculationExtension;
 use crate::sql::extensions::{NodeWithIncomingEdges, StreamExtension};
 use crate::sql::schema::utils::add_timestamp_field_arrow;
 use crate::sql::schema::StreamSchemaProvider;
-use crate::types::{FsSchema, FsSchemaRef};
+use crate::sql::common::{FsSchema, FsSchemaRef};
 
 #[derive(Eq, Hash, PartialEq)]
 #[derive(Debug)]
diff --git a/src/sql/mod.rs b/src/sql/mod.rs
index be44d979..32c0dce9 100644
--- a/src/sql/mod.rs
+++ b/src/sql/mod.rs
@@ -10,6 +10,9 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+pub mod common;
+pub mod api;
+
 pub mod schema;
 pub mod functions;
 pub mod parse;
@@ -22,4 +25,4 @@ pub mod types;
 pub use schema::StreamSchemaProvider;
 pub use parse::parse_sql;
 pub use analysis::rewrite_plan;
-pub use analysis::{CompiledSql};
+pub use logical_planner::CompiledSql;
diff --git a/src/sql/parse.rs b/src/sql/parse.rs
index bdb4d481..1b4be38a 100644
--- a/src/sql/parse.rs
+++ b/src/sql/parse.rs
@@ -23,11 +23,6 @@ use crate::coordinator::{
     Statement as CoordinatorStatement, StopFunction, StreamingTableStatement,
 };
 
-/// Stage 1: String → Vec<Box<dyn Statement>>
-///
-/// Parses SQL using FunctionStreamDialect (from sqlparser-rs), then classifies
-/// each statement into a concrete coordinator Statement type.
-/// A single SQL input may contain multiple statements (separated by `;`).
 pub fn parse_sql(query: &str) -> Result<Vec<Box<dyn CoordinatorStatement>>> {
     let trimmed = query.trim();
     if trimmed.is_empty() {
diff --git a/src/sql/types/data_type.rs b/src/sql/types/data_type.rs
index 57edc3c9..66076da3 100644
--- a/src/sql/types/data_type.rs
+++ b/src/sql/types/data_type.rs
@@ -5,7 +5,7 @@ use datafusion::arrow::datatypes::{
 };
 use datafusion::common::{Result, plan_datafusion_err, plan_err};
 
-use crate::types::FsExtensionType;
+use crate::sql::common::FsExtensionType;
 
 pub fn convert_data_type(
     sql_type: &datafusion::sql::sqlparser::ast::DataType,

From d647ea15f9b749e75f0b8e2ecda56f5f722c8c57 Mon Sep 17 00:00:00 2001
From: luoluoyuyu <zhenyu@apache.org>
Date: Sat, 21 Mar 2026 22:23:02 +0800
Subject: [PATCH 09/44] update

---
 src/coordinator/coordinator.rs               | 378 ++++++++-
 src/coordinator/plan/logical_plan_visitor.rs |  76 +-
 src/coordinator/statement/streaming_table.rs |   7 +-
 src/sql/extensions/remote_table.rs           |  90 ++-
 src/sql/frontend_sql_coverage_tests.rs       | 807 +++++++++++++++++++
 src/sql/logical_planner/planner.rs           |   1 -
 src/sql/mod.rs                               |   3 +
 src/sql/parse.rs                             |  69 +-
 8 files changed, 1370 insertions(+), 61 deletions(-)
 create mode 100644 src/sql/frontend_sql_coverage_tests.rs

diff --git a/src/coordinator/coordinator.rs b/src/coordinator/coordinator.rs
index 8dc55c4d..0ddca660 100644
--- a/src/coordinator/coordinator.rs
+++ b/src/coordinator/coordinator.rs
@@ -37,12 +37,28 @@ impl Coordinator {
         Self {}
     }
 
-    pub fn execute(&self, stmt: &dyn Statement) -> ExecuteResult {
+    pub fn compile_plan(
+        &self,
+        stmt: &dyn Statement,
+        schema_provider: StreamSchemaProvider,
+    ) -> Result<Box<dyn PlanNode>, anyhow::Error> {
+        let context = ExecutionContext::new();
+        let analysis = self.step_analyze(&context, stmt)?;
+        let plan = self.step_build_logical_plan(&analysis, schema_provider)?;
+        self.step_optimize(&analysis, plan)
+    }
+
+    /// Same as [`Self::execute`], but uses the provided catalog / stream tables (e.g. tests).
+    pub fn execute_with_schema_provider(
+        &self,
+        stmt: &dyn Statement,
+        schema_provider: StreamSchemaProvider,
+    ) -> ExecuteResult {
         let start_time = Instant::now();
         let context = ExecutionContext::new();
         let execution_id = context.execution_id;
 
-        match self.execute_pipeline(&context, stmt) {
+        match self.execute_pipeline(&context, stmt, schema_provider) {
             Ok(result) => {
                 log::debug!(
                     "[{}] Execution completed in {}ms",
@@ -63,13 +79,18 @@ impl Coordinator {
         }
     }
 
+    pub fn execute(&self, stmt: &dyn Statement) -> ExecuteResult {
+        self.execute_with_schema_provider(stmt, StreamSchemaProvider::new())
+    }
+
     fn execute_pipeline(
         &self,
         context: &ExecutionContext,
         stmt: &dyn Statement,
+        schema_provider: StreamSchemaProvider,
     ) -> Result<ExecuteResult> {
         let analysis = self.step_analyze(context, stmt)?;
-        let plan = self.step_build_logical_plan(&analysis)?;
+        let plan = self.step_build_logical_plan(&analysis, schema_provider)?;
         let optimized_plan = self.step_optimize(&analysis, plan)?;
         self.step_execute(optimized_plan)
     }
@@ -90,8 +111,11 @@ impl Coordinator {
         result
     }
 
-    fn step_build_logical_plan(&self, analysis: &Analysis) -> Result<Box<dyn PlanNode>> {
-        let schema_provider = StreamSchemaProvider::new();
+    fn step_build_logical_plan(
+        &self,
+        analysis: &Analysis,
+        schema_provider: StreamSchemaProvider,
+    ) -> Result<Box<dyn PlanNode>> {
         let visitor = LogicalPlanVisitor::new(schema_provider);
         let plan = visitor.visit(analysis);
         Ok(plan)
@@ -137,3 +161,347 @@ impl Coordinator {
         result
     }
 }
+
+#[cfg(test)]
+mod create_streaming_table_coordinator_tests {
+    use std::sync::Arc;
+
+    use datafusion::arrow::datatypes::{DataType, Field, Schema, TimeUnit};
+
+    use crate::sql::common::TIMESTAMP_FIELD;
+    use crate::sql::parse::parse_sql;
+    use crate::sql::schema::StreamSchemaProvider;
+
+    use super::Coordinator;
+
+    fn fake_stream_schema_provider() -> StreamSchemaProvider {
+        let mut provider = StreamSchemaProvider::new();
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("id", DataType::Int64, false),
+            Field::new(
+                TIMESTAMP_FIELD,
+                DataType::Timestamp(TimeUnit::Nanosecond, None),
+                false,
+            ),
+        ]));
+        provider.add_source_table(
+            "src".to_string(),
+            schema,
+            Some(TIMESTAMP_FIELD.to_string()),
+            None,
+        );
+        provider
+    }
+
+    fn fake_stream_schema_provider_with_v() -> StreamSchemaProvider {
+        let mut provider = StreamSchemaProvider::new();
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("id", DataType::Int64, false),
+            Field::new("v", DataType::Utf8, true),
+            Field::new(
+                TIMESTAMP_FIELD,
+                DataType::Timestamp(TimeUnit::Nanosecond, None),
+                false,
+            ),
+        ]));
+        provider.add_source_table(
+            "src".to_string(),
+            schema,
+            Some(TIMESTAMP_FIELD.to_string()),
+            None,
+        );
+        provider
+    }
+
+    fn fake_src_dim_provider() -> StreamSchemaProvider {
+        let mut provider = fake_stream_schema_provider_with_v();
+        let dim = Arc::new(Schema::new(vec![
+            Field::new("id", DataType::Int64, false),
+            Field::new("name", DataType::Utf8, true),
+            Field::new("amt", DataType::Float64, true),
+            Field::new(
+                TIMESTAMP_FIELD,
+                DataType::Timestamp(TimeUnit::Nanosecond, None),
+                false,
+            ),
+        ]));
+        provider.add_source_table(
+            "dim".to_string(),
+            dim,
+            Some(TIMESTAMP_FIELD.to_string()),
+            None,
+        );
+        provider
+    }
+
+    fn assert_coordinator_streaming_build_ok(
+        sql: &str,
+        provider: StreamSchemaProvider,
+        expect_sink_substring: &str,
+        expect_connector_substring: &str,
+    ) {
+        let stmts = parse_sql(sql).unwrap_or_else(|e| panic!("parse {sql:?}: {e}"));
+        assert_eq!(stmts.len(), 1);
+        let plan = Coordinator::new()
+            .compile_plan(stmts[0].as_ref(), provider)
+            .unwrap_or_else(|e| panic!("compile_plan {sql:?}: {e:#}"));
+        let rendered = format!("{plan:?}");
+        assert!(rendered.contains("StreamingTable"), "{rendered}");
+        assert!(
+            rendered.contains(expect_sink_substring),
+            "expected sink name fragment {expect_sink_substring:?} in:\n{rendered}"
+        );
+        assert!(
+            rendered.contains(expect_connector_substring),
+            "expected connector fragment {expect_connector_substring:?} in:\n{rendered}"
+        );
+    }
+
+    #[test]
+    fn coordinator_build_create_streaming_table_select_star_kafka() {
+        assert_coordinator_streaming_build_ok(
+            concat!(
+                "CREATE STREAMING TABLE my_sink ",
+                "WITH ('connector' = 'kafka') ",
+                "AS SELECT * FROM src",
+            ),
+            fake_stream_schema_provider(),
+            "my_sink",
+            "kafka",
+        );
+    }
+
+    #[test]
+    fn coordinator_build_create_streaming_table_memory_connector() {
+        assert_coordinator_streaming_build_ok(
+            "CREATE STREAMING TABLE mem_out WITH ('connector'='memory') AS SELECT * FROM src",
+            fake_stream_schema_provider(),
+            "mem_out",
+            "memory",
+        );
+    }
+
+    #[test]
+    fn coordinator_build_create_streaming_table_postgres_connector() {
+        assert_coordinator_streaming_build_ok(
+            "CREATE STREAMING TABLE pg_out WITH ('connector'='postgres') AS SELECT id FROM src",
+            fake_stream_schema_provider(),
+            "pg_out",
+            "postgres",
+        );
+    }
+
+    #[test]
+    fn coordinator_build_create_streaming_table_partition_by_and_idle_time() {
+        assert_coordinator_streaming_build_ok(
+            concat!(
+                "CREATE STREAMING TABLE part_idle ",
+                "WITH ('connector'='kafka', 'partition_by'='id', 'idle_time'='30 seconds') ",
+                "AS SELECT * FROM src",
+            ),
+            fake_stream_schema_provider(),
+            "part_idle",
+            "kafka",
+        );
+    }
+
+    #[test]
+    fn coordinator_build_create_streaming_table_project_timestamp_columns() {
+        let sql = format!(
+            "CREATE STREAMING TABLE ts_cols WITH ('connector'='kafka') AS SELECT id, {ts} FROM src",
+            ts = TIMESTAMP_FIELD
+        );
+        assert_coordinator_streaming_build_ok(
+            &sql,
+            fake_stream_schema_provider(),
+            "ts_cols",
+            "kafka",
+        );
+    }
+
+    #[test]
+    fn coordinator_build_create_streaming_table_where_filters() {
+        let p = fake_stream_schema_provider_with_v();
+        for (label, body) in [
+            ("eq", "SELECT * FROM src WHERE id = 1"),
+            ("range", "SELECT * FROM src WHERE id > 0 AND id < 100"),
+            ("in_list", "SELECT * FROM src WHERE id IN (1, 2, 3)"),
+            ("between", "SELECT * FROM src WHERE id BETWEEN 1 AND 10"),
+            ("like", "SELECT * FROM src WHERE v LIKE 'a%'"),
+            ("null", "SELECT * FROM src WHERE v IS NULL"),
+        ] {
+            let sql = format!(
+                "CREATE STREAMING TABLE sink_w_{label} WITH ('connector'='kafka') AS {body}"
+            );
+            assert_coordinator_streaming_build_ok(&sql, p.clone(), &format!("sink_w_{label}"), "kafka");
+        }
+    }
+
+    #[test]
+    fn coordinator_build_create_streaming_table_case_coalesce_cast() {
+        let ts = TIMESTAMP_FIELD;
+        let sql = format!(
+            "CREATE STREAMING TABLE sink_expr WITH ('connector'='kafka') AS \
+             SELECT CASE WHEN id < 0 THEN 0 ELSE id END AS c, COALESCE(v, 'x') AS v2, \
+             CAST(id AS DOUBLE) AS id_f, {ts} FROM src"
+        );
+        assert_coordinator_streaming_build_ok(
+            &sql,
+            fake_stream_schema_provider_with_v(),
+            "sink_expr",
+            "kafka",
+        );
+    }
+
+    #[test]
+    fn coordinator_build_create_streaming_table_row_time_projection() {
+        let ts = TIMESTAMP_FIELD;
+        let sql = format!(
+            "CREATE STREAMING TABLE sink_rt WITH ('connector'='kafka') AS \
+             SELECT row_time(), id, {ts} FROM src"
+        );
+        assert_coordinator_streaming_build_ok(
+            &sql,
+            fake_stream_schema_provider(),
+            "sink_rt",
+            "kafka",
+        );
+    }
+
+    #[test]
+    fn coordinator_build_create_streaming_table_scalar_funcs_projection() {
+        let ts = TIMESTAMP_FIELD;
+        let sql = format!(
+            "CREATE STREAMING TABLE sink_scalar WITH ('connector'='kafka') AS \
+             SELECT ABS(id), UPPER(v), LOWER(v), BTRIM(v), CHARACTER_LENGTH(v), {ts} FROM src"
+        );
+        assert_coordinator_streaming_build_ok(
+            &sql,
+            fake_stream_schema_provider_with_v(),
+            "sink_scalar",
+            "kafka",
+        );
+    }
+
+    #[test]
+    fn coordinator_build_create_streaming_table_cte() {
+        let ts = TIMESTAMP_FIELD;
+        let sql = format!(
+            "CREATE STREAMING TABLE sink_cte WITH ('connector'='kafka') AS \
+             WITH t AS (SELECT id, {ts} FROM src WHERE id > 0) SELECT * FROM t"
+        );
+        assert_coordinator_streaming_build_ok(
+            &sql,
+            fake_stream_schema_provider(),
+            "sink_cte",
+            "kafka",
+        );
+    }
+
+    #[test]
+    fn coordinator_build_create_streaming_table_cte_chain() {
+        let sql = "CREATE STREAMING TABLE sink_cte2 WITH ('connector'='kafka') AS \
+             WITH a AS (SELECT id FROM src), b AS (SELECT id FROM a WHERE id > 1) SELECT * FROM b";
+        assert_coordinator_streaming_build_ok(
+            sql,
+            fake_stream_schema_provider(),
+            "sink_cte2",
+            "kafka",
+        );
+    }
+
+    #[test]
+    fn coordinator_build_create_streaming_table_sink_name_with_digits() {
+        assert_coordinator_streaming_build_ok(
+            "CREATE STREAMING TABLE out_sink_01 WITH ('connector'='kafka') AS SELECT * FROM src",
+            fake_stream_schema_provider(),
+            "out_sink_01",
+            "kafka",
+        );
+    }
+
+    #[test]
+    fn coordinator_build_create_streaming_table_subquery_in_from() {
+        let ts = TIMESTAMP_FIELD;
+        let sql = format!(
+            "CREATE STREAMING TABLE sink_sq WITH ('connector'='kafka') AS \
+             SELECT * FROM (SELECT id, {ts} FROM src WHERE id >= 0) AS x"
+        );
+        assert_coordinator_streaming_build_ok(
+            &sql,
+            fake_stream_schema_provider(),
+            "sink_sq",
+            "kafka",
+        );
+    }
+
+    #[test]
+    fn coordinator_build_create_streaming_table_nested_subqueries() {
+        let sql = "CREATE STREAMING TABLE sink_nest WITH ('connector'='kafka') AS \
+             SELECT * FROM (SELECT * FROM (SELECT id FROM src) AS i2) AS i1";
+        assert_coordinator_streaming_build_ok(
+            sql,
+            fake_stream_schema_provider(),
+            "sink_nest",
+            "kafka",
+        );
+    }
+
+    #[test]
+    fn coordinator_build_create_streaming_table_union_all() {
+        let ts = TIMESTAMP_FIELD;
+        let sql = format!(
+            "CREATE STREAMING TABLE sink_union WITH ('connector'='kafka') AS \
+             SELECT id, v, {ts} FROM src \
+             UNION ALL \
+             SELECT id, name AS v, {ts} FROM dim"
+        );
+        assert_coordinator_streaming_build_ok(
+            &sql,
+            fake_src_dim_provider(),
+            "sink_union",
+            "kafka",
+        );
+    }
+
+    #[test]
+    fn coordinator_build_create_streaming_table_nullif_regexp() {
+        let ts = TIMESTAMP_FIELD;
+        let sql = format!(
+            "CREATE STREAMING TABLE sink_re WITH ('connector'='kafka') AS \
+             SELECT id, NULLIF(v, ''), REGEXP_LIKE(v, '^x'), {ts} FROM src"
+        );
+        assert_coordinator_streaming_build_ok(
+            &sql,
+            fake_stream_schema_provider_with_v(),
+            "sink_re",
+            "kafka",
+        );
+    }
+
+    #[test]
+    fn coordinator_build_create_streaming_table_not_and_or_where() {
+        let p = fake_stream_schema_provider_with_v();
+        assert_coordinator_streaming_build_ok(
+            "CREATE STREAMING TABLE sink_bool WITH ('connector'='kafka') AS \
+             SELECT * FROM src WHERE NOT (id = 0) AND (v IS NOT NULL OR id > 0)",
+            p,
+            "sink_bool",
+            "kafka",
+        );
+    }
+
+    #[test]
+    fn coordinator_sql_create_streaming_table_compiles_full_pipeline() {
+        assert_coordinator_streaming_build_ok(
+            concat!(
+                "CREATE STREAMING TABLE my_sink ",
+                "WITH ('connector' = 'kafka') ",
+                "AS SELECT * FROM src",
+            ),
+            fake_stream_schema_provider(),
+            "my_sink",
+            "kafka",
+        );
+    }
+}
diff --git a/src/coordinator/plan/logical_plan_visitor.rs b/src/coordinator/plan/logical_plan_visitor.rs
index 1daf5a16..93f8776a 100644
--- a/src/coordinator/plan/logical_plan_visitor.rs
+++ b/src/coordinator/plan/logical_plan_visitor.rs
@@ -49,7 +49,6 @@ const CONNECTOR: &str = "connector";
 const PARTITION_BY: &str = "partition_by";
 const IDLE_MICROS: &str = "idle_time";
 
-/// Convert `WITH` option list to a key-value map (e.g. connector settings).
 fn with_options_to_map(options: &[SqlOption]) -> std::collections::HashMap<String, String> {
     options
         .iter()
@@ -83,8 +82,6 @@ impl LogicalPlanVisitor {
             _ => panic!("LogicalPlanVisitor should return Plan"),
         }
     }
-    /// Builds the logical plan for 'CREATE STREAMING TABLE'.
-    /// This orchestrates the transformation from a SQL Query to a stateful Sink.
     fn build_create_streaming_table_plan(
         &self,
         stmt: &StreamingTableStatement,
@@ -102,8 +99,6 @@ impl LogicalPlanVisitor {
         let table_name = name.to_string();
         debug!("Compiling Streaming Table Sink for: {}", table_name);
 
-        // 1. Connector Options Extraction
-        // Extract 'connector' (Kafka, Postgres, etc.) and other physical properties.
         let mut opts = ConnectorOptions::new(with_options, &None)?;
         let connector = opts.pull_opt_str(CONNECTOR)?.ok_or_else(|| {
             plan_datafusion_err!(
@@ -113,14 +108,10 @@ impl LogicalPlanVisitor {
             )
         })?;
 
-        // 2. Query Optimization & Streaming Rewrite
-        // Convert the standard SQL query into a streaming-aware logical plan.
         let base_plan =
             produce_optimized_plan(&Statement::Query(query.clone()), &self.schema_provider)?;
         let mut plan = rewrite_plan(base_plan, &self.schema_provider)?;
 
-        // 3. Outgoing Data Serialization
-        // If the query produces internal types (like JSON Union), inject a serialization layer.
         if plan
             .schema()
             .fields()
@@ -130,11 +121,8 @@ impl LogicalPlanVisitor {
             plan = serialize_outgoing_json(&self.schema_provider, Arc::new(plan));
         }
 
-        // 4. Sink Metadata & Partitioning Logic
-        // Determine how data should be partitioned before hitting the external system.
         let partition_exprs = self.resolve_partition_expressions(&mut opts)?;
 
-        // Map DataFusion fields to Arroyo FieldSpecs for the connector.
         let fields: Vec<FieldSpec> = plan
             .schema()
             .fields()
@@ -142,28 +130,24 @@ impl LogicalPlanVisitor {
             .map(|f| FieldSpec::Struct((**f).clone()))
             .collect();
 
-        // 5. Connector Table Construction
-        // This object acts as the 'Identity Card' for the Sink in the physical cluster.
         let connector_table = ConnectorTable {
             id: None,
             connector,
             name: table_name.clone(),
             connection_type: ConnectionType::Sink,
             fields,
-            config: "".to_string(), // Filled by the coordinator later
+            config: "".to_string(),
             description: comment.clone().unwrap_or_default(),
             event_time_field: None,
             watermark_field: None,
             idle_time: opts.pull_opt_duration(IDLE_MICROS)?,
-            primary_keys: Arc::new(vec![]), // PKs are inferred or explicitly set here
+            primary_keys: Arc::new(vec![]),
             inferred_fields: None,
             partition_exprs: Arc::new(partition_exprs),
             lookup_cache_ttl:None,
             lookup_cache_max_bytes:None,
         };
 
-        // 6. Sink Extension & Final Rewrites
-        // Wrap the plan in a SinkExtension and ensure Key/Partition alignment.
         let sink_extension = SinkExtension::new(
             TableReference::bare(table_name.clone()),
             Table::ConnectorTable(connector_table.clone()),
@@ -171,12 +155,10 @@ impl LogicalPlanVisitor {
             Arc::new(plan),
         )?;
 
-        // Ensure the data distribution matches the Sink's requirements (e.g., Shuffle by Partition Key)
         let plan_with_keys = maybe_add_key_extension_to_sink(LogicalPlan::Extension(Extension {
             node: Arc::new(sink_extension),
         }))?;
 
-        // Global pass to wire inputs and handle shared sub-plans
         let final_extensions = rewrite_sinks(vec![plan_with_keys])?;
         let final_plan = final_extensions.into_iter().next().unwrap();
 
@@ -328,3 +310,57 @@ impl StatementVisitor for LogicalPlanVisitor {
         }
     }
 }
+
+#[cfg(test)]
+mod create_streaming_table_tests {
+    use std::sync::Arc;
+
+    use datafusion::arrow::datatypes::{DataType, Field, Schema, TimeUnit};
+    use datafusion::sql::sqlparser::ast::Statement as DFStatement;
+    use datafusion::sql::sqlparser::dialect::FunctionStreamDialect;
+    use datafusion::sql::sqlparser::parser::Parser;
+
+    use crate::sql::common::TIMESTAMP_FIELD;
+    use crate::sql::rewrite_plan;
+    use crate::sql::schema::optimizer::produce_optimized_plan;
+    use crate::sql::schema::StreamSchemaProvider;
+
+    fn schema_provider_with_src() -> StreamSchemaProvider {
+        let mut provider = StreamSchemaProvider::new();
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("id", DataType::Int64, false),
+            Field::new(
+                TIMESTAMP_FIELD,
+                DataType::Timestamp(TimeUnit::Nanosecond, None),
+                false,
+            ),
+        ]));
+        provider.add_source_table(
+            "src".to_string(),
+            schema,
+            Some(TIMESTAMP_FIELD.to_string()),
+            None,
+        );
+        provider
+    }
+
+    #[test]
+    fn create_streaming_table_query_plans_and_rewrites() {
+        let sql =
+            "CREATE STREAMING TABLE my_sink WITH ('connector' = 'kafka') AS SELECT * FROM src";
+        let dialect = FunctionStreamDialect {};
+        let ast = Parser::parse_sql(&dialect, sql).expect("parse CREATE STREAMING TABLE");
+        let DFStatement::CreateStreamingTable { query, .. } = &ast[0] else {
+            panic!("expected CreateStreamingTable, got {:?}", ast[0]);
+        };
+        let provider = schema_provider_with_src();
+        let base = produce_optimized_plan(&DFStatement::Query(query.clone()), &provider)
+            .expect("produce optimized logical plan for sink query");
+        let rewritten = rewrite_plan(base, &provider).expect("streaming rewrite_plan");
+        let dot = format!("{}", rewritten.display_graphviz());
+        assert!(
+            dot.contains("src") || dot.contains("Src"),
+            "rewritten plan should reference source; got subgraph:\n{dot}"
+        );
+    }
+}
diff --git a/src/coordinator/statement/streaming_table.rs b/src/coordinator/statement/streaming_table.rs
index 48fd25e9..86ec1a85 100644
--- a/src/coordinator/statement/streaming_table.rs
+++ b/src/coordinator/statement/streaming_table.rs
@@ -14,11 +14,10 @@ use datafusion::sql::sqlparser::ast::Statement as DFStatement;
 
 use super::{Statement, StatementVisitor, StatementVisitorContext, StatementVisitorResult};
 
-/// Represents an INSERT INTO or standalone SELECT/query that creates a streaming table/pipeline.
+/// Wrapper for **`CREATE STREAMING TABLE ... WITH (...) AS SELECT ...`** (parsed AST).
 ///
-/// In the streaming SQL context, both INSERT INTO (writing to a sink)
-/// and standalone SELECT (anonymous computation) are treated as
-/// data-producing operations that create/feed into the streaming pipeline.
+/// The coordinator `parse_sql` frontend does **not** support `INSERT`; streaming sinks are
+/// defined only via **`CREATE STREAMING TABLE`** (and regular tables via **`CREATE TABLE`**).
 #[derive(Debug)]
 pub struct StreamingTableStatement {
     pub statement: DFStatement,
diff --git a/src/sql/extensions/remote_table.rs b/src/sql/extensions/remote_table.rs
index 570a3393..0bd2706f 100644
--- a/src/sql/extensions/remote_table.rs
+++ b/src/sql/extensions/remote_table.rs
@@ -28,6 +28,67 @@ pub(crate) struct RemoteTableExtension {
 
 multifield_partial_ord!(RemoteTableExtension, input, name, materialize);
 
+impl RemoteTableExtension {
+    fn plan_node_inlined(
+        planner: &Planner,
+        index: usize,
+        this: &RemoteTableExtension,
+    ) -> Result<NodeWithIncomingEdges> {
+        let physical_plan = planner.sync_plan(&this.input)?;
+        let physical_plan_node = PhysicalPlanNode::try_from_physical_plan(
+            physical_plan,
+            &FsPhysicalExtensionCodec::default(),
+        )?;
+        let config = ValuePlanOperator {
+            name: format!("value_calculation({})", this.name),
+            physical_plan: physical_plan_node.encode_to_vec(),
+        };
+        let node = LogicalNode::single(
+            index as u32,
+            format!("value_{index}"),
+            OperatorName::ArrowValue,
+            config.encode_to_vec(),
+            this.name.to_string(),
+            1,
+        );
+        Ok(NodeWithIncomingEdges {
+            node,
+            edges: vec![],
+        })
+    }
+
+    fn plan_node_with_edges(
+        planner: &Planner,
+        index: usize,
+        this: &RemoteTableExtension,
+        input_schemas: Vec<FsSchemaRef>,
+    ) -> Result<NodeWithIncomingEdges> {
+        let physical_plan = planner.sync_plan(&this.input)?;
+        let physical_plan_node = PhysicalPlanNode::try_from_physical_plan(
+            physical_plan,
+            &FsPhysicalExtensionCodec::default(),
+        )?;
+        let config = ValuePlanOperator {
+            name: format!("value_calculation({})", this.name),
+            physical_plan: physical_plan_node.encode_to_vec(),
+        };
+        let node = LogicalNode::single(
+            index as u32,
+            format!("value_{index}"),
+            OperatorName::ArrowValue,
+            config.encode_to_vec(),
+            this.name.to_string(),
+            1,
+        );
+
+        let edges = input_schemas
+            .into_iter()
+            .map(|schema| LogicalEdge::project_all(LogicalEdgeType::Forward, (*schema).clone()))
+            .collect();
+        Ok(NodeWithIncomingEdges { node, edges })
+    }
+}
+
 impl StreamExtension for RemoteTableExtension {
     fn node_name(&self) -> Option<NamedNode> {
         if self.materialize {
@@ -44,10 +105,11 @@ impl StreamExtension for RemoteTableExtension {
         input_schemas: Vec<FsSchemaRef>,
     ) -> Result<NodeWithIncomingEdges> {
         match input_schemas.len() {
-            0 => return plan_err!("RemoteTableExtension should have exactly one input"),
+            0 => {
+                return Self::plan_node_inlined(planner, index, self);
+            }
             1 => {}
             _multiple_inputs => {
-                // check they are all the same
                 let first = input_schemas[0].clone();
                 for schema in input_schemas.iter().skip(1) {
                     if *schema != first {
@@ -58,29 +120,7 @@ impl StreamExtension for RemoteTableExtension {
                 }
             }
         }
-        let physical_plan = planner.sync_plan(&self.input)?;
-        let physical_plan_node = PhysicalPlanNode::try_from_physical_plan(
-            physical_plan,
-            &FsPhysicalExtensionCodec::default(),
-        )?;
-        let config = ValuePlanOperator {
-            name: format!("value_calculation({})", self.name),
-            physical_plan: physical_plan_node.encode_to_vec(),
-        };
-        let node = LogicalNode::single(
-            index as u32,
-            format!("value_{index}"),
-            OperatorName::ArrowValue,
-            config.encode_to_vec(),
-            self.name.to_string(),
-            1,
-        );
-
-        let edges = input_schemas
-            .into_iter()
-            .map(|schema| LogicalEdge::project_all(LogicalEdgeType::Forward, (*schema).clone()))
-            .collect();
-        Ok(NodeWithIncomingEdges { node, edges })
+        Self::plan_node_with_edges(planner, index, self, input_schemas)
     }
 
     fn output_schema(&self) -> FsSchema {
diff --git a/src/sql/frontend_sql_coverage_tests.rs b/src/sql/frontend_sql_coverage_tests.rs
new file mode 100644
index 00000000..fa730614
--- /dev/null
+++ b/src/sql/frontend_sql_coverage_tests.rs
@@ -0,0 +1,807 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! SQL parse and streaming-related tests.
+
+use std::sync::Arc;
+
+use datafusion::arrow::datatypes::{DataType, Field, Schema, TimeUnit};
+use datafusion::sql::sqlparser::ast::Statement as DFStatement;
+use datafusion::sql::sqlparser::dialect::FunctionStreamDialect;
+use datafusion::sql::sqlparser::parser::Parser;
+
+use crate::coordinator::Coordinator;
+use crate::sql::common::TIMESTAMP_FIELD;
+use crate::sql::parse::parse_sql;
+use crate::sql::rewrite_plan;
+use crate::sql::schema::optimizer::produce_optimized_plan;
+use crate::sql::schema::StreamSchemaProvider;
+
+fn assert_parses_as(sql: &str, type_prefix: &str) {
+    let stmts = parse_sql(sql).unwrap_or_else(|e| panic!("parse failed for {sql:?}: {e}"));
+    assert!(!stmts.is_empty(), "{sql}");
+    let dbg = format!("{:?}", stmts[0]);
+    assert!(
+        dbg.starts_with(type_prefix),
+        "sql={sql:?} expected prefix {type_prefix}, got {dbg}"
+    );
+}
+
+fn assert_parse_fails(sql: &str) {
+    assert!(
+        parse_sql(sql).is_err(),
+        "expected parse/classify failure for {sql:?}"
+    );
+}
+
+fn fake_src_stream_provider() -> StreamSchemaProvider {
+    let mut provider = StreamSchemaProvider::new();
+    let schema = Arc::new(Schema::new(vec![
+        Field::new("id", DataType::Int64, false),
+        Field::new("v", DataType::Utf8, true),
+        Field::new(
+            TIMESTAMP_FIELD,
+            DataType::Timestamp(TimeUnit::Nanosecond, None),
+            false,
+        ),
+    ]));
+    provider.add_source_table(
+        "src".to_string(),
+        schema,
+        Some(TIMESTAMP_FIELD.to_string()),
+        None,
+    );
+    provider
+}
+
+fn compile_first(coordinator: &Coordinator, sql: &str, provider: StreamSchemaProvider) {
+    let stmts = parse_sql(sql).unwrap_or_else(|e| panic!("parse {sql:?}: {e}"));
+    coordinator
+        .compile_plan(stmts[0].as_ref(), provider)
+        .unwrap_or_else(|e| panic!("compile_plan {sql:?}: {e:#}"));
+}
+
+fn compile_first_streaming(sql: &str) {
+    compile_first(
+        &Coordinator::new(),
+        sql,
+        fake_src_stream_provider(),
+    );
+}
+
+fn fake_src_dim_stream_provider() -> StreamSchemaProvider {
+    let mut provider = fake_src_stream_provider();
+    let dim_schema = Arc::new(Schema::new(vec![
+        Field::new("id", DataType::Int64, false),
+        Field::new("name", DataType::Utf8, true),
+        Field::new("amt", DataType::Float64, true),
+        Field::new(
+            TIMESTAMP_FIELD,
+            DataType::Timestamp(TimeUnit::Nanosecond, None),
+            false,
+        ),
+    ]));
+    provider.add_source_table(
+        "dim".to_string(),
+        dim_schema,
+        Some(TIMESTAMP_FIELD.to_string()),
+        None,
+    );
+    provider
+}
+
+fn compile_streaming_select_body(body: &str, provider: StreamSchemaProvider) {
+    let sql = format!(
+        "CREATE STREAMING TABLE sink_shape_cov WITH ('connector'='kafka') AS {body}"
+    );
+    compile_first(&Coordinator::new(), &sql, provider);
+}
+
+fn assert_streaming_select_logical_rewrites(body: &str, provider: &StreamSchemaProvider) {
+    let sql = format!(
+        "CREATE STREAMING TABLE sink_lr WITH ('connector'='kafka') AS {body}"
+    );
+    let dialect = FunctionStreamDialect {};
+    let stmts = Parser::parse_sql(&dialect, &sql).unwrap_or_else(|e| panic!("parse {sql:?}: {e}"));
+    let DFStatement::CreateStreamingTable { query, .. } = &stmts[0] else {
+        panic!("expected CreateStreamingTable, got {:?}", stmts[0]);
+    };
+    let plan = produce_optimized_plan(&DFStatement::Query(query.clone()), provider)
+        .unwrap_or_else(|e| panic!("produce_optimized_plan {sql:?}: {e:#}"));
+    rewrite_plan(plan, provider).unwrap_or_else(|e| panic!("rewrite_plan {sql:?}: {e:#}"));
+}
+
+fn assert_streaming_select_logical_rewrite_err_contains(
+    body: &str,
+    provider: &StreamSchemaProvider,
+    needle: &str,
+) {
+    let sql = format!(
+        "CREATE STREAMING TABLE sink_lr WITH ('connector'='kafka') AS {body}"
+    );
+    let dialect = FunctionStreamDialect {};
+    let stmts = Parser::parse_sql(&dialect, &sql).unwrap_or_else(|e| panic!("parse {sql:?}: {e}"));
+    let DFStatement::CreateStreamingTable { query, .. } = &stmts[0] else {
+        panic!("expected CreateStreamingTable, got {:?}", stmts[0]);
+    };
+    let plan = produce_optimized_plan(&DFStatement::Query(query.clone()), provider)
+        .unwrap_or_else(|e| panic!("produce_optimized_plan {sql:?}: {e:#}"));
+    let err = rewrite_plan(plan, provider).unwrap_err();
+    let msg = err.to_string();
+    assert!(
+        msg.contains(needle),
+        "expected '{needle}' in rewrite error, got: {msg}"
+    );
+}
+
+#[test]
+fn parse_create_function_double_quoted_path_style() {
+    assert_parses_as(
+        r#"CREATE FUNCTION WITH ("function_path"='./a.wasm', "config_path"='./b.yml')"#,
+        "CreateFunction",
+    );
+}
+
+#[test]
+fn parse_create_function_extra_numeric_and_bool_like_strings() {
+    assert_parses_as(
+        r#"CREATE FUNCTION WITH (
+            'function_path'='./f.wasm',
+            'config_path'='./c.yml',
+            'parallelism'='8',
+            'dry_run'='false'
+        )"#,
+        "CreateFunction",
+    );
+}
+
+#[test]
+fn parse_create_function_fails_without_function_path() {
+    let err = parse_sql("CREATE FUNCTION WITH ('config_path'='./only.yml')").unwrap_err();
+    let s = err.to_string();
+    assert!(
+        s.contains("function_path") || s.contains("CREATE FUNCTION"),
+        "{s}"
+    );
+}
+
+#[test]
+fn parse_drop_function_quoted_name() {
+    assert_parses_as(r#"DROP FUNCTION "my-pipeline""#, "DropFunction");
+}
+
+#[test]
+fn parse_start_stop_function_dotted_style_name() {
+    assert_parses_as("START FUNCTION job.v1.main", "StartFunction");
+    assert_parses_as("STOP FUNCTION job.v1.main", "StopFunction");
+}
+
+#[test]
+fn parse_show_functions_extra_whitespace() {
+    assert_parses_as("  SHOW   FUNCTIONS  ", "ShowFunctions");
+}
+
+#[test]
+fn parse_create_table_multiple_columns_types() {
+    assert_parses_as(
+        "CREATE TABLE metrics (ts TIMESTAMP, name VARCHAR, val DOUBLE, ok BOOLEAN)",
+        "CreateTable",
+    );
+}
+
+#[test]
+fn parse_create_table_with_not_null_and_precision() {
+    assert_parses_as(
+        "CREATE TABLE t (id BIGINT NOT NULL, code DECIMAL(10,2))",
+        "CreateTable",
+    );
+}
+
+#[test]
+fn parse_create_table_if_not_exists_if_dialect_accepts() {
+    if let Ok(stmts) = parse_sql("CREATE TABLE IF NOT EXISTS guard (id INT)") {
+        assert!(format!("{:?}", stmts[0]).starts_with("CreateTable"));
+    }
+}
+
+#[test]
+fn parse_streaming_table_select_star() {
+    assert_parses_as(
+        "CREATE STREAMING TABLE s1 WITH ('connector'='kafka') AS SELECT * FROM src",
+        "StreamingTableStatement",
+    );
+}
+
+#[test]
+fn parse_streaming_table_select_columns() {
+    assert_parses_as(
+        "CREATE STREAMING TABLE s2 WITH ('connector'='memory') AS SELECT id, v FROM src",
+        "StreamingTableStatement",
+    );
+}
+
+#[test]
+fn parse_streaming_table_with_partition_by() {
+    let sql = format!(
+        "CREATE STREAMING TABLE s3 WITH ('connector' = 'kafka', 'partition_by' = 'id') AS SELECT id, {} FROM src",
+        TIMESTAMP_FIELD
+    );
+    assert_parses_as(&sql, "StreamingTableStatement");
+}
+
+#[test]
+fn parse_streaming_table_with_idle_time_option() {
+    assert_parses_as(
+        "CREATE STREAMING TABLE s4 WITH ('connector'='kafka', 'idle_time'='30s') AS SELECT * FROM src",
+        "StreamingTableStatement",
+    );
+}
+
+#[test]
+fn parse_streaming_table_sink_name_snake_and_digits() {
+    assert_parses_as(
+        "CREATE STREAMING TABLE sink_01_out WITH ('connector'='memory') AS SELECT 1",
+        "StreamingTableStatement",
+    );
+}
+
+#[test]
+fn parse_streaming_table_comment_before_as_if_supported() {
+    let sql = "CREATE STREAMING TABLE c1 WITH ('connector'='kafka') COMMENT 'out' AS SELECT * FROM src";
+    if let Ok(stmts) = parse_sql(sql) {
+        assert!(
+            format!("{:?}", stmts[0]).starts_with("StreamingTableStatement"),
+            "{stmts:?}"
+        );
+    }
+}
+
+#[test]
+fn parse_three_semicolon_separated_statements() {
+    let sql = concat!(
+        "CREATE FUNCTION WITH ('function_path'='./x.wasm'); ",
+        "CREATE TABLE meta (id INT); ",
+        "CREATE STREAMING TABLE out1 WITH ('connector'='kafka') AS SELECT 1",
+    );
+    let stmts = parse_sql(sql).unwrap();
+    assert_eq!(stmts.len(), 3);
+    assert!(format!("{:?}", stmts[0]).starts_with("CreateFunction"));
+    assert!(format!("{:?}", stmts[1]).starts_with("CreateTable"));
+    assert!(format!("{:?}", stmts[2]).starts_with("StreamingTableStatement"));
+}
+
+#[test]
+fn parse_rejects_insert_with_columns_list() {
+    assert_parse_fails("INSERT INTO t (a,b) VALUES (1,2)");
+}
+
+#[test]
+fn parse_rejects_update_delete() {
+    assert_parse_fails("UPDATE src SET id = 1");
+    assert_parse_fails("DELETE FROM src WHERE id = 0");
+}
+
+#[test]
+fn parse_rejects_merge_explain() {
+    assert_parse_fails("EXPLAIN SELECT 1");
+    assert_parse_fails("MERGE INTO t USING s ON true WHEN MATCHED THEN UPDATE SET x=1");
+}
+
+#[test]
+fn parse_rejects_create_schema_database() {
+    assert_parse_fails("CREATE SCHEMA s");
+    assert_parse_fails("CREATE DATABASE d");
+}
+
+#[test]
+fn compile_streaming_select_star_from_src() {
+    compile_first_streaming(concat!(
+        "CREATE STREAMING TABLE kafka_all ",
+        "WITH ('connector'='kafka') ",
+        "AS SELECT * FROM src",
+    ));
+}
+
+#[test]
+fn compile_streaming_select_id_v_from_src() {
+    let sql = format!(
+        "CREATE STREAMING TABLE kafka_cols WITH ('connector'='kafka') AS SELECT id, v, {} FROM src",
+        TIMESTAMP_FIELD
+    );
+    compile_first_streaming(&sql);
+}
+
+#[test]
+fn compile_streaming_memory_connector() {
+    compile_first_streaming(
+        "CREATE STREAMING TABLE mem_sink WITH ('connector'='memory') AS SELECT * FROM src",
+    );
+}
+
+#[test]
+fn compile_streaming_with_partition_by_id() {
+    compile_first_streaming(concat!(
+        "CREATE STREAMING TABLE part_sink ",
+        "WITH ('connector'='kafka', 'partition_by'='id') ",
+        "AS SELECT * FROM src",
+    ));
+}
+
+#[test]
+fn compile_streaming_connector_postgres_string() {
+    compile_first_streaming(
+        "CREATE STREAMING TABLE pg_sink WITH ('connector'='postgres') AS SELECT id FROM src",
+    );
+}
+
+#[test]
+#[should_panic(expected = "connector")]
+fn compile_streaming_fails_without_connector() {
+    let sql = "CREATE STREAMING TABLE bad WITH ('partition_by'='id') AS SELECT * FROM src";
+    let stmts = parse_sql(sql).unwrap();
+    let _ = Coordinator::new().compile_plan(stmts[0].as_ref(), fake_src_stream_provider());
+}
+
+#[test]
+fn compile_plan_show_functions() {
+    let stmts = parse_sql("SHOW FUNCTIONS").unwrap();
+    Coordinator::new()
+        .compile_plan(stmts[0].as_ref(), StreamSchemaProvider::new())
+        .expect("ShowFunctions plan");
+}
+
+#[test]
+fn compile_plan_start_stop_drop_function() {
+    for sql in [
+        "START FUNCTION t1",
+        "STOP FUNCTION t1",
+        "DROP FUNCTION t1",
+    ] {
+        let stmts = parse_sql(sql).unwrap();
+        Coordinator::new()
+            .compile_plan(stmts[0].as_ref(), StreamSchemaProvider::new())
+            .unwrap_or_else(|e| panic!("{sql}: {e:#}"));
+    }
+}
+
+#[test]
+fn compile_plan_create_function() {
+    let sql =
+        "CREATE FUNCTION WITH ('function_path'='./x.wasm', 'config_path'='./c.yml')";
+    let stmts = parse_sql(sql).unwrap();
+    Coordinator::new()
+        .compile_plan(stmts[0].as_ref(), StreamSchemaProvider::new())
+        .expect("CreateFunction plan");
+}
+
+#[test]
+fn compile_plan_create_table_simple_ddl() {
+    let sql = "CREATE TABLE local_only (id INT, name VARCHAR)";
+    let stmts = parse_sql(sql).unwrap();
+    Coordinator::new()
+        .compile_plan(stmts[0].as_ref(), StreamSchemaProvider::new())
+        .expect("CreateTable plan");
+}
+
+#[test]
+fn streaming_where_eq_ne_and_or_not() {
+    let ts = TIMESTAMP_FIELD;
+    compile_streaming_select_body(
+        &format!("SELECT * FROM src WHERE id = 1 AND (v <> 'x' OR NOT (id < 0))"),
+        fake_src_stream_provider(),
+    );
+    compile_streaming_select_body(
+        &format!("SELECT * FROM src WHERE id > 0 AND id <= 100 AND id >= 1"),
+        fake_src_stream_provider(),
+    );
+    compile_streaming_select_body(
+        &format!("SELECT id, v, {ts} FROM src WHERE (id = 2 OR id = 3) AND v IS NOT NULL"),
+        fake_src_stream_provider(),
+    );
+}
+
+#[test]
+fn streaming_where_in_between_like_null() {
+    let ts = TIMESTAMP_FIELD;
+    compile_streaming_select_body(
+        &format!("SELECT * FROM src WHERE id IN (1, 2, 3)"),
+        fake_src_stream_provider(),
+    );
+    compile_streaming_select_body(
+        &format!("SELECT * FROM src WHERE id NOT IN (99, 100)"),
+        fake_src_stream_provider(),
+    );
+    compile_streaming_select_body(
+        &format!("SELECT * FROM src WHERE id BETWEEN 1 AND 10"),
+        fake_src_stream_provider(),
+    );
+    compile_streaming_select_body(
+        &format!("SELECT * FROM src WHERE v LIKE 'pre%'"),
+        fake_src_stream_provider(),
+    );
+    compile_streaming_select_body(
+        &format!("SELECT * FROM src WHERE v IS NULL"),
+        fake_src_stream_provider(),
+    );
+    compile_streaming_select_body(
+        &format!("SELECT id, v, {ts} FROM src WHERE v IS NOT NULL OR id = 0"),
+        fake_src_stream_provider(),
+    );
+}
+
+#[test]
+fn streaming_where_scalar_subquery() {
+    let ts = TIMESTAMP_FIELD;
+    let p = fake_src_dim_stream_provider();
+    assert_streaming_select_logical_rewrites(
+        &format!(
+            "SELECT src.id, src.v, src.{ts} FROM src \
+             WHERE src.id = (SELECT MAX(dim.id) FROM dim)"
+        ),
+        &p,
+    );
+}
+
+#[test]
+#[should_panic(expected = "window")]
+fn streaming_where_in_subquery_currently_panics() {
+    let p = fake_src_dim_stream_provider();
+    compile_streaming_select_body(
+        "SELECT * FROM src WHERE id IN (SELECT id FROM dim WHERE amt IS NOT NULL)",
+        p,
+    );
+}
+
+#[test]
+#[should_panic(expected = "window")]
+fn streaming_where_exists_correlated_currently_panics() {
+    let p = fake_src_dim_stream_provider();
+    compile_streaming_select_body(
+        "SELECT * FROM src WHERE EXISTS (SELECT 1 FROM dim WHERE dim.id = src.id)",
+        p,
+    );
+}
+
+#[test]
+fn streaming_select_case_coalesce_cast() {
+    let ts = TIMESTAMP_FIELD;
+    compile_streaming_select_body(
+        &format!(
+            "SELECT CASE WHEN id < 0 THEN 0 WHEN id > 1000 THEN 1000 ELSE id END AS c, v, {ts} FROM src"
+        ),
+        fake_src_stream_provider(),
+    );
+    compile_streaming_select_body(
+        &format!("SELECT COALESCE(v, 'na') AS v2, id, {ts} FROM src"),
+        fake_src_stream_provider(),
+    );
+    compile_streaming_select_body(
+        &format!("SELECT CAST(id AS DOUBLE) AS id_f, {ts} FROM src"),
+        fake_src_stream_provider(),
+    );
+}
+
+#[test]
+fn streaming_select_row_time_distinct() {
+    let ts = TIMESTAMP_FIELD;
+    compile_streaming_select_body(
+        &format!("SELECT row_time(), id, v, {ts} FROM src"),
+        fake_src_stream_provider(),
+    );
+    let p = fake_src_stream_provider();
+    assert_streaming_select_logical_rewrites("SELECT DISTINCT id FROM src", &p);
+}
+
+#[test]
+fn streaming_from_subquery_nested() {
+    let ts = TIMESTAMP_FIELD;
+    compile_streaming_select_body(
+        &format!("SELECT * FROM (SELECT id, v, {ts} FROM src WHERE id > 0) AS t"),
+        fake_src_stream_provider(),
+    );
+    compile_streaming_select_body(
+        &format!(
+            "SELECT * FROM (SELECT * FROM (SELECT id FROM src) AS i2) AS i1"
+        ),
+        fake_src_stream_provider(),
+    );
+}
+
+#[test]
+fn streaming_with_cte_single_and_chain() {
+    let ts = TIMESTAMP_FIELD;
+    compile_streaming_select_body(
+        &format!(
+            "WITH a AS (SELECT id, v, {ts} FROM src WHERE id > 0) SELECT * FROM a"
+        ),
+        fake_src_stream_provider(),
+    );
+    compile_streaming_select_body(
+        &format!(
+            "WITH a AS (SELECT id FROM src), b AS (SELECT id FROM a WHERE id > 1) SELECT * FROM b"
+        ),
+        fake_src_stream_provider(),
+    );
+}
+
+#[test]
+fn streaming_group_by_updating_aggregate_bundle() {
+    let p = fake_src_stream_provider();
+    assert_streaming_select_logical_rewrites(
+        "SELECT id, COUNT(*), SUM(id), AVG(id), MIN(v), MAX(v) FROM src GROUP BY id",
+        &p,
+    );
+}
+
+#[test]
+fn streaming_group_by_count_distinct_and_stats() {
+    let p = fake_src_stream_provider();
+    assert_streaming_select_logical_rewrites(
+        "SELECT id, COUNT(DISTINCT v), STDDEV_POP(id), VAR_POP(id) FROM src GROUP BY id",
+        &p,
+    );
+}
+
+#[test]
+fn streaming_group_by_having() {
+    let p = fake_src_stream_provider();
+    assert_streaming_select_logical_rewrites(
+        "SELECT id, COUNT(*) AS c FROM src GROUP BY id HAVING COUNT(*) >= 0",
+        &p,
+    );
+}
+
+#[test]
+fn streaming_group_by_tumble_window() {
+    let ts = TIMESTAMP_FIELD;
+    let p = fake_src_stream_provider();
+    assert_streaming_select_logical_rewrites(
+        &format!(
+            "SELECT tumble(INTERVAL '1' MINUTE) AS w, id, COUNT(*) AS c, MAX({ts}) AS max_evt \
+             FROM src GROUP BY tumble(INTERVAL '1' MINUTE), id"
+        ),
+        &p,
+    );
+}
+
+#[test]
+fn streaming_group_by_hop_window() {
+    let ts = TIMESTAMP_FIELD;
+    let p = fake_src_stream_provider();
+    assert_streaming_select_logical_rewrites(
+        &format!(
+            "SELECT hop(INTERVAL '1' MINUTE, INTERVAL '3' MINUTE) AS w, id, SUM(id), MAX({ts}) AS max_evt \
+             FROM src GROUP BY hop(INTERVAL '1' MINUTE, INTERVAL '3' MINUTE), id"
+        ),
+        &p,
+    );
+}
+
+#[test]
+fn streaming_window_row_number_over_tumble_aggregate() {
+    let ts = TIMESTAMP_FIELD;
+    let p = fake_src_stream_provider();
+    assert_streaming_select_logical_rewrites(
+        &format!(
+            "SELECT ROW_NUMBER() OVER (PARTITION BY w ORDER BY max_evt) AS rn, id, w, max_evt \
+             FROM ( \
+               SELECT tumble(INTERVAL '1' MINUTE) AS w, id, MAX({ts}) AS max_evt \
+               FROM src \
+               GROUP BY tumble(INTERVAL '1' MINUTE), id \
+             ) AS x"
+        ),
+        &p,
+    );
+}
+
+#[test]
+fn streaming_inner_join_eq_and_compound_on() {
+    let ts = TIMESTAMP_FIELD;
+    let p = fake_src_dim_stream_provider();
+    assert_streaming_select_logical_rewrites(
+        &format!(
+            "SELECT src.id, src.v, dim.name, src.{ts} \
+             FROM src INNER JOIN dim ON src.id = dim.id"
+        ),
+        &p,
+    );
+    assert_streaming_select_logical_rewrites(
+        &format!(
+            "SELECT src.id, dim.amt, src.{ts} \
+             FROM src JOIN dim ON src.id = dim.id AND dim.amt > CAST(0 AS DOUBLE)"
+        ),
+        &p,
+    );
+}
+
+#[test]
+#[ignore]
+fn streaming_self_join_inner_ignored() {
+    let ts = TIMESTAMP_FIELD;
+    compile_streaming_select_body(
+        &format!(
+            "SELECT a.id, b.v, a.{ts} \
+             FROM src AS a JOIN src AS b ON a.id = b.id AND a.v = b.v"
+        ),
+        fake_src_stream_provider(),
+    );
+}
+
+#[test]
+fn streaming_join_subquery_branch() {
+    let ts = TIMESTAMP_FIELD;
+    let p = fake_src_dim_stream_provider();
+    assert_streaming_select_logical_rewrites(
+        &format!(
+            "SELECT src.id, src.v, j.name, src.{ts} \
+             FROM src JOIN (SELECT id, name FROM dim) AS j ON src.id = j.id"
+        ),
+        &p,
+    );
+}
+
+#[test]
+fn streaming_union_all_compatible_schemas() {
+    let ts = TIMESTAMP_FIELD;
+    let p = fake_src_dim_stream_provider();
+    compile_streaming_select_body(
+        &format!(
+            "SELECT id, v, {ts} FROM src \
+             UNION ALL \
+             SELECT id, name AS v, {ts} FROM dim"
+        ),
+        p,
+    );
+}
+
+#[test]
+fn streaming_logical_group_by_two_keys_and_filter_agg() {
+    let p = fake_src_stream_provider();
+    assert_streaming_select_logical_rewrites(
+        "SELECT id, v, COUNT(*) AS c FROM src GROUP BY id, v",
+        &p,
+    );
+    assert_streaming_select_logical_rewrites(
+        "SELECT id, SUM(id) FILTER (WHERE v IS NOT NULL) AS s FROM src GROUP BY id",
+        &p,
+    );
+}
+
+#[test]
+fn streaming_logical_more_builtin_aggregates() {
+    let p = fake_src_stream_provider();
+    assert_streaming_select_logical_rewrites(
+        "SELECT id, STDDEV_POP(CAST(id AS DOUBLE)), COVAR_SAMP(CAST(id AS DOUBLE), CAST(id AS DOUBLE)), \
+         COVAR_POP(CAST(id AS DOUBLE), CAST(id AS DOUBLE)) \
+         FROM src GROUP BY id",
+        &p,
+    );
+    assert_streaming_select_logical_rewrites(
+        "SELECT id, CORR(CAST(id AS DOUBLE), CAST(id AS DOUBLE)) FROM src GROUP BY id",
+        &p,
+    );
+}
+
+#[test]
+fn streaming_logical_bit_and_bool_aggregates() {
+    let p = fake_src_stream_provider();
+    assert_streaming_select_logical_rewrites(
+        "SELECT id, BIT_AND(id), BIT_OR(id), BIT_XOR(id) FROM src GROUP BY id",
+        &p,
+    );
+    assert_streaming_select_logical_rewrites(
+        "SELECT id, BOOL_AND(id > 0), BOOL_OR(id < 100000) FROM src GROUP BY id",
+        &p,
+    );
+}
+
+#[test]
+fn streaming_logical_array_agg_and_list_union() {
+    let p = fake_src_stream_provider();
+    assert_streaming_select_logical_rewrites(
+        "SELECT id, ARRAY_AGG(v) FROM src GROUP BY id",
+        &p,
+    );
+}
+
+#[test]
+fn streaming_logical_scalar_funcs_on_projection() {
+    let ts = TIMESTAMP_FIELD;
+    compile_streaming_select_body(
+        &format!(
+            "SELECT ABS(id), POWER(CAST(id AS DOUBLE), 2.0), UPPER(v), LOWER(v), BTRIM(v), \
+             CHARACTER_LENGTH(v), CONCAT(v, '_x'), {ts} FROM src"
+        ),
+        fake_src_stream_provider(),
+    );
+}
+
+#[test]
+fn streaming_logical_nullif_regexp() {
+    let ts = TIMESTAMP_FIELD;
+    compile_streaming_select_body(
+        &format!(
+            "SELECT id, NULLIF(v, ''), REGEXP_LIKE(v, '^a'), {ts} FROM src WHERE v IS NOT NULL OR id = 0"
+        ),
+        fake_src_stream_provider(),
+    );
+}
+
+#[test]
+fn streaming_window_first_value_over_tumbled_subquery() {
+    let ts = TIMESTAMP_FIELD;
+    let p = fake_src_stream_provider();
+    assert_streaming_select_logical_rewrites(
+        &format!(
+            "SELECT FIRST_VALUE(id) OVER (PARTITION BY w ORDER BY max_evt) AS fv, w, id \
+             FROM ( \
+               SELECT tumble(INTERVAL '1' MINUTE) AS w, id, MAX({ts}) AS max_evt \
+               FROM src GROUP BY tumble(INTERVAL '1' MINUTE), id \
+             ) AS x"
+        ),
+        &p,
+    );
+}
+
+#[test]
+fn streaming_window_lag_over_tumbled_subquery() {
+    let ts = TIMESTAMP_FIELD;
+    let p = fake_src_stream_provider();
+    assert_streaming_select_logical_rewrites(
+        &format!(
+            "SELECT LAG(id, 1) OVER (PARTITION BY w ORDER BY max_evt) AS prev_id, w, id \
+             FROM ( \
+               SELECT tumble(INTERVAL '2' MINUTE) AS w, id, MAX({ts}) AS max_evt \
+               FROM src GROUP BY tumble(INTERVAL '2' MINUTE), id \
+             ) AS x"
+        ),
+        &p,
+    );
+}
+
+#[test]
+fn streaming_window_lead_over_tumbled_subquery() {
+    let ts = TIMESTAMP_FIELD;
+    let p = fake_src_stream_provider();
+    assert_streaming_select_logical_rewrites(
+        &format!(
+            "SELECT LEAD(id, 1) OVER (PARTITION BY w ORDER BY max_evt) AS next_id, w \
+             FROM ( \
+               SELECT tumble(INTERVAL '2' MINUTE) AS w, id, MAX({ts}) AS max_evt \
+               FROM src GROUP BY tumble(INTERVAL '2' MINUTE), id \
+             ) AS x"
+        ),
+        &p,
+    );
+}
+
+#[test]
+fn streaming_logical_full_outer_join_errors() {
+    let p = fake_src_dim_stream_provider();
+    assert_streaming_select_logical_rewrite_err_contains(
+        "SELECT src.id, dim.name FROM src FULL OUTER JOIN dim ON src.id = dim.id",
+        &p,
+        "inner",
+    );
+}
+
+#[test]
+#[should_panic(expected = "Non-inner")]
+fn streaming_left_join_errors_without_window() {
+    let ts = TIMESTAMP_FIELD;
+    let sql = format!(
+        "CREATE STREAMING TABLE sink_left WITH ('connector'='kafka') AS \
+         SELECT src.id, dim.name, src.{ts} FROM src LEFT JOIN dim ON src.id = dim.id"
+    );
+    let stmts = parse_sql(&sql).unwrap();
+    let _ = Coordinator::new().compile_plan(stmts[0].as_ref(), fake_src_dim_stream_provider());
+}
diff --git a/src/sql/logical_planner/planner.rs b/src/sql/logical_planner/planner.rs
index 45d373c3..0f2075c1 100644
--- a/src/sql/logical_planner/planner.rs
+++ b/src/sql/logical_planner/planner.rs
@@ -96,7 +96,6 @@ impl<'a> Planner<'a> {
         let fut = self.planner.create_physical_plan(plan, self.session_state);
         let (tx, mut rx) = oneshot::channel();
         thread::scope(|s| {
-            let _handle = tokio::runtime::Handle::current();
             let builder = thread::Builder::new();
             let builder = if cfg!(debug_assertions) {
                 builder.stack_size(10_000_000)
diff --git a/src/sql/mod.rs b/src/sql/mod.rs
index 32c0dce9..6e17e0f2 100644
--- a/src/sql/mod.rs
+++ b/src/sql/mod.rs
@@ -26,3 +26,6 @@ pub use schema::StreamSchemaProvider;
 pub use parse::parse_sql;
 pub use analysis::rewrite_plan;
 pub use logical_planner::CompiledSql;
+
+#[cfg(test)]
+mod frontend_sql_coverage_tests;
diff --git a/src/sql/parse.rs b/src/sql/parse.rs
index 1b4be38a..78c8bac0 100644
--- a/src/sql/parse.rs
+++ b/src/sql/parse.rs
@@ -10,6 +10,18 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+//! Coordinator-facing SQL parsing (`parse_sql`).
+//!
+//! **Data-definition / pipeline shape (this entry point)**  
+//! Only these table-related forms are supported:
+//! - **`CREATE TABLE ...`** (including `CREATE TABLE ... AS SELECT` where the planner accepts it)
+//! - **`CREATE STREAMING TABLE ... WITH (...) AS SELECT ...`** (streaming sink DDL)
+//!
+//! **`INSERT` is not supported** here — use `CREATE TABLE ... AS SELECT` or
+//! `CREATE STREAMING TABLE ... AS SELECT` to define the query shape instead.
+//!
+//! Other supported statements include function lifecycle (`CREATE FUNCTION WITH`, `START FUNCTION`, …).
+
 use std::collections::HashMap;
 
 use datafusion::common::{Result, plan_err};
@@ -62,7 +74,14 @@ fn classify_statement(stmt: DFStatement) -> Result<Box<dyn CoordinatorStatement>
         s @ DFStatement::CreateStreamingTable { .. } => {
             Ok(Box::new(StreamingTableStatement::new(s)))
         }
-        other => plan_err!("Unsupported SQL statement: {other}"),
+        DFStatement::Insert { .. } => plan_err!(
+            "INSERT is not supported; only CREATE TABLE and CREATE STREAMING TABLE (with AS SELECT) \
+             are supported for defining table/query pipelines in this SQL frontend"
+        ),
+        other => plan_err!(
+            "Unsupported SQL statement: {other}. \
+             For tables/pipelines use CREATE TABLE or CREATE STREAMING TABLE ... AS SELECT; INSERT is not supported."
+        ),
     }
 }
 
@@ -139,10 +158,31 @@ mod tests {
         assert!(is_type(stmt.as_ref(), "CreateTable"));
     }
 
+    /// `CREATE STREAMING TABLE` is the sink DDL supported by FunctionStream (not `CREATE STREAM TABLE`).
     #[test]
-    fn test_parse_insert_statement() {
-        let stmt = first_stmt("INSERT INTO sink SELECT * FROM source");
-        assert!(is_type(stmt.as_ref(), "CreateStreamingTableStatement"));
+    fn test_parse_create_streaming_table() {
+        let sql = concat!(
+            "CREATE STREAMING TABLE my_sink ",
+            "WITH ('connector' = 'kafka') ",
+            "AS SELECT id FROM src",
+        );
+        let stmt = first_stmt(sql);
+        assert!(
+            is_type(stmt.as_ref(), "StreamingTableStatement"),
+            "expected StreamingTableStatement, got {:?}",
+            stmt
+        );
+    }
+
+    #[test]
+    fn test_parse_create_streaming_table_case_insensitive() {
+        let sql = concat!(
+            "create streaming table out_q ",
+            "with ('connector' = 'memory') ",
+            "as select 1 as x",
+        );
+        let stmt = first_stmt(sql);
+        assert!(is_type(stmt.as_ref(), "StreamingTableStatement"));
     }
 
     #[test]
@@ -163,11 +203,14 @@ mod tests {
 
     #[test]
     fn test_parse_multiple_statements() {
-        let sql = "CREATE TABLE t1 (id INT); INSERT INTO sink SELECT * FROM t1";
+        let sql = concat!(
+            "CREATE TABLE t1 (id INT); ",
+            "CREATE STREAMING TABLE sk WITH ('connector' = 'kafka') AS SELECT id FROM t1",
+        );
         let stmts = parse_sql(sql).unwrap();
         assert_eq!(stmts.len(), 2);
         assert!(is_type(stmts[0].as_ref(), "CreateTable"));
-        assert!(is_type(stmts[1].as_ref(), "CreateStreamingTableStatement"));
+        assert!(is_type(stmts[1].as_ref(), "StreamingTableStatement"));
     }
 
     #[test]
@@ -182,6 +225,20 @@ mod tests {
         assert!(result.is_err());
     }
 
+    #[test]
+    fn test_insert_not_supported() {
+        let err = parse_sql("INSERT INTO sink SELECT * FROM src").unwrap_err();
+        let msg = err.to_string();
+        assert!(
+            msg.contains("INSERT") && msg.contains("not supported"),
+            "expected explicit INSERT rejection, got: {msg}"
+        );
+        assert!(
+            msg.contains("CREATE TABLE") || msg.contains("CREATE STREAMING TABLE"),
+            "error should mention supported alternatives, got: {msg}"
+        );
+    }
+
     #[test]
     fn test_parse_with_extra_properties() {
         let sql = r#"CREATE FUNCTION WITH (

From 13e1341db50cea310cf281ef73c24f7b74d3c129 Mon Sep 17 00:00:00 2001
From: luoluoyuyu <zhenyu@apache.org>
Date: Sun, 22 Mar 2026 01:45:15 +0800
Subject: [PATCH 10/44] update

---
 src/coordinator/execution/executor.rs         |   2 +-
 src/coordinator/plan/logical_plan_visitor.rs  |  59 +-
 src/coordinator/plan/lookup_table_plan.rs     |   4 +-
 .../plan/streaming_table_connector_plan.rs    |   4 +-
 src/coordinator/plan/streaming_table_plan.rs  |   4 +-
 src/coordinator/tool/mod.rs                   |   4 +-
 src/sql/analysis/aggregate_rewriter.rs        |  18 +-
 src/sql/analysis/async_udf_rewriter.rs        |  34 +-
 src/sql/analysis/join_rewriter.rs             |  22 +-
 src/sql/analysis/mod.rs                       |  35 +-
 src/sql/analysis/sink_input_rewriter.rs       |   8 +-
 src/sql/analysis/source_metadata_visitor.rs   |  18 +-
 src/sql/analysis/source_rewriter.rs           | 105 ++-
 src/sql/analysis/stream_rewriter.rs           |  24 +-
 src/sql/analysis/streaming_window_analzer.rs  |  18 +-
 src/sql/analysis/window_function_rewriter.rs  |  14 +-
 .../tool => sql/common}/connector_options.rs  |  10 +
 src/sql/common/format_from_opts.rs            | 162 ++++
 src/sql/common/mod.rs                         |   5 +-
 src/sql/extensions/aggregate.rs               | 776 +++++++++---------
 src/sql/extensions/async_udf.rs               | 259 +++---
 src/sql/extensions/constants.rs               |   1 +
 src/sql/extensions/debezium.rs                | 442 ++++++----
 src/sql/extensions/extension_try_from.rs      |  66 +-
 src/sql/extensions/join.rs                    | 230 ++++--
 src/sql/extensions/key_calculation.rs         | 369 +++++----
 src/sql/extensions/lookup.rs                  | 302 ++++---
 src/sql/extensions/mod.rs                     |  10 +-
 src/sql/extensions/projection.rs              | 263 ++++--
 src/sql/extensions/remote_table.rs            | 231 +++---
 src/sql/extensions/sink.rs                    | 292 ++++---
 src/sql/extensions/stream_extension.rs        |  38 -
 .../streaming_operator_blueprint.rs           |  65 ++
 src/sql/extensions/table_source.rs            | 195 +++--
 src/sql/extensions/timestamp_append.rs        | 104 ++-
 src/sql/extensions/updating_aggregate.rs      | 264 +++---
 src/sql/extensions/watermark_node.rs          | 263 ++++--
 src/sql/extensions/window_fn.rs               | 123 ---
 src/sql/extensions/windows_function.rs        | 197 +++++
 src/sql/frontend_sql_coverage_tests.rs        |   2 +-
 src/sql/logical_node/logical.rs               | 378 ---------
 .../logical_node/logical/dylib_udf_config.rs  |  71 ++
 src/sql/logical_node/logical/logical_edge.rs  |  57 ++
 src/sql/logical_node/logical/logical_graph.rs |  30 +
 src/sql/logical_node/logical/logical_node.rs  |  71 ++
 .../logical_node/logical/logical_program.rs   | 123 +++
 src/sql/logical_node/logical/mod.rs           |  30 +
 .../logical_node/logical/operator_chain.rs    |  80 ++
 src/sql/logical_node/logical/operator_name.rs |  32 +
 .../logical_node/logical/program_config.rs    |  22 +
 .../logical_node/logical/python_udf_config.rs |  23 +
 .../{optimizers.rs => optimizers/chaining.rs} |  18 +-
 .../optimizers/datafusion_logical.rs}         |   0
 src/sql/logical_planner/optimizers/mod.rs     |  20 +
 src/sql/logical_planner/planner.rs            |  57 +-
 src/sql/mod.rs                                |   2 +-
 src/sql/schema/column_descriptor.rs           | 136 +++
 .../{connector.rs => connection_type.rs}      |   0
 src/sql/schema/connector_table.rs             | 205 -----
 src/sql/schema/data_encoding_format.rs        |  82 ++
 src/sql/schema/field_spec.rs                  |  52 --
 src/sql/schema/insert.rs                      |  55 --
 src/sql/schema/mod.rs                         |  36 +-
 src/sql/schema/schema_context.rs              |  37 +
 src/sql/schema/schema_provider.rs             | 417 ++++++----
 src/sql/schema/source_table.rs                | 564 +++++++++++++
 src/sql/schema/table.rs                       |  36 +-
 src/sql/schema/table_execution_unit.rs        |  33 +
 src/sql/schema/table_role.rs                  | 110 +++
 src/sql/schema/temporal_pipeline_config.rs    |  57 ++
 70 files changed, 4966 insertions(+), 2910 deletions(-)
 rename src/{coordinator/tool => sql/common}/connector_options.rs (96%)
 create mode 100644 src/sql/common/format_from_opts.rs
 delete mode 100644 src/sql/extensions/stream_extension.rs
 create mode 100644 src/sql/extensions/streaming_operator_blueprint.rs
 delete mode 100644 src/sql/extensions/window_fn.rs
 create mode 100644 src/sql/extensions/windows_function.rs
 delete mode 100644 src/sql/logical_node/logical.rs
 create mode 100644 src/sql/logical_node/logical/dylib_udf_config.rs
 create mode 100644 src/sql/logical_node/logical/logical_edge.rs
 create mode 100644 src/sql/logical_node/logical/logical_graph.rs
 create mode 100644 src/sql/logical_node/logical/logical_node.rs
 create mode 100644 src/sql/logical_node/logical/logical_program.rs
 create mode 100644 src/sql/logical_node/logical/mod.rs
 create mode 100644 src/sql/logical_node/logical/operator_chain.rs
 create mode 100644 src/sql/logical_node/logical/operator_name.rs
 create mode 100644 src/sql/logical_node/logical/program_config.rs
 create mode 100644 src/sql/logical_node/logical/python_udf_config.rs
 rename src/sql/logical_planner/{optimizers.rs => optimizers/chaining.rs} (81%)
 rename src/sql/{schema/optimizer.rs => logical_planner/optimizers/datafusion_logical.rs} (100%)
 create mode 100644 src/sql/logical_planner/optimizers/mod.rs
 create mode 100644 src/sql/schema/column_descriptor.rs
 rename src/sql/schema/{connector.rs => connection_type.rs} (100%)
 delete mode 100644 src/sql/schema/connector_table.rs
 create mode 100644 src/sql/schema/data_encoding_format.rs
 delete mode 100644 src/sql/schema/field_spec.rs
 delete mode 100644 src/sql/schema/insert.rs
 create mode 100644 src/sql/schema/schema_context.rs
 create mode 100644 src/sql/schema/source_table.rs
 create mode 100644 src/sql/schema/table_execution_unit.rs
 create mode 100644 src/sql/schema/table_role.rs
 create mode 100644 src/sql/schema/temporal_pipeline_config.rs

diff --git a/src/coordinator/execution/executor.rs b/src/coordinator/execution/executor.rs
index 8285a2c5..4dae91d5 100644
--- a/src/coordinator/execution/executor.rs
+++ b/src/coordinator/execution/executor.rs
@@ -225,7 +225,7 @@ impl PlanVisitor for Executor {
     ) -> PlanVisitorResult {
         let result = (|| -> Result<ExecuteResult, ExecuteError> {
             let catalog_table =
-                CatalogTable::ConnectorTable(plan.connector_table.clone());
+                CatalogTable::ConnectorTable(plan.source_table.clone());
             let mut schema_provider = StreamSchemaProvider::new();
             schema_provider.insert_catalog_table(catalog_table.clone());
 
diff --git a/src/coordinator/plan/logical_plan_visitor.rs b/src/coordinator/plan/logical_plan_visitor.rs
index 93f8776a..4a747fdf 100644
--- a/src/coordinator/plan/logical_plan_visitor.rs
+++ b/src/coordinator/plan/logical_plan_visitor.rs
@@ -33,21 +33,19 @@ use crate::coordinator::statement::{
 };
 use crate::coordinator::tool::ConnectorOptions;
 use crate::sql::logical_node::logical::{LogicalProgram, ProgramConfig};
-use crate::sql::logical_planner::optimizers::ChainingOptimizer;
+use crate::sql::logical_planner::optimizers::{ChainingOptimizer, produce_optimized_plan};
 use crate::sql::schema::Table;
-use crate::sql::schema::connector::ConnectionType;
-use crate::sql::schema::connector_table::ConnectorTable;
-use crate::sql::schema::field_spec::FieldSpec;
-use crate::sql::schema::optimizer::produce_optimized_plan;
+use crate::sql::schema::ConnectionType;
+use crate::sql::schema::source_table::SourceTable;
+use crate::sql::schema::ColumnDescriptor;
 use crate::sql::functions::{is_json_union, serialize_outgoing_json};
-use crate::sql::extensions::sink::SinkExtension;
+use crate::sql::extensions::sink::StreamEgressNode;
 use crate::sql::logical_planner::planner;
 use crate::sql::analysis::{StreamSchemaProvider, maybe_add_key_extension_to_sink, rewrite_sinks};
 use crate::sql::rewrite_plan;
 
 const CONNECTOR: &str = "connector";
 const PARTITION_BY: &str = "partition_by";
-const IDLE_MICROS: &str = "idle_time";
 
 fn with_options_to_map(options: &[SqlOption]) -> std::collections::HashMap<String, String> {
     options
@@ -108,6 +106,8 @@ impl LogicalPlanVisitor {
             )
         })?;
 
+        let partition_exprs = self.resolve_partition_expressions(&mut opts)?;
+
         let base_plan =
             produce_optimized_plan(&Statement::Query(query.clone()), &self.schema_provider)?;
         let mut plan = rewrite_plan(base_plan, &self.schema_provider)?;
@@ -121,38 +121,33 @@ impl LogicalPlanVisitor {
             plan = serialize_outgoing_json(&self.schema_provider, Arc::new(plan));
         }
 
-        let partition_exprs = self.resolve_partition_expressions(&mut opts)?;
-
-        let fields: Vec<FieldSpec> = plan
+        let fields: Vec<ColumnDescriptor> = plan
             .schema()
             .fields()
             .iter()
-            .map(|f| FieldSpec::Struct((**f).clone()))
+            .map(|f| ColumnDescriptor::from((**f).clone()))
             .collect();
 
-        let connector_table = ConnectorTable {
-            id: None,
-            connector,
-            name: table_name.clone(),
-            connection_type: ConnectionType::Sink,
+        let mut source_table = SourceTable::from_options(
+            &table_name,
+            &connector,
+            false,
             fields,
-            config: "".to_string(),
-            description: comment.clone().unwrap_or_default(),
-            event_time_field: None,
-            watermark_field: None,
-            idle_time: opts.pull_opt_duration(IDLE_MICROS)?,
-            primary_keys: Arc::new(vec![]),
-            inferred_fields: None,
-            partition_exprs: Arc::new(partition_exprs),
-            lookup_cache_ttl:None,
-            lookup_cache_max_bytes:None,
-        };
+            vec![],
+            None,
+            &mut opts,
+            None,
+            &self.schema_provider,
+            Some(ConnectionType::Sink),
+            comment.clone().unwrap_or_default(),
+        )?;
+        source_table.partition_exprs = Arc::new(partition_exprs);
 
-        let sink_extension = SinkExtension::new(
+        let sink_extension = StreamEgressNode::try_new(
             TableReference::bare(table_name.clone()),
-            Table::ConnectorTable(connector_table.clone()),
+            Table::ConnectorTable(source_table.clone()),
             plan.schema().clone(),
-            Arc::new(plan),
+            plan,
         )?;
 
         let plan_with_keys = maybe_add_key_extension_to_sink(LogicalPlan::Extension(Extension {
@@ -196,7 +191,7 @@ impl LogicalPlanVisitor {
         Ok(Box::new(StreamingTable {
             name: table_name,
             comment: comment.clone(),
-            connector_table,
+            source_table,
             logical_plan: final_plan,
         }))
     }
@@ -322,7 +317,7 @@ mod create_streaming_table_tests {
 
     use crate::sql::common::TIMESTAMP_FIELD;
     use crate::sql::rewrite_plan;
-    use crate::sql::schema::optimizer::produce_optimized_plan;
+    use crate::sql::logical_planner::optimizers::produce_optimized_plan;
     use crate::sql::schema::StreamSchemaProvider;
 
     fn schema_provider_with_src() -> StreamSchemaProvider {
diff --git a/src/coordinator/plan/lookup_table_plan.rs b/src/coordinator/plan/lookup_table_plan.rs
index e0ea06ba..65103b61 100644
--- a/src/coordinator/plan/lookup_table_plan.rs
+++ b/src/coordinator/plan/lookup_table_plan.rs
@@ -10,14 +10,14 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-use crate::sql::schema::connector_table::ConnectorTable;
+use crate::sql::schema::source_table::SourceTable;
 
 use super::{PlanNode, PlanVisitor, PlanVisitorContext, PlanVisitorResult};
 
 /// Plan node that exposes a lookup table config as a logical plan input.
 #[derive(Debug)]
 pub struct LookupTablePlan {
-    pub table: ConnectorTable,
+    pub table: SourceTable,
 }
 
 impl PlanNode for LookupTablePlan {
diff --git a/src/coordinator/plan/streaming_table_connector_plan.rs b/src/coordinator/plan/streaming_table_connector_plan.rs
index c2407ec8..214e2e15 100644
--- a/src/coordinator/plan/streaming_table_connector_plan.rs
+++ b/src/coordinator/plan/streaming_table_connector_plan.rs
@@ -10,14 +10,14 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-use crate::sql::schema::connector_table::ConnectorTable;
+use crate::sql::schema::source_table::SourceTable;
 
 use super::{PlanNode, PlanVisitor, PlanVisitorContext, PlanVisitorResult};
 
 /// Plan node that exposes a connector table config as a logical plan input.
 #[derive(Debug)]
 pub struct StreamingTableConnectorPlan {
-    pub table: ConnectorTable,
+    pub table: SourceTable,
 }
 
 impl PlanNode for StreamingTableConnectorPlan {
diff --git a/src/coordinator/plan/streaming_table_plan.rs b/src/coordinator/plan/streaming_table_plan.rs
index 30e519f8..01b8dbb8 100644
--- a/src/coordinator/plan/streaming_table_plan.rs
+++ b/src/coordinator/plan/streaming_table_plan.rs
@@ -11,7 +11,7 @@
 // limitations under the License.
 
 use super::{PlanNode, PlanVisitor, PlanVisitorContext, PlanVisitorResult};
-use crate::sql::schema::connector_table::ConnectorTable;
+use crate::sql::schema::source_table::SourceTable;
 use datafusion::logical_expr::LogicalPlan;
 
 /// Plan node representing a fully resolved streaming table (DDL).
@@ -19,7 +19,7 @@ use datafusion::logical_expr::LogicalPlan;
 pub struct StreamingTable {
     pub name: String,
     pub comment: Option<String>,
-    pub connector_table: ConnectorTable,
+    pub source_table: SourceTable,
     pub logical_plan: LogicalPlan,
 }
 
diff --git a/src/coordinator/tool/mod.rs b/src/coordinator/tool/mod.rs
index 95d6a7ed..8ef77230 100644
--- a/src/coordinator/tool/mod.rs
+++ b/src/coordinator/tool/mod.rs
@@ -1,3 +1 @@
-mod connector_options;
-
-pub use connector_options::{ConnectorOptions, FromOpts};
+pub use crate::sql::common::ConnectorOptions;
diff --git a/src/sql/analysis/aggregate_rewriter.rs b/src/sql/analysis/aggregate_rewriter.rs
index 04ac0896..f11b53d0 100644
--- a/src/sql/analysis/aggregate_rewriter.rs
+++ b/src/sql/analysis/aggregate_rewriter.rs
@@ -1,13 +1,13 @@
 use datafusion::common::tree_node::{Transformed, TreeNodeRewriter};
 use datafusion::common::{DFSchema, DataFusionError, Result, not_impl_err, plan_err};
 use datafusion::functions_aggregate::expr_fn::max;
-use datafusion::logical_expr::{self, Aggregate, Expr, Extension, LogicalPlan, Projection};
+use datafusion::logical_expr::{Aggregate, Expr, Extension, LogicalPlan, Projection};
 use datafusion::prelude::col;
 use std::sync::Arc;
 
 use crate::sql::schema::StreamSchemaProvider;
-use crate::sql::extensions::aggregate::AggregateExtension;
-use crate::sql::extensions::key_calculation::{KeyCalculationExtension, KeysOrExprs};
+use crate::sql::extensions::aggregate::StreamWindowAggregateNode;
+use crate::sql::extensions::key_calculation::{KeyExtractionNode, KeyExtractionStrategy};
 use crate::sql::analysis::streaming_window_analzer::StreamingWindowAnalzer;
 use crate::sql::types::{
     DFField, TIMESTAMP_FIELD, WindowBehavior, WindowType, fields_with_qualifiers, find_window,
@@ -83,7 +83,7 @@ impl TreeNodeRewriter for AggregateRewriter<'_> {
         let keyed_input =
             self.build_keyed_input(agg.input.clone(), &agg.group_expr, &key_fields)?;
 
-        // 5. Build the final AggregateExtension for the physical planner.
+        // 5. Build the final StreamWindowAggregateNode for the physical planner.
         let mut internal_fields = fields_with_qualifiers(&agg.schema);
         if let WindowBehavior::FromOperator { window_index, .. } = &behavior {
             internal_fields.remove(*window_index);
@@ -100,11 +100,11 @@ impl TreeNodeRewriter for AggregateRewriter<'_> {
             internal_schema,
         )?;
 
-        let extension = AggregateExtension::new(
+        let extension = StreamWindowAggregateNode::try_new(
             behavior,
             LogicalPlan::Aggregate(rewritten_agg),
             (0..key_count).collect(),
-        );
+        )?;
 
         Ok(Transformed::yes(LogicalPlan::Extension(Extension {
             node: Arc::new(extension),
@@ -118,7 +118,7 @@ impl<'a> AggregateRewriter<'a> {
     }
 
     /// [Internal] Builds the physical Key Calculation layer required for distributed Shuffling.
-    /// This wraps the input in a Projection and a KeyCalculationExtension.
+    /// This wraps the input in a Projection and a KeyExtractionNode.
     fn build_keyed_input(
         &self,
         input: Arc<LogicalPlan>,
@@ -151,9 +151,9 @@ impl<'a> AggregateRewriter<'a> {
             LogicalPlan::Projection(Projection::try_new_with_schema(exprs, input, key_schema)?);
 
         Ok(LogicalPlan::Extension(Extension {
-            node: Arc::new(KeyCalculationExtension::new(
+            node: Arc::new(KeyExtractionNode::new(
                 projection,
-                KeysOrExprs::Keys((0..key_count).collect()),
+                KeyExtractionStrategy::ColumnIndices((0..key_count).collect()),
             )),
         }))
     }
diff --git a/src/sql/analysis/async_udf_rewriter.rs b/src/sql/analysis/async_udf_rewriter.rs
index 9584c022..0ad4dfc2 100644
--- a/src/sql/analysis/async_udf_rewriter.rs
+++ b/src/sql/analysis/async_udf_rewriter.rs
@@ -1,5 +1,5 @@
-use crate::sql::extensions::remote_table::RemoteTableExtension;
-use crate::sql::extensions::{ASYNC_RESULT_FIELD, AsyncUDFExtension};
+use crate::sql::extensions::remote_table::RemoteTableBoundaryNode;
+use crate::sql::extensions::{ASYNC_RESULT_FIELD, AsyncFunctionExecutionNode};
 use crate::sql::schema::StreamSchemaProvider;
 use datafusion::common::tree_node::{Transformed, TreeNode, TreeNodeRewriter};
 use datafusion::common::{Column, Result as DFResult, TableReference, plan_err};
@@ -92,11 +92,11 @@ impl TreeNodeRewriter for AsyncUdfRewriter<'_> {
 
         let input = if matches!(*projection.input, LogicalPlan::Projection(..)) {
             Arc::new(LogicalPlan::Extension(Extension {
-                node: Arc::new(RemoteTableExtension {
-                    input: (*projection.input).clone(),
-                    name: TableReference::bare("subquery_projection"),
-                    schema: projection.input.schema().clone(),
-                    materialize: false,
+                node: Arc::new(RemoteTableBoundaryNode {
+                    upstream_plan: (*projection.input).clone(),
+                    table_identifier: TableReference::bare("subquery_projection"),
+                    resolved_schema: projection.input.schema().clone(),
+                    requires_materialization: false,
                 }),
             }))
         } else {
@@ -104,16 +104,16 @@ impl TreeNodeRewriter for AsyncUdfRewriter<'_> {
         };
 
         Ok(Transformed::yes(LogicalPlan::Extension(Extension {
-            node: Arc::new(AsyncUDFExtension {
-                input,
-                name,
-                udf,
-                arg_exprs,
-                final_exprs: projection.expr,
-                ordered: opts.ordered,
-                max_concurrency: opts.max_concurrency,
-                timeout: opts.timeout,
-                final_schema: projection.schema,
+            node: Arc::new(AsyncFunctionExecutionNode {
+                upstream_plan: input,
+                operator_name: name,
+                function_config: udf,
+                invocation_args: arg_exprs,
+                result_projections: projection.expr,
+                preserve_ordering: opts.ordered,
+                concurrency_limit: opts.max_concurrency,
+                execution_timeout: opts.timeout,
+                resolved_schema: projection.schema,
             }),
         })))
     }
diff --git a/src/sql/analysis/join_rewriter.rs b/src/sql/analysis/join_rewriter.rs
index 520af335..e9efe96b 100644
--- a/src/sql/analysis/join_rewriter.rs
+++ b/src/sql/analysis/join_rewriter.rs
@@ -1,6 +1,6 @@
 use crate::sql::schema::StreamSchemaProvider;
-use crate::sql::extensions::join::JoinExtension;
-use crate::sql::extensions::key_calculation::KeyCalculationExtension;
+use crate::sql::extensions::join::StreamingJoinNode;
+use crate::sql::extensions::key_calculation::KeyExtractionNode;
 use crate::sql::analysis::streaming_window_analzer::StreamingWindowAnalzer;
 use crate::sql::types::{WindowType, fields_with_qualifiers, schema_from_df_fields_with_metadata};
 use crate::sql::common::TIMESTAMP_FIELD;
@@ -62,7 +62,7 @@ impl<'a> JoinRewriter<'a> {
         }
     }
 
-    /// [Internal] Wraps a join input in a KeyCalculation layer to facilitate Shuffle/KeyBy distribution.
+    /// [Internal] Wraps a join input in a key-extraction layer to facilitate shuffle / key-by distribution.
     fn build_keyed_side(
         &self,
         input: Arc<LogicalPlan>,
@@ -85,11 +85,11 @@ impl<'a> JoinRewriter<'a> {
             .collect();
 
         let projection = Projection::try_new(projection_exprs, input)?;
-        let key_ext = KeyCalculationExtension::new_named_and_trimmed(
+        let key_ext = KeyExtractionNode::try_new_with_projection(
             LogicalPlan::Projection(projection),
             (0..key_count).collect(),
             side.to_string(),
-        );
+        )?;
 
         Ok(LogicalPlan::Extension(Extension {
             node: Arc::new(key_ext),
@@ -209,13 +209,13 @@ impl TreeNodeRewriter for JoinRewriter<'_> {
         // 4. Resolve Output Watermark (Timestamp Projection)
         let plan_with_timestamp = self.apply_timestamp_resolution(rewritten_join)?;
 
-        // 5. Wrap in JoinExtension for Physical Planning
-        let ttl = (!is_instant).then_some(self.schema_provider.planning_options.ttl);
-        let extension = JoinExtension {
-            rewritten_join: plan_with_timestamp,
+        // 5. Wrap in StreamingJoinNode for physical planning
+        let state_retention_ttl = (!is_instant).then_some(self.schema_provider.planning_options.ttl);
+        let extension = StreamingJoinNode::new(
+            plan_with_timestamp,
             is_instant,
-            ttl,
-        };
+            state_retention_ttl,
+        );
 
         Ok(Transformed::yes(LogicalPlan::Extension(Extension {
             node: Arc::new(extension),
diff --git a/src/sql/analysis/mod.rs b/src/sql/analysis/mod.rs
index e13e2b7e..697d8c97 100644
--- a/src/sql/analysis/mod.rs
+++ b/src/sql/analysis/mod.rs
@@ -41,13 +41,12 @@ use datafusion::sql::sqlparser::parser::Parser;
 use tracing::{debug, info, instrument};
 
 use crate::sql::logical_planner::optimizers::ChainingOptimizer;
-use crate::sql::schema::insert::Insert;
 use crate::sql::schema::table::Table as CatalogTable;
 use crate::sql::functions::{is_json_union, serialize_outgoing_json};
-use crate::sql::extensions::key_calculation::{KeyCalculationExtension, KeysOrExprs};
-use crate::sql::extensions::projection::ProjectionExtension;
-use crate::sql::extensions::sink::SinkExtension;
-use crate::sql::extensions::{ StreamExtension};
+use crate::sql::extensions::key_calculation::{KeyExtractionNode, KeyExtractionStrategy};
+use crate::sql::extensions::projection::StreamProjectionNode;
+use crate::sql::extensions::sink::StreamEgressNode;
+use crate::sql::extensions::StreamingOperatorBlueprint;
 use crate::sql::logical_planner::planner::NamedNode;
 use crate::sql::types::SqlConfig;
 
@@ -99,8 +98,8 @@ fn build_sink_inputs(extensions: &[LogicalPlan]) -> HashMap<NamedNode, Vec<Logic
     let mut sink_inputs = HashMap::<NamedNode, Vec<LogicalPlan>>::new();
     for extension in extensions.iter() {
         if let LogicalPlan::Extension(ext) = extension {
-            if let Some(sink_node) = ext.node.as_any().downcast_ref::<SinkExtension>() {
-                if let Some(named_node) = sink_node.node_name() {
+            if let Some(sink_node) = ext.node.as_any().downcast_ref::<StreamEgressNode>() {
+                if let Some(named_node) = sink_node.operator_identity() {
                     let inputs = sink_node
                         .inputs()
                         .into_iter()
@@ -119,11 +118,11 @@ pub(crate) fn maybe_add_key_extension_to_sink(plan: LogicalPlan) -> Result<Logic
         return Ok(plan);
     };
 
-    let Some(sink) = ext.node.as_any().downcast_ref::<SinkExtension>() else {
+    let Some(sink) = ext.node.as_any().downcast_ref::<StreamEgressNode>() else {
         return Ok(plan);
     };
 
-    let Some(partition_exprs) = sink.table.partition_exprs() else {
+    let Some(partition_exprs) = sink.destination_table.partition_exprs() else {
         return Ok(plan);
     };
 
@@ -136,11 +135,13 @@ pub(crate) fn maybe_add_key_extension_to_sink(plan: LogicalPlan) -> Result<Logic
         .into_iter()
         .map(|input| {
             Ok(LogicalPlan::Extension(Extension {
-                node: Arc::new(KeyCalculationExtension {
-                    name: Some("key-calc-partition".to_string()),
-                    schema: input.schema().clone(),
-                    input: input.clone(),
-                    keys: KeysOrExprs::Exprs(partition_exprs.clone()),
+                node: Arc::new(KeyExtractionNode {
+                    operator_label: Some("key-calc-partition".to_string()),
+                    resolved_schema: input.schema().clone(),
+                    upstream_plan: input.clone(),
+                    extraction_strategy: KeyExtractionStrategy::CalculatedExpressions(
+                        partition_exprs.clone(),
+                    ),
                 }),
             }))
         })
@@ -149,12 +150,12 @@ pub(crate) fn maybe_add_key_extension_to_sink(plan: LogicalPlan) -> Result<Logic
     use datafusion::prelude::col;
     let unkey = LogicalPlan::Extension(Extension {
         node: Arc::new(
-            ProjectionExtension::new(
+            StreamProjectionNode::try_new(
                 inputs,
                 Some("unkey".to_string()),
                 sink.schema().iter().map(|(_, f)| col(f.name())).collect(),
-            )
-            .shuffled(),
+            )?
+            .with_shuffle_routing(),
         ),
     });
 
diff --git a/src/sql/analysis/sink_input_rewriter.rs b/src/sql/analysis/sink_input_rewriter.rs
index b33ac647..e491a75a 100644
--- a/src/sql/analysis/sink_input_rewriter.rs
+++ b/src/sql/analysis/sink_input_rewriter.rs
@@ -1,5 +1,5 @@
-use crate::sql::extensions::sink::SinkExtension;
-use crate::sql::extensions::{StreamExtension};
+use crate::sql::extensions::sink::StreamEgressNode;
+use crate::sql::extensions::StreamingOperatorBlueprint;
 use datafusion::common::Result as DFResult;
 use datafusion::common::tree_node::{Transformed, TreeNodeRecursion, TreeNodeRewriter};
 use datafusion::logical_expr::{Extension, LogicalPlan, UserDefinedLogicalNodeCore};
@@ -29,8 +29,8 @@ impl TreeNodeRewriter for SinkInputRewriter<'_> {
 
     fn f_down(&mut self, node: Self::Node) -> DFResult<Transformed<Self::Node>> {
         if let LogicalPlan::Extension(extension) = &node {
-            if let Some(sink_node) = extension.node.as_any().downcast_ref::<SinkExtension>() {
-                if let Some(named_node) = sink_node.node_name() {
+            if let Some(sink_node) = extension.node.as_any().downcast_ref::<StreamEgressNode>() {
+                if let Some(named_node) = sink_node.operator_identity() {
                     if let Some(inputs) = self.sink_inputs.remove(&named_node) {
                         let new_node = LogicalPlan::Extension(Extension {
                             node: Arc::new(sink_node.with_exprs_and_inputs(vec![], inputs)?),
diff --git a/src/sql/analysis/source_metadata_visitor.rs b/src/sql/analysis/source_metadata_visitor.rs
index a49a7e72..0d2e1455 100644
--- a/src/sql/analysis/source_metadata_visitor.rs
+++ b/src/sql/analysis/source_metadata_visitor.rs
@@ -1,5 +1,5 @@
-use crate::sql::extensions::sink::SinkExtension;
-use crate::sql::extensions::table_source::TableSourceExtension;
+use crate::sql::extensions::sink::{StreamEgressNode, STREAM_EGRESS_NODE_NAME};
+use crate::sql::extensions::table_source::{StreamIngestionNode, STREAM_INGESTION_NODE_NAME};
 use crate::sql::schema::StreamSchemaProvider;
 use datafusion::common::Result as DFResult;
 use datafusion::common::tree_node::{TreeNodeRecursion, TreeNodeVisitor};
@@ -26,20 +26,20 @@ impl<'a> SourceMetadataVisitor<'a> {
         };
 
         let table_name = match node.name() {
-            "TableSourceExtension" => {
-                let ext = node.as_any().downcast_ref::<TableSourceExtension>()?;
-                ext.name.to_string()
+            name if name == STREAM_INGESTION_NODE_NAME => {
+                let ext = node.as_any().downcast_ref::<StreamIngestionNode>()?;
+                ext.source_identifier.to_string()
             }
-            "SinkExtension" => {
-                let ext = node.as_any().downcast_ref::<SinkExtension>()?;
-                ext.name.to_string()
+            name if name == STREAM_EGRESS_NODE_NAME => {
+                let ext = node.as_any().downcast_ref::<StreamEgressNode>()?;
+                ext.target_identifier.to_string()
             }
             _ => return None,
         };
 
         let table = self.schema_provider.get_catalog_table(&table_name)?;
         match table {
-            crate::sql::schema::table::Table::ConnectorTable(t) => t.id,
+            crate::sql::schema::table::Table::ConnectorTable(t) => t.registry_id,
             _ => None,
         }
     }
diff --git a/src/sql/analysis/source_rewriter.rs b/src/sql/analysis/source_rewriter.rs
index 1bba1551..d642afd5 100644
--- a/src/sql/analysis/source_rewriter.rs
+++ b/src/sql/analysis/source_rewriter.rs
@@ -20,12 +20,12 @@ use datafusion::logical_expr::{
     self, BinaryExpr, Expr, Extension, LogicalPlan, Projection, TableScan,
 };
 
-use crate::sql::schema::connector_table::ConnectorTable;
-use crate::sql::schema::field_spec::FieldSpec;
+use crate::sql::schema::source_table::SourceTable;
+use crate::sql::schema::ColumnDescriptor;
 use crate::sql::schema::table::Table;
 use crate::sql::schema::StreamSchemaProvider;
-use crate::sql::extensions::remote_table::RemoteTableExtension;
-use crate::sql::extensions::watermark_node::WatermarkNode;
+use crate::sql::extensions::remote_table::RemoteTableBoundaryNode;
+use crate::sql::extensions::watermark_node::EventTimeWatermarkNode;
 use crate::sql::types::TIMESTAMP_FIELD;
 
 /// Rewrites table scans into proper source nodes with projections and watermarks.
@@ -34,22 +34,35 @@ pub struct SourceRewriter<'a> {
 }
 
 impl SourceRewriter<'_> {
-    fn watermark_expression(table: &ConnectorTable) -> DFResult<Expr> {
-        match table.watermark_field.clone() {
+    fn projection_expr_for_column(col: &ColumnDescriptor, qualifier: &TableReference) -> Expr {
+        if let Some(logic) = col.computation_logic() {
+            logic
+                .clone()
+                .alias_qualified(Some(qualifier.clone()), col.arrow_field().name().to_string())
+        } else {
+            Expr::Column(Column {
+                relation: Some(qualifier.clone()),
+                name: col.arrow_field().name().to_string(),
+                spans: Default::default(),
+            })
+        }
+    }
+
+    fn watermark_expression(table: &SourceTable) -> DFResult<Expr> {
+        match table.temporal_config.watermark_strategy_column.clone() {
             Some(watermark_field) => table
-                .fields
+                .schema_specs
                 .iter()
-                .find_map(|f| {
-                    if f.field().name() == &watermark_field {
-                        return match f {
-                            FieldSpec::Struct(field) | FieldSpec::Metadata { field, .. } => {
-                                Some(Expr::Column(Column {
-                                    relation: None,
-                                    name: field.name().to_string(),
-                                    spans: Default::default(),
-                                }))
-                            }
-                            FieldSpec::Virtual { expression, .. } => Some(*expression.clone()),
+                .find_map(|c| {
+                    if c.arrow_field().name() == watermark_field.as_str() {
+                        return if let Some(expr) = c.computation_logic() {
+                            Some(expr.clone())
+                        } else {
+                            Some(Expr::Column(Column {
+                                relation: None,
+                                name: c.arrow_field().name().to_string(),
+                                spans: Default::default(),
+                            }))
                         };
                     }
                     None
@@ -73,47 +86,27 @@ impl SourceRewriter<'_> {
     }
 
     fn projection_expressions(
-        table: &ConnectorTable,
+        table: &SourceTable,
         qualifier: &TableReference,
         projection: &Option<Vec<usize>>,
     ) -> DFResult<Vec<Expr>> {
         let mut expressions: Vec<Expr> = table
-            .fields
+            .schema_specs
             .iter()
-            .map(|field| match field {
-                FieldSpec::Struct(field) | FieldSpec::Metadata { field, .. } => {
-                    Expr::Column(Column {
-                        relation: Some(qualifier.clone()),
-                        name: field.name().to_string(),
-                        spans: Default::default(),
-                    })
-                }
-                FieldSpec::Virtual { field, expression } => expression
-                    .clone()
-                    .alias_qualified(Some(qualifier.clone()), field.name().to_string()),
-            })
+            .map(|col| Self::projection_expr_for_column(col, qualifier))
             .collect();
 
         if let Some(proj) = projection {
             expressions = proj.iter().map(|i| expressions[*i].clone()).collect();
         }
 
-        if let Some(event_time_field) = table.event_time_field.clone() {
+        if let Some(event_time_field) = table.temporal_config.event_column.clone() {
             let expr = table
-                .fields
+                .schema_specs
                 .iter()
-                .find_map(|f| {
-                    if f.field().name() == &event_time_field {
-                        return match f {
-                            FieldSpec::Struct(field) | FieldSpec::Metadata { field, .. } => {
-                                Some(Expr::Column(Column {
-                                    relation: Some(qualifier.clone()),
-                                    name: field.name().to_string(),
-                                    spans: Default::default(),
-                                }))
-                            }
-                            FieldSpec::Virtual { expression, .. } => Some(*expression.clone()),
-                        };
+                .find_map(|c| {
+                    if c.arrow_field().name() == event_time_field.as_str() {
+                        return Some(Self::projection_expr_for_column(c, qualifier));
                     }
                     None
                 })
@@ -133,10 +126,10 @@ impl SourceRewriter<'_> {
         Ok(expressions)
     }
 
-    fn projection(&self, table_scan: &TableScan, table: &ConnectorTable) -> DFResult<LogicalPlan> {
+    fn projection(&self, table_scan: &TableScan, table: &SourceTable) -> DFResult<LogicalPlan> {
         let qualifier = table_scan.table_name.clone();
 
-        // TODO: replace with TableSourceExtension when available
+        // TODO: replace with StreamIngestionNode when available
         let source_input = LogicalPlan::TableScan(table_scan.clone());
 
         Ok(LogicalPlan::Projection(Projection::try_new(
@@ -148,27 +141,27 @@ impl SourceRewriter<'_> {
     fn mutate_connector_table(
         &self,
         table_scan: &TableScan,
-        table: &ConnectorTable,
+        table: &SourceTable,
     ) -> DFResult<Transformed<LogicalPlan>> {
         let input = self.projection(table_scan, table)?;
 
         let schema = input.schema().clone();
         let remote = LogicalPlan::Extension(Extension {
-            node: Arc::new(RemoteTableExtension {
-                input,
-                name: table_scan.table_name.to_owned(),
-                schema,
-                materialize: true,
+            node: Arc::new(RemoteTableBoundaryNode {
+                upstream_plan: input,
+                table_identifier: table_scan.table_name.to_owned(),
+                resolved_schema: schema,
+                requires_materialization: true,
             }),
         });
 
-        let watermark_node = WatermarkNode::new(
+        let watermark_node = EventTimeWatermarkNode::try_new(
             remote,
             table_scan.table_name.clone(),
             Self::watermark_expression(table)?,
         )
         .map_err(|err| {
-            DataFusionError::Internal(format!("failed to create watermark expression: {err}"))
+            DataFusionError::Internal(format!("failed to create watermark node: {err}"))
         })?;
 
         Ok(Transformed::yes(LogicalPlan::Extension(Extension {
diff --git a/src/sql/analysis/stream_rewriter.rs b/src/sql/analysis/stream_rewriter.rs
index 999b1fb8..22ed3c83 100644
--- a/src/sql/analysis/stream_rewriter.rs
+++ b/src/sql/analysis/stream_rewriter.rs
@@ -1,8 +1,8 @@
 use std::sync::Arc;
 
 use super::StreamSchemaProvider;
-use crate::sql::extensions::StreamExtension;
-use crate::sql::extensions::remote_table::RemoteTableExtension;
+use crate::sql::extensions::StreamingOperatorBlueprint;
+use crate::sql::extensions::remote_table::RemoteTableBoundaryNode;
 use crate::sql::analysis::row_time_rewriter::RowTimeRewriter;
 use crate::sql::analysis::{
     aggregate_rewriter::AggregateRewriter, join_rewriter::JoinRewriter,
@@ -137,7 +137,7 @@ impl<'a> StreamRewriter<'a> {
         Ok(Transformed::yes(LogicalPlan::Projection(projection)))
     }
 
-    /// Harmonizes schemas across Union branches and wraps them in RemoteTableExtensions.
+    /// Harmonizes schemas across Union branches and wraps them in RemoteTableBoundaryNodes.
     ///
     /// This ensures that all inputs to a UNION operation share the exact same schema metadata,
     /// preventing "Schema Drift" where different branches have different field qualifiers.
@@ -151,23 +151,23 @@ impl<'a> StreamRewriter<'a> {
             // Optimization: If the node is already a non-transparent Extension,
             // we skip wrapping to avoid unnecessary nesting of logical nodes.
             if let LogicalPlan::Extension(Extension { node }) = input.as_ref() {
-                let stream_ext: &dyn StreamExtension = node.try_into().map_err(|e| {
-                    DataFusionError::Internal(format!("Failed to resolve StreamExtension: {}", e))
+                let stream_ext: &dyn StreamingOperatorBlueprint = node.try_into().map_err(|e| {
+                    DataFusionError::Internal(format!("Failed to resolve StreamingOperatorBlueprint: {}", e))
                 })?;
 
-                if !stream_ext.transparent() {
+                if !stream_ext.is_passthrough_boundary() {
                     continue;
                 }
             }
 
-            // Wrap each branch in a RemoteTableExtension.
+            // Wrap each branch in a RemoteTableBoundaryNode.
             // This acts as a logical "bridge" that forces the input to adopt the master_schema,
             // effectively stripping away branch-specific qualifiers (e.g., table aliases).
-            let remote_ext = Arc::new(RemoteTableExtension {
-                input: input.as_ref().clone(),
-                name: TableReference::bare("union_input"),
-                schema: master_schema.clone(),
-                materialize: false, // Internal logical boundary only; does not require physical sink.
+            let remote_ext = Arc::new(RemoteTableBoundaryNode {
+                upstream_plan: input.as_ref().clone(),
+                table_identifier: TableReference::bare("union_input"),
+                resolved_schema: master_schema.clone(),
+                requires_materialization: false, // Internal logical boundary only; does not require physical sink.
             });
 
             // Atomically replace the input with the wrapped version.
diff --git a/src/sql/analysis/streaming_window_analzer.rs b/src/sql/analysis/streaming_window_analzer.rs
index 59ded792..5eed3d2b 100644
--- a/src/sql/analysis/streaming_window_analzer.rs
+++ b/src/sql/analysis/streaming_window_analzer.rs
@@ -1,12 +1,12 @@
 use std::collections::HashSet;
 use std::sync::Arc;
 
-use datafusion::common::tree_node::{TreeNode, TreeNodeRecursion, TreeNodeVisitor};
+use datafusion::common::tree_node::{TreeNodeRecursion, TreeNodeVisitor};
 use datafusion::common::{Column, DFSchema, DataFusionError, Result};
-use datafusion::logical_expr::{Aggregate, Expr, Extension, LogicalPlan, expr::Alias};
+use datafusion::logical_expr::{Expr, Extension, LogicalPlan, expr::Alias};
 
-use crate::sql::extensions::aggregate::{AGGREGATE_EXTENSION_NAME, AggregateExtension};
-use crate::sql::extensions::join::JOIN_NODE_NAME;
+use crate::sql::extensions::aggregate::{STREAM_AGG_EXTENSION_NAME, StreamWindowAggregateNode};
+use crate::sql::extensions::join::STREAM_JOIN_NODE_TYPE;
 use crate::sql::types::{DFField, WindowBehavior, WindowType, fields_with_qualifiers, find_window};
 
 /// WindowDetectingVisitor identifies windowing strategies and tracks window-carrying fields
@@ -89,7 +89,7 @@ impl TreeNodeVisitor<'_> for StreamingWindowAnalzer {
     fn f_down(&mut self, node: &Self::Node) -> Result<TreeNodeRecursion> {
         // Joins require cross-branch validation to ensure left and right sides align on time.
         if let LogicalPlan::Extension(Extension { node }) = node
-            && node.name() == JOIN_NODE_NAME
+            && node.name() == STREAM_JOIN_NODE_TYPE
         {
             let mut branch_windows = HashSet::new();
             for input in node.inputs() {
@@ -159,16 +159,16 @@ impl TreeNodeVisitor<'_> for StreamingWindowAnalzer {
             }
 
             LogicalPlan::Extension(Extension { node })
-                if node.name() == AGGREGATE_EXTENSION_NAME =>
+                if node.name() == STREAM_AGG_EXTENSION_NAME =>
             {
                 let ext = node
                     .as_any()
-                    .downcast_ref::<AggregateExtension>()
+                    .downcast_ref::<StreamWindowAggregateNode>()
                     .ok_or_else(|| {
-                        DataFusionError::Internal("AggregateExtension node is malformed".into())
+                        DataFusionError::Internal("StreamWindowAggregateNode is malformed".into())
                     })?;
 
-                match &ext.window_behavior {
+                match &ext.window_spec {
                     WindowBehavior::FromOperator {
                         window,
                         window_field,
diff --git a/src/sql/analysis/window_function_rewriter.rs b/src/sql/analysis/window_function_rewriter.rs
index ce580eaf..8f195325 100644
--- a/src/sql/analysis/window_function_rewriter.rs
+++ b/src/sql/analysis/window_function_rewriter.rs
@@ -8,8 +8,8 @@ use datafusion_common::DataFusionError;
 use std::sync::Arc;
 use tracing::debug;
 
-use crate::sql::extensions::key_calculation::{KeyCalculationExtension, KeysOrExprs};
-use crate::sql::extensions::window_fn::WindowFunctionExtension;
+use crate::sql::extensions::key_calculation::{KeyExtractionNode, KeyExtractionStrategy};
+use crate::sql::extensions::windows_function::StreamingWindowFunctionNode;
 use crate::sql::analysis::streaming_window_analzer::{StreamingWindowAnalzer, extract_column};
 use crate::sql::types::{WindowType, fields_with_qualifiers, schema_from_df_fields};
 
@@ -66,7 +66,7 @@ impl WindowFunctionRewriter {
         Ok(matched[0])
     }
 
-    /// Wraps the input in a Projection and KeyCalculationExtension to handle data distribution.
+    /// Wraps the input in a Projection and KeyExtractionNode to handle data distribution.
     fn build_keyed_input(
         &self,
         input: Arc<LogicalPlan>,
@@ -101,11 +101,11 @@ impl WindowFunctionRewriter {
         let projection =
             LogicalPlan::Projection(Projection::try_new_with_schema(exprs, input, keyed_schema)?);
 
-        // 3. Wrap in KeyCalculationExtension for the physical planner
+        // 3. Wrap in KeyExtractionNode for the physical planner
         Ok(LogicalPlan::Extension(Extension {
-            node: Arc::new(KeyCalculationExtension::new(
+            node: Arc::new(KeyExtractionNode::new(
                 projection,
-                KeysOrExprs::Keys((0..key_count).collect()),
+                KeyExtractionStrategy::ColumnIndices((0..key_count).collect()),
             )),
         }))
     }
@@ -182,7 +182,7 @@ impl TreeNodeRewriter for WindowFunctionRewriter {
             LogicalPlan::Window(Window::try_new(vec![final_wf_expr], Arc::new(sorted_plan))?);
 
         Ok(Transformed::yes(LogicalPlan::Extension(Extension {
-            node: Arc::new(WindowFunctionExtension::new(
+            node: Arc::new(StreamingWindowFunctionNode::new(
                 rewritten_window,
                 (0..key_count).collect(),
             )),
diff --git a/src/coordinator/tool/connector_options.rs b/src/sql/common/connector_options.rs
similarity index 96%
rename from src/coordinator/tool/connector_options.rs
rename to src/sql/common/connector_options.rs
index de39872f..308d5197 100644
--- a/src/coordinator/tool/connector_options.rs
+++ b/src/sql/common/connector_options.rs
@@ -307,6 +307,16 @@ impl ConnectorOptions {
     pub fn contains_key(&self, key: &str) -> bool {
         self.options.contains_key(key)
     }
+
+    /// Drain all remaining options into string values (for connector runtime config).
+    pub fn drain_remaining_string_values(&mut self) -> DFResult<HashMap<String, String>> {
+        let taken = std::mem::take(&mut self.options);
+        let mut out = HashMap::with_capacity(taken.len());
+        for (k, v) in taken {
+            out.insert(k, format!("{v}"));
+        }
+        Ok(out)
+    }
 }
 
 fn duration_from_sql_expr(expr: &Expr) -> Result<Duration, DataFusionError> {
diff --git a/src/sql/common/format_from_opts.rs b/src/sql/common/format_from_opts.rs
new file mode 100644
index 00000000..dc9a43da
--- /dev/null
+++ b/src/sql/common/format_from_opts.rs
@@ -0,0 +1,162 @@
+//! Parse `WITH` clause format / framing / bad-data options (Arroyo-compatible keys).
+
+use std::str::FromStr;
+
+use datafusion::common::{Result as DFResult, plan_datafusion_err, plan_err};
+
+use super::connector_options::ConnectorOptions;
+use super::formats::{
+    AvroFormat, BadData, DecimalEncoding, Format, Framing, JsonCompression, JsonFormat,
+    NewlineDelimitedFraming, ParquetCompression, ParquetFormat, ProtobufFormat, RawBytesFormat,
+    RawStringFormat, TimestampFormat,
+};
+
+impl JsonFormat {
+    pub fn from_opts(opts: &mut ConnectorOptions) -> DFResult<Self> {
+        let mut j = JsonFormat::default();
+        if let Some(v) = opts.pull_opt_bool("json.confluent_schema_registry")? {
+            j.confluent_schema_registry = v;
+        }
+        if let Some(v) = opts.pull_opt_u64("json.confluent_schema_version")? {
+            j.schema_id = Some(v as u32);
+        }
+        if let Some(v) = opts.pull_opt_bool("json.include_schema")? {
+            j.include_schema = v;
+        }
+        if let Some(v) = opts.pull_opt_bool("json.debezium")? {
+            j.debezium = v;
+        }
+        if let Some(v) = opts.pull_opt_bool("json.unstructured")? {
+            j.unstructured = v;
+        }
+        if let Some(s) = opts.pull_opt_str("json.timestamp_format")? {
+            j.timestamp_format = TimestampFormat::try_from(s.as_str()).map_err(|_| {
+                plan_datafusion_err!("invalid json.timestamp_format '{}'", s)
+            })?;
+        }
+        if let Some(s) = opts.pull_opt_str("json.decimal_encoding")? {
+            j.decimal_encoding = DecimalEncoding::try_from(s.as_str()).map_err(|_| {
+                plan_datafusion_err!("invalid json.decimal_encoding '{s}'")
+            })?;
+        }
+        if let Some(s) = opts.pull_opt_str("json.compression")? {
+            j.compression = JsonCompression::from_str(&s)
+                .map_err(|e| plan_datafusion_err!("invalid json.compression: {e}"))?;
+        }
+        Ok(j)
+    }
+}
+
+impl Format {
+    pub fn from_opts(opts: &mut ConnectorOptions) -> DFResult<Option<Self>> {
+        let Some(name) = opts.pull_opt_str("format")? else {
+            return Ok(None);
+        };
+        match name.to_lowercase().as_str() {
+            "json" => Ok(Some(Format::Json(JsonFormat::from_opts(opts)?))),
+            "debezium_json" => {
+                let mut j = JsonFormat::from_opts(opts)?;
+                j.debezium = true;
+                Ok(Some(Format::Json(j)))
+            }
+            "avro" => Ok(Some(Format::Avro(AvroFormat::from_opts(opts)?))),
+            "parquet" => Ok(Some(Format::Parquet(ParquetFormat::from_opts(opts)?))),
+            "protobuf" => Ok(Some(Format::Protobuf(ProtobufFormat::from_opts(opts)?))),
+            "raw_string" => Ok(Some(Format::RawString(RawStringFormat {}))),
+            "raw_bytes" => Ok(Some(Format::RawBytes(RawBytesFormat {}))),
+            _ => plan_err!("unknown format '{name}'"),
+        }
+    }
+}
+
+impl AvroFormat {
+    fn from_opts(opts: &mut ConnectorOptions) -> DFResult<Self> {
+        let mut a = AvroFormat {
+            confluent_schema_registry: false,
+            raw_datums: false,
+            into_unstructured_json: false,
+            schema_id: None,
+        };
+        if let Some(v) = opts.pull_opt_bool("avro.confluent_schema_registry")? {
+            a.confluent_schema_registry = v;
+        }
+        if let Some(v) = opts.pull_opt_bool("avro.raw_datums")? {
+            a.raw_datums = v;
+        }
+        if let Some(v) = opts.pull_opt_bool("avro.into_unstructured_json")? {
+            a.into_unstructured_json = v;
+        }
+        if let Some(v) = opts.pull_opt_u64("avro.schema_id")? {
+            a.schema_id = Some(v as u32);
+        }
+        Ok(a)
+    }
+}
+
+impl ParquetFormat {
+    fn from_opts(opts: &mut ConnectorOptions) -> DFResult<Self> {
+        let mut p = ParquetFormat::default();
+        if let Some(s) = opts.pull_opt_str("parquet.compression")? {
+            p.compression = ParquetCompression::from_str(&s)
+                .map_err(|e| plan_datafusion_err!("invalid parquet.compression: {e}"))?;
+        }
+        if let Some(v) = opts.pull_opt_u64("parquet.row_group_bytes")? {
+            p.row_group_bytes = Some(v);
+        }
+        Ok(p)
+    }
+}
+
+impl ProtobufFormat {
+    fn from_opts(opts: &mut ConnectorOptions) -> DFResult<Self> {
+        let mut p = ProtobufFormat {
+            into_unstructured_json: false,
+            message_name: None,
+            compiled_schema: None,
+            confluent_schema_registry: false,
+            length_delimited: false,
+        };
+        if let Some(v) = opts.pull_opt_bool("protobuf.into_unstructured_json")? {
+            p.into_unstructured_json = v;
+        }
+        if let Some(s) = opts.pull_opt_str("protobuf.message_name")? {
+            p.message_name = Some(s);
+        }
+        if let Some(v) = opts.pull_opt_bool("protobuf.confluent_schema_registry")? {
+            p.confluent_schema_registry = v;
+        }
+        if let Some(v) = opts.pull_opt_bool("protobuf.length_delimited")? {
+            p.length_delimited = v;
+        }
+        Ok(p)
+    }
+}
+
+impl Framing {
+    pub fn from_opts(opts: &mut ConnectorOptions) -> DFResult<Option<Self>> {
+        let method = opts.pull_opt_str("framing.method")?;
+        match method.as_deref() {
+            None => Ok(None),
+            Some("newline") | Some("newline_delimited") => {
+                let max = opts.pull_opt_u64("framing.max_line_length")?;
+                Ok(Some(Framing::Newline(NewlineDelimitedFraming {
+                    max_line_length: max,
+                })))
+            }
+            Some(other) => plan_err!("unknown framing.method '{other}'"),
+        }
+    }
+}
+
+impl BadData {
+    pub fn from_opts(opts: &mut ConnectorOptions) -> DFResult<Self> {
+        let Some(s) = opts.pull_opt_str("bad_data")? else {
+            return Ok(BadData::Fail {});
+        };
+        match s.to_lowercase().as_str() {
+            "fail" => Ok(BadData::Fail {}),
+            "drop" => Ok(BadData::Drop {}),
+            _ => plan_err!("invalid bad_data '{s}'"),
+        }
+    }
+}
diff --git a/src/sql/common/mod.rs b/src/sql/common/mod.rs
index d03511c0..730d6f37 100644
--- a/src/sql/common/mod.rs
+++ b/src/sql/common/mod.rs
@@ -16,11 +16,13 @@
 //! analogous to `arroyo-types` + `arroyo-rpc` in Arroyo.
 
 pub mod arrow_ext;
+pub mod connector_options;
 pub mod control;
 pub mod date;
 pub mod debezium;
 pub mod fs_schema;
 pub mod errors;
+pub mod format_from_opts;
 pub mod formats;
 pub mod hash;
 pub mod message;
@@ -46,8 +48,9 @@ pub use control::{
     ErrorDomain, RetryHint, StopMode, TaskCheckpointEventType, TaskError,
 };
 pub use fs_schema::{FsSchema, FsSchemaRef};
+pub use connector_options::{ConnectorOptions, FromOpts};
 pub use errors::DataflowError;
-pub use formats::{BadData, Format, Framing, JsonFormat};
+pub use formats::{BadData, Format, Framing, JsonCompression, JsonFormat};
 pub use operator_config::{MetadataField, OperatorConfig, RateLimit};
 
 // ── Well-known column names ──
diff --git a/src/sql/extensions/aggregate.rs b/src/sql/extensions/aggregate.rs
index 12cde08c..7ba16f7a 100644
--- a/src/sql/extensions/aggregate.rs
+++ b/src/sql/extensions/aggregate.rs
@@ -1,11 +1,24 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use std::fmt::Formatter;
 use std::sync::Arc;
 use std::time::Duration;
+
 use arrow_array::types::IntervalMonthDayNanoType;
 use datafusion::common::{Column, DFSchemaRef, Result, ScalarValue, internal_err};
-use datafusion::logical_expr;
 use datafusion::logical_expr::{
-    BinaryExpr, Expr, Extension, LogicalPlan, UserDefinedLogicalNodeCore, expr::ScalarFunction,
+    self, expr::ScalarFunction, BinaryExpr, Expr, Extension, LogicalPlan,
+    UserDefinedLogicalNodeCore,
 };
 use datafusion_common::{plan_err, DFSchema, DataFusionError};
 use datafusion_expr::Aggregate;
@@ -13,149 +26,164 @@ use datafusion_proto::physical_plan::{AsExecutionPlan, DefaultPhysicalExtensionC
 use datafusion_proto::physical_plan::to_proto::serialize_physical_expr;
 use datafusion_proto::protobuf::PhysicalPlanNode;
 use prost::Message;
-use protocol::grpc::api::{ SessionWindowAggregateOperator, SlidingWindowAggregateOperator, TumblingWindowAggregateOperator};
-use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName};
+use protocol::grpc::api::{
+    SessionWindowAggregateOperator, SlidingWindowAggregateOperator, TumblingWindowAggregateOperator,
+};
+
 use crate::multifield_partial_ord;
-use crate::sql::logical_planner::{window, FsPhysicalExtensionCodec};
-use crate::sql::extensions::{ NodeWithIncomingEdges, StreamExtension, TimestampAppendExtension};
+use crate::sql::common::{FsSchema, FsSchemaRef};
+use crate::sql::extensions::{
+    CompiledTopologyNode, StreamingOperatorBlueprint, SystemTimestampInjectorNode,
+};
+use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName};
 use crate::sql::logical_planner::planner::{NamedNode, Planner, SplitPlanOutput};
+use crate::sql::logical_planner::{window, FsPhysicalExtensionCodec};
 use crate::sql::types::{
     DFField, TIMESTAMP_FIELD, WindowBehavior, WindowType, fields_with_qualifiers,
     schema_from_df_fields, schema_from_df_fields_with_metadata,
 };
-use crate::sql::common::{FsSchema, FsSchemaRef};
 
-pub(crate) const AGGREGATE_EXTENSION_NAME: &str = "AggregateExtension";
+pub(crate) const STREAM_AGG_EXTENSION_NAME: &str = "StreamWindowAggregateNode";
+const INTERNAL_TIMESTAMP_COL: &str = "_timestamp";
 
+/// Represents a streaming windowed aggregation node in the logical plan.
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub(crate) struct AggregateExtension {
-    pub(crate) window_behavior: WindowBehavior,
-    pub(crate) aggregate: LogicalPlan,
-    pub(crate) schema: DFSchemaRef,
-    pub(crate) key_fields: Vec<usize>,
-    pub(crate) final_calculation: LogicalPlan,
+pub(crate) struct StreamWindowAggregateNode {
+    pub(crate) window_spec: WindowBehavior,
+    pub(crate) base_agg_plan: LogicalPlan,
+    pub(crate) output_schema: DFSchemaRef,
+    pub(crate) partition_keys: Vec<usize>,
+    pub(crate) post_aggregation_plan: LogicalPlan,
 }
 
-multifield_partial_ord!(AggregateExtension, aggregate, key_fields, final_calculation);
-
-impl AggregateExtension {
-    pub fn new(
-        window_behavior: WindowBehavior,
-        aggregate: LogicalPlan,
-        key_fields: Vec<usize>,
-    ) -> Self {
-        let final_calculation =
-            Self::final_projection(&aggregate, window_behavior.clone()).unwrap();
-
-        Self {
-            window_behavior,
-            aggregate,
-            schema: final_calculation.schema().clone(),
-            key_fields,
-            final_calculation,
-        }
+multifield_partial_ord!(
+    StreamWindowAggregateNode,
+    base_agg_plan,
+    partition_keys,
+    post_aggregation_plan
+);
+
+impl StreamWindowAggregateNode {
+    /// Safely constructs a new node, computing the final projection without panicking.
+    pub fn try_new(
+        window_spec: WindowBehavior,
+        base_agg_plan: LogicalPlan,
+        partition_keys: Vec<usize>,
+    ) -> Result<Self> {
+        let post_aggregation_plan =
+            WindowBoundaryMath::build_post_aggregation(&base_agg_plan, window_spec.clone())?;
+
+        Ok(Self {
+            window_spec,
+            base_agg_plan,
+            output_schema: post_aggregation_plan.schema().clone(),
+            partition_keys,
+            post_aggregation_plan,
+        })
     }
 
-    pub fn tumbling_window_config(
+    fn build_tumbling_operator(
         &self,
         planner: &Planner,
-        index: usize,
+        node_id: usize,
         input_schema: DFSchemaRef,
-        width: Duration,
+        duration: Duration,
     ) -> Result<LogicalNode> {
-        let binning_function_proto = planner.binning_function_proto(width, input_schema.clone())?;
+        let binning_expr = planner.binning_function_proto(duration, input_schema.clone())?;
+
         let SplitPlanOutput {
             partial_aggregation_plan,
             partial_schema,
             finish_plan,
-        } = planner.split_physical_plan(self.key_fields.clone(), &self.aggregate, true)?;
+        } = planner.split_physical_plan(self.partition_keys.clone(), &self.base_agg_plan, true)?;
 
-        let final_physical_plan = planner.sync_plan(&self.final_calculation)?;
-        let final_physical_plan_node = PhysicalPlanNode::try_from_physical_plan(
-            final_physical_plan,
+        let final_physical = planner.sync_plan(&self.post_aggregation_plan)?;
+        let final_physical_proto = PhysicalPlanNode::try_from_physical_plan(
+            final_physical,
             &FsPhysicalExtensionCodec::default(),
         )?;
 
-        let config = TumblingWindowAggregateOperator {
+        let operator_config = TumblingWindowAggregateOperator {
             name: "TumblingWindow".to_string(),
-            width_micros: width.as_micros() as u64,
-            binning_function: binning_function_proto.encode_to_vec(),
+            width_micros: duration.as_micros() as u64,
+            binning_function: binning_expr.encode_to_vec(),
             input_schema: Some(
                 FsSchema::from_schema_keys(
                     Arc::new(input_schema.as_ref().into()),
-                    self.key_fields.clone(),
-                )?.into(),
+                    self.partition_keys.clone(),
+                )?
+                .into(),
             ),
             partial_schema: Some(partial_schema.into()),
             partial_aggregation_plan: partial_aggregation_plan.encode_to_vec(),
             final_aggregation_plan: finish_plan.encode_to_vec(),
-            final_projection: Some(final_physical_plan_node.encode_to_vec()),
+            final_projection: Some(final_physical_proto.encode_to_vec()),
         };
 
         Ok(LogicalNode::single(
-            index as u32,
-            format!("tumbling_{index}"),
+            node_id as u32,
+            format!("tumbling_{node_id}"),
             OperatorName::TumblingWindowAggregate,
-            config.encode_to_vec(),
-            format!("TumblingWindow<{}>", config.name),
+            operator_config.encode_to_vec(),
+            format!("TumblingWindow<{}>", operator_config.name),
             1,
         ))
     }
 
-    pub fn sliding_window_config(
+    fn build_sliding_operator(
         &self,
         planner: &Planner,
-        index: usize,
+        node_id: usize,
         input_schema: DFSchemaRef,
-        width: Duration,
-        slide: Duration,
+        duration: Duration,
+        slide_interval: Duration,
     ) -> Result<LogicalNode> {
-        let binning_function_proto = planner.binning_function_proto(slide, input_schema.clone())?;
+        let binning_expr = planner.binning_function_proto(slide_interval, input_schema.clone())?;
 
         let SplitPlanOutput {
             partial_aggregation_plan,
             partial_schema,
             finish_plan,
-        } = planner.split_physical_plan(self.key_fields.clone(), &self.aggregate, true)?;
+        } = planner.split_physical_plan(self.partition_keys.clone(), &self.base_agg_plan, true)?;
 
-        let final_physical_plan = planner.sync_plan(&self.final_calculation)?;
-        let final_physical_plan_node = PhysicalPlanNode::try_from_physical_plan(
-            final_physical_plan,
+        let final_physical = planner.sync_plan(&self.post_aggregation_plan)?;
+        let final_physical_proto = PhysicalPlanNode::try_from_physical_plan(
+            final_physical,
             &FsPhysicalExtensionCodec::default(),
         )?;
 
-        let config = SlidingWindowAggregateOperator {
-            name: format!("SlidingWindow<{width:?}>"),
-            width_micros: width.as_micros() as u64,
-            slide_micros: slide.as_micros() as u64,
-            binning_function: binning_function_proto.encode_to_vec(),
+        let operator_config = SlidingWindowAggregateOperator {
+            name: format!("SlidingWindow<{duration:?}>"),
+            width_micros: duration.as_micros() as u64,
+            slide_micros: slide_interval.as_micros() as u64,
+            binning_function: binning_expr.encode_to_vec(),
             input_schema: Some(
                 FsSchema::from_schema_keys(
                     Arc::new(input_schema.as_ref().into()),
-                    self.key_fields.clone(),
-                )?.into(),
+                    self.partition_keys.clone(),
+                )?
+                .into(),
             ),
             partial_schema: Some(partial_schema.into()),
             partial_aggregation_plan: partial_aggregation_plan.encode_to_vec(),
             final_aggregation_plan: finish_plan.encode_to_vec(),
-            final_projection: final_physical_plan_node.encode_to_vec(),
-            // TODO add final aggregation.
+            final_projection: final_physical_proto.encode_to_vec(),
         };
 
         Ok(LogicalNode::single(
-            index as u32,
-            format!("sliding_window_{index}"),
+            node_id as u32,
+            format!("sliding_window_{node_id}"),
             OperatorName::SlidingWindowAggregate,
-            config.encode_to_vec(),
+            operator_config.encode_to_vec(),
             "sliding window".to_string(),
             1,
         ))
     }
 
-    pub fn session_window_config(
+    fn build_session_operator(
         &self,
         planner: &Planner,
-        index: usize,
+        node_id: usize,
         input_schema: DFSchemaRef,
     ) -> Result<LogicalNode> {
         let WindowBehavior::FromOperator {
@@ -163,426 +191,421 @@ impl AggregateExtension {
             window_index,
             window_field,
             is_nested: false,
-        } = &self.window_behavior
+        } = &self.window_spec
         else {
-            return plan_err!("expected sliding window");
+            return plan_err!("Expected standard session window configuration");
         };
-        let output_schema = fields_with_qualifiers(self.aggregate.schema());
-        let LogicalPlan::Aggregate(agg) = self.aggregate.clone() else {
-            return plan_err!("expected aggregate");
+
+        let output_fields = fields_with_qualifiers(self.base_agg_plan.schema());
+        let LogicalPlan::Aggregate(base_agg) = self.base_agg_plan.clone() else {
+            return plan_err!("Base plan must be an Aggregate node");
         };
-        let key_count = self.key_fields.len();
-        let unkeyed_aggregate_schema = Arc::new(schema_from_df_fields_with_metadata(
-            &output_schema[key_count..],
-            self.aggregate.schema().metadata().clone(),
+
+        let key_count = self.partition_keys.len();
+        let unkeyed_schema = Arc::new(schema_from_df_fields_with_metadata(
+            &output_fields[key_count..],
+            self.base_agg_plan.schema().metadata().clone(),
         )?);
 
-        let unkeyed_aggregate = Aggregate::try_new_with_schema(
-            agg.input.clone(),
+        let unkeyed_agg_node = Aggregate::try_new_with_schema(
+            base_agg.input.clone(),
             vec![],
-            agg.aggr_expr.clone(),
-            unkeyed_aggregate_schema.clone(),
+            base_agg.aggr_expr.clone(),
+            unkeyed_schema,
         )?;
-        let aggregate_plan = planner.sync_plan(&LogicalPlan::Aggregate(unkeyed_aggregate))?;
 
-        let physical_plan_node = PhysicalPlanNode::try_from_physical_plan(
-            aggregate_plan,
+        let physical_agg = planner.sync_plan(&LogicalPlan::Aggregate(unkeyed_agg_node))?;
+        let physical_agg_proto = PhysicalPlanNode::try_from_physical_plan(
+            physical_agg,
             &FsPhysicalExtensionCodec::default(),
         )?;
-        let input_schema = FsSchema::from_schema_keys(
-            Arc::new(input_schema.as_ref().into()),
-            self.key_fields.clone(),
-        )?;
 
-        let config = SessionWindowAggregateOperator {
-            name: format!("session_window_{index}"),
+        let operator_config = SessionWindowAggregateOperator {
+            name: format!("session_window_{node_id}"),
             gap_micros: gap.as_micros() as u64,
             window_field_name: window_field.name().to_string(),
             window_index: *window_index as u64,
-            input_schema: Some(input_schema.into()),
+            input_schema: Some(
+                FsSchema::from_schema_keys(
+                    Arc::new(input_schema.as_ref().into()),
+                    self.partition_keys.clone(),
+                )?
+                .into(),
+            ),
             unkeyed_aggregate_schema: None,
             partial_aggregation_plan: vec![],
-            final_aggregation_plan: physical_plan_node.encode_to_vec(),
+            final_aggregation_plan: physical_agg_proto.encode_to_vec(),
         };
 
         Ok(LogicalNode::single(
-            index as u32,
+            node_id as u32,
             format!("SessionWindow<{gap:?}>"),
             OperatorName::SessionWindowAggregate,
-            config.encode_to_vec(),
-            config.name.clone(),
+            operator_config.encode_to_vec(),
+            operator_config.name.clone(),
             1,
         ))
     }
 
-    pub fn instant_window_config(
+    fn build_instant_operator(
         &self,
         planner: &Planner,
-        index: usize,
+        node_id: usize,
         input_schema: DFSchemaRef,
-        use_final_projection: bool,
+        apply_final_projection: bool,
     ) -> Result<LogicalNode> {
-        let binning_function = planner.create_physical_expr(
-            &Expr::Column(Column::new_unqualified("_timestamp".to_string())),
-            &input_schema,
-        )?;
-        let binning_function_proto =
-            serialize_physical_expr(&binning_function, &DefaultPhysicalExtensionCodec {})?;
-
-        let final_projection = use_final_projection
-            .then(|| {
-                let final_physical_plan = planner.sync_plan(&self.final_calculation)?;
-                let final_physical_plan_node = PhysicalPlanNode::try_from_physical_plan(
-                    final_physical_plan,
-                    &FsPhysicalExtensionCodec::default(),
-                )?;
-                Ok::<Vec<u8>, DataFusionError>(final_physical_plan_node.encode_to_vec())
-            })
-            .transpose()?;
+        let ts_column_expr =
+            Expr::Column(Column::new_unqualified(INTERNAL_TIMESTAMP_COL.to_string()));
+        let binning_expr = planner.create_physical_expr(&ts_column_expr, &input_schema)?;
+        let binning_proto = serialize_physical_expr(&binning_expr, &DefaultPhysicalExtensionCodec {})?;
+
+        let final_projection_payload = if apply_final_projection {
+            let physical_plan = planner.sync_plan(&self.post_aggregation_plan)?;
+            let proto_node = PhysicalPlanNode::try_from_physical_plan(
+                physical_plan,
+                &FsPhysicalExtensionCodec::default(),
+            )?;
+            Some(proto_node.encode_to_vec())
+        } else {
+            None
+        };
 
         let SplitPlanOutput {
             partial_aggregation_plan,
             partial_schema,
             finish_plan,
-        } = planner.split_physical_plan(self.key_fields.clone(), &self.aggregate, true)?;
+        } = planner.split_physical_plan(self.partition_keys.clone(), &self.base_agg_plan, true)?;
 
-        let config = TumblingWindowAggregateOperator {
+        let operator_config = TumblingWindowAggregateOperator {
             name: "InstantWindow".to_string(),
             width_micros: 0,
-            binning_function: binning_function_proto.encode_to_vec(),
+            binning_function: binning_proto.encode_to_vec(),
             input_schema: Some(
                 FsSchema::from_schema_keys(
                     Arc::new(input_schema.as_ref().into()),
-                    self.key_fields.clone(),
-                )?.into(),
+                    self.partition_keys.clone(),
+                )?
+                .into(),
             ),
             partial_schema: Some(partial_schema.into()),
             partial_aggregation_plan: partial_aggregation_plan.encode_to_vec(),
             final_aggregation_plan: finish_plan.encode_to_vec(),
-            final_projection,
+            final_projection: final_projection_payload,
         };
 
         Ok(LogicalNode::single(
-            index as u32,
-            format!("instant_window_{index}"),
+            node_id as u32,
+            format!("instant_window_{node_id}"),
             OperatorName::TumblingWindowAggregate,
-            config.encode_to_vec(),
+            operator_config.encode_to_vec(),
             "instant window".to_string(),
             1,
         ))
     }
+}
+
+impl StreamingOperatorBlueprint for StreamWindowAggregateNode {
+    fn operator_identity(&self) -> Option<NamedNode> {
+        None
+    }
+
+    fn compile_to_graph_node(
+        &self,
+        planner: &Planner,
+        node_id: usize,
+        mut input_schemas: Vec<FsSchemaRef>,
+    ) -> Result<CompiledTopologyNode> {
+        if input_schemas.len() != 1 {
+            return plan_err!("StreamWindowAggregateNode requires exactly one input schema");
+        }
+
+        let raw_schema = input_schemas.remove(0);
+        let df_schema = Arc::new(DFSchema::try_from(raw_schema.schema.as_ref().clone())?);
+
+        let logical_operator = match &self.window_spec {
+            WindowBehavior::FromOperator { window, is_nested, .. } => {
+                if *is_nested {
+                    self.build_instant_operator(planner, node_id, df_schema, true)?
+                } else {
+                    match window {
+                        WindowType::Tumbling { width } => {
+                            self.build_tumbling_operator(planner, node_id, df_schema, *width)?
+                        }
+                        WindowType::Sliding { width, slide } => {
+                            self.build_sliding_operator(planner, node_id, df_schema, *width, *slide)?
+                        }
+                        WindowType::Session { .. } => {
+                            self.build_session_operator(planner, node_id, df_schema)?
+                        }
+                        WindowType::Instant => {
+                            return plan_err!(
+                                "Instant window is invalid within standard operator context"
+                            );
+                        }
+                    }
+                }
+            }
+            WindowBehavior::InData => self
+                .build_instant_operator(planner, node_id, df_schema, false)
+                .map_err(|e| e.context("Failed compiling instant window"))?,
+        };
+
+        let link = LogicalEdge::project_all(LogicalEdgeType::Shuffle, (*raw_schema).clone());
+        Ok(CompiledTopologyNode {
+            execution_unit: logical_operator,
+            routing_edges: vec![link],
+        })
+    }
+
+    fn yielded_schema(&self) -> FsSchema {
+        let schema_ref = (*self.output_schema).clone().into();
+        FsSchema::from_schema_unkeyed(Arc::new(schema_ref)).expect(
+            "StreamWindowAggregateNode output schema must contain timestamp column",
+        )
+    }
+}
+
+impl UserDefinedLogicalNodeCore for StreamWindowAggregateNode {
+    fn name(&self) -> &str {
+        STREAM_AGG_EXTENSION_NAME
+    }
+
+    fn inputs(&self) -> Vec<&LogicalPlan> {
+        vec![&self.base_agg_plan]
+    }
+
+    fn schema(&self) -> &DFSchemaRef {
+        &self.output_schema
+    }
+
+    fn expressions(&self) -> Vec<Expr> {
+        vec![]
+    }
 
-    // projection assuming that _timestamp has been populated with the start of the bin.
-    pub fn final_projection(
-        aggregate_plan: &LogicalPlan,
-        window_behavior: WindowBehavior,
+    fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result {
+        let spec_desc = match &self.window_spec {
+            WindowBehavior::InData => "InData".to_string(),
+            WindowBehavior::FromOperator { window, .. } => format!("FromOperator({window:?})"),
+        };
+        write!(
+            f,
+            "StreamWindowAggregate: {} | spec: {}",
+            self.schema(),
+            spec_desc
+        )
+    }
+
+    fn with_exprs_and_inputs(&self, _exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> Result<Self> {
+        if inputs.len() != 1 {
+            return internal_err!("StreamWindowAggregateNode expects exactly 1 input");
+        }
+        Self::try_new(
+            self.window_spec.clone(),
+            inputs[0].clone(),
+            self.partition_keys.clone(),
+        )
+    }
+}
+
+// -----------------------------------------------------------------------------
+// Dedicated boundary math for window bin / post-aggregation projection
+// -----------------------------------------------------------------------------
+
+struct WindowBoundaryMath;
+
+impl WindowBoundaryMath {
+    fn interval_nanos(nanos: i64) -> Expr {
+        Expr::Literal(
+            ScalarValue::IntervalMonthDayNano(Some(
+                IntervalMonthDayNanoType::make_value(0, 0, nanos),
+            )),
+            None,
+        )
+    }
+
+    fn build_post_aggregation(
+        agg_plan: &LogicalPlan,
+        window_spec: WindowBehavior,
     ) -> Result<LogicalPlan> {
-        let timestamp_field: DFField = aggregate_plan.inputs()[0]
+        let ts_field: DFField = agg_plan
+            .inputs()
+            .first()
+            .ok_or_else(|| DataFusionError::Plan("Aggregate has no inputs".into()))?
             .schema()
             .qualified_field_with_unqualified_name(TIMESTAMP_FIELD)?
             .into();
-        let timestamp_append = LogicalPlan::Extension(Extension {
-            node: Arc::new(TimestampAppendExtension::new(
-                aggregate_plan.clone(),
-                timestamp_field.qualifier().cloned(),
-            )),
+
+        let plan_with_ts = LogicalPlan::Extension(Extension {
+            node: Arc::new(SystemTimestampInjectorNode::try_new(
+                agg_plan.clone(),
+                ts_field.qualifier().cloned(),
+            )?),
         });
-        let mut aggregate_fields = fields_with_qualifiers(aggregate_plan.schema());
-        let mut aggregate_expressions: Vec<_> = aggregate_fields
-            .iter()
-            .map(|field| Expr::Column(field.qualified_column()))
-            .collect();
-        let (window_field, window_index, width, is_nested) = match window_behavior {
-            WindowBehavior::InData => return Ok(timestamp_append),
+
+        let (win_field, win_index, duration, is_nested) = match window_spec {
+            WindowBehavior::InData => return Ok(plan_with_ts),
             WindowBehavior::FromOperator {
                 window,
                 window_field,
                 window_index,
                 is_nested,
             } => match window {
-                WindowType::Tumbling { width, .. } | WindowType::Sliding { width, .. } => {
+                WindowType::Tumbling { width } | WindowType::Sliding { width, .. } => {
                     (window_field, window_index, width, is_nested)
                 }
                 WindowType::Session { .. } => {
                     return Ok(LogicalPlan::Extension(Extension {
-                        node: Arc::new(WindowAppendExtension::new(
-                            timestamp_append,
+                        node: Arc::new(InjectWindowFieldNode::try_new(
+                            plan_with_ts,
                             window_field,
                             window_index,
-                        )),
+                        )?),
                     }));
                 }
-                WindowType::Instant => return Ok(timestamp_append),
+                WindowType::Instant => return Ok(plan_with_ts),
             },
         };
+
         if is_nested {
-            return Self::nested_final_projection(
-                timestamp_append,
-                window_field,
-                window_index,
-                width,
-            );
+            return Self::build_nested_projection(plan_with_ts, win_field, win_index, duration);
         }
-        let timestamp_column =
-            Column::new(timestamp_field.qualifier().cloned(), timestamp_field.name());
-        aggregate_fields.insert(window_index, window_field.clone());
 
-        let window_expression = Expr::ScalarFunction(ScalarFunction {
+        let mut output_fields = fields_with_qualifiers(agg_plan.schema());
+        let mut projections: Vec<_> = output_fields
+            .iter()
+            .map(|f| Expr::Column(f.qualified_column()))
+            .collect();
+
+        let ts_col_expr = Expr::Column(Column::new(ts_field.qualifier().cloned(), ts_field.name()));
+
+        output_fields.insert(win_index, win_field.clone());
+
+        let win_func_expr = Expr::ScalarFunction(ScalarFunction {
             func: window(),
             args: vec![
-                // copy bin_start as first argument
-                Expr::Column(timestamp_column.clone()),
-                // add width interval to _timestamp for bin end
+                ts_col_expr.clone(),
                 Expr::BinaryExpr(BinaryExpr {
-                    left: Box::new(Expr::Column(timestamp_column.clone())),
+                    left: Box::new(ts_col_expr.clone()),
                     op: logical_expr::Operator::Plus,
-                    right: Box::new(Expr::Literal(
-                        ScalarValue::IntervalMonthDayNano(Some(
-                            IntervalMonthDayNanoType::make_value(0, 0, width.as_nanos() as i64),
-                        )),
-                        None,
-                    )),
+                    right: Box::new(Self::interval_nanos(duration.as_nanos() as i64)),
                 }),
             ],
         });
-        aggregate_expressions.insert(
-            window_index,
-            window_expression
-                .alias_qualified(window_field.qualifier().cloned(), window_field.name()),
+
+        projections.insert(
+            win_index,
+            win_func_expr.alias_qualified(win_field.qualifier().cloned(), win_field.name()),
         );
-        aggregate_fields.push(timestamp_field);
-        let bin_end_calculation = Expr::BinaryExpr(BinaryExpr {
-            left: Box::new(Expr::Column(timestamp_column.clone())),
+
+        output_fields.push(ts_field);
+
+        let bin_end_expr = Expr::BinaryExpr(BinaryExpr {
+            left: Box::new(ts_col_expr),
             op: logical_expr::Operator::Plus,
-            right: Box::new(Expr::Literal(
-                ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNanoType::make_value(
-                    0,
-                    0,
-                    (width.as_nanos() - 1) as i64,
-                ))),
-                None,
-            )),
+            right: Box::new(Self::interval_nanos((duration.as_nanos() - 1) as i64)),
         });
-        aggregate_expressions.push(bin_end_calculation);
-        Ok(LogicalPlan::Projection(
-            logical_expr::Projection::try_new_with_schema(
-                aggregate_expressions,
-                Arc::new(timestamp_append),
-                Arc::new(schema_from_df_fields(&aggregate_fields)?),
-            )?,
-        ))
+        projections.push(bin_end_expr);
+
+        Ok(LogicalPlan::Projection(logical_expr::Projection::try_new_with_schema(
+            projections,
+            Arc::new(plan_with_ts),
+            Arc::new(schema_from_df_fields(&output_fields)?),
+        )?))
     }
 
-    fn nested_final_projection(
-        aggregate_plan: LogicalPlan,
-        window_field: DFField,
-        window_index: usize,
-        width: Duration,
+    fn build_nested_projection(
+        plan: LogicalPlan,
+        win_field: DFField,
+        win_index: usize,
+        duration: Duration,
     ) -> Result<LogicalPlan> {
-        let timestamp_field: DFField = aggregate_plan
+        let ts_field: DFField = plan
             .schema()
-            .qualified_field_with_unqualified_name(TIMESTAMP_FIELD)
-            .unwrap()
+            .qualified_field_with_unqualified_name(TIMESTAMP_FIELD)?
             .into();
-        let timestamp_column =
-            Column::new(timestamp_field.qualifier().cloned(), timestamp_field.name());
+        let ts_col_expr = Expr::Column(Column::new(ts_field.qualifier().cloned(), ts_field.name()));
 
-        let mut aggregate_fields = fields_with_qualifiers(aggregate_plan.schema());
-        let mut aggregate_expressions: Vec<_> = aggregate_fields
+        let mut output_fields = fields_with_qualifiers(plan.schema());
+        let mut projections: Vec<_> = output_fields
             .iter()
-            .map(|field| Expr::Column(field.qualified_column()))
+            .map(|f| Expr::Column(f.qualified_column()))
             .collect();
-        aggregate_fields.insert(window_index, window_field.clone());
-        let window_expression = Expr::ScalarFunction(ScalarFunction {
+
+        output_fields.insert(win_index, win_field.clone());
+
+        let win_func_expr = Expr::ScalarFunction(ScalarFunction {
             func: window(),
             args: vec![
-                // calculate the start of the bin
                 Expr::BinaryExpr(BinaryExpr {
-                    left: Box::new(Expr::Column(timestamp_column.clone())),
+                    left: Box::new(ts_col_expr.clone()),
                     op: logical_expr::Operator::Minus,
-                    right: Box::new(Expr::Literal(
-                        ScalarValue::IntervalMonthDayNano(Some(
-                            IntervalMonthDayNanoType::make_value(0, 0, width.as_nanos() as i64 - 1),
-                        )),
-                        None,
-                    )),
+                    right: Box::new(Self::interval_nanos(duration.as_nanos() as i64 - 1)),
                 }),
-                // add 1 nanosecond to the timestamp
                 Expr::BinaryExpr(BinaryExpr {
-                    left: Box::new(Expr::Column(timestamp_column.clone())),
+                    left: Box::new(ts_col_expr),
                     op: logical_expr::Operator::Plus,
-                    right: Box::new(Expr::Literal(
-                        ScalarValue::IntervalMonthDayNano(Some(
-                            IntervalMonthDayNanoType::make_value(0, 0, 1),
-                        )),
-                        None,
-                    )),
+                    right: Box::new(Self::interval_nanos(1)),
                 }),
             ],
         });
-        aggregate_expressions.insert(
-            window_index,
-            window_expression
-                .alias_qualified(window_field.qualifier().cloned(), window_field.name()),
-        );
-        Ok(LogicalPlan::Projection(
-            logical_expr::Projection::try_new_with_schema(
-                aggregate_expressions,
-                Arc::new(aggregate_plan),
-                Arc::new(schema_from_df_fields(&aggregate_fields).unwrap()),
-            )
-                .unwrap(),
-        ))
-    }
-}
-
-impl UserDefinedLogicalNodeCore for AggregateExtension {
-    fn name(&self) -> &str {
-        AGGREGATE_EXTENSION_NAME
-    }
-
-    fn inputs(&self) -> Vec<&LogicalPlan> {
-        vec![&self.aggregate]
-    }
-
-    fn schema(&self) -> &DFSchemaRef {
-        &self.schema
-    }
-
-    fn expressions(&self) -> Vec<Expr> {
-        vec![]
-    }
-
-    fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result {
-        write!(
-            f,
-            "AggregateExtension: {} | window_behavior: {:?}",
-            self.schema(),
-            match &self.window_behavior {
-                WindowBehavior::InData => "InData".to_string(),
-                WindowBehavior::FromOperator { window, .. } => format!("FromOperator({window:?})"),
-            }
-        )
-    }
 
-    fn with_exprs_and_inputs(&self, _exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> Result<Self> {
-        if inputs.len() != 1 {
-            return internal_err!("input size inconsistent");
-        }
-
-        Ok(Self::new(
-            self.window_behavior.clone(),
-            inputs[0].clone(),
-            self.key_fields.clone(),
-        ))
-    }
-}
-
-impl StreamExtension for AggregateExtension {
-    fn node_name(&self) -> Option<NamedNode> {
-        None
-    }
-
-    fn plan_node(
-        &self,
-        planner: &Planner,
-        index: usize,
-        input_schemas: Vec<FsSchemaRef>,
-    ) -> Result<NodeWithIncomingEdges> {
-        if input_schemas.len() != 1 {
-            return plan_err!("AggregateExtension should have exactly one input");
-        }
-        let input_schema = input_schemas[0].clone();
-        let input_df_schema =
-            Arc::new(DFSchema::try_from(input_schema.schema.as_ref().clone()).unwrap());
-        let logical_node = match &self.window_behavior {
-            WindowBehavior::FromOperator {
-                window,
-                window_field: _,
-                window_index: _,
-                is_nested,
-            } => {
-                if *is_nested {
-                    self.instant_window_config(planner, index, input_df_schema, true)?
-                } else {
-                    match window {
-                        WindowType::Tumbling { width } => {
-                            self.tumbling_window_config(planner, index, input_df_schema, *width)?
-                        }
-                        WindowType::Sliding { width, slide } => self.sliding_window_config(
-                            planner,
-                            index,
-                            input_df_schema,
-                            *width,
-                            *slide,
-                        )?,
-                        WindowType::Instant => {
-                            return plan_err!(
-                                "instant window not supported in aggregate extension"
-                            );
-                        }
-                        WindowType::Session { gap: _ } => {
-                            self.session_window_config(planner, index, input_df_schema)?
-                        }
-                    }
-                }
-            }
-            WindowBehavior::InData => self
-                .instant_window_config(planner, index, input_df_schema, false)
-                .map_err(|e| e.context("instant window"))?,
-        };
-        let edge = LogicalEdge::project_all(LogicalEdgeType::Shuffle, (*input_schema).clone());
-        Ok(NodeWithIncomingEdges {
-            node: logical_node,
-            edges: vec![edge],
-        })
-    }
+        projections.insert(
+            win_index,
+            win_func_expr.alias_qualified(win_field.qualifier().cloned(), win_field.name()),
+        );
 
-    fn output_schema(&self) -> FsSchema {
-        let output_schema = (*self.schema).clone().into();
-        FsSchema::from_schema_keys(Arc::new(output_schema), vec![]).unwrap()
+        Ok(LogicalPlan::Projection(logical_expr::Projection::try_new_with_schema(
+            projections,
+            Arc::new(plan),
+            Arc::new(schema_from_df_fields(&output_fields)?),
+        )?))
     }
 }
 
-/*
-This is a plan used for appending a _timestamp field to an existing record batch.
- */
+// -----------------------------------------------------------------------------
+// Field injection node (session window column placement)
+// -----------------------------------------------------------------------------
 
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
-struct WindowAppendExtension {
-    pub(crate) input: LogicalPlan,
-    pub(crate) window_field: DFField,
-    pub(crate) window_index: usize,
-    pub(crate) schema: DFSchemaRef,
+struct InjectWindowFieldNode {
+    pub(crate) upstream_plan: LogicalPlan,
+    pub(crate) target_field: DFField,
+    pub(crate) insertion_index: usize,
+    pub(crate) new_schema: DFSchemaRef,
 }
 
-multifield_partial_ord!(WindowAppendExtension, input, window_index);
-
-impl WindowAppendExtension {
-    fn new(input: LogicalPlan, window_field: DFField, window_index: usize) -> Self {
-        let mut fields = fields_with_qualifiers(input.schema());
-        fields.insert(window_index, window_field.clone());
-        let metadata = input.schema().metadata().clone();
-        Self {
-            input,
-            window_field,
-            window_index,
-            schema: Arc::new(schema_from_df_fields_with_metadata(&fields, metadata).unwrap()),
-        }
+multifield_partial_ord!(InjectWindowFieldNode, upstream_plan, insertion_index);
+
+impl InjectWindowFieldNode {
+    fn try_new(
+        upstream_plan: LogicalPlan,
+        target_field: DFField,
+        insertion_index: usize,
+    ) -> Result<Self> {
+        let mut fields = fields_with_qualifiers(upstream_plan.schema());
+        fields.insert(insertion_index, target_field.clone());
+        let meta = upstream_plan.schema().metadata().clone();
+
+        Ok(Self {
+            upstream_plan,
+            target_field,
+            insertion_index,
+            new_schema: Arc::new(schema_from_df_fields_with_metadata(&fields, meta)?),
+        })
     }
 }
 
-impl UserDefinedLogicalNodeCore for WindowAppendExtension {
+impl UserDefinedLogicalNodeCore for InjectWindowFieldNode {
     fn name(&self) -> &str {
-        "WindowAppendExtension"
+        "InjectWindowFieldNode"
     }
 
     fn inputs(&self) -> Vec<&LogicalPlan> {
-        vec![&self.input]
+        vec![&self.upstream_plan]
     }
 
     fn schema(&self) -> &DFSchemaRef {
-        &self.schema
+        &self.new_schema
     }
 
     fn expressions(&self) -> Vec<Expr> {
@@ -592,16 +615,19 @@ impl UserDefinedLogicalNodeCore for WindowAppendExtension {
     fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
         write!(
             f,
-            "WindowAppendExtension: field {:?} at {}",
-            self.window_field, self.window_index
+            "InjectWindowField: insert {:?} at offset {}",
+            self.target_field, self.insertion_index
         )
     }
 
     fn with_exprs_and_inputs(&self, _exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> Result<Self> {
-        Ok(Self::new(
+        if inputs.len() != 1 {
+            return internal_err!("InjectWindowFieldNode expects exactly 1 input");
+        }
+        Self::try_new(
             inputs[0].clone(),
-            self.window_field.clone(),
-            self.window_index,
-        ))
+            self.target_field.clone(),
+            self.insertion_index,
+        )
     }
 }
diff --git a/src/sql/extensions/async_udf.rs b/src/sql/extensions/async_udf.rs
index da0bdff1..147e0f90 100644
--- a/src/sql/extensions/async_udf.rs
+++ b/src/sql/extensions/async_udf.rs
@@ -18,170 +18,225 @@ use datafusion::common::{DFSchemaRef, Result};
 use datafusion::logical_expr::{
     Expr, LogicalPlan, UserDefinedLogicalNode, UserDefinedLogicalNodeCore,
 };
-use datafusion_common::internal_err;
+use datafusion_common::{internal_err, plan_err};
 use datafusion_proto::physical_plan::DefaultPhysicalExtensionCodec;
 use datafusion_proto::physical_plan::to_proto::serialize_physical_expr;
 use prost::Message;
 use protocol::grpc::api::{AsyncUdfOperator, AsyncUdfOrdering};
 
 use crate::multifield_partial_ord;
-use crate::sql::extensions::constants::ASYNC_RESULT_FIELD;
-use crate::sql::extensions::stream_extension::{NodeWithIncomingEdges, StreamExtension};
+use crate::sql::common::{FsSchema, FsSchemaRef};
+use crate::sql::extensions::streaming_operator_blueprint::{CompiledTopologyNode, StreamingOperatorBlueprint};
 use crate::sql::logical_node::logical::{
     DylibUdfConfig, LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName,
 };
 use crate::sql::logical_planner::planner::{NamedNode, Planner};
 use crate::sql::types::{DFField, fields_with_qualifiers, schema_from_df_fields};
-use crate::sql::common::{FsSchema, FsSchemaRef};
 
+pub(crate) const NODE_TYPE_NAME: &str = "AsyncFunctionExecutionNode";
+pub const ASYNC_RESULT_FIELD: &str = "__async_result";
+
+/// Represents a logical node that executes an external asynchronous function (UDF)
+/// and projects the final results into the streaming pipeline.
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub(crate) struct AsyncUDFExtension {
-    pub(crate) input: Arc<LogicalPlan>,
-    pub(crate) name: String,
-    pub(crate) udf: DylibUdfConfig,
-    pub(crate) arg_exprs: Vec<Expr>,
-    pub(crate) final_exprs: Vec<Expr>,
-    pub(crate) ordered: bool,
-    pub(crate) max_concurrency: usize,
-    pub(crate) timeout: Duration,
-    pub(crate) final_schema: DFSchemaRef,
+pub(crate) struct AsyncFunctionExecutionNode {
+    pub(crate) upstream_plan: Arc<LogicalPlan>,
+    pub(crate) operator_name: String,
+    pub(crate) function_config: DylibUdfConfig,
+    pub(crate) invocation_args: Vec<Expr>,
+    pub(crate) result_projections: Vec<Expr>,
+    pub(crate) preserve_ordering: bool,
+    pub(crate) concurrency_limit: usize,
+    pub(crate) execution_timeout: Duration,
+    pub(crate) resolved_schema: DFSchemaRef,
 }
 
 multifield_partial_ord!(
-    AsyncUDFExtension,
-    input,
-    name,
-    udf,
-    arg_exprs,
-    final_exprs,
-    ordered,
-    max_concurrency,
-    timeout
+    AsyncFunctionExecutionNode,
+    upstream_plan,
+    operator_name,
+    function_config,
+    invocation_args,
+    result_projections,
+    preserve_ordering,
+    concurrency_limit,
+    execution_timeout
 );
 
-impl StreamExtension for AsyncUDFExtension {
-    fn node_name(&self) -> Option<NamedNode> {
-        None
-    }
-
-    fn plan_node(
+impl AsyncFunctionExecutionNode {
+    /// Compiles logical expressions into serialized physical protobuf bytes.
+    fn compile_physical_expressions(
         &self,
         planner: &Planner,
-        index: usize,
-        input_schemas: Vec<FsSchemaRef>,
-    ) -> Result<NodeWithIncomingEdges> {
-        let arg_exprs = self
-            .arg_exprs
+        expressions: &[Expr],
+        schema_context: &DFSchemaRef,
+    ) -> Result<Vec<Vec<u8>>> {
+        expressions
             .iter()
-            .map(|e| {
-                let p = planner.create_physical_expr(e, self.input.schema())?;
-                Ok(serialize_physical_expr(&p, &DefaultPhysicalExtensionCodec {})?.encode_to_vec())
+            .map(|logical_expr| {
+                let physical_expr = planner.create_physical_expr(logical_expr, schema_context)?;
+                let serialized =
+                    serialize_physical_expr(&physical_expr, &DefaultPhysicalExtensionCodec {})?;
+                Ok(serialized.encode_to_vec())
             })
-            .collect::<Result<Vec<_>>>()?;
+            .collect()
+    }
+
+    /// Computes the intermediate schema which bridges the upstream output
+    /// and the raw asynchronous result injected by the UDF execution.
+    fn compute_intermediate_schema(&self) -> Result<DFSchemaRef> {
+        let mut fields = fields_with_qualifiers(self.upstream_plan.schema());
 
-        let mut final_fields = fields_with_qualifiers(self.input.schema());
-        final_fields.push(DFField::new(
+        let raw_result_field = DFField::new(
             None,
             ASYNC_RESULT_FIELD,
-            self.udf.return_type.clone(),
+            self.function_config.return_type.clone(),
             true,
-        ));
-        let post_udf_schema = schema_from_df_fields(&final_fields)?;
+        );
+        fields.push(raw_result_field);
 
-        let final_exprs = self
-            .final_exprs
-            .iter()
-            .map(|e| {
-                let p = planner.create_physical_expr(e, &post_udf_schema)?;
-                Ok(serialize_physical_expr(&p, &DefaultPhysicalExtensionCodec {})?.encode_to_vec())
-            })
-            .collect::<Result<Vec<_>>>()?;
-
-        let config = AsyncUdfOperator {
-            name: self.name.clone(),
-            udf: Some(self.udf.clone().into()),
-            arg_exprs,
-            final_exprs,
-            ordering: if self.ordered {
-                AsyncUdfOrdering::Ordered as i32
-            } else {
-                AsyncUdfOrdering::Unordered as i32
-            },
-            max_concurrency: self.max_concurrency as u32,
-            timeout_micros: self.timeout.as_micros() as u64,
+        Ok(Arc::new(schema_from_df_fields(&fields)?))
+    }
+
+    fn to_protobuf_config(
+        &self,
+        compiled_args: Vec<Vec<u8>>,
+        compiled_projections: Vec<Vec<u8>>,
+    ) -> AsyncUdfOperator {
+        let ordering_strategy = if self.preserve_ordering {
+            AsyncUdfOrdering::Ordered
+        } else {
+            AsyncUdfOrdering::Unordered
         };
 
-        let node = LogicalNode::single(
-            index as u32,
-            format!("async_udf_{index}"),
+        AsyncUdfOperator {
+            name: self.operator_name.clone(),
+            udf: Some(self.function_config.clone().into()),
+            arg_exprs: compiled_args,
+            final_exprs: compiled_projections,
+            ordering: ordering_strategy as i32,
+            max_concurrency: self.concurrency_limit as u32,
+            timeout_micros: self.execution_timeout.as_micros() as u64,
+        }
+    }
+}
+
+impl StreamingOperatorBlueprint for AsyncFunctionExecutionNode {
+    fn operator_identity(&self) -> Option<NamedNode> {
+        None
+    }
+
+    fn compile_to_graph_node(
+        &self,
+        planner: &Planner,
+        node_index: usize,
+        mut input_schemas: Vec<FsSchemaRef>,
+    ) -> Result<CompiledTopologyNode> {
+        if input_schemas.len() != 1 {
+            return plan_err!("AsyncFunctionExecutionNode requires exactly one input schema");
+        }
+
+        let compiled_args = self.compile_physical_expressions(
+            planner,
+            &self.invocation_args,
+            self.upstream_plan.schema(),
+        )?;
+
+        let intermediate_schema = self.compute_intermediate_schema()?;
+        let compiled_projections = self.compile_physical_expressions(
+            planner,
+            &self.result_projections,
+            &intermediate_schema,
+        )?;
+
+        let operator_config = self.to_protobuf_config(compiled_args, compiled_projections);
+
+        let logical_node = LogicalNode::single(
+            node_index as u32,
+            format!("async_udf_{node_index}"),
             OperatorName::AsyncUdf,
-            config.encode_to_vec(),
-            format!("async_udf<{}>", self.name),
+            operator_config.encode_to_vec(),
+            format!("AsyncUdf<{}>", self.operator_name),
             1,
         );
 
-        let incoming_edge =
-            LogicalEdge::project_all(LogicalEdgeType::Forward, input_schemas[0].as_ref().clone());
-        Ok(NodeWithIncomingEdges {
-            node,
-            edges: vec![incoming_edge],
+        let upstream_schema = input_schemas.remove(0);
+        let data_edge =
+            LogicalEdge::project_all(LogicalEdgeType::Forward, (*upstream_schema).clone());
+
+        Ok(CompiledTopologyNode {
+            execution_unit: logical_node,
+            routing_edges: vec![data_edge],
         })
     }
 
-    fn output_schema(&self) -> FsSchema {
-        FsSchema::from_fields(
-            self.final_schema
-                .fields()
-                .iter()
-                .map(|f| (**f).clone())
-                .collect(),
-        )
+    fn yielded_schema(&self) -> FsSchema {
+        let arrow_fields: Vec<_> = self
+            .resolved_schema
+            .fields()
+            .iter()
+            .map(|f| (**f).clone())
+            .collect();
+
+        FsSchema::from_fields(arrow_fields)
     }
 }
 
-impl UserDefinedLogicalNodeCore for AsyncUDFExtension {
+impl UserDefinedLogicalNodeCore for AsyncFunctionExecutionNode {
     fn name(&self) -> &str {
-        "AsyncUDFNode"
+        NODE_TYPE_NAME
     }
 
     fn inputs(&self) -> Vec<&LogicalPlan> {
-        vec![&self.input]
+        vec![&self.upstream_plan]
     }
 
     fn schema(&self) -> &DFSchemaRef {
-        &self.final_schema
+        &self.resolved_schema
     }
 
     fn expressions(&self) -> Vec<Expr> {
-        self.arg_exprs
+        self.invocation_args
             .iter()
-            .chain(self.final_exprs.iter())
-            .map(|e| e.to_owned())
+            .chain(self.result_projections.iter())
+            .cloned()
             .collect()
     }
 
     fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result {
-        write!(f, "AsyncUdfExtension<{}>: {}", self.name, self.final_schema)
+        write!(
+            f,
+            "AsyncFunctionExecution<{}>: Concurrency={}, Ordered={}",
+            self.operator_name,
+            self.concurrency_limit,
+            self.preserve_ordering
+        )
     }
 
-    fn with_exprs_and_inputs(&self, exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> Result<Self> {
+    fn with_exprs_and_inputs(&self, exprs: Vec<Expr>, mut inputs: Vec<LogicalPlan>) -> Result<Self> {
         if inputs.len() != 1 {
-            return internal_err!("input size inconsistent");
+            return internal_err!(
+                "AsyncFunctionExecutionNode expects exactly 1 input, but received {}",
+                inputs.len()
+            );
         }
+
         if UserDefinedLogicalNode::expressions(self) != exprs {
-            return internal_err!("Tried to recreate async UDF node with different expressions");
+            return internal_err!(
+                "Attempted to mutate async UDF expressions during logical planning, which is not supported."
+            );
         }
 
         Ok(Self {
-            input: Arc::new(inputs[0].clone()),
-            name: self.name.clone(),
-            udf: self.udf.clone(),
-            arg_exprs: self.arg_exprs.clone(),
-            final_exprs: self.final_exprs.clone(),
-            ordered: self.ordered,
-            max_concurrency: self.max_concurrency,
-            timeout: self.timeout,
-            final_schema: self.final_schema.clone(),
+            upstream_plan: Arc::new(inputs.remove(0)),
+            operator_name: self.operator_name.clone(),
+            function_config: self.function_config.clone(),
+            invocation_args: self.invocation_args.clone(),
+            result_projections: self.result_projections.clone(),
+            preserve_ordering: self.preserve_ordering,
+            concurrency_limit: self.concurrency_limit,
+            execution_timeout: self.execution_timeout,
+            resolved_schema: self.resolved_schema.clone(),
         })
     }
 }
diff --git a/src/sql/extensions/constants.rs b/src/sql/extensions/constants.rs
index 4f90ca6e..489af179 100644
--- a/src/sql/extensions/constants.rs
+++ b/src/sql/extensions/constants.rs
@@ -10,4 +10,5 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+/// Column name substituted for an async UDF call after rewrite.
 pub const ASYNC_RESULT_FIELD: &str = "__async_result";
diff --git a/src/sql/extensions/debezium.rs b/src/sql/extensions/debezium.rs
index 84407ee4..612c0d79 100644
--- a/src/sql/extensions/debezium.rs
+++ b/src/sql/extensions/debezium.rs
@@ -1,188 +1,250 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 
-use super::{ StreamExtension};
-use crate::sql::types::{StreamSchema, TIMESTAMP_FIELD};
 use std::sync::Arc;
 
-use arrow_schema::{DataType, Schema};
-
-use datafusion::common::{DFSchema, DFSchemaRef, Result, TableReference, internal_err, plan_err};
-use datafusion::error::DataFusionError;
+use arrow_schema::{DataType, Field, Schema};
+use datafusion::common::{
+    internal_err, plan_err, DFSchema, DFSchemaRef, DataFusionError, Result, TableReference,
+};
 use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore};
 use datafusion::physical_plan::DisplayAs;
 
-use super::{NodeWithIncomingEdges};
 use crate::multifield_partial_ord;
-use crate::sql::logical_planner::updating_meta_field;
-use crate::sql::logical_planner::planner::{NamedNode, Planner};
 use crate::sql::common::{FsSchema, FsSchemaRef, UPDATING_META_FIELD};
+use crate::sql::logical_planner::planner::{NamedNode, Planner};
+use crate::sql::logical_planner::updating_meta_field;
+use crate::sql::types::TIMESTAMP_FIELD;
 
-pub(crate) const DEBEZIUM_UNROLLING_EXTENSION_NAME: &str = "DebeziumUnrollingExtension";
-pub(crate) const TO_DEBEZIUM_EXTENSION_NAME: &str = "ToDebeziumExtension";
+use super::{CompiledTopologyNode, StreamingOperatorBlueprint};
 
-#[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub struct DebeziumUnrollingExtension {
-    input: LogicalPlan,
-    schema: DFSchemaRef,
-    pub primary_keys: Vec<usize>,
-    primary_key_names: Arc<Vec<String>>,
-}
+// -----------------------------------------------------------------------------
+// Constants & Identifiers
+// -----------------------------------------------------------------------------
 
-multifield_partial_ord!(
-    DebeziumUnrollingExtension,
-    input,
-    primary_keys,
-    primary_key_names
-);
+pub(crate) const UNROLL_NODE_NAME: &str = "UnrollDebeziumPayloadNode";
+pub(crate) const PACK_NODE_NAME: &str = "PackDebeziumEnvelopeNode";
 
-impl DebeziumUnrollingExtension {
-    pub(crate) fn as_debezium_schema(
-        input_schema: &DFSchemaRef,
-        qualifier: Option<TableReference>,
+const CDC_FIELD_BEFORE: &str = "before";
+const CDC_FIELD_AFTER: &str = "after";
+const CDC_FIELD_OP: &str = "op";
+
+// -----------------------------------------------------------------------------
+// Core Schema Codec
+// -----------------------------------------------------------------------------
+
+/// Transforms between flat schemas and Debezium CDC envelopes.
+pub(crate) struct DebeziumSchemaCodec;
+
+impl DebeziumSchemaCodec {
+    /// Wraps a flat physical schema into a Debezium CDC envelope structure.
+    pub(crate) fn wrap_into_envelope(
+        flat_schema: &DFSchemaRef,
+        qualifier_override: Option<TableReference>,
     ) -> Result<DFSchemaRef> {
-        let timestamp_field = if input_schema.has_column_with_unqualified_name(TIMESTAMP_FIELD) {
-            Some(
-                input_schema
-                    .field_with_unqualified_name(TIMESTAMP_FIELD)?
-                    .clone(),
-            )
+        let ts_field = if flat_schema.has_column_with_unqualified_name(TIMESTAMP_FIELD) {
+            Some(flat_schema.field_with_unqualified_name(TIMESTAMP_FIELD)?.clone())
         } else {
             None
         };
-        let struct_schema: Vec<_> = input_schema
+
+        let payload_fields: Vec<_> = flat_schema
             .fields()
             .iter()
-            .filter(|field| field.name() != TIMESTAMP_FIELD && field.name() != UPDATING_META_FIELD)
+            .filter(|f| f.name() != TIMESTAMP_FIELD && f.name() != UPDATING_META_FIELD)
             .cloned()
             .collect();
 
-        let struct_type = DataType::Struct(struct_schema.into());
+        let payload_struct_type = DataType::Struct(payload_fields.into());
 
-        let before = Arc::new(arrow::datatypes::Field::new(
-            "before",
-            struct_type.clone(),
-            true,
-        ));
-        let after = Arc::new(arrow::datatypes::Field::new(
-            "after",
-            struct_type.clone(),
-            true,
-        ));
+        let mut envelope_fields = vec![
+            Arc::new(Field::new(
+                CDC_FIELD_BEFORE,
+                payload_struct_type.clone(),
+                true,
+            )),
+            Arc::new(Field::new(CDC_FIELD_AFTER, payload_struct_type, true)),
+            Arc::new(Field::new(CDC_FIELD_OP, DataType::Utf8, true)),
+        ];
 
-        let op = Arc::new(arrow::datatypes::Field::new("op", DataType::Utf8, true));
-        let mut fields = vec![before, after, op];
-
-        if let Some(timestamp_field) = timestamp_field {
-            fields.push(Arc::new(timestamp_field));
+        if let Some(ts) = ts_field {
+            envelope_fields.push(Arc::new(ts));
         }
 
-        let schema = match qualifier {
-            Some(qualifier) => {
-                DFSchema::try_from_qualified_schema(qualifier, &Schema::new(fields))?
-            }
-            None => DFSchema::try_from(Schema::new(fields))?,
+        let arrow_schema = Schema::new(envelope_fields);
+        let final_schema = match qualifier_override {
+            Some(qualifier) => DFSchema::try_from_qualified_schema(qualifier, &arrow_schema)?,
+            None => DFSchema::try_from(arrow_schema)?,
         };
-        Ok(Arc::new(schema))
+
+        Ok(Arc::new(final_schema))
     }
+}
 
-    pub fn try_new(input: LogicalPlan, primary_keys: Arc<Vec<String>>) -> Result<Self> {
-        let input_schema = input.schema();
+// -----------------------------------------------------------------------------
+// Logical Node: Unroll Debezium Payload
+// -----------------------------------------------------------------------------
 
-        // confirm that the input schema has before, after and op columns, and before and after match
-        let Some(before_index) = input_schema.index_of_column_by_name(None, "before") else {
-            return plan_err!("DebeziumUnrollingExtension requires a before column");
-        };
-        let Some(after_index) = input_schema.index_of_column_by_name(None, "after") else {
-            return plan_err!("DebeziumUnrollingExtension requires an after column");
-        };
-        let Some(op_index) = input_schema.index_of_column_by_name(None, "op") else {
-            return plan_err!("DebeziumUnrollingExtension requires an op column");
-        };
+/// Decodes an incoming Debezium envelope into a flat, updating stream representation.
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct UnrollDebeziumPayloadNode {
+    upstream_plan: LogicalPlan,
+    resolved_schema: DFSchemaRef,
+    pub pk_indices: Vec<usize>,
+    pk_names: Arc<Vec<String>>,
+}
+
+multifield_partial_ord!(
+    UnrollDebeziumPayloadNode,
+    upstream_plan,
+    pk_indices,
+    pk_names
+);
+
+impl UnrollDebeziumPayloadNode {
+    pub fn try_new(upstream_plan: LogicalPlan, pk_names: Arc<Vec<String>>) -> Result<Self> {
+        let input_schema = upstream_plan.schema();
+
+        let (before_idx, after_idx) = Self::validate_envelope_structure(input_schema)?;
+
+        let payload_fields = Self::extract_payload_fields(input_schema, before_idx)?;
+
+        let pk_indices = Self::map_primary_keys(payload_fields, &pk_names)?;
+
+        let qualifier = Self::resolve_schema_qualifier(input_schema, before_idx, after_idx)?;
+
+        let resolved_schema =
+            Self::compile_unrolled_schema(input_schema, payload_fields, qualifier)?;
+
+        Ok(Self {
+            upstream_plan,
+            resolved_schema,
+            pk_indices,
+            pk_names,
+        })
+    }
+
+    fn validate_envelope_structure(schema: &DFSchemaRef) -> Result<(usize, usize)> {
+        let before_idx = schema.index_of_column_by_name(None, CDC_FIELD_BEFORE).ok_or_else(
+            || DataFusionError::Plan("Missing 'before' state column in CDC stream".into()),
+        )?;
+
+        let after_idx = schema.index_of_column_by_name(None, CDC_FIELD_AFTER).ok_or_else(
+            || DataFusionError::Plan("Missing 'after' state column in CDC stream".into()),
+        )?;
+
+        let op_idx = schema.index_of_column_by_name(None, CDC_FIELD_OP).ok_or_else(|| {
+            DataFusionError::Plan("Missing 'op' operation column in CDC stream".into())
+        })?;
+
+        let before_type = schema.field(before_idx).data_type();
+        let after_type = schema.field(after_idx).data_type();
 
-        let before_type = input_schema.field(before_index).data_type();
-        let after_type = input_schema.field(after_index).data_type();
         if before_type != after_type {
             return plan_err!(
-                "before and after columns must have the same type, not {} and {}",
-                before_type,
-                after_type
+                "State column type mismatch: 'before' is {before_type}, but 'after' is {after_type}"
             );
         }
 
-        // check that op is a string
-        let op_type = input_schema.field(op_index).data_type();
-        if *op_type != DataType::Utf8 {
-            return plan_err!("op column must be a string, not {}", op_type);
-        }
-
-        // create the output schema
-        let DataType::Struct(fields) = before_type else {
+        if *schema.field(op_idx).data_type() != DataType::Utf8 {
             return plan_err!(
-                "before and after columns must be structs, not {}",
-                before_type
+                "The '{}' column must be of type Utf8",
+                CDC_FIELD_OP
             );
-        };
+        }
+
+        Ok((before_idx, after_idx))
+    }
 
-        // get the primary keys
-        let primary_key_idx = primary_keys
+    fn extract_payload_fields<'a>(
+        schema: &'a DFSchemaRef,
+        state_idx: usize,
+    ) -> Result<&'a arrow_schema::Fields> {
+        match schema.field(state_idx).data_type() {
+            DataType::Struct(fields) => Ok(fields),
+            other => plan_err!("State columns must be of type Struct, found {other}"),
+        }
+    }
+
+    fn map_primary_keys(
+        fields: &arrow_schema::Fields,
+        pk_names: &[String],
+    ) -> Result<Vec<usize>> {
+        pk_names
             .iter()
-            .map(|pk| fields.find(pk).map(|(i, _)| i))
+            .map(|pk| fields.find(pk).map(|(idx, _)| idx))
             .collect::<Option<Vec<_>>>()
             .ok_or_else(|| {
-                DataFusionError::Plan("primary key field not found in Debezium schema".to_string())
-            })?;
+                DataFusionError::Plan("Specified primary key not found in payload schema".into())
+            })
+    }
 
-        // determine the qualifier from the before and after columns
-        let qualifier = match (
-            input_schema.qualified_field(before_index).0,
-            input_schema.qualified_field(after_index).0,
-        ) {
-            (Some(before_qualifier), Some(after_qualifier)) => {
-                if before_qualifier != after_qualifier {
-                    return plan_err!("before and after columns must have the same alias");
-                }
-                Some(before_qualifier.clone())
-            }
-            (None, None) => None,
-            _ => return plan_err!("before and after columns must both have an alias or neither"),
-        };
+    fn resolve_schema_qualifier(
+        schema: &DFSchemaRef,
+        before_idx: usize,
+        after_idx: usize,
+    ) -> Result<Option<TableReference>> {
+        let before_qualifier = schema.qualified_field(before_idx).0;
+        let after_qualifier = schema.qualified_field(after_idx).0;
+
+        match (before_qualifier, after_qualifier) {
+            (Some(bq), Some(aq)) if bq == aq => Ok(Some(bq.clone())),
+            (None, None) => Ok(None),
+            _ => plan_err!(
+                "'before' and 'after' columns must share the same namespace/qualifier"
+            ),
+        }
+    }
 
-        let mut fields = fields.to_vec();
-        fields.push(updating_meta_field());
+    fn compile_unrolled_schema(
+        original_schema: &DFSchemaRef,
+        payload_fields: &arrow_schema::Fields,
+        qualifier: Option<TableReference>,
+    ) -> Result<DFSchemaRef> {
+        let mut flat_fields = payload_fields.to_vec();
 
-        let Some(input_timestamp_field) =
-            input_schema.index_of_column_by_name(None, TIMESTAMP_FIELD)
-        else {
-            return plan_err!("DebeziumUnrollingExtension requires a timestamp field");
-        };
+        flat_fields.push(updating_meta_field());
 
-        fields.push(Arc::new(input_schema.field(input_timestamp_field).clone()));
-        let arrow_schema = Schema::new(fields);
+        let ts_idx = original_schema
+            .index_of_column_by_name(None, TIMESTAMP_FIELD)
+            .ok_or_else(|| {
+                DataFusionError::Plan(format!(
+                    "Required event time field '{TIMESTAMP_FIELD}' is missing"
+                ))
+            })?;
 
-        let schema = match qualifier {
-            Some(qualifier) => DFSchema::try_from_qualified_schema(qualifier, &arrow_schema)?,
+        flat_fields.push(Arc::new(original_schema.field(ts_idx).clone()));
+
+        let arrow_schema = Schema::new(flat_fields);
+        let compiled_schema = match qualifier {
+            Some(q) => DFSchema::try_from_qualified_schema(q, &arrow_schema)?,
             None => DFSchema::try_from(arrow_schema)?,
         };
 
-        Ok(Self {
-            input,
-            schema: Arc::new(schema),
-            primary_keys: primary_key_idx,
-            primary_key_names: primary_keys,
-        })
+        Ok(Arc::new(compiled_schema))
     }
 }
 
-impl UserDefinedLogicalNodeCore for DebeziumUnrollingExtension {
+impl UserDefinedLogicalNodeCore for UnrollDebeziumPayloadNode {
     fn name(&self) -> &str {
-        DEBEZIUM_UNROLLING_EXTENSION_NAME
+        UNROLL_NODE_NAME
     }
 
     fn inputs(&self) -> Vec<&LogicalPlan> {
-        vec![&self.input]
+        vec![&self.upstream_plan]
     }
 
     fn schema(&self) -> &DFSchemaRef {
-        &self.schema
+        &self.resolved_schema
     }
 
     fn expressions(&self) -> Vec<Expr> {
@@ -190,116 +252,136 @@ impl UserDefinedLogicalNodeCore for DebeziumUnrollingExtension {
     }
 
     fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        write!(f, "DebeziumUnrollingExtension")
+        write!(f, "UnrollDebeziumPayload")
     }
 
-    fn with_exprs_and_inputs(&self, _exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> Result<Self> {
-        Self::try_new(inputs[0].clone(), self.primary_key_names.clone())
+    fn with_exprs_and_inputs(&self, _exprs: Vec<Expr>, mut inputs: Vec<LogicalPlan>) -> Result<Self> {
+        if inputs.len() != 1 {
+            return internal_err!(
+                "UnrollDebeziumPayloadNode expects exactly 1 input, got {}",
+                inputs.len()
+            );
+        }
+        Self::try_new(inputs.remove(0), self.pk_names.clone())
     }
 }
 
-impl StreamExtension for DebeziumUnrollingExtension {
-    fn node_name(&self) -> Option<NamedNode> {
+impl StreamingOperatorBlueprint for UnrollDebeziumPayloadNode {
+    fn operator_identity(&self) -> Option<NamedNode> {
         None
     }
 
-    fn plan_node(
-        &self,
-        _planner: &Planner,
-        _index: usize,
-        _input_schemas: Vec<FsSchemaRef>,
-    ) -> Result<NodeWithIncomingEdges> {
-        plan_err!("DebeziumUnrollingExtension should not be planned")
+    fn is_passthrough_boundary(&self) -> bool {
+        true
     }
 
-    fn output_schema(&self) -> FsSchema {
-        FsSchema::from_schema_unkeyed(Arc::new(self.schema.as_ref().into())).unwrap()
+    fn compile_to_graph_node(
+        &self,
+        _: &Planner,
+        _: usize,
+        _: Vec<FsSchemaRef>,
+    ) -> Result<CompiledTopologyNode> {
+        plan_err!("UnrollDebeziumPayloadNode is a logical boundary and should not be physically planned")
     }
 
-    fn transparent(&self) -> bool {
-        true
+    fn yielded_schema(&self) -> FsSchema {
+        FsSchema::from_schema_unkeyed(Arc::new(self.resolved_schema.as_ref().into())).unwrap_or_else(
+            |_| panic!("Failed to extract physical schema for {}", UNROLL_NODE_NAME),
+        )
     }
 }
 
+// -----------------------------------------------------------------------------
+// Logical Node: Pack Debezium Envelope
+// -----------------------------------------------------------------------------
+
+/// Encodes a flat updating stream back into a Debezium CDC envelope representation.
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub(crate) struct ToDebeziumExtension {
-    input: Arc<LogicalPlan>,
-    schema: DFSchemaRef,
+pub(crate) struct PackDebeziumEnvelopeNode {
+    upstream_plan: Arc<LogicalPlan>,
+    envelope_schema: DFSchemaRef,
 }
 
-multifield_partial_ord!(ToDebeziumExtension, input);
+multifield_partial_ord!(PackDebeziumEnvelopeNode, upstream_plan);
+
+impl PackDebeziumEnvelopeNode {
+    pub(crate) fn try_new(upstream_plan: LogicalPlan) -> Result<Self> {
+        let envelope_schema = DebeziumSchemaCodec::wrap_into_envelope(upstream_plan.schema(), None)
+            .map_err(|e| {
+                DataFusionError::Plan(format!("Failed to compile Debezium envelope schema: {e}"))
+            })?;
 
-impl ToDebeziumExtension {
-    pub(crate) fn try_new(input: LogicalPlan) -> Result<Self> {
-        let input_schema = input.schema();
-        let schema = DebeziumUnrollingExtension::as_debezium_schema(input_schema, None)
-            .expect("should be able to create ToDebeziumExtenison");
         Ok(Self {
-            input: Arc::new(input),
-            schema,
+            upstream_plan: Arc::new(upstream_plan),
+            envelope_schema,
         })
     }
 }
 
-impl DisplayAs for ToDebeziumExtension {
+impl DisplayAs for PackDebeziumEnvelopeNode {
     fn fmt_as(
         &self,
         _t: datafusion::physical_plan::DisplayFormatType,
         f: &mut std::fmt::Formatter,
     ) -> std::fmt::Result {
-        write!(f, "ToDebeziumExtension")
+        write!(f, "PackDebeziumEnvelope")
     }
 }
 
-impl UserDefinedLogicalNodeCore for ToDebeziumExtension {
+impl UserDefinedLogicalNodeCore for PackDebeziumEnvelopeNode {
     fn name(&self) -> &str {
-        TO_DEBEZIUM_EXTENSION_NAME
+        PACK_NODE_NAME
     }
 
     fn inputs(&self) -> Vec<&LogicalPlan> {
-        vec![&self.input]
+        vec![&self.upstream_plan]
     }
 
     fn schema(&self) -> &DFSchemaRef {
-        &self.schema
+        &self.envelope_schema
     }
 
-    fn expressions(&self) -> Vec<datafusion::prelude::Expr> {
+    fn expressions(&self) -> Vec<Expr> {
         vec![]
     }
 
     fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        write!(f, "ToDebeziumExtension")
+        write!(f, "PackDebeziumEnvelope")
     }
 
-    fn with_exprs_and_inputs(
-        &self,
-        _exprs: Vec<datafusion::prelude::Expr>,
-        inputs: Vec<LogicalPlan>,
-    ) -> Result<Self> {
-        Self::try_new(inputs[0].clone())
+    fn with_exprs_and_inputs(&self, _exprs: Vec<Expr>, mut inputs: Vec<LogicalPlan>) -> Result<Self> {
+        if inputs.len() != 1 {
+            return internal_err!(
+                "PackDebeziumEnvelopeNode expects exactly 1 input, got {}",
+                inputs.len()
+            );
+        }
+        Self::try_new(inputs.remove(0))
     }
 }
 
-impl StreamExtension for ToDebeziumExtension {
-    fn node_name(&self) -> Option<NamedNode> {
+impl StreamingOperatorBlueprint for PackDebeziumEnvelopeNode {
+    fn operator_identity(&self) -> Option<NamedNode> {
         None
     }
 
-    fn plan_node(
-        &self,
-        _planner: &Planner,
-        _index: usize,
-        _input_schemas: Vec<FsSchemaRef>,
-    ) -> Result<NodeWithIncomingEdges> {
-        internal_err!("ToDebeziumExtension should not be planned")
+    fn is_passthrough_boundary(&self) -> bool {
+        true
     }
 
-    fn output_schema(&self) -> FsSchema {
-        FsSchema::from_schema_unkeyed(Arc::new(self.schema.as_ref().into())).unwrap()
+    fn compile_to_graph_node(
+        &self,
+        _: &Planner,
+        _: usize,
+        _: Vec<FsSchemaRef>,
+    ) -> Result<CompiledTopologyNode> {
+        internal_err!("PackDebeziumEnvelopeNode is a logical boundary and should not be physically planned")
     }
 
-    fn transparent(&self) -> bool {
-        true
+    fn yielded_schema(&self) -> FsSchema {
+        FsSchema::from_schema_unkeyed(Arc::new(self.envelope_schema.as_ref().into()))
+            .unwrap_or_else(|_| {
+                panic!("Failed to extract physical schema for {}", PACK_NODE_NAME)
+            })
     }
 }
diff --git a/src/sql/extensions/extension_try_from.rs b/src/sql/extensions/extension_try_from.rs
index eb042a90..a64ac9cf 100644
--- a/src/sql/extensions/extension_try_from.rs
+++ b/src/sql/extensions/extension_try_from.rs
@@ -15,53 +15,53 @@ use std::sync::Arc;
 use datafusion::common::{DataFusionError, Result};
 use datafusion::logical_expr::UserDefinedLogicalNode;
 
-use crate::sql::extensions::aggregate::AggregateExtension;
-use crate::sql::extensions::async_udf::AsyncUDFExtension;
-use crate::sql::extensions::debezium::{DebeziumUnrollingExtension, ToDebeziumExtension};
-use crate::sql::extensions::join::JoinExtension;
-use crate::sql::extensions::key_calculation::KeyCalculationExtension;
-use crate::sql::extensions::lookup::LookupJoin;
-use crate::sql::extensions::projection::ProjectionExtension;
-use crate::sql::extensions::remote_table::RemoteTableExtension;
-use crate::sql::extensions::sink::SinkExtension;
-use crate::sql::extensions::stream_extension::StreamExtension;
-use crate::sql::extensions::table_source::TableSourceExtension;
-use crate::sql::extensions::updating_aggregate::UpdatingAggregateExtension;
-use crate::sql::extensions::watermark_node::WatermarkNode;
-use crate::sql::extensions::window_fn::WindowFunctionExtension;
+use crate::sql::extensions::aggregate::StreamWindowAggregateNode;
+use crate::sql::extensions::async_udf::AsyncFunctionExecutionNode;
+use crate::sql::extensions::debezium::{PackDebeziumEnvelopeNode, UnrollDebeziumPayloadNode};
+use crate::sql::extensions::join::StreamingJoinNode;
+use crate::sql::extensions::key_calculation::KeyExtractionNode;
+use crate::sql::extensions::lookup::StreamReferenceJoinNode;
+use crate::sql::extensions::projection::StreamProjectionNode;
+use crate::sql::extensions::remote_table::RemoteTableBoundaryNode;
+use crate::sql::extensions::sink::StreamEgressNode;
+use crate::sql::extensions::streaming_operator_blueprint::StreamingOperatorBlueprint;
+use crate::sql::extensions::table_source::StreamIngestionNode;
+use crate::sql::extensions::updating_aggregate::ContinuousAggregateNode;
+use crate::sql::extensions::watermark_node::EventTimeWatermarkNode;
+use crate::sql::extensions::windows_function::StreamingWindowFunctionNode;
 
-fn try_from_t<T: StreamExtension + 'static>(
+fn try_from_t<T: StreamingOperatorBlueprint + 'static>(
     node: &dyn UserDefinedLogicalNode,
-) -> std::result::Result<&dyn StreamExtension, ()> {
+) -> std::result::Result<&dyn StreamingOperatorBlueprint, ()> {
     node.as_any()
         .downcast_ref::<T>()
-        .map(|t| t as &dyn StreamExtension)
+        .map(|t| t as &dyn StreamingOperatorBlueprint)
         .ok_or(())
 }
 
-impl<'a> TryFrom<&'a dyn UserDefinedLogicalNode> for &'a dyn StreamExtension {
+impl<'a> TryFrom<&'a dyn UserDefinedLogicalNode> for &'a dyn StreamingOperatorBlueprint {
     type Error = DataFusionError;
 
     fn try_from(node: &'a dyn UserDefinedLogicalNode) -> Result<Self, Self::Error> {
-        try_from_t::<TableSourceExtension>(node)
-            .or_else(|_| try_from_t::<WatermarkNode>(node))
-            .or_else(|_| try_from_t::<SinkExtension>(node))
-            .or_else(|_| try_from_t::<KeyCalculationExtension>(node))
-            .or_else(|_| try_from_t::<AggregateExtension>(node))
-            .or_else(|_| try_from_t::<RemoteTableExtension>(node))
-            .or_else(|_| try_from_t::<JoinExtension>(node))
-            .or_else(|_| try_from_t::<WindowFunctionExtension>(node))
-            .or_else(|_| try_from_t::<AsyncUDFExtension>(node))
-            .or_else(|_| try_from_t::<ToDebeziumExtension>(node))
-            .or_else(|_| try_from_t::<DebeziumUnrollingExtension>(node))
-            .or_else(|_| try_from_t::<UpdatingAggregateExtension>(node))
-            .or_else(|_| try_from_t::<LookupJoin>(node))
-            .or_else(|_| try_from_t::<ProjectionExtension>(node))
+        try_from_t::<StreamIngestionNode>(node)
+            .or_else(|_| try_from_t::<EventTimeWatermarkNode>(node))
+            .or_else(|_| try_from_t::<StreamEgressNode>(node))
+            .or_else(|_| try_from_t::<KeyExtractionNode>(node))
+            .or_else(|_| try_from_t::<StreamWindowAggregateNode>(node))
+            .or_else(|_| try_from_t::<RemoteTableBoundaryNode>(node))
+            .or_else(|_| try_from_t::<StreamingJoinNode>(node))
+            .or_else(|_| try_from_t::<StreamingWindowFunctionNode>(node))
+            .or_else(|_| try_from_t::<AsyncFunctionExecutionNode>(node))
+            .or_else(|_| try_from_t::<PackDebeziumEnvelopeNode>(node))
+            .or_else(|_| try_from_t::<UnrollDebeziumPayloadNode>(node))
+            .or_else(|_| try_from_t::<ContinuousAggregateNode>(node))
+            .or_else(|_| try_from_t::<StreamReferenceJoinNode>(node))
+            .or_else(|_| try_from_t::<StreamProjectionNode>(node))
             .map_err(|_| DataFusionError::Plan(format!("unexpected node: {}", node.name())))
     }
 }
 
-impl<'a> TryFrom<&'a Arc<dyn UserDefinedLogicalNode>> for &'a dyn StreamExtension {
+impl<'a> TryFrom<&'a Arc<dyn UserDefinedLogicalNode>> for &'a dyn StreamingOperatorBlueprint {
     type Error = DataFusionError;
 
     fn try_from(node: &'a Arc<dyn UserDefinedLogicalNode>) -> Result<Self, Self::Error> {
diff --git a/src/sql/extensions/join.rs b/src/sql/extensions/join.rs
index 74dcfde6..70fbf3a3 100644
--- a/src/sql/extensions/join.rs
+++ b/src/sql/extensions/join.rs
@@ -1,120 +1,208 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::fmt::Formatter;
 use std::time::Duration;
 
 use datafusion::common::{DFSchemaRef, Result};
 use datafusion::logical_expr::expr::Expr;
 use datafusion::logical_expr::{LogicalPlan, UserDefinedLogicalNodeCore};
-
-use crate::sql::extensions::{NodeWithIncomingEdges, StreamExtension};
-use crate::sql::types::StreamSchema;
-
-use std::sync::Arc;
 use datafusion_common::plan_err;
 use datafusion_proto::physical_plan::AsExecutionPlan;
 use datafusion_proto::protobuf::PhysicalPlanNode;
 use prost::Message;
 use protocol::grpc::api::JoinOperator;
-use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName};
-use crate::sql::logical_planner::FsPhysicalExtensionCodec;
-use crate::sql::logical_planner::planner::{NamedNode, Planner};
+
 use crate::sql::common::{FsSchema, FsSchemaRef};
+use crate::sql::extensions::{CompiledTopologyNode, StreamingOperatorBlueprint};
+use crate::sql::logical_node::logical::{
+    LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName,
+};
+use crate::sql::logical_planner::planner::{NamedNode, Planner};
+use crate::sql::logical_planner::FsPhysicalExtensionCodec;
+
+// -----------------------------------------------------------------------------
+// Constants
+// -----------------------------------------------------------------------------
 
-pub(crate) const JOIN_NODE_NAME: &str = "JoinNode";
+pub(crate) const STREAM_JOIN_NODE_TYPE: &str = "StreamingJoinNode";
 
+// -----------------------------------------------------------------------------
+// Logical Node Definition
+// -----------------------------------------------------------------------------
+
+/// A logical plan node representing a streaming join operation.
+/// It bridges the DataFusion logical plan with the physical streaming execution engine.
 #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd)]
-pub struct JoinExtension {
-    pub(crate) rewritten_join: LogicalPlan,
-    pub(crate) is_instant: bool,
-    pub(crate) ttl: Option<Duration>,
+pub struct StreamingJoinNode {
+    pub(crate) underlying_plan: LogicalPlan,
+    pub(crate) instant_execution_mode: bool,
+    pub(crate) state_retention_ttl: Option<Duration>,
 }
 
-impl StreamExtension for JoinExtension {
-    fn node_name(&self) -> Option<NamedNode> {
-        None
+impl StreamingJoinNode {
+    /// Creates a new instance of the streaming join node.
+    pub fn new(
+        underlying_plan: LogicalPlan,
+        instant_execution_mode: bool,
+        state_retention_ttl: Option<Duration>,
+    ) -> Self {
+        Self {
+            underlying_plan,
+            instant_execution_mode,
+            state_retention_ttl,
+        }
     }
 
-    fn plan_node(
+    /// Compiles the physical execution plan and serializes it into a Protobuf configuration payload.
+    fn compile_operator_config(
         &self,
         planner: &Planner,
-        index: usize,
-        input_schemas: Vec<FsSchemaRef>,
-    ) -> Result<NodeWithIncomingEdges> {
-        if input_schemas.len() != 2 {
-            return plan_err!("join should have exactly two inputs");
-        }
-        let left_schema = input_schemas[0].clone();
-        let right_schema = input_schemas[1].clone();
-
-        let join_plan = planner.sync_plan(&self.rewritten_join)?;
-        let physical_plan_node = PhysicalPlanNode::try_from_physical_plan(
-            join_plan.clone(),
+        node_identifier: &str,
+        left_schema: FsSchemaRef,
+        right_schema: FsSchemaRef,
+    ) -> Result<JoinOperator> {
+        let physical_plan = planner.sync_plan(&self.underlying_plan)?;
+
+        let proto_node = PhysicalPlanNode::try_from_physical_plan(
+            physical_plan,
             &FsPhysicalExtensionCodec::default(),
         )?;
 
-        let operator_name = if self.is_instant {
-            OperatorName::InstantJoin
-        } else {
-            OperatorName::Join
-        };
-
-        let config = JoinOperator {
-            name: format!("join_{index}"),
+        Ok(JoinOperator {
+            name: node_identifier.to_string(),
             left_schema: Some(left_schema.as_ref().clone().into()),
             right_schema: Some(right_schema.as_ref().clone().into()),
-            output_schema: Some(self.output_schema().into()),
-            join_plan: physical_plan_node.encode_to_vec(),
-            ttl_micros: self.ttl.map(|t| t.as_micros() as u64),
-        };
-
-        let logical_node = LogicalNode::single(
-            index as u32,
-            format!("join_{index}"),
-            operator_name,
-            config.encode_to_vec(),
-            "join".to_string(),
-            1,
-        );
-
-        let left_edge =
-            LogicalEdge::project_all(LogicalEdgeType::LeftJoin, left_schema.as_ref().clone());
-        let right_edge =
-            LogicalEdge::project_all(LogicalEdgeType::RightJoin, right_schema.as_ref().clone());
-        Ok(NodeWithIncomingEdges {
-            node: logical_node,
-            edges: vec![left_edge, right_edge],
+            output_schema: Some(self.extract_fs_schema().into()),
+            join_plan: proto_node.encode_to_vec(),
+            ttl_micros: self.state_retention_ttl.map(|ttl| ttl.as_micros() as u64),
         })
     }
 
-    fn output_schema(&self) -> FsSchema {
-        FsSchema::from_schema_unkeyed(self.schema().inner().clone()).unwrap()
+    fn determine_operator_type(&self) -> OperatorName {
+        if self.instant_execution_mode {
+            OperatorName::InstantJoin
+        } else {
+            OperatorName::Join
+        }
+    }
+
+    fn extract_fs_schema(&self) -> FsSchema {
+        FsSchema::from_schema_unkeyed(self.underlying_plan.schema().inner().clone())
+            .expect("Fatal: Failed to convert internal join schema to FsSchema without keys")
     }
 }
 
-impl UserDefinedLogicalNodeCore for JoinExtension {
+// -----------------------------------------------------------------------------
+// DataFusion Logical Node Core Implementation
+// -----------------------------------------------------------------------------
+
+impl UserDefinedLogicalNodeCore for StreamingJoinNode {
     fn name(&self) -> &str {
-        JOIN_NODE_NAME
+        STREAM_JOIN_NODE_TYPE
     }
 
     fn inputs(&self) -> Vec<&LogicalPlan> {
-        vec![&self.rewritten_join]
+        vec![&self.underlying_plan]
     }
 
     fn schema(&self) -> &DFSchemaRef {
-        self.rewritten_join.schema()
+        self.underlying_plan.schema()
     }
 
     fn expressions(&self) -> Vec<Expr> {
         vec![]
     }
 
-    fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        write!(f, "JoinExtension: {}", self.schema())
+    fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result {
+        write!(
+            f,
+            "StreamingJoinNode: Schema={}, InstantMode={}, TTL={:?}",
+            self.schema(),
+            self.instant_execution_mode,
+            self.state_retention_ttl
+        )
+    }
+
+    fn with_exprs_and_inputs(&self, _exprs: Vec<Expr>, mut inputs: Vec<LogicalPlan>) -> Result<Self> {
+        if inputs.len() != 1 {
+            return plan_err!(
+                "StreamingJoinNode expects exactly 1 underlying logical plan during recreation"
+            );
+        }
+
+        Ok(Self::new(
+            inputs.remove(0),
+            self.instant_execution_mode,
+            self.state_retention_ttl,
+        ))
+    }
+}
+
+// -----------------------------------------------------------------------------
+// Streaming Graph Extension Implementation
+// -----------------------------------------------------------------------------
+
+impl StreamingOperatorBlueprint for StreamingJoinNode {
+    fn operator_identity(&self) -> Option<NamedNode> {
+        None
     }
 
-    fn with_exprs_and_inputs(&self, _exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> Result<Self> {
-        Ok(Self {
-            rewritten_join: inputs[0].clone(),
-            is_instant: self.is_instant,
-            ttl: self.ttl,
+    fn compile_to_graph_node(
+        &self,
+        planner: &Planner,
+        node_index: usize,
+        mut input_schemas: Vec<FsSchemaRef>,
+    ) -> Result<CompiledTopologyNode> {
+        if input_schemas.len() != 2 {
+            return plan_err!(
+                "Invalid topology: StreamingJoinNode requires exactly two upstream inputs, received {}",
+                input_schemas.len()
+            );
+        }
+
+        let right_schema = input_schemas.pop().unwrap();
+        let left_schema = input_schemas.pop().unwrap();
+
+        let node_identifier = format!("stream_join_{node_index}");
+
+        let operator_config = self.compile_operator_config(
+            planner,
+            &node_identifier,
+            left_schema.clone(),
+            right_schema.clone(),
+        )?;
+
+        let logical_node = LogicalNode::single(
+            node_index as u32,
+            node_identifier.clone(),
+            self.determine_operator_type(),
+            operator_config.encode_to_vec(),
+            "streaming_join".to_string(),
+            1,
+        );
+
+        let left_edge =
+            LogicalEdge::project_all(LogicalEdgeType::LeftJoin, left_schema.as_ref().clone());
+        let right_edge =
+            LogicalEdge::project_all(LogicalEdgeType::RightJoin, right_schema.as_ref().clone());
+
+        Ok(CompiledTopologyNode {
+            execution_unit: logical_node,
+            routing_edges: vec![left_edge, right_edge],
         })
     }
+
+    fn yielded_schema(&self) -> FsSchema {
+        self.extract_fs_schema()
+    }
 }
diff --git a/src/sql/extensions/key_calculation.rs b/src/sql/extensions/key_calculation.rs
index 3a94f592..484d464c 100644
--- a/src/sql/extensions/key_calculation.rs
+++ b/src/sql/extensions/key_calculation.rs
@@ -1,217 +1,263 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use std::fmt::Formatter;
 use std::sync::Arc;
 
 use datafusion::arrow::datatypes::{Field, Schema};
-use datafusion::common::{DFSchemaRef, Result, internal_err};
+use datafusion::common::{DFSchemaRef, Result, internal_err, plan_err};
 use datafusion::logical_expr::{Expr, ExprSchemable, LogicalPlan, UserDefinedLogicalNodeCore};
-use datafusion_common::{plan_err, DFSchema};
+use datafusion_common::DFSchema;
 use datafusion_expr::col;
 use datafusion_proto::physical_plan::{AsExecutionPlan, DefaultPhysicalExtensionCodec};
 use datafusion_proto::physical_plan::to_proto::serialize_physical_expr;
 use datafusion_proto::protobuf::PhysicalPlanNode;
 use itertools::Itertools;
 use prost::Message;
+
 use protocol::grpc::api::{KeyPlanOperator, ProjectionOperator};
-use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName};
+
 use crate::multifield_partial_ord;
+use crate::sql::common::{FsSchema, FsSchemaRef};
+use crate::sql::extensions::{CompiledTopologyNode, StreamingOperatorBlueprint};
+use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName};
 use crate::sql::logical_planner::FsPhysicalExtensionCodec;
-use crate::sql::extensions::{NodeWithIncomingEdges, StreamExtension};
 use crate::sql::logical_planner::planner::{NamedNode, Planner};
-use crate::sql::types::{
-    StreamSchema, fields_with_qualifiers, schema_from_df_fields_with_metadata,
-};
-use crate::sql::common::{FsSchema, FsSchemaRef};
+use crate::sql::types::{fields_with_qualifiers, schema_from_df_fields_with_metadata};
 
-pub(crate) const KEY_CALCULATION_NAME: &str = "KeyCalculationExtension";
+pub(crate) const EXTENSION_NODE_IDENTIFIER: &str = "KeyExtractionNode";
 
-/// Two ways of specifying keys — either as col indexes in the existing data or as a set of
-/// exprs to evaluate
+/// Routing strategy for shuffling data across the stream topology.
 #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd)]
-pub enum KeysOrExprs {
-    Keys(Vec<usize>),
-    Exprs(Vec<Expr>),
+pub enum KeyExtractionStrategy {
+    ColumnIndices(Vec<usize>),
+    CalculatedExpressions(Vec<Expr>),
 }
 
-/// Calculation for computing keyed data, with a vec of keys
-/// that will be used for shuffling data to the correct nodes.
+/// Logical node that computes or extracts routing keys.
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub(crate) struct KeyCalculationExtension {
-    pub(crate) name: Option<String>,
-    pub(crate) input: LogicalPlan,
-    pub(crate) keys: KeysOrExprs,
-    pub(crate) schema: DFSchemaRef,
+pub(crate) struct KeyExtractionNode {
+    pub(crate) operator_label: Option<String>,
+    pub(crate) upstream_plan: LogicalPlan,
+    pub(crate) extraction_strategy: KeyExtractionStrategy,
+    pub(crate) resolved_schema: DFSchemaRef,
 }
 
-multifield_partial_ord!(KeyCalculationExtension, name, input, keys);
-
-impl KeyCalculationExtension {
-    pub fn new_named_and_trimmed(input: LogicalPlan, keys: Vec<usize>, name: String) -> Self {
-        let output_fields: Vec<_> = fields_with_qualifiers(input.schema())
+multifield_partial_ord!(
+    KeyExtractionNode,
+    operator_label,
+    upstream_plan,
+    extraction_strategy
+);
+
+impl KeyExtractionNode {
+    /// Extracts keys and hides them from the downstream projection.
+    pub fn try_new_with_projection(
+        upstream_plan: LogicalPlan,
+        target_indices: Vec<usize>,
+        label: String,
+    ) -> Result<Self> {
+        let projected_fields: Vec<_> = fields_with_qualifiers(upstream_plan.schema())
             .into_iter()
             .enumerate()
-            .filter_map(|(index, field)| {
-                if !keys.contains(&index) {
-                    Some(field.clone())
-                } else {
-                    None
-                }
-            })
+            .filter(|(idx, _)| !target_indices.contains(idx))
+            .map(|(_, field)| field)
             .collect();
 
-        let schema =
-            schema_from_df_fields_with_metadata(&output_fields, input.schema().metadata().clone())
-                .unwrap();
-        Self {
-            name: Some(name),
-            input,
-            keys: KeysOrExprs::Keys(keys),
-            schema: Arc::new(schema),
-        }
+        let metadata = upstream_plan.schema().metadata().clone();
+        let resolved_schema = schema_from_df_fields_with_metadata(&projected_fields, metadata)?;
+
+        Ok(Self {
+            operator_label: Some(label),
+            upstream_plan,
+            extraction_strategy: KeyExtractionStrategy::ColumnIndices(target_indices),
+            resolved_schema: Arc::new(resolved_schema),
+        })
     }
-    pub fn new(input: LogicalPlan, keys: KeysOrExprs) -> Self {
-        let schema = input.schema().clone();
+
+    /// Creates a node using an explicit strategy without changing the visible schema.
+    pub fn new(upstream_plan: LogicalPlan, strategy: KeyExtractionStrategy) -> Self {
+        let resolved_schema = upstream_plan.schema().clone();
         Self {
-            name: None,
-            input,
-            keys,
-            schema,
+            operator_label: None,
+            upstream_plan,
+            extraction_strategy: strategy,
+            resolved_schema,
         }
     }
-}
 
-impl StreamExtension for KeyCalculationExtension {
-    fn node_name(&self) -> Option<NamedNode> {
-        None
+    fn compile_index_router(
+        &self,
+        physical_plan_proto: PhysicalPlanNode,
+        indices: &[usize],
+    ) -> (Vec<u8>, OperatorName) {
+        let operator_config = KeyPlanOperator {
+            name: "key".into(),
+            physical_plan: physical_plan_proto.encode_to_vec(),
+            key_fields: indices.iter().map(|&idx| idx as u64).collect(),
+        };
+
+        (operator_config.encode_to_vec(), OperatorName::ArrowKey)
     }
 
-    fn plan_node(
+    fn compile_expression_router(
         &self,
         planner: &Planner,
-        index: usize,
-        input_schemas: Vec<FsSchemaRef>,
-    ) -> Result<NodeWithIncomingEdges> {
-        // check there's only one input
-        if input_schemas.len() != 1 {
-            return plan_err!("KeyCalculationExtension should have exactly one input");
+        expressions: &[Expr],
+        input_schema_ref: &FsSchemaRef,
+        input_df_schema: &DFSchemaRef,
+    ) -> Result<(Vec<u8>, OperatorName)> {
+        let mut target_exprs = expressions.to_vec();
+
+        for field in input_schema_ref.schema.fields.iter() {
+            target_exprs.push(col(field.name()));
         }
-        let input_schema = (*input_schemas[0]).clone();
-        let input_df_schema = Arc::new(DFSchema::try_from(input_schema.schema.as_ref().clone())?);
 
-        let physical_plan = planner.sync_plan(&self.input)?;
+        let output_fs_schema = self.generate_fs_schema()?;
+
+        for (compiled_expr, expected_field) in target_exprs
+            .iter()
+            .zip(output_fs_schema.schema.fields())
+        {
+            let (expr_type, expr_nullable) = compiled_expr.data_type_and_nullable(input_df_schema)?;
+            if expr_type != *expected_field.data_type() || expr_nullable != expected_field.is_nullable()
+            {
+                return plan_err!(
+                    "Type mismatch in key calculation: Expected {} (nullable: {}), got {} (nullable: {})",
+                    expected_field.data_type(),
+                    expected_field.is_nullable(),
+                    expr_type,
+                    expr_nullable
+                );
+            }
+        }
 
-        let physical_plan_node: PhysicalPlanNode = PhysicalPlanNode::try_from_physical_plan(
-            physical_plan,
-            &FsPhysicalExtensionCodec::default(),
-        )?;
+        let mut physical_expr_payloads = Vec::with_capacity(target_exprs.len());
+        for logical_expr in target_exprs {
+            let physical_expr = planner
+                .create_physical_expr(&logical_expr, input_df_schema)
+                .map_err(|e| e.context("Failed to physicalize PARTITION BY expression"))?;
 
-        let (config, name) = match &self.keys {
-            KeysOrExprs::Keys(keys) => (
-                KeyPlanOperator {
-                    name: "key".into(),
-                    physical_plan: physical_plan_node.encode_to_vec(),
-                    key_fields: keys.iter().map(|k| *k as u64).collect(),
-                }
-                    .encode_to_vec(),
-                OperatorName::ArrowKey,
-            ),
-            KeysOrExprs::Exprs(key_exprs) => {
-                let mut exprs = vec![];
-                for k in key_exprs {
-                    exprs.push(k.clone())
-                }
+            let serialized_expr =
+                serialize_physical_expr(&physical_expr, &DefaultPhysicalExtensionCodec {})?;
+            physical_expr_payloads.push(serialized_expr.encode_to_vec());
+        }
 
-                for f in input_schema.schema.fields.iter() {
-                    exprs.push(col(f.name()));
-                }
+        let operator_config = ProjectionOperator {
+            name: self.operator_label.as_deref().unwrap_or("key").to_string(),
+            input_schema: Some(input_schema_ref.as_ref().clone().into()),
+            output_schema: Some(output_fs_schema.into()),
+            exprs: physical_expr_payloads,
+        };
 
-                let output_schema = self.output_schema();
+        Ok((operator_config.encode_to_vec(), OperatorName::Projection))
+    }
 
-                // ensure that the exprs generate the output schema
-                for (expr, expected) in exprs.iter().zip(output_schema.schema.fields()) {
-                    let (data_type, nullable) = expr.data_type_and_nullable(&input_df_schema)?;
-                    assert_eq!(data_type, *expected.data_type());
-                    assert_eq!(nullable, expected.is_nullable());
-                }
+    fn generate_fs_schema(&self) -> Result<FsSchema> {
+        let base_arrow_schema = self.upstream_plan.schema().as_ref();
 
-                let mut physical_exprs = vec![];
+        match &self.extraction_strategy {
+            KeyExtractionStrategy::ColumnIndices(indices) => {
+                FsSchema::from_schema_keys(Arc::new(base_arrow_schema.into()), indices.clone())
+            }
+            KeyExtractionStrategy::CalculatedExpressions(expressions) => {
+                let mut composite_fields =
+                    Vec::with_capacity(expressions.len() + base_arrow_schema.fields().len());
 
-                for e in exprs {
-                    let phys = planner
-                        .create_physical_expr(&e, &input_df_schema)
-                        .map_err(|e| e.context("in PARTITION BY"))?;
-                    physical_exprs.push(
-                        serialize_physical_expr(&phys, &DefaultPhysicalExtensionCodec {})?
-                            .encode_to_vec(),
-                    );
+                for (idx, expr) in expressions.iter().enumerate() {
+                    let (data_type, nullable) = expr.data_type_and_nullable(base_arrow_schema)?;
+                    composite_fields.push(Field::new(format!("__key_{idx}"), data_type, nullable).into());
                 }
 
-                let config = ProjectionOperator {
-                    name: self.name.as_deref().unwrap_or("key").to_string(),
-                    input_schema: Some(input_schema.clone().into()),
-
-                    output_schema: Some(self.output_schema().into()),
-                    exprs: physical_exprs,
-                };
+                for field in base_arrow_schema.fields().iter() {
+                    composite_fields.push(field.clone());
+                }
 
-                (config.encode_to_vec(), OperatorName::Projection)
+                let final_schema = Arc::new(Schema::new(composite_fields));
+                let key_mapping = (1..=expressions.len()).collect_vec();
+                FsSchema::from_schema_keys(final_schema, key_mapping)
             }
-        };
+        }
+    }
+}
 
-        let node = LogicalNode::single(
-            index as u32,
-            format!("key_{index}"),
-            name,
-            config,
-            format!("ArrowKey<{}>", self.name.as_deref().unwrap_or("_")),
-            1,
-        );
-        let edge = LogicalEdge::project_all(LogicalEdgeType::Forward, input_schema);
-        Ok(NodeWithIncomingEdges {
-            node,
-            edges: vec![edge],
-        })
+impl StreamingOperatorBlueprint for KeyExtractionNode {
+    fn operator_identity(&self) -> Option<NamedNode> {
+        None
     }
 
-    fn output_schema(&self) -> FsSchema {
-        let arrow_schema = self.input.schema().as_ref();
+    fn compile_to_graph_node(
+        &self,
+        planner: &Planner,
+        node_index: usize,
+        mut input_schemas: Vec<FsSchemaRef>,
+    ) -> Result<CompiledTopologyNode> {
+        if input_schemas.len() != 1 {
+            return plan_err!("KeyExtractionNode requires exactly one upstream input schema");
+        }
+
+        let input_schema_ref = input_schemas.remove(0);
+        let input_df_schema = Arc::new(DFSchema::try_from(input_schema_ref.schema.as_ref().clone())?);
 
-        match &self.keys {
-            KeysOrExprs::Keys(keys) => {
-                FsSchema::from_schema_keys(Arc::new(arrow_schema.into()), keys.clone()).unwrap()
+        let physical_plan = planner.sync_plan(&self.upstream_plan)?;
+        let physical_plan_proto = PhysicalPlanNode::try_from_physical_plan(
+            physical_plan,
+            &FsPhysicalExtensionCodec::default(),
+        )?;
+
+        let (protobuf_payload, engine_operator_name) = match &self.extraction_strategy {
+            KeyExtractionStrategy::ColumnIndices(indices) => {
+                self.compile_index_router(physical_plan_proto, indices)
+            }
+            KeyExtractionStrategy::CalculatedExpressions(exprs) => {
+                self.compile_expression_router(planner, exprs, &input_schema_ref, &input_df_schema)?
             }
-            KeysOrExprs::Exprs(exprs) => {
-                let mut fields = vec![];
+        };
 
-                for (i, e) in exprs.iter().enumerate() {
-                    let (dt, nullable) = e.data_type_and_nullable(arrow_schema).unwrap();
-                    fields.push(Field::new(format!("__key_{i}"), dt, nullable).into());
-                }
+        let logical_node = LogicalNode::single(
+            node_index as u32,
+            format!("key_{node_index}"),
+            engine_operator_name,
+            protobuf_payload,
+            format!("ArrowKey<{}>", self.operator_label.as_deref().unwrap_or("_")),
+            1,
+        );
 
-                for f in arrow_schema.fields().iter() {
-                    fields.push(f.clone());
-                }
+        let data_edge =
+            LogicalEdge::project_all(LogicalEdgeType::Forward, (*input_schema_ref).clone());
 
-                FsSchema::from_schema_keys(
-                    Arc::new(Schema::new(fields)),
-                    (1..=exprs.len()).collect_vec(),
-                )
-                    .unwrap()
-            }
-        }
+        Ok(CompiledTopologyNode {
+            execution_unit: logical_node,
+            routing_edges: vec![data_edge],
+        })
+    }
+
+    fn yielded_schema(&self) -> FsSchema {
+        self.generate_fs_schema()
+            .expect("Fatal: Failed to generate output schema for KeyExtractionNode")
     }
 }
 
-impl UserDefinedLogicalNodeCore for KeyCalculationExtension {
+impl UserDefinedLogicalNodeCore for KeyExtractionNode {
     fn name(&self) -> &str {
-        KEY_CALCULATION_NAME
+        EXTENSION_NODE_IDENTIFIER
     }
 
     fn inputs(&self) -> Vec<&LogicalPlan> {
-        vec![&self.input]
+        vec![&self.upstream_plan]
     }
 
     fn schema(&self) -> &DFSchemaRef {
-        &self.schema
+        &self.resolved_schema
     }
 
     fn expressions(&self) -> Vec<Expr> {
@@ -219,24 +265,33 @@ impl UserDefinedLogicalNodeCore for KeyCalculationExtension {
     }
 
     fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result {
-        write!(f, "KeyCalculationExtension: {}", self.schema())
+        write!(
+            f,
+            "KeyExtractionNode: Strategy={:?} | Schema={}",
+            self.extraction_strategy,
+            self.resolved_schema
+        )
     }
 
-    fn with_exprs_and_inputs(&self, exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> Result<Self> {
+    fn with_exprs_and_inputs(&self, exprs: Vec<Expr>, mut inputs: Vec<LogicalPlan>) -> Result<Self> {
         if inputs.len() != 1 {
-            return internal_err!("input size inconsistent");
+            return internal_err!("KeyExtractionNode requires exactly 1 input logical plan");
         }
 
-        let keys = match &self.keys {
-            KeysOrExprs::Keys(k) => KeysOrExprs::Keys(k.clone()),
-            KeysOrExprs::Exprs(_) => KeysOrExprs::Exprs(exprs),
+        let strategy = match &self.extraction_strategy {
+            KeyExtractionStrategy::ColumnIndices(indices) => {
+                KeyExtractionStrategy::ColumnIndices(indices.clone())
+            }
+            KeyExtractionStrategy::CalculatedExpressions(_) => {
+                KeyExtractionStrategy::CalculatedExpressions(exprs)
+            }
         };
 
         Ok(Self {
-            name: self.name.clone(),
-            input: inputs[0].clone(),
-            keys,
-            schema: self.schema.clone(),
+            operator_label: self.operator_label.clone(),
+            upstream_plan: inputs.remove(0),
+            extraction_strategy: strategy,
+            resolved_schema: self.resolved_schema.clone(),
         })
     }
 }
diff --git a/src/sql/extensions/lookup.rs b/src/sql/extensions/lookup.rs
index c2ef8f28..c34c5b10 100644
--- a/src/sql/extensions/lookup.rs
+++ b/src/sql/extensions/lookup.rs
@@ -1,35 +1,59 @@
-use datafusion::common::{Column, DFSchemaRef, JoinType, internal_err, plan_err};
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::fmt::Formatter;
+use std::sync::Arc;
+
+use datafusion::common::{Column, DFSchemaRef, JoinType, Result, internal_err, plan_err};
 use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore};
 use datafusion::sql::TableReference;
 use datafusion_proto::physical_plan::DefaultPhysicalExtensionCodec;
 use datafusion_proto::physical_plan::to_proto::serialize_physical_expr;
 use prost::Message;
-use std::fmt::Formatter;
-use std::sync::Arc;
+
 use protocol::grpc::api;
 use protocol::grpc::api::{ConnectorOp, LookupJoinCondition, LookupJoinOperator};
-use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName};
+
 use crate::multifield_partial_ord;
-use crate::sql::schema::ConnectorTable;
-use crate::sql::schema::utils::add_timestamp_field_arrow;
-use crate::sql::extensions::{NodeWithIncomingEdges, StreamExtension};
-use crate::sql::logical_planner::planner::{NamedNode, Planner};
 use crate::sql::common::{FsSchema, FsSchemaRef};
+use crate::sql::extensions::{CompiledTopologyNode, StreamingOperatorBlueprint};
+use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName};
+use crate::sql::logical_planner::planner::{NamedNode, Planner};
+use crate::sql::schema::SourceTable;
+use crate::sql::schema::utils::add_timestamp_field_arrow;
+
+// -----------------------------------------------------------------------------
+// Constants & Identifiers
+// -----------------------------------------------------------------------------
 
-pub const SOURCE_EXTENSION_NAME: &str = "LookupSource";
-pub const JOIN_EXTENSION_NAME: &str = "LookupJoin";
+pub const DICTIONARY_SOURCE_NODE_NAME: &str = "ReferenceTableSource";
+pub const STREAM_DICTIONARY_JOIN_NODE_NAME: &str = "StreamReferenceJoin";
 
+// -----------------------------------------------------------------------------
+// Logical Node: Reference Table Source
+// -----------------------------------------------------------------------------
+
+/// Static or periodically updated reference table used for lookups.
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub struct LookupSource {
-    pub(crate) table: ConnectorTable,
-    pub(crate) schema: DFSchemaRef,
+pub struct ReferenceTableSourceNode {
+    pub(crate) source_definition: SourceTable,
+    pub(crate) resolved_schema: DFSchemaRef,
 }
 
-multifield_partial_ord!(LookupSource, table);
+multifield_partial_ord!(ReferenceTableSourceNode, source_definition);
 
-impl UserDefinedLogicalNodeCore for LookupSource {
+impl UserDefinedLogicalNodeCore for ReferenceTableSourceNode {
     fn name(&self) -> &str {
-        SOURCE_EXTENSION_NAME
+        DICTIONARY_SOURCE_NODE_NAME
     }
 
     fn inputs(&self) -> Vec<&LogicalPlan> {
@@ -37,7 +61,7 @@ impl UserDefinedLogicalNodeCore for LookupSource {
     }
 
     fn schema(&self) -> &DFSchemaRef {
-        &self.schema
+        &self.resolved_schema
     }
 
     fn expressions(&self) -> Vec<Expr> {
@@ -45,150 +69,206 @@ impl UserDefinedLogicalNodeCore for LookupSource {
     }
 
     fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result {
-        write!(f, "LookupSource: {}", self.schema)
+        write!(f, "ReferenceTableSource: Schema={}", self.resolved_schema)
     }
 
     fn with_exprs_and_inputs(
         &self,
         _exprs: Vec<Expr>,
         inputs: Vec<LogicalPlan>,
-    ) -> datafusion::common::Result<Self> {
+    ) -> Result<Self> {
         if !inputs.is_empty() {
-            return internal_err!("LookupSource cannot have inputs");
+            return internal_err!(
+                "ReferenceTableSource is a leaf node and cannot accept upstream inputs"
+            );
         }
 
         Ok(Self {
-            table: self.table.clone(),
-            schema: self.schema.clone(),
+            source_definition: self.source_definition.clone(),
+            resolved_schema: self.resolved_schema.clone(),
         })
     }
 }
 
+// -----------------------------------------------------------------------------
+// Logical Node: Stream to Reference Join
+// -----------------------------------------------------------------------------
+
+/// Join between an unbounded stream and a reference (lookup) table.
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub struct LookupJoin {
-    pub(crate) input: LogicalPlan,
-    pub(crate) schema: DFSchemaRef,
-    pub(crate) connector: ConnectorTable,
-    pub(crate) on: Vec<(Expr, Column)>,
-    pub(crate) filter: Option<Expr>,
-    pub(crate) alias: Option<TableReference>,
-    pub(crate) join_type: JoinType,
+pub struct StreamReferenceJoinNode {
+    pub(crate) upstream_stream_plan: LogicalPlan,
+    pub(crate) output_schema: DFSchemaRef,
+    pub(crate) external_dictionary: SourceTable,
+    pub(crate) equijoin_conditions: Vec<(Expr, Column)>,
+    pub(crate) post_join_filter: Option<Expr>,
+    pub(crate) namespace_alias: Option<TableReference>,
+    pub(crate) join_semantics: JoinType,
 }
 
-multifield_partial_ord!(LookupJoin, input, connector, on, filter, alias);
+multifield_partial_ord!(
+    StreamReferenceJoinNode,
+    upstream_stream_plan,
+    external_dictionary,
+    equijoin_conditions,
+    post_join_filter,
+    namespace_alias
+);
 
-impl StreamExtension for LookupJoin {
-    fn node_name(&self) -> Option<NamedNode> {
-        None
+impl StreamReferenceJoinNode {
+    fn compile_join_conditions(&self, planner: &Planner) -> Result<Vec<LookupJoinCondition>> {
+        self.equijoin_conditions
+            .iter()
+            .map(|(logical_left_expr, right_column)| {
+                let physical_expr =
+                    planner.create_physical_expr(logical_left_expr, &self.output_schema)?;
+                let serialized_expr =
+                    serialize_physical_expr(&physical_expr, &DefaultPhysicalExtensionCodec {})?;
+
+                Ok(LookupJoinCondition {
+                    left_expr: serialized_expr.encode_to_vec(),
+                    right_key: right_column.name.clone(),
+                })
+            })
+            .collect()
+    }
+
+    fn map_api_join_type(&self) -> Result<i32> {
+        match self.join_semantics {
+            JoinType::Inner => Ok(api::JoinType::Inner as i32),
+            JoinType::Left => Ok(api::JoinType::Left as i32),
+            unsupported => plan_err!(
+                "Unsupported join type '{unsupported}' for dictionary lookups. Only INNER and LEFT joins are permitted."
+            ),
+        }
     }
 
-    fn plan_node(
+    fn build_engine_operator(
         &self,
         planner: &Planner,
-        index: usize,
-        input_schemas: Vec<FsSchemaRef>,
-    ) -> datafusion::common::Result<NodeWithIncomingEdges> {
-        let schema = FsSchema::from_schema_unkeyed(Arc::new(self.schema.as_ref().into()))?;
-        let lookup_schema = FsSchema::from_schema_unkeyed(add_timestamp_field_arrow(
-            self.connector.physical_schema(),
+        _upstream_schema: &FsSchemaRef,
+    ) -> Result<LookupJoinOperator> {
+        let internal_input_schema = FsSchema::from_schema_unkeyed(Arc::new(
+            self.output_schema.as_ref().into(),
         ))?;
-        let join_config = LookupJoinOperator {
-            input_schema: Some(schema.into()),
-            lookup_schema: Some(lookup_schema.into()),
+        let dictionary_physical_schema = self.external_dictionary.produce_physical_schema();
+        let lookup_fs_schema =
+            FsSchema::from_schema_unkeyed(add_timestamp_field_arrow(dictionary_physical_schema))?;
+
+        Ok(LookupJoinOperator {
+            input_schema: Some(internal_input_schema.into()),
+            lookup_schema: Some(lookup_fs_schema.into()),
             connector: Some(ConnectorOp {
-                connector: self.connector.connector.clone(),
-                config: self.connector.config.clone(),
-                description: self.connector.description.clone(),
+                connector: self.external_dictionary.adapter_type.clone(),
+                config: self.external_dictionary.opaque_config.clone(),
+                description: self.external_dictionary.description.clone(),
             }),
-            key_exprs: self
-                .on
-                .iter()
-                .map(|(l, r)| {
-                    let expr = planner.create_physical_expr(l, &self.schema)?;
-                    let expr = serialize_physical_expr(&expr, &DefaultPhysicalExtensionCodec {})?;
-                    Ok(LookupJoinCondition {
-                        left_expr: expr.encode_to_vec(),
-                        right_key: r.name.clone(),
-                    })
-                })
-                .collect::<datafusion::error::Result<Vec<_>>>()?,
-            join_type: match self.join_type {
-                JoinType::Inner => api::JoinType::Inner as i32,
-                JoinType::Left => api::JoinType::Left as i32,
-                j => {
-                    return plan_err!(
-                        "unsupported join type '{j}' for lookup join; only inner and left joins are supported"
-                    );
-                }
-            },
+            key_exprs: self.compile_join_conditions(planner)?,
+            join_type: self.map_api_join_type()?,
             ttl_micros: self
-                .connector
+                .external_dictionary
                 .lookup_cache_ttl
                 .map(|t| t.as_micros() as u64),
-            max_capacity_bytes: self.connector.lookup_cache_max_bytes,
-        };
-
-        let incoming_edge =
-            LogicalEdge::project_all(LogicalEdgeType::Shuffle, (*input_schemas[0]).clone());
-
-        Ok(NodeWithIncomingEdges {
-            node: LogicalNode::single(
-                index as u32,
-                format!("lookupjoin_{index}"),
-                OperatorName::LookupJoin,
-                join_config.encode_to_vec(),
-                format!("LookupJoin<{}>", self.connector.name),
-                1,
-            ),
-            edges: vec![incoming_edge],
+            max_capacity_bytes: self.external_dictionary.lookup_cache_max_bytes,
+        })
+    }
+}
+
+impl StreamingOperatorBlueprint for StreamReferenceJoinNode {
+    fn operator_identity(&self) -> Option<NamedNode> {
+        None
+    }
+
+    fn compile_to_graph_node(
+        &self,
+        planner: &Planner,
+        node_index: usize,
+        mut input_schemas: Vec<FsSchemaRef>,
+    ) -> Result<CompiledTopologyNode> {
+        if input_schemas.len() != 1 {
+            return plan_err!(
+                "StreamReferenceJoinNode requires exactly one upstream stream input"
+            );
+        }
+        let upstream_schema = input_schemas.remove(0);
+
+        let operator_config = self.build_engine_operator(planner, &upstream_schema)?;
+
+        let logical_node = LogicalNode::single(
+            node_index as u32,
+            format!("lookup_join_{node_index}"),
+            OperatorName::LookupJoin,
+            operator_config.encode_to_vec(),
+            format!("DictionaryJoin<{}>", self.external_dictionary.table_identifier),
+            1,
+        );
+
+        let incoming_edge = LogicalEdge::project_all(
+            LogicalEdgeType::Shuffle,
+            (*upstream_schema).clone(),
+        );
+
+        Ok(CompiledTopologyNode {
+            execution_unit: logical_node,
+            routing_edges: vec![incoming_edge],
         })
     }
 
-    fn output_schema(&self) -> FsSchema {
-        FsSchema::from_schema_unkeyed(self.schema.inner().clone()).unwrap()
+    fn yielded_schema(&self) -> FsSchema {
+        FsSchema::from_schema_unkeyed(self.output_schema.inner().clone())
+            .expect("Failed to convert lookup join output schema to FsSchema")
     }
 }
 
-impl UserDefinedLogicalNodeCore for LookupJoin {
+impl UserDefinedLogicalNodeCore for StreamReferenceJoinNode {
     fn name(&self) -> &str {
-        JOIN_EXTENSION_NAME
+        STREAM_DICTIONARY_JOIN_NODE_NAME
     }
 
     fn inputs(&self) -> Vec<&LogicalPlan> {
-        vec![&self.input]
+        vec![&self.upstream_stream_plan]
     }
 
     fn schema(&self) -> &DFSchemaRef {
-        &self.schema
+        &self.output_schema
     }
 
     fn expressions(&self) -> Vec<Expr> {
-        let mut e: Vec<_> = self.on.iter().map(|(l, _)| l.clone()).collect();
-
-        if let Some(filter) = &self.filter {
-            e.push(filter.clone());
+        let mut exprs: Vec<_> = self
+            .equijoin_conditions
+            .iter()
+            .map(|(l, _)| l.clone())
+            .collect();
+        if let Some(filter) = &self.post_join_filter {
+            exprs.push(filter.clone());
         }
-
-        e
+        exprs
     }
 
     fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result {
-        write!(f, "LookupJoinExtension: {}", self.schema)
+        write!(
+            f,
+            "StreamReferenceJoin: join_type={:?} | {}",
+            self.join_semantics,
+            self.output_schema
+        )
     }
 
-    fn with_exprs_and_inputs(
-        &self,
-        _: Vec<Expr>,
-        inputs: Vec<LogicalPlan>,
-    ) -> datafusion::common::Result<Self> {
+    fn with_exprs_and_inputs(&self, _: Vec<Expr>, inputs: Vec<LogicalPlan>) -> Result<Self> {
+        if inputs.len() != 1 {
+            return internal_err!(
+                "StreamReferenceJoinNode expects exactly 1 upstream plan, got {}",
+                inputs.len()
+            );
+        }
         Ok(Self {
-            input: inputs[0].clone(),
-            schema: self.schema.clone(),
-            connector: self.connector.clone(),
-            on: self.on.clone(),
-            filter: self.filter.clone(),
-            alias: self.alias.clone(),
-            join_type: self.join_type,
+            upstream_stream_plan: inputs[0].clone(),
+            output_schema: self.output_schema.clone(),
+            external_dictionary: self.external_dictionary.clone(),
+            equijoin_conditions: self.equijoin_conditions.clone(),
+            post_join_filter: self.post_join_filter.clone(),
+            namespace_alias: self.namespace_alias.clone(),
+            join_semantics: self.join_semantics,
         })
     }
-}
\ No newline at end of file
+}
diff --git a/src/sql/extensions/mod.rs b/src/sql/extensions/mod.rs
index a78ca419..eab2443b 100644
--- a/src/sql/extensions/mod.rs
+++ b/src/sql/extensions/mod.rs
@@ -15,8 +15,8 @@ mod macros;
 pub(crate) mod constants;
 pub(crate) use constants::ASYNC_RESULT_FIELD;
 
-pub(crate) mod stream_extension;
-pub(crate) use stream_extension::{NodeWithIncomingEdges, StreamExtension};
+pub(crate) mod streaming_operator_blueprint;
+pub(crate) use streaming_operator_blueprint::{CompiledTopologyNode, StreamingOperatorBlueprint};
 
 pub(crate) mod aggregate;
 pub(crate) mod debezium;
@@ -29,13 +29,13 @@ pub(crate) mod sink;
 pub(crate) mod table_source;
 pub(crate) mod updating_aggregate;
 pub(crate) mod watermark_node;
-pub(crate) mod window_fn;
+pub(crate) mod windows_function;
 
 pub(crate) mod timestamp_append;
-pub(crate) use timestamp_append::TimestampAppendExtension;
+pub(crate) use timestamp_append::SystemTimestampInjectorNode;
 
 pub(crate) mod async_udf;
-pub(crate) use async_udf::AsyncUDFExtension;
+pub(crate) use async_udf::AsyncFunctionExecutionNode;
 
 pub(crate) mod is_retract;
 pub(crate) use is_retract::IsRetractExtension;
diff --git a/src/sql/extensions/projection.rs b/src/sql/extensions/projection.rs
index ff319d12..2175bddf 100644
--- a/src/sql/extensions/projection.rs
+++ b/src/sql/extensions/projection.rs
@@ -1,137 +1,213 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::fmt::Formatter;
+use std::sync::Arc;
 
 use datafusion::common::{DFSchema, DFSchemaRef, Result, internal_err};
-use std::{fmt::Formatter, sync::Arc};
-
-use super::{StreamExtension, NodeWithIncomingEdges};
 use datafusion::logical_expr::{Expr, ExprSchemable, LogicalPlan, UserDefinedLogicalNodeCore};
 use datafusion_proto::physical_plan::DefaultPhysicalExtensionCodec;
 use datafusion_proto::physical_plan::to_proto::serialize_physical_expr;
-use itertools::Itertools;
 use prost::Message;
+
 use protocol::grpc::api::ProjectionOperator;
-use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName};
+
 use crate::multifield_partial_ord;
-use crate::sql::logical_planner::planner::{NamedNode, Planner};
-use crate::sql::types::{schema_from_df_fields, DFField};
 use crate::sql::common::{FsSchema, FsSchemaRef};
+use crate::sql::extensions::{CompiledTopologyNode, StreamingOperatorBlueprint};
+use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName};
+use crate::sql::logical_planner::planner::{NamedNode, Planner};
+use crate::sql::types::{DFField, schema_from_df_fields};
+
+// -----------------------------------------------------------------------------
+// Constants & Identifiers
+// -----------------------------------------------------------------------------
+
+pub(crate) const STREAM_PROJECTION_NODE_NAME: &str = "StreamProjectionNode";
+const DEFAULT_PROJECTION_LABEL: &str = "projection";
 
-pub(crate) const PROJECTION_NAME: &str = "ProjectionExtension";
+// -----------------------------------------------------------------------------
+// Logical Node Definition
+// -----------------------------------------------------------------------------
 
-/// Projection operations
+/// Projection within a streaming execution topology.
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub(crate) struct ProjectionExtension {
-    pub(crate) inputs: Vec<LogicalPlan>,
-    pub(crate) name: Option<String>,
-    pub(crate) exprs: Vec<Expr>,
-    pub(crate) schema: DFSchemaRef,
-    pub(crate) shuffle: bool,
+pub(crate) struct StreamProjectionNode {
+    pub(crate) upstream_plans: Vec<LogicalPlan>,
+    pub(crate) operator_label: Option<String>,
+    pub(crate) projection_exprs: Vec<Expr>,
+    pub(crate) resolved_schema: DFSchemaRef,
+    pub(crate) requires_shuffle: bool,
 }
 
-multifield_partial_ord!(ProjectionExtension, name, exprs);
+multifield_partial_ord!(StreamProjectionNode, operator_label, projection_exprs);
 
-impl ProjectionExtension {
-    pub(crate) fn new(inputs: Vec<LogicalPlan>, name: Option<String>, exprs: Vec<Expr>) -> Self {
-        let input_schema = inputs.first().unwrap().schema();
-        let fields = exprs
-            .iter()
-            .map(|e| DFField::from(e.to_field(input_schema).unwrap()))
-            .collect_vec();
-
-        let schema = Arc::new(schema_from_df_fields(&fields).unwrap());
+impl StreamProjectionNode {
+    pub(crate) fn try_new(
+        upstream_plans: Vec<LogicalPlan>,
+        operator_label: Option<String>,
+        projection_exprs: Vec<Expr>,
+    ) -> Result<Self> {
+        if upstream_plans.is_empty() {
+            return internal_err!("StreamProjectionNode requires at least one upstream plan");
+        }
+        let primary_input = &upstream_plans[0];
+        let upstream_schema = primary_input.schema();
 
-        Self {
-            inputs,
-            name,
-            exprs,
-            schema,
-            shuffle: false,
+        let mut projected_fields = Vec::with_capacity(projection_exprs.len());
+        for logical_expr in &projection_exprs {
+            let arrow_field = logical_expr.to_field(upstream_schema)?;
+            projected_fields.push(DFField::from(arrow_field));
         }
-    }
 
-    pub(crate) fn shuffled(mut self) -> Self {
-        self.shuffle = true;
-        self
+        let resolved_schema = Arc::new(schema_from_df_fields(&projected_fields)?);
+
+        Ok(Self {
+            upstream_plans,
+            operator_label,
+            projection_exprs,
+            resolved_schema,
+            requires_shuffle: false,
+        })
     }
-}
 
-impl StreamExtension for ProjectionExtension {
-    fn node_name(&self) -> Option<NamedNode> {
-        None
+    pub(crate) fn with_shuffle_routing(mut self) -> Self {
+        self.requires_shuffle = true;
+        self
     }
 
-    fn plan_node(
-        &self,
-        planner: &Planner,
-        index: usize,
-        input_schemas: Vec<FsSchemaRef>,
-    ) -> Result<NodeWithIncomingEdges> {
-        let input_schema = (*input_schemas[0]).clone();
+    fn validate_uniform_schemas(input_schemas: &[FsSchemaRef]) -> Result<FsSchemaRef> {
+        if input_schemas.is_empty() {
+            return internal_err!("No input schemas provided to projection planner");
+        }
+        let primary_schema = input_schemas[0].clone();
 
-        // check that all inputs have the same schemas
-        for s in input_schemas.iter().skip(1) {
-            if **s != input_schema {
-                return internal_err!("all input schemas to a projection node must mast");
+        for schema in input_schemas.iter().skip(1) {
+            if **schema != *primary_schema {
+                return internal_err!(
+                    "Schema mismatch: All upstream inputs to a projection node must share the identical schema topology."
+                );
             }
         }
 
-        let input_df_schema = Arc::new(DFSchema::try_from(input_schema.schema.as_ref().clone())?);
-        let mut physical_exprs = vec![];
+        Ok(primary_schema)
+    }
 
-        for e in &self.exprs {
-            let phys = planner
-                .create_physical_expr(e, &input_df_schema)
-                .map_err(|e| e.context("projection"))?;
-            physical_exprs.push(
-                serialize_physical_expr(&phys, &DefaultPhysicalExtensionCodec {})?.encode_to_vec(),
-            );
-        }
+    fn compile_physical_expressions(
+        &self,
+        planner: &Planner,
+        input_df_schema: &DFSchemaRef,
+    ) -> Result<Vec<Vec<u8>>> {
+        self.projection_exprs
+            .iter()
+            .map(|logical_expr| {
+                let physical_expr = planner
+                    .create_physical_expr(logical_expr, input_df_schema)
+                    .map_err(|e| e.context("Failed to compile physical projection expression"))?;
+
+                let serialized_expr = serialize_physical_expr(
+                    &physical_expr,
+                    &DefaultPhysicalExtensionCodec {},
+                )?;
+
+                Ok(serialized_expr.encode_to_vec())
+            })
+            .collect()
+    }
+}
+
+// -----------------------------------------------------------------------------
+// Stream Extension Trait Implementation
+// -----------------------------------------------------------------------------
 
-        let config = ProjectionOperator {
-            name: self.name.as_deref().unwrap_or("projection").to_string(),
-            input_schema: Some(input_schema.clone().into()),
+impl StreamingOperatorBlueprint for StreamProjectionNode {
+    fn operator_identity(&self) -> Option<NamedNode> {
+        None
+    }
 
-            output_schema: Some(self.output_schema().into()),
-            exprs: physical_exprs,
+    fn compile_to_graph_node(
+        &self,
+        planner: &Planner,
+        node_index: usize,
+        input_schemas: Vec<FsSchemaRef>,
+    ) -> Result<CompiledTopologyNode> {
+        let unified_input_schema = Self::validate_uniform_schemas(&input_schemas)?;
+        let input_df_schema =
+            Arc::new(DFSchema::try_from(unified_input_schema.schema.as_ref().clone())?);
+
+        let compiled_expr_payloads = self.compile_physical_expressions(planner, &input_df_schema)?;
+
+        let operator_config = ProjectionOperator {
+            name: self
+                .operator_label
+                .as_deref()
+                .unwrap_or(DEFAULT_PROJECTION_LABEL)
+                .to_string(),
+            input_schema: Some(unified_input_schema.as_ref().clone().into()),
+            output_schema: Some(self.yielded_schema().into()),
+            exprs: compiled_expr_payloads,
         };
 
-        let node = LogicalNode::single(
-            index as u32,
-            format!("projection_{index}"),
+        let node_identifier = format!("projection_{node_index}");
+        let label = format!(
+            "ArrowProjection<{}>",
+            self.operator_label.as_deref().unwrap_or("_")
+        );
+
+        let logical_node = LogicalNode::single(
+            node_index as u32,
+            node_identifier,
             OperatorName::Projection,
-            config.encode_to_vec(),
-            format!("ArrowProjection<{}>", self.name.as_deref().unwrap_or("_")),
+            operator_config.encode_to_vec(),
+            label,
             1,
         );
 
-        let edge_type = if self.shuffle {
+        let routing_strategy = if self.requires_shuffle {
             LogicalEdgeType::Shuffle
         } else {
             LogicalEdgeType::Forward
         };
 
-        let edge = LogicalEdge::project_all(edge_type, input_schema);
-        Ok(NodeWithIncomingEdges {
-            node,
-            edges: vec![edge],
+        let outgoing_edge =
+            LogicalEdge::project_all(routing_strategy, (*unified_input_schema).clone());
+
+        Ok(CompiledTopologyNode {
+            execution_unit: logical_node,
+            routing_edges: vec![outgoing_edge],
         })
     }
 
-    fn output_schema(&self) -> FsSchema {
-        FsSchema::from_schema_unkeyed(Arc::new(self.schema.as_arrow().clone())).unwrap()
+    fn yielded_schema(&self) -> FsSchema {
+        FsSchema::from_schema_unkeyed(Arc::new(self.resolved_schema.as_arrow().clone()))
+            .expect("Fatal: Failed to generate unkeyed output schema for projection")
     }
 }
 
-impl UserDefinedLogicalNodeCore for ProjectionExtension {
+// -----------------------------------------------------------------------------
+// DataFusion Logical Node Hooks
+// -----------------------------------------------------------------------------
+
+impl UserDefinedLogicalNodeCore for StreamProjectionNode {
     fn name(&self) -> &str {
-        PROJECTION_NAME
+        STREAM_PROJECTION_NODE_NAME
     }
 
     fn inputs(&self) -> Vec<&LogicalPlan> {
-        self.inputs.iter().collect()
+        self.upstream_plans.iter().collect()
     }
 
     fn schema(&self) -> &DFSchemaRef {
-        &self.schema
+        &self.resolved_schema
     }
 
     fn expressions(&self) -> Vec<Expr> {
@@ -139,16 +215,25 @@ impl UserDefinedLogicalNodeCore for ProjectionExtension {
     }
 
     fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result {
-        write!(f, "KeyCalculationExtension: {}", self.schema())
+        write!(
+            f,
+            "StreamProjectionNode: RequiresShuffle={}, Schema={}",
+            self.requires_shuffle,
+            self.resolved_schema
+        )
     }
 
-    fn with_exprs_and_inputs(&self, exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> Result<Self> {
-        Ok(Self {
-            name: self.name.clone(),
+    fn with_exprs_and_inputs(&self, _exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> Result<Self> {
+        let mut new_node = Self::try_new(
             inputs,
-            exprs,
-            schema: self.schema.clone(),
-            shuffle: self.shuffle,
-        })
+            self.operator_label.clone(),
+            self.projection_exprs.clone(),
+        )?;
+
+        if self.requires_shuffle {
+            new_node = new_node.with_shuffle_routing();
+        }
+
+        Ok(new_node)
     }
 }
diff --git a/src/sql/extensions/remote_table.rs b/src/sql/extensions/remote_table.rs
index 0bd2706f..5011bb4c 100644
--- a/src/sql/extensions/remote_table.rs
+++ b/src/sql/extensions/remote_table.rs
@@ -1,144 +1,158 @@
-use std::{fmt::Formatter, sync::Arc};
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::fmt::Formatter;
+use std::sync::Arc;
 
 use datafusion::common::{DFSchemaRef, Result, TableReference, internal_err, plan_err};
-
 use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore};
-use datafusion_proto::{physical_plan::AsExecutionPlan, protobuf::PhysicalPlanNode};
+use datafusion_proto::physical_plan::AsExecutionPlan;
+use datafusion_proto::protobuf::PhysicalPlanNode;
 use prost::Message;
+
 use protocol::grpc::api::ValuePlanOperator;
-use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName};
+
 use crate::multifield_partial_ord;
+use crate::sql::common::{FsSchema, FsSchemaRef};
+use crate::sql::extensions::{CompiledTopologyNode, StreamingOperatorBlueprint};
+use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName};
 use crate::sql::logical_planner::FsPhysicalExtensionCodec;
 use crate::sql::logical_planner::planner::{NamedNode, Planner};
-use crate::sql::common::{FsSchema, FsSchemaRef};
-use super::{StreamExtension, NodeWithIncomingEdges};
 
-pub(crate) const REMOTE_TABLE_NAME: &str = "RemoteTableExtension";
+// -----------------------------------------------------------------------------
+// Constants & Identifiers
+// -----------------------------------------------------------------------------
 
-/* Lightweight extension that allows us to segment the graph and merge nodes with the same name.
-  An Extension Planner will be used to isolate computation to individual nodes.
-*/
+pub(crate) const REMOTE_TABLE_NODE_NAME: &str = "RemoteTableBoundaryNode";
+
+// -----------------------------------------------------------------------------
+// Logical Node Definition
+// -----------------------------------------------------------------------------
+
+/// Segments the execution graph and merges nodes sharing the same identifier; acts as a boundary.
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub(crate) struct RemoteTableExtension {
-    pub(crate) input: LogicalPlan,
-    pub(crate) name: TableReference,
-    pub(crate) schema: DFSchemaRef,
-    pub(crate) materialize: bool,
+pub(crate) struct RemoteTableBoundaryNode {
+    pub(crate) upstream_plan: LogicalPlan,
+    pub(crate) table_identifier: TableReference,
+    pub(crate) resolved_schema: DFSchemaRef,
+    pub(crate) requires_materialization: bool,
 }
 
-multifield_partial_ord!(RemoteTableExtension, input, name, materialize);
+multifield_partial_ord!(
+    RemoteTableBoundaryNode,
+    upstream_plan,
+    table_identifier,
+    requires_materialization
+);
 
-impl RemoteTableExtension {
-    fn plan_node_inlined(
-        planner: &Planner,
-        index: usize,
-        this: &RemoteTableExtension,
-    ) -> Result<NodeWithIncomingEdges> {
-        let physical_plan = planner.sync_plan(&this.input)?;
-        let physical_plan_node = PhysicalPlanNode::try_from_physical_plan(
+impl RemoteTableBoundaryNode {
+    fn compile_engine_operator(&self, planner: &Planner) -> Result<Vec<u8>> {
+        let physical_plan = planner.sync_plan(&self.upstream_plan)?;
+
+        let physical_plan_proto = PhysicalPlanNode::try_from_physical_plan(
             physical_plan,
             &FsPhysicalExtensionCodec::default(),
         )?;
-        let config = ValuePlanOperator {
-            name: format!("value_calculation({})", this.name),
-            physical_plan: physical_plan_node.encode_to_vec(),
+
+        let operator_config = ValuePlanOperator {
+            name: format!("value_calculation({})", self.table_identifier),
+            physical_plan: physical_plan_proto.encode_to_vec(),
         };
-        let node = LogicalNode::single(
-            index as u32,
-            format!("value_{index}"),
-            OperatorName::ArrowValue,
-            config.encode_to_vec(),
-            this.name.to_string(),
-            1,
-        );
-        Ok(NodeWithIncomingEdges {
-            node,
-            edges: vec![],
-        })
+
+        Ok(operator_config.encode_to_vec())
     }
 
-    fn plan_node_with_edges(
-        planner: &Planner,
-        index: usize,
-        this: &RemoteTableExtension,
-        input_schemas: Vec<FsSchemaRef>,
-    ) -> Result<NodeWithIncomingEdges> {
-        let physical_plan = planner.sync_plan(&this.input)?;
-        let physical_plan_node = PhysicalPlanNode::try_from_physical_plan(
-            physical_plan,
-            &FsPhysicalExtensionCodec::default(),
-        )?;
-        let config = ValuePlanOperator {
-            name: format!("value_calculation({})", this.name),
-            physical_plan: physical_plan_node.encode_to_vec(),
-        };
-        let node = LogicalNode::single(
-            index as u32,
-            format!("value_{index}"),
-            OperatorName::ArrowValue,
-            config.encode_to_vec(),
-            this.name.to_string(),
-            1,
-        );
+    fn validate_uniform_schemas(input_schemas: &[FsSchemaRef]) -> Result<()> {
+        if input_schemas.len() <= 1 {
+            return Ok(());
+        }
 
-        let edges = input_schemas
-            .into_iter()
-            .map(|schema| LogicalEdge::project_all(LogicalEdgeType::Forward, (*schema).clone()))
-            .collect();
-        Ok(NodeWithIncomingEdges { node, edges })
+        let primary_schema = &input_schemas[0];
+        for schema in input_schemas.iter().skip(1) {
+            if *schema != *primary_schema {
+                return plan_err!(
+                    "Topology error: Multiple input streams routed to the same remote table must share an identical schema structure."
+                );
+            }
+        }
+
+        Ok(())
     }
 }
 
-impl StreamExtension for RemoteTableExtension {
-    fn node_name(&self) -> Option<NamedNode> {
-        if self.materialize {
-            Some(NamedNode::RemoteTable(self.name.to_owned()))
+// -----------------------------------------------------------------------------
+// Stream Extension Trait Implementation
+// -----------------------------------------------------------------------------
+
+impl StreamingOperatorBlueprint for RemoteTableBoundaryNode {
+    fn operator_identity(&self) -> Option<NamedNode> {
+        if self.requires_materialization {
+            Some(NamedNode::RemoteTable(self.table_identifier.clone()))
         } else {
             None
         }
     }
 
-    fn plan_node(
+    fn compile_to_graph_node(
         &self,
         planner: &Planner,
-        index: usize,
+        node_index: usize,
         input_schemas: Vec<FsSchemaRef>,
-    ) -> Result<NodeWithIncomingEdges> {
-        match input_schemas.len() {
-            0 => {
-                return Self::plan_node_inlined(planner, index, self);
-            }
-            1 => {}
-            _multiple_inputs => {
-                let first = input_schemas[0].clone();
-                for schema in input_schemas.iter().skip(1) {
-                    if *schema != first {
-                        return plan_err!(
-                            "If a node has multiple inputs, they must all have the same schema"
-                        );
-                    }
-                }
-            }
-        }
-        Self::plan_node_with_edges(planner, index, self, input_schemas)
+    ) -> Result<CompiledTopologyNode> {
+        Self::validate_uniform_schemas(&input_schemas)?;
+
+        let operator_payload = self.compile_engine_operator(planner)?;
+
+        let logical_node = LogicalNode::single(
+            node_index as u32,
+            format!("value_{node_index}"),
+            OperatorName::ArrowValue,
+            operator_payload,
+            self.table_identifier.to_string(),
+            1,
+        );
+
+        let routing_edges: Vec<LogicalEdge> = input_schemas
+            .into_iter()
+            .map(|schema| LogicalEdge::project_all(LogicalEdgeType::Forward, (*schema).clone()))
+            .collect();
+
+        Ok(CompiledTopologyNode {
+            execution_unit: logical_node,
+            routing_edges: routing_edges,
+        })
     }
 
-    fn output_schema(&self) -> FsSchema {
-        FsSchema::from_schema_keys(Arc::new(self.schema.as_ref().into()), vec![]).unwrap()
+    fn yielded_schema(&self) -> FsSchema {
+        FsSchema::from_schema_keys(Arc::new(self.resolved_schema.as_ref().into()), vec![])
+            .expect("Fatal: Failed to generate output schema for remote table boundary")
     }
 }
 
-impl UserDefinedLogicalNodeCore for RemoteTableExtension {
+// -----------------------------------------------------------------------------
+// DataFusion Logical Node Hooks
+// -----------------------------------------------------------------------------
+
+impl UserDefinedLogicalNodeCore for RemoteTableBoundaryNode {
     fn name(&self) -> &str {
-        REMOTE_TABLE_NAME
+        REMOTE_TABLE_NODE_NAME
     }
 
     fn inputs(&self) -> Vec<&LogicalPlan> {
-        vec![&self.input]
+        vec![&self.upstream_plan]
     }
 
     fn schema(&self) -> &DFSchemaRef {
-        &self.schema
+        &self.resolved_schema
     }
 
     fn expressions(&self) -> Vec<Expr> {
@@ -146,19 +160,28 @@ impl UserDefinedLogicalNodeCore for RemoteTableExtension {
     }
 
     fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result {
-        write!(f, "RemoteTableExtension: {}", self.schema)
+        write!(
+            f,
+            "RemoteTableBoundaryNode: Identifier={}, Materialized={}, Schema={}",
+            self.table_identifier,
+            self.requires_materialization,
+            self.resolved_schema
+        )
     }
 
-    fn with_exprs_and_inputs(&self, _exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> Result<Self> {
+    fn with_exprs_and_inputs(&self, _exprs: Vec<Expr>, mut inputs: Vec<LogicalPlan>) -> Result<Self> {
         if inputs.len() != 1 {
-            return internal_err!("input size inconsistent");
+            return internal_err!(
+                "RemoteTableBoundaryNode expects exactly 1 upstream logical plan, but received {}",
+                inputs.len()
+            );
         }
 
         Ok(Self {
-            input: inputs[0].clone(),
-            name: self.name.clone(),
-            schema: self.schema.clone(),
-            materialize: self.materialize,
+            upstream_plan: inputs.remove(0),
+            table_identifier: self.table_identifier.clone(),
+            resolved_schema: self.resolved_schema.clone(),
+            requires_materialization: self.requires_materialization,
         })
     }
 }
diff --git a/src/sql/extensions/sink.rs b/src/sql/extensions/sink.rs
index a1112c4b..8fc31aac 100644
--- a/src/sql/extensions/sink.rs
+++ b/src/sql/extensions/sink.rs
@@ -1,168 +1,228 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::fmt::Formatter;
 use std::sync::Arc;
 
 use datafusion::common::{DFSchemaRef, Result, TableReference, plan_err};
-
 use datafusion::logical_expr::{Expr, Extension, LogicalPlan, UserDefinedLogicalNodeCore};
-
 use prost::Message;
-use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName};
+
 use crate::multifield_partial_ord;
-use crate::sql::schema::Table;
-use crate::sql::logical_planner::planner::{NamedNode, Planner};
 use crate::sql::common::{FsSchema, FsSchemaRef, UPDATING_META_FIELD};
-use super::{
-    StreamExtension, NodeWithIncomingEdges, debezium::ToDebeziumExtension,
-    remote_table::RemoteTableExtension,
-};
+use crate::sql::extensions::{CompiledTopologyNode, StreamingOperatorBlueprint};
+use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName};
+use crate::sql::logical_planner::planner::{NamedNode, Planner};
+use crate::sql::schema::Table;
+
+use super::debezium::PackDebeziumEnvelopeNode;
+use super::remote_table::RemoteTableBoundaryNode;
 
-pub(crate) const SINK_NODE_NAME: &str = "SinkExtension";
+// -----------------------------------------------------------------------------
+// Constants & Identifiers
+// -----------------------------------------------------------------------------
 
+pub(crate) const STREAM_EGRESS_NODE_NAME: &str = "StreamEgressNode";
+
+// -----------------------------------------------------------------------------
+// Logical Node Definition
+// -----------------------------------------------------------------------------
+
+/// Terminal node routing processed data into an external sink (e.g. Kafka, PostgreSQL).
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub(crate) struct SinkExtension {
-    pub(crate) name: TableReference,
-    pub(crate) table: Table,
-    pub(crate) schema: DFSchemaRef,
-    inputs: Arc<Vec<LogicalPlan>>,
+pub(crate) struct StreamEgressNode {
+    pub(crate) target_identifier: TableReference,
+    pub(crate) destination_table: Table,
+    pub(crate) egress_schema: DFSchemaRef,
+    upstream_plans: Arc<Vec<LogicalPlan>>,
 }
 
-multifield_partial_ord!(SinkExtension, name, inputs);
+multifield_partial_ord!(StreamEgressNode, target_identifier, upstream_plans);
 
-impl SinkExtension {
-    pub fn new(
-        name: TableReference,
-        table: Table,
-        mut schema: DFSchemaRef,
-        mut input: Arc<LogicalPlan>,
+impl StreamEgressNode {
+    pub fn try_new(
+        target_identifier: TableReference,
+        destination_table: Table,
+        initial_schema: DFSchemaRef,
+        upstream_plan: LogicalPlan,
     ) -> Result<Self> {
-        let input_is_updating = input
+        let (mut processed_plan, mut resolved_schema) = Self::apply_cdc_transformations(
+            upstream_plan,
+            initial_schema,
+            &destination_table,
+        )?;
+
+        Self::enforce_computational_boundary(&mut resolved_schema, &mut processed_plan);
+
+        Ok(Self {
+            target_identifier,
+            destination_table,
+            egress_schema: resolved_schema,
+            upstream_plans: Arc::new(vec![processed_plan]),
+        })
+    }
+
+    fn apply_cdc_transformations(
+        plan: LogicalPlan,
+        schema: DFSchemaRef,
+        destination: &Table,
+    ) -> Result<(LogicalPlan, DFSchemaRef)> {
+        let is_upstream_updating = plan
             .schema()
             .has_column_with_unqualified_name(UPDATING_META_FIELD);
-        match &table {
-            Table::ConnectorTable(connector_table) => {
-                match (input_is_updating, connector_table.is_updating()) {
+
+        match destination {
+            Table::ConnectorTable(connector) => {
+                let is_sink_updating = connector.is_updating();
+
+                match (is_upstream_updating, is_sink_updating) {
                     (_, true) => {
-                        let to_debezium_extension =
-                            ToDebeziumExtension::try_new(input.as_ref().clone())?;
-                        input = Arc::new(LogicalPlan::Extension(Extension {
-                            node: Arc::new(to_debezium_extension),
-                        }));
-                        schema = input.schema().clone();
+                        let debezium_encoder = PackDebeziumEnvelopeNode::try_new(plan)?;
+                        let wrapped_plan = LogicalPlan::Extension(Extension {
+                            node: Arc::new(debezium_encoder),
+                        });
+                        let new_schema = wrapped_plan.schema().clone();
+
+                        Ok((wrapped_plan, new_schema))
                     }
                     (true, false) => {
-                        return plan_err!(
-                            "input is updating, but sink is not configured as an updating sink (hint: use `format = 'debezium_json'`)"
-                        );
+                        plan_err!(
+                            "Topology Mismatch: The upstream is producing an updating stream (CDC), \
+                             but the target sink '{}' is not configured to accept updates. \
+                             Hint: set `format = 'debezium_json'` in the WITH clause.",
+                            connector.name()
+                        )
                     }
-                    (false, false) => {}
+                    (false, false) => Ok((plan, schema)),
                 }
             }
-            Table::LookupTable(..) => return plan_err!("cannot use a lookup table as a sink"),
-            Table::TableFromQuery { .. } => {}
-
-        }
-        Self::add_remote_if_necessary(&schema, &mut input);
-
-        let inputs = Arc::new(vec![(*input).clone()]);
-        Ok(Self {
-            name,
-            table,
-            schema,
-            inputs,
-        })
-    }
-
-    // The input to a sink needs to be a non-transparent logical plan extension.
-    // If it isn't, wrap the input in a RemoteTableExtension.
-    pub fn add_remote_if_necessary(schema: &DFSchemaRef, input: &mut Arc<LogicalPlan>) {
-        if let LogicalPlan::Extension(node) = input.as_ref() {
-            let arroyo_extension: &dyn StreamExtension = (&node.node).try_into().unwrap();
-            if !arroyo_extension.transparent() {
-                return;
+            Table::LookupTable(..) => {
+                plan_err!("Topology Violation: A Lookup Table cannot be used as a streaming data sink.")
             }
+            Table::TableFromQuery { .. } => Ok((plan, schema)),
         }
-        let remote_table_extension = RemoteTableExtension {
-            input: input.as_ref().clone(),
-            name: TableReference::bare("sink projection"),
-            schema: schema.clone(),
-            materialize: false,
-        };
-        *input = Arc::new(LogicalPlan::Extension(Extension {
-            node: Arc::new(remote_table_extension),
-        }));
-    }
-}
-
-impl UserDefinedLogicalNodeCore for SinkExtension {
-    fn name(&self) -> &str {
-        SINK_NODE_NAME
-    }
-
-    fn inputs(&self) -> Vec<&LogicalPlan> {
-        self.inputs.iter().collect()
-    }
-
-    fn schema(&self) -> &DFSchemaRef {
-        &self.schema
     }
 
-    fn expressions(&self) -> Vec<Expr> {
-        vec![]
-    }
+    fn enforce_computational_boundary(schema: &mut DFSchemaRef, plan: &mut LogicalPlan) {
+        let requires_boundary = if let LogicalPlan::Extension(extension) = plan {
+            let stream_ext: &dyn StreamingOperatorBlueprint = (&extension.node)
+                .try_into()
+                .expect("Fatal: Egress node encountered an extension that does not implement StreamingOperatorBlueprint");
 
-    fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        write!(f, "SinkExtension({:?}): {}", self.name, self.schema)
-    }
+            stream_ext.is_passthrough_boundary()
+        } else {
+            true
+        };
 
-    fn with_exprs_and_inputs(&self, _exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> Result<Self> {
-        Ok(Self {
-            name: self.name.clone(),
-            table: self.table.clone(),
-            schema: self.schema.clone(),
-            inputs: Arc::new(inputs),
-        })
+        if requires_boundary {
+            let boundary_node = RemoteTableBoundaryNode {
+                upstream_plan: plan.clone(),
+                table_identifier: TableReference::bare("sink projection"),
+                resolved_schema: schema.clone(),
+                requires_materialization: false,
+            };
+
+            *plan = LogicalPlan::Extension(Extension {
+                node: Arc::new(boundary_node),
+            });
+        }
     }
 }
 
-impl StreamExtension for SinkExtension {
-    fn node_name(&self) -> Option<NamedNode> {
-        match &self.table {
-            _ => Some(NamedNode::Sink(self.name.clone())),
-        }
+// -----------------------------------------------------------------------------
+// Stream Extension Trait Implementation
+// -----------------------------------------------------------------------------
+
+impl StreamingOperatorBlueprint for StreamEgressNode {
+    fn operator_identity(&self) -> Option<NamedNode> {
+        Some(NamedNode::Sink(self.target_identifier.clone()))
     }
 
-    fn plan_node(
+    fn compile_to_graph_node(
         &self,
         _planner: &Planner,
-        index: usize,
+        node_index: usize,
         input_schemas: Vec<FsSchemaRef>,
-    ) -> Result<NodeWithIncomingEdges> {
-        let operator_config = (self
-            .table
+    ) -> Result<CompiledTopologyNode> {
+        let connector_operator = self
+            .destination_table
             .connector_op()
-            .map_err(|e| e.context("connector op"))?)
-            .encode_to_vec();
+            .map_err(|e| e.context("Failed to generate connector operation payload"))?;
+
+        let operator_description = connector_operator.description.clone();
+        let operator_payload = connector_operator.encode_to_vec();
 
-        let node = LogicalNode::single(
-            index as u32,
-            format!("sink_{}_{}", self.name, index),
+        let logical_node = LogicalNode::single(
+            node_index as u32,
+            format!("sink_{}_{node_index}", self.target_identifier),
             OperatorName::ConnectorSink,
-            operator_config,
-            self.table.connector_op()?.description.clone(),
+            operator_payload,
+            operator_description,
             1,
         );
 
-        let edges = input_schemas
+        let routing_edges: Vec<LogicalEdge> = input_schemas
             .into_iter()
             .map(|input_schema| {
                 LogicalEdge::project_all(LogicalEdgeType::Forward, (*input_schema).clone())
             })
             .collect();
-        Ok(NodeWithIncomingEdges { node, edges })
+
+        Ok(CompiledTopologyNode {
+            execution_unit: logical_node,
+            routing_edges: routing_edges,
+        })
     }
 
+    fn yielded_schema(&self) -> FsSchema {
+        FsSchema::from_fields(vec![])
+    }
+}
 
+// -----------------------------------------------------------------------------
+// DataFusion Logical Node Hooks
+// -----------------------------------------------------------------------------
 
-    fn output_schema(&self) -> FsSchema {
-        FsSchema::from_fields(vec![])
+impl UserDefinedLogicalNodeCore for StreamEgressNode {
+    fn name(&self) -> &str {
+        STREAM_EGRESS_NODE_NAME
+    }
+
+    fn inputs(&self) -> Vec<&LogicalPlan> {
+        self.upstream_plans.iter().collect()
+    }
+
+    fn schema(&self) -> &DFSchemaRef {
+        &self.egress_schema
     }
-}
\ No newline at end of file
+
+    fn expressions(&self) -> Vec<Expr> {
+        vec![]
+    }
+
+    fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result {
+        write!(
+            f,
+            "StreamEgressNode({:?}): Schema={}",
+            self.target_identifier, self.egress_schema
+        )
+    }
+
+    fn with_exprs_and_inputs(&self, _exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> Result<Self> {
+        Ok(Self {
+            target_identifier: self.target_identifier.clone(),
+            destination_table: self.destination_table.clone(),
+            egress_schema: self.egress_schema.clone(),
+            upstream_plans: Arc::new(inputs),
+        })
+    }
+}
diff --git a/src/sql/extensions/stream_extension.rs b/src/sql/extensions/stream_extension.rs
deleted file mode 100644
index 76954529..00000000
--- a/src/sql/extensions/stream_extension.rs
+++ /dev/null
@@ -1,38 +0,0 @@
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use std::fmt::Debug;
-
-use datafusion::common::Result;
-
-use crate::sql::logical_node::logical::{LogicalEdge, LogicalNode};
-use crate::sql::logical_planner::planner::{NamedNode, Planner};
-use crate::sql::common::{FsSchema, FsSchemaRef};
-
-pub(crate) trait StreamExtension: Debug {
-    fn node_name(&self) -> Option<NamedNode>;
-    fn plan_node(
-        &self,
-        planner: &Planner,
-        index: usize,
-        input_schemas: Vec<FsSchemaRef>,
-    ) -> Result<NodeWithIncomingEdges>;
-    fn output_schema(&self) -> FsSchema;
-    fn transparent(&self) -> bool {
-        false
-    }
-}
-
-pub(crate) struct NodeWithIncomingEdges {
-    pub node: LogicalNode,
-    pub edges: Vec<LogicalEdge>,
-}
diff --git a/src/sql/extensions/streaming_operator_blueprint.rs b/src/sql/extensions/streaming_operator_blueprint.rs
new file mode 100644
index 00000000..d3f9d459
--- /dev/null
+++ b/src/sql/extensions/streaming_operator_blueprint.rs
@@ -0,0 +1,65 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::fmt::Debug;
+
+use datafusion::common::Result;
+
+use crate::sql::common::{FsSchema, FsSchemaRef};
+use crate::sql::logical_node::logical::{LogicalEdge, LogicalNode};
+use crate::sql::logical_planner::planner::{NamedNode, Planner};
+
+// -----------------------------------------------------------------------------
+// Core Execution Blueprint
+// -----------------------------------------------------------------------------
+
+/// Atomic unit within a streaming execution topology: translates streaming SQL into graph nodes.
+pub(crate) trait StreamingOperatorBlueprint: Debug {
+    /// Canonical named identity for this operator, if any (sources, sinks, etc.).
+    fn operator_identity(&self) -> Option<NamedNode>;
+
+    /// Compiles this operator into a graph vertex and its incoming routing edges.
+    fn compile_to_graph_node(
+        &self,
+        compiler_context: &Planner,
+        node_id_sequence: usize,
+        upstream_schemas: Vec<FsSchemaRef>,
+    ) -> Result<CompiledTopologyNode>;
+
+    /// Schema of records this operator yields downstream.
+    fn yielded_schema(&self) -> FsSchema;
+
+    /// Logical passthrough boundary (no physical state change); default is stateful / materializing.
+    fn is_passthrough_boundary(&self) -> bool {
+        false
+    }
+}
+
+// -----------------------------------------------------------------------------
+// Graph Topology Structures
+// -----------------------------------------------------------------------------
+
+/// Compiled vertex: execution unit plus upstream routing edges.
+#[derive(Debug, Clone)]
+pub(crate) struct CompiledTopologyNode {
+    pub execution_unit: LogicalNode,
+    pub routing_edges: Vec<LogicalEdge>,
+}
+
+impl CompiledTopologyNode {
+    pub fn new(execution_unit: LogicalNode, routing_edges: Vec<LogicalEdge>) -> Self {
+        Self {
+            execution_unit,
+            routing_edges,
+        }
+    }
+}
diff --git a/src/sql/extensions/table_source.rs b/src/sql/extensions/table_source.rs
index 0b069bbf..292284ba 100644
--- a/src/sql/extensions/table_source.rs
+++ b/src/sql/extensions/table_source.rs
@@ -1,65 +1,98 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::fmt::Formatter;
 use std::sync::Arc;
 
 use datafusion::common::{DFSchemaRef, Result, TableReference, plan_err};
-
-use datafusion::logical_expr::{Expr, Extension, LogicalPlan, UserDefinedLogicalNodeCore};
-
+use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore};
 use prost::Message;
-use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName};
+
 use crate::multifield_partial_ord;
-use crate::sql::schema::{ConnectorTable, FieldSpec, Table};
-use crate::sql::schema::utils::add_timestamp_field;
-use crate::sql::extensions::debezium::DebeziumUnrollingExtension;
+use crate::sql::common::{FsSchema, FsSchemaRef};
+use crate::sql::extensions::debezium::DebeziumSchemaCodec;
+use crate::sql::logical_node::logical::{LogicalNode, OperatorName};
 use crate::sql::logical_planner::planner::{NamedNode, Planner};
+use crate::sql::schema::SourceTable;
+use crate::sql::schema::utils::add_timestamp_field;
 use crate::sql::types::schema_from_df_fields;
-use crate::sql::common::{FsSchema, FsSchemaRef, UPDATING_META_FIELD};
-use super::{
-    StreamExtension, NodeWithIncomingEdges, debezium::ToDebeziumExtension,
-    remote_table::RemoteTableExtension,
-};
-pub(crate) const TABLE_SOURCE_NAME: &str = "TableSourceExtension";
 
+use super::{CompiledTopologyNode, StreamingOperatorBlueprint};
+
+// -----------------------------------------------------------------------------
+// Constants & Identifiers
+// -----------------------------------------------------------------------------
+
+pub(crate) const STREAM_INGESTION_NODE_NAME: &str = "StreamIngestionNode";
+
+// -----------------------------------------------------------------------------
+// Logical Node Definition
+// -----------------------------------------------------------------------------
+
+/// Foundational ingestion point: connects to external systems and injects raw or CDC data.
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub(crate) struct TableSourceExtension {
-    pub(crate) name: TableReference,
-    pub(crate) table: ConnectorTable,
-    pub(crate) schema: DFSchemaRef,
+pub(crate) struct StreamIngestionNode {
+    pub(crate) source_identifier: TableReference,
+    pub(crate) source_definition: SourceTable,
+    pub(crate) resolved_schema: DFSchemaRef,
 }
 
-multifield_partial_ord!(TableSourceExtension, name, table);
+multifield_partial_ord!(StreamIngestionNode, source_identifier, source_definition);
+
+impl StreamIngestionNode {
+    pub fn try_new(
+        source_identifier: TableReference,
+        source_definition: SourceTable,
+    ) -> Result<Self> {
+        let resolved_schema =
+            Self::build_ingestion_schema(&source_identifier, &source_definition)?;
+
+        Ok(Self {
+            source_identifier,
+            source_definition,
+            resolved_schema,
+        })
+    }
 
-impl TableSourceExtension {
-    pub fn new(name: TableReference, table: ConnectorTable) -> Self {
-        let physical_fields = table
-            .fields
+    fn build_ingestion_schema(
+        identifier: &TableReference,
+        definition: &SourceTable,
+    ) -> Result<DFSchemaRef> {
+        let physical_fields: Vec<_> = definition
+            .schema_specs
             .iter()
-            .filter_map(|field| match field {
-                FieldSpec::Struct(field) | FieldSpec::Metadata { field, .. } => {
-                    Some((Some(name.clone()), Arc::new(field.clone())).into())
-                }
-                FieldSpec::Virtual { .. } => None,
-            })
-            .collect::<Vec<_>>();
-        let base_schema = Arc::new(schema_from_df_fields(&physical_fields).unwrap());
-
-        let schema = if table.is_updating() {
-            DebeziumUnrollingExtension::as_debezium_schema(&base_schema, Some(name.clone()))
-                .unwrap()
+            .filter(|col| !col.is_computed())
+            .map(|col| (Some(identifier.clone()), Arc::new(col.arrow_field().clone())).into())
+            .collect();
+
+        let base_schema = Arc::new(schema_from_df_fields(&physical_fields)?);
+
+        let enveloped_schema = if definition.is_updating() {
+            DebeziumSchemaCodec::wrap_into_envelope(&base_schema, Some(identifier.clone()))?
         } else {
             base_schema
         };
-        let schema = add_timestamp_field(schema, Some(name.clone())).unwrap();
-        Self {
-            name,
-            table,
-            schema,
-        }
+
+        add_timestamp_field(enveloped_schema, Some(identifier.clone()))
     }
 }
 
-impl UserDefinedLogicalNodeCore for TableSourceExtension {
+// -----------------------------------------------------------------------------
+// DataFusion Logical Node Hooks
+// -----------------------------------------------------------------------------
+
+impl UserDefinedLogicalNodeCore for StreamIngestionNode {
     fn name(&self) -> &str {
-        TABLE_SOURCE_NAME
+        STREAM_INGESTION_NODE_NAME
     }
 
     fn inputs(&self) -> Vec<&LogicalPlan> {
@@ -67,56 +100,76 @@ impl UserDefinedLogicalNodeCore for TableSourceExtension {
     }
 
     fn schema(&self) -> &DFSchemaRef {
-        &self.schema
+        &self.resolved_schema
     }
 
     fn expressions(&self) -> Vec<Expr> {
         vec![]
     }
 
-    fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        write!(f, "TableSourceExtension: {}", self.schema)
+    fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result {
+        write!(
+            f,
+            "StreamIngestionNode({}): Schema={}",
+            self.source_identifier, self.resolved_schema
+        )
     }
 
-    fn with_exprs_and_inputs(&self, _exprs: Vec<Expr>, _inputs: Vec<LogicalPlan>) -> Result<Self> {
+    fn with_exprs_and_inputs(&self, _exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> Result<Self> {
+        if !inputs.is_empty() {
+            return plan_err!(
+                "StreamIngestionNode acts as a leaf boundary and cannot accept upstream inputs."
+            );
+        }
+
         Ok(Self {
-            name: self.name.clone(),
-            table: self.table.clone(),
-            schema: self.schema.clone(),
+            source_identifier: self.source_identifier.clone(),
+            source_definition: self.source_definition.clone(),
+            resolved_schema: self.resolved_schema.clone(),
         })
     }
 }
 
-impl StreamExtension for TableSourceExtension {
-    fn node_name(&self) -> Option<NamedNode> {
-        Some(NamedNode::Source(self.name.clone()))
+// -----------------------------------------------------------------------------
+// Core Execution Blueprint Implementation
+// -----------------------------------------------------------------------------
+
+impl StreamingOperatorBlueprint for StreamIngestionNode {
+    fn operator_identity(&self) -> Option<NamedNode> {
+        Some(NamedNode::Source(self.source_identifier.clone()))
     }
 
-    fn plan_node(
+    fn compile_to_graph_node(
         &self,
-        _planner: &Planner,
-        index: usize,
-        input_schemas: Vec<FsSchemaRef>,
-    ) -> Result<NodeWithIncomingEdges> {
-        if !input_schemas.is_empty() {
-            return plan_err!("TableSourceExtension should not have inputs");
+        _compiler_context: &Planner,
+        node_id_sequence: usize,
+        upstream_schemas: Vec<FsSchemaRef>,
+    ) -> Result<CompiledTopologyNode> {
+        if !upstream_schemas.is_empty() {
+            return plan_err!(
+                "Topology Violation: StreamIngestionNode is a source origin and cannot process upstream routing edges."
+            );
         }
-        let sql_source = self.table.as_sql_source()?;
-        let node = LogicalNode::single(
-            index as u32,
-            format!("source_{}_{}", self.name, index),
+
+        let sql_source = self.source_definition.as_sql_source()?;
+        let connector_payload = sql_source.source.config.encode_to_vec();
+        let operator_description = sql_source.source.config.description.clone();
+
+        let execution_unit = LogicalNode::single(
+            node_id_sequence as u32,
+            format!("source_{}_{node_id_sequence}", self.source_identifier),
             OperatorName::ConnectorSource,
-            sql_source.source.config.encode_to_vec(),
-            sql_source.source.config.description.clone(),
+            connector_payload,
+            operator_description,
             1,
         );
-        Ok(NodeWithIncomingEdges {
-            node,
-            edges: vec![],
-        })
+
+        Ok(CompiledTopologyNode::new(execution_unit, vec![]))
     }
 
-    fn output_schema(&self) -> FsSchema {
-        FsSchema::from_schema_keys(Arc::new(self.schema.as_ref().into()), vec![]).unwrap()
+    fn yielded_schema(&self) -> FsSchema {
+        FsSchema::from_schema_keys(Arc::new(self.resolved_schema.as_ref().into()), vec![]).expect(
+            "Fatal: Failed to generate output schema for stream ingestion",
+        )
     }
 }
diff --git a/src/sql/extensions/timestamp_append.rs b/src/sql/extensions/timestamp_append.rs
index 069b288a..7a3a07e9 100644
--- a/src/sql/extensions/timestamp_append.rs
+++ b/src/sql/extensions/timestamp_append.rs
@@ -10,71 +10,107 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-use datafusion::common::{DFSchemaRef, Result, TableReference};
+use std::fmt::Formatter;
+
+use datafusion::common::{DFSchemaRef, Result, TableReference, internal_err};
 use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore};
 
 use crate::multifield_partial_ord;
 use crate::sql::schema::utils::{add_timestamp_field, has_timestamp_field};
 
+// -----------------------------------------------------------------------------
+// Constants & Identifiers
+// -----------------------------------------------------------------------------
+
+pub(crate) const TIMESTAMP_INJECTOR_NODE_NAME: &str = "SystemTimestampInjectorNode";
+
+// -----------------------------------------------------------------------------
+// Logical Node Definition
+// -----------------------------------------------------------------------------
+
+/// Injects the mandatory system `_timestamp` field into the upstream streaming schema.
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub(crate) struct TimestampAppendExtension {
-    pub(crate) input: LogicalPlan,
-    pub(crate) qualifier: Option<TableReference>,
-    pub(crate) schema: DFSchemaRef,
+pub(crate) struct SystemTimestampInjectorNode {
+    pub(crate) upstream_plan: LogicalPlan,
+    pub(crate) target_qualifier: Option<TableReference>,
+    pub(crate) resolved_schema: DFSchemaRef,
 }
 
-impl TimestampAppendExtension {
-    pub(crate) fn new(input: LogicalPlan, qualifier: Option<TableReference>) -> Self {
-        if has_timestamp_field(input.schema()) {
-            unreachable!(
-                "shouldn't be adding timestamp to a plan that already has it: plan :\n {:?}\n schema: {:?}",
-                input,
-                input.schema()
+multifield_partial_ord!(SystemTimestampInjectorNode, upstream_plan, target_qualifier);
+
+impl SystemTimestampInjectorNode {
+    pub(crate) fn try_new(
+        upstream_plan: LogicalPlan,
+        target_qualifier: Option<TableReference>,
+    ) -> Result<Self> {
+        let upstream_schema = upstream_plan.schema();
+
+        if has_timestamp_field(upstream_schema) {
+            return internal_err!(
+                "Topology Violation: Attempted to inject a system timestamp into an upstream plan \
+                 that already contains one. \
+                 \nPlan:\n {:?} \nSchema:\n {:?}",
+                upstream_plan,
+                upstream_schema
             );
         }
-        let schema = add_timestamp_field(input.schema().clone(), qualifier.clone()).unwrap();
-        Self {
-            input,
-            qualifier,
-            schema,
-        }
+
+        let resolved_schema =
+            add_timestamp_field(upstream_schema.clone(), target_qualifier.clone())?;
+
+        Ok(Self {
+            upstream_plan,
+            target_qualifier,
+            resolved_schema,
+        })
     }
 }
 
-multifield_partial_ord!(TimestampAppendExtension, input, qualifier);
+// -----------------------------------------------------------------------------
+// DataFusion Logical Node Hooks
+// -----------------------------------------------------------------------------
 
-impl UserDefinedLogicalNodeCore for TimestampAppendExtension {
+impl UserDefinedLogicalNodeCore for SystemTimestampInjectorNode {
     fn name(&self) -> &str {
-        "TimestampAppendExtension"
+        TIMESTAMP_INJECTOR_NODE_NAME
     }
 
     fn inputs(&self) -> Vec<&LogicalPlan> {
-        vec![&self.input]
+        vec![&self.upstream_plan]
     }
 
     fn schema(&self) -> &DFSchemaRef {
-        &self.schema
+        &self.resolved_schema
     }
 
     fn expressions(&self) -> Vec<Expr> {
         vec![]
     }
 
-    fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+    fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result {
+        let field_names = self
+            .resolved_schema
+            .fields()
+            .iter()
+            .map(|field| field.name().to_string())
+            .collect::<Vec<String>>()
+            .join(", ");
+
         write!(
             f,
-            "TimestampAppendExtension({:?}): {}",
-            self.qualifier,
-            self.schema
-                .fields()
-                .iter()
-                .map(|f| f.name().to_string())
-                .collect::<Vec<_>>()
-                .join(", ")
+            "SystemTimestampInjector(Qualifier={:?}): [{}]",
+            self.target_qualifier, field_names
         )
     }
 
-    fn with_exprs_and_inputs(&self, _exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> Result<Self> {
-        Ok(Self::new(inputs[0].clone(), self.qualifier.clone()))
+    fn with_exprs_and_inputs(&self, _exprs: Vec<Expr>, mut inputs: Vec<LogicalPlan>) -> Result<Self> {
+        if inputs.len() != 1 {
+            return internal_err!(
+                "SystemTimestampInjectorNode requires exactly 1 upstream logical plan, but received {}",
+                inputs.len()
+            );
+        }
+
+        Self::try_new(inputs.remove(0), self.target_qualifier.clone())
     }
 }
diff --git a/src/sql/extensions/updating_aggregate.rs b/src/sql/extensions/updating_aggregate.rs
index 8220945b..1671fb13 100644
--- a/src/sql/extensions/updating_aggregate.rs
+++ b/src/sql/extensions/updating_aggregate.rs
@@ -1,165 +1,241 @@
-use datafusion::common::{DFSchemaRef, Result, TableReference, ToDFSchema, plan_err};
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::sync::Arc;
+use std::time::Duration;
+
+use datafusion::common::{DFSchemaRef, Result, TableReference, ToDFSchema, internal_err, plan_err};
 use datafusion::logical_expr::expr::ScalarFunction;
 use datafusion::logical_expr::{
-    Expr, Extension, LogicalPlan, UserDefinedLogicalNodeCore, col, lit,
+    col, lit, Expr, Extension, LogicalPlan, UserDefinedLogicalNodeCore,
 };
 use datafusion::prelude::named_struct;
 use datafusion::scalar::ScalarValue;
 use datafusion_proto::physical_plan::AsExecutionPlan;
 use datafusion_proto::protobuf::PhysicalPlanNode;
 use prost::Message;
-use std::sync::Arc;
-use std::time::Duration;
 use protocol::grpc::api::UpdatingAggregateOperator;
-use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName};
+
+use crate::sql::common::{FsSchema, FsSchemaRef};
+use crate::sql::extensions::{CompiledTopologyNode, IsRetractExtension, StreamingOperatorBlueprint};
 use crate::sql::functions::multi_hash;
+use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName};
 use crate::sql::logical_planner::FsPhysicalExtensionCodec;
-use crate::sql::extensions::{IsRetractExtension, NodeWithIncomingEdges, StreamExtension};
 use crate::sql::logical_planner::planner::{NamedNode, Planner};
-use crate::sql::common::{FsSchema, FsSchemaRef};
 
-pub(crate) const UPDATING_AGGREGATE_EXTENSION_NAME: &str = "UpdatingAggregateExtension";
+// -----------------------------------------------------------------------------
+// Constants & Configuration
+// -----------------------------------------------------------------------------
+
+pub(crate) const CONTINUOUS_AGGREGATE_NODE_NAME: &str = "ContinuousAggregateNode";
+
+const DEFAULT_FLUSH_INTERVAL_MICROS: u64 = 10_000_000;
+
+const STATIC_HASH_SIZE_BYTES: i32 = 16;
 
+// -----------------------------------------------------------------------------
+// Logical Node Definition
+// -----------------------------------------------------------------------------
+
+/// Stateful continuous aggregation: running aggregates with updating / retraction semantics.
 #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd)]
-pub(crate) struct UpdatingAggregateExtension {
-    pub(crate) aggregate: LogicalPlan,
-    pub(crate) key_fields: Vec<usize>,
-    pub(crate) final_calculation: LogicalPlan,
-    pub(crate) timestamp_qualifier: Option<TableReference>,
-    pub(crate) ttl: Duration,
+pub(crate) struct ContinuousAggregateNode {
+    pub(crate) base_aggregate_plan: LogicalPlan,
+    pub(crate) partition_key_indices: Vec<usize>,
+    pub(crate) retract_injected_plan: LogicalPlan,
+    pub(crate) namespace_qualifier: Option<TableReference>,
+    pub(crate) state_retention_ttl: Duration,
 }
 
-impl UpdatingAggregateExtension {
-    pub fn new(
-        aggregate: LogicalPlan,
-        key_fields: Vec<usize>,
-        timestamp_qualifier: Option<TableReference>,
-        ttl: Duration,
+impl ContinuousAggregateNode {
+    pub fn try_new(
+        base_aggregate_plan: LogicalPlan,
+        partition_key_indices: Vec<usize>,
+        namespace_qualifier: Option<TableReference>,
+        state_retention_ttl: Duration,
     ) -> Result<Self> {
-        let final_calculation = LogicalPlan::Extension(Extension {
+        let retract_injected_plan = LogicalPlan::Extension(Extension {
             node: Arc::new(IsRetractExtension::new(
-                aggregate.clone(),
-                timestamp_qualifier.clone(),
+                base_aggregate_plan.clone(),
+                namespace_qualifier.clone(),
             )),
         });
 
         Ok(Self {
-            aggregate,
-            key_fields,
-            final_calculation,
-            timestamp_qualifier,
-            ttl,
+            base_aggregate_plan,
+            partition_key_indices,
+            retract_injected_plan,
+            namespace_qualifier,
+            state_retention_ttl,
+        })
+    }
+
+    fn construct_state_metadata_expr(&self, upstream_schema: &FsSchemaRef) -> Expr {
+        let routing_keys: Vec<Expr> = self
+            .partition_key_indices
+            .iter()
+            .map(|&idx| col(upstream_schema.schema.field(idx).name()))
+            .collect();
+
+        let state_id_hash = if routing_keys.is_empty() {
+            Expr::Literal(
+                ScalarValue::FixedSizeBinary(
+                    STATIC_HASH_SIZE_BYTES,
+                    Some(vec![0; STATIC_HASH_SIZE_BYTES as usize]),
+                ),
+                None,
+            )
+        } else {
+            Expr::ScalarFunction(ScalarFunction {
+                func: multi_hash(),
+                args: routing_keys,
+            })
+        };
+
+        named_struct(vec![
+            lit("is_retract"),
+            lit(false),
+            lit("id"),
+            state_id_hash,
+        ])
+    }
+
+    fn compile_operator_config(
+        &self,
+        planner: &Planner,
+        upstream_schema: &FsSchemaRef,
+    ) -> Result<UpdatingAggregateOperator> {
+        let upstream_df_schema = upstream_schema.schema.clone().to_dfschema()?;
+
+        let physical_agg_plan = planner.sync_plan(&self.base_aggregate_plan)?;
+        let compiled_agg_payload = PhysicalPlanNode::try_from_physical_plan(
+            physical_agg_plan,
+            &FsPhysicalExtensionCodec::default(),
+        )?
+        .encode_to_vec();
+
+        let meta_expr = self.construct_state_metadata_expr(upstream_schema);
+        let compiled_meta_expr =
+            planner.serialize_as_physical_expr(&meta_expr, &upstream_df_schema)?;
+
+        Ok(UpdatingAggregateOperator {
+            name: "UpdatingAggregate".to_string(),
+            input_schema: Some((**upstream_schema).clone().into()),
+            final_schema: Some(self.yielded_schema().into()),
+            aggregate_exec: compiled_agg_payload,
+            metadata_expr: compiled_meta_expr,
+            flush_interval_micros: DEFAULT_FLUSH_INTERVAL_MICROS,
+            ttl_micros: self.state_retention_ttl.as_micros() as u64,
         })
     }
 }
 
-impl UserDefinedLogicalNodeCore for UpdatingAggregateExtension {
+// -----------------------------------------------------------------------------
+// DataFusion Logical Node Hooks
+// -----------------------------------------------------------------------------
+
+impl UserDefinedLogicalNodeCore for ContinuousAggregateNode {
     fn name(&self) -> &str {
-        UPDATING_AGGREGATE_EXTENSION_NAME
+        CONTINUOUS_AGGREGATE_NODE_NAME
     }
 
     fn inputs(&self) -> Vec<&LogicalPlan> {
-        vec![&self.aggregate]
+        vec![&self.base_aggregate_plan]
     }
 
     fn schema(&self) -> &DFSchemaRef {
-        self.final_calculation.schema()
+        self.retract_injected_plan.schema()
     }
 
-    fn expressions(&self) -> Vec<datafusion::prelude::Expr> {
+    fn expressions(&self) -> Vec<Expr> {
         vec![]
     }
 
     fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        write!(f, "UpdatingAggregateExtension")
+        write!(
+            f,
+            "ContinuousAggregateNode(TTL={:?})",
+            self.state_retention_ttl
+        )
     }
 
     fn with_exprs_and_inputs(
         &self,
-        _exprs: Vec<datafusion::prelude::Expr>,
-        inputs: Vec<LogicalPlan>,
+        _exprs: Vec<Expr>,
+        mut inputs: Vec<LogicalPlan>,
     ) -> Result<Self> {
-        Self::new(
-            inputs[0].clone(),
-            self.key_fields.clone(),
-            self.timestamp_qualifier.clone(),
-            self.ttl,
+        if inputs.len() != 1 {
+            return internal_err!(
+                "ContinuousAggregateNode requires exactly 1 upstream input, got {}",
+                inputs.len()
+            );
+        }
+
+        Self::try_new(
+            inputs.remove(0),
+            self.partition_key_indices.clone(),
+            self.namespace_qualifier.clone(),
+            self.state_retention_ttl,
         )
     }
 }
 
-impl StreamExtension for UpdatingAggregateExtension {
-    fn node_name(&self) -> Option<NamedNode> {
+// -----------------------------------------------------------------------------
+// Core Execution Blueprint Implementation
+// -----------------------------------------------------------------------------
+
+impl StreamingOperatorBlueprint for ContinuousAggregateNode {
+    fn operator_identity(&self) -> Option<NamedNode> {
         None
     }
 
-    fn plan_node(
+    fn compile_to_graph_node(
         &self,
         planner: &Planner,
-        index: usize,
-        input_schemas: Vec<FsSchemaRef>,
-    ) -> Result<NodeWithIncomingEdges> {
-        if input_schemas.len() != 1 {
+        node_index: usize,
+        mut upstream_schemas: Vec<FsSchemaRef>,
+    ) -> Result<CompiledTopologyNode> {
+        if upstream_schemas.len() != 1 {
             return plan_err!(
-                "UpdatingAggregateExtension requires exactly one input schema, found {}",
-                input_schemas.len()
+                "Topology Violation: ContinuousAggregateNode requires exactly 1 upstream input, received {}",
+                upstream_schemas.len()
             );
         }
 
-        let input_schema = input_schemas[0].clone();
-        let input_dfschema = input_schema.schema.clone().to_dfschema()?;
+        let upstream_schema = upstream_schemas.remove(0);
 
-        let aggregate_exec = PhysicalPlanNode::try_from_physical_plan(
-            planner.sync_plan(&self.aggregate)?,
-            &FsPhysicalExtensionCodec::default(),
-        )?;
-
-        let key_exprs: Vec<Expr> = self
-            .key_fields
-            .iter()
-            .map(|&i| col(input_schema.schema.field(i).name()))
-            .collect();
-        let hash_expr = if key_exprs.is_empty() {
-            Expr::Literal(ScalarValue::FixedSizeBinary(16, Some(vec![0; 16])), None)
-        } else {
-            Expr::ScalarFunction(ScalarFunction {
-                func: multi_hash(),
-                args: key_exprs,
-            })
-        };
+        let operator_config = self.compile_operator_config(planner, &upstream_schema)?;
 
-        let updating_meta_expr =
-            named_struct(vec![lit("is_retract"), lit(false), lit("id"), hash_expr]);
-
-        let config = UpdatingAggregateOperator {
-            name: "UpdatingAggregate".to_string(),
-            input_schema: Some((*input_schema).clone().into()),
-            final_schema: Some(self.output_schema().into()),
-            aggregate_exec: aggregate_exec.encode_to_vec(),
-            metadata_expr: planner
-                .serialize_as_physical_expr(&updating_meta_expr, &input_dfschema)?,
-            flush_interval_micros: 10_000_000,
-            ttl_micros: self.ttl.as_micros() as u64,
-        };
-
-        let node = LogicalNode::single(
-            index as u32,
-            format!("updating_aggregate_{index}"),
+        let logical_node = LogicalNode::single(
+            node_index as u32,
+            format!("updating_aggregate_{node_index}"),
             OperatorName::UpdatingAggregate,
-            config.encode_to_vec(),
+            operator_config.encode_to_vec(),
             "UpdatingAggregate".to_string(),
             1,
         );
 
-        let edge = LogicalEdge::project_all(LogicalEdgeType::Shuffle, (*input_schema).clone());
+        let shuffle_edge =
+            LogicalEdge::project_all(LogicalEdgeType::Shuffle, (*upstream_schema).clone());
 
-        Ok(NodeWithIncomingEdges {
-            node,
-            edges: vec![edge],
+        Ok(CompiledTopologyNode {
+            execution_unit: logical_node,
+            routing_edges: vec![shuffle_edge],
         })
     }
 
-    fn output_schema(&self) -> FsSchema {
-        FsSchema::from_schema_unkeyed(Arc::new(self.schema().as_ref().into())).unwrap()
+    fn yielded_schema(&self) -> FsSchema {
+        FsSchema::from_schema_unkeyed(Arc::new(self.schema().as_ref().into())).expect(
+            "Fatal: Failed to generate unkeyed output schema for continuous aggregate",
+        )
     }
 }
diff --git a/src/sql/extensions/watermark_node.rs b/src/sql/extensions/watermark_node.rs
index 5ef8aa49..7cdb9b67 100644
--- a/src/sql/extensions/watermark_node.rs
+++ b/src/sql/extensions/watermark_node.rs
@@ -1,140 +1,231 @@
-use datafusion::common::{DFSchemaRef, Result, TableReference, internal_err};
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::fmt::Formatter;
+use std::sync::Arc;
+
+use datafusion::common::{DFSchemaRef, Result, TableReference, internal_err, plan_err};
 use datafusion::error::DataFusionError;
 use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore};
 use datafusion_proto::physical_plan::DefaultPhysicalExtensionCodec;
 use datafusion_proto::physical_plan::to_proto::serialize_physical_expr;
 use prost::Message;
-use std::fmt::Formatter;
-use std::sync::Arc;
 use protocol::grpc::api::ExpressionWatermarkConfig;
-use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName};
+
 use crate::multifield_partial_ord;
-use crate::sql::schema::utils::add_timestamp_field;
-use crate::sql::extensions::{NodeWithIncomingEdges, StreamExtension};
-use crate::sql::logical_planner::planner::{NamedNode, Planner};
 use crate::sql::common::{FsSchema, FsSchemaRef};
+use crate::sql::extensions::{CompiledTopologyNode, StreamingOperatorBlueprint};
+use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName};
+use crate::sql::logical_planner::planner::{NamedNode, Planner};
+use crate::sql::schema::utils::add_timestamp_field;
+
+// -----------------------------------------------------------------------------
+// Constants & Identifiers
+// -----------------------------------------------------------------------------
+
+pub(crate) const EVENT_TIME_WATERMARK_NODE_NAME: &str = "EventTimeWatermarkNode";
+
+const INTERNAL_TIMESTAMP_COLUMN: &str = "_timestamp";
+
+const DEFAULT_WATERMARK_EMISSION_PERIOD_MICROS: u64 = 1_000_000;
+
+// -----------------------------------------------------------------------------
+// Logical Node Definition
+// -----------------------------------------------------------------------------
 
-pub(crate) const WATERMARK_NODE_NAME: &str = "WatermarkNode";
+/// Event-time watermark from a user strategy; drives time progress in stateful operators.
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub struct WatermarkNode {
-    pub input: LogicalPlan,
-    pub qualifier: TableReference,
-    pub watermark_expression: Expr,
-    pub schema: DFSchemaRef,
-    timestamp_index: usize,
+pub(crate) struct EventTimeWatermarkNode {
+    pub(crate) upstream_plan: LogicalPlan,
+    pub(crate) namespace_qualifier: TableReference,
+    pub(crate) watermark_strategy_expr: Expr,
+    pub(crate) resolved_schema: DFSchemaRef,
+    pub(crate) internal_timestamp_offset: usize,
 }
 
 multifield_partial_ord!(
-    WatermarkNode,
-    input,
-    qualifier,
-    watermark_expression,
-    timestamp_index
+    EventTimeWatermarkNode,
+    upstream_plan,
+    namespace_qualifier,
+    watermark_strategy_expr,
+    internal_timestamp_offset
 );
 
-impl UserDefinedLogicalNodeCore for WatermarkNode {
+impl EventTimeWatermarkNode {
+    pub(crate) fn try_new(
+        upstream_plan: LogicalPlan,
+        namespace_qualifier: TableReference,
+        watermark_strategy_expr: Expr,
+    ) -> Result<Self> {
+        let resolved_schema = add_timestamp_field(
+            upstream_plan.schema().clone(),
+            Some(namespace_qualifier.clone()),
+        )?;
+
+        let internal_timestamp_offset = resolved_schema
+            .index_of_column_by_name(None, INTERNAL_TIMESTAMP_COLUMN)
+            .ok_or_else(|| {
+                DataFusionError::Plan(format!(
+                    "Fatal: Failed to resolve mandatory temporal column '{}'",
+                    INTERNAL_TIMESTAMP_COLUMN
+                ))
+            })?;
+
+        Ok(Self {
+            upstream_plan,
+            namespace_qualifier,
+            watermark_strategy_expr,
+            resolved_schema,
+            internal_timestamp_offset,
+        })
+    }
+
+    pub(crate) fn generate_fs_schema(&self) -> FsSchema {
+        FsSchema::new_unkeyed(
+            Arc::new(self.resolved_schema.as_ref().into()),
+            self.internal_timestamp_offset,
+        )
+    }
+
+    fn compile_operator_config(&self, planner: &Planner) -> Result<ExpressionWatermarkConfig> {
+        let physical_expr = planner.create_physical_expr(
+            &self.watermark_strategy_expr,
+            &self.resolved_schema,
+        )?;
+
+        let serialized_expr =
+            serialize_physical_expr(&physical_expr, &DefaultPhysicalExtensionCodec {})?;
+
+        Ok(ExpressionWatermarkConfig {
+            period_micros: DEFAULT_WATERMARK_EMISSION_PERIOD_MICROS,
+            idle_time_micros: None,
+            expression: serialized_expr.encode_to_vec(),
+            input_schema: Some(self.generate_fs_schema().into()),
+        })
+    }
+}
+
+// -----------------------------------------------------------------------------
+// DataFusion Logical Node Hooks
+// -----------------------------------------------------------------------------
+
+impl UserDefinedLogicalNodeCore for EventTimeWatermarkNode {
     fn name(&self) -> &str {
-        WATERMARK_NODE_NAME
+        EVENT_TIME_WATERMARK_NODE_NAME
     }
 
     fn inputs(&self) -> Vec<&LogicalPlan> {
-        vec![&self.input]
+        vec![&self.upstream_plan]
     }
 
     fn schema(&self) -> &DFSchemaRef {
-        &self.schema
+        &self.resolved_schema
     }
 
     fn expressions(&self) -> Vec<Expr> {
-        vec![self.watermark_expression.clone()]
+        vec![self.watermark_strategy_expr.clone()]
     }
 
     fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result {
-        write!(f, "WatermarkNode({}): {}", self.qualifier, self.schema)
+        write!(
+            f,
+            "EventTimeWatermarkNode({}): Schema={}",
+            self.namespace_qualifier, self.resolved_schema
+        )
     }
 
-    fn with_exprs_and_inputs(&self, exprs: Vec<Expr>, inputs: Vec<LogicalPlan>) -> Result<Self> {
+    fn with_exprs_and_inputs(
+        &self,
+        mut exprs: Vec<Expr>,
+        mut inputs: Vec<LogicalPlan>,
+    ) -> Result<Self> {
         if inputs.len() != 1 {
-            return internal_err!("input size inconsistent");
+            return internal_err!(
+                "EventTimeWatermarkNode requires exactly 1 upstream logical plan, but received {}",
+                inputs.len()
+            );
         }
         if exprs.len() != 1 {
-            return internal_err!("expected one expression; found {}", exprs.len());
+            return internal_err!(
+                "EventTimeWatermarkNode requires exactly 1 watermark strategy expression, but received {}",
+                exprs.len()
+            );
         }
 
-        let timestamp_index = self
-            .schema
-            .index_of_column_by_name(Some(&self.qualifier), "_timestamp")
-            .ok_or_else(|| DataFusionError::Plan("missing timestamp column".to_string()))?;
+        let internal_timestamp_offset = self
+            .resolved_schema
+            .index_of_column_by_name(Some(&self.namespace_qualifier), INTERNAL_TIMESTAMP_COLUMN)
+            .ok_or_else(|| {
+                DataFusionError::Plan(format!(
+                    "Optimizer Error: Lost tracking of temporal column '{}'",
+                    INTERNAL_TIMESTAMP_COLUMN
+                ))
+            })?;
 
         Ok(Self {
-            input: inputs[0].clone(),
-            qualifier: self.qualifier.clone(),
-            watermark_expression: exprs.into_iter().next().unwrap(),
-            schema: self.schema.clone(),
-            timestamp_index,
+            upstream_plan: inputs.remove(0),
+            namespace_qualifier: self.namespace_qualifier.clone(),
+            watermark_strategy_expr: exprs.remove(0),
+            resolved_schema: self.resolved_schema.clone(),
+            internal_timestamp_offset,
         })
     }
 }
 
-impl StreamExtension for WatermarkNode {
-    fn node_name(&self) -> Option<NamedNode> {
-        Some(NamedNode::Watermark(self.qualifier.clone()))
+// -----------------------------------------------------------------------------
+// Core Execution Blueprint Implementation
+// -----------------------------------------------------------------------------
+
+impl StreamingOperatorBlueprint for EventTimeWatermarkNode {
+    fn operator_identity(&self) -> Option<NamedNode> {
+        Some(NamedNode::Watermark(self.namespace_qualifier.clone()))
     }
 
-    fn plan_node(
+    fn compile_to_graph_node(
         &self,
         planner: &Planner,
-        index: usize,
-        input_schemas: Vec<FsSchemaRef>,
-    ) -> Result<NodeWithIncomingEdges> {
-        let expression = planner.create_physical_expr(&self.watermark_expression, &self.schema)?;
-        let expression = serialize_physical_expr(&expression, &DefaultPhysicalExtensionCodec {})?;
-        let node = LogicalNode::single(
-            index as u32,
-            format!("watermark_{index}"),
+        node_index: usize,
+        mut upstream_schemas: Vec<FsSchemaRef>,
+    ) -> Result<CompiledTopologyNode> {
+        if upstream_schemas.len() != 1 {
+            return plan_err!(
+                "Topology Violation: EventTimeWatermarkNode requires exactly 1 upstream input, received {}",
+                upstream_schemas.len()
+            );
+        }
+
+        let operator_config = self.compile_operator_config(planner)?;
+
+        let execution_unit = LogicalNode::single(
+            node_index as u32,
+            format!("watermark_{node_index}"),
             OperatorName::ExpressionWatermark,
-            ExpressionWatermarkConfig {
-                period_micros: 1_000_000,
-                idle_time_micros: None,
-                expression: expression.encode_to_vec(),
-                input_schema: Some(self.arroyo_schema().into()),
-            }
-                .encode_to_vec(),
-            "watermark".to_string(),
+            operator_config.encode_to_vec(),
+            "watermark_generator".to_string(),
             1,
         );
 
-        let incoming_edge =
-            LogicalEdge::project_all(LogicalEdgeType::Forward, input_schemas[0].as_ref().clone());
-        Ok(NodeWithIncomingEdges {
-            node,
-            edges: vec![incoming_edge],
-        })
-    }
-    fn output_schema(&self) -> FsSchema {
-        self.arroyo_schema()
-    }
-}
+        let incoming_edge = LogicalEdge::project_all(
+            LogicalEdgeType::Forward,
+            (*upstream_schemas.remove(0)).clone(),
+        );
 
-impl WatermarkNode {
-    pub(crate) fn new(
-        input: LogicalPlan,
-        qualifier: TableReference,
-        watermark_expression: Expr,
-    ) -> Result<Self> {
-        let schema = add_timestamp_field(input.schema().clone(), Some(qualifier.clone()))?;
-        let timestamp_index = schema
-            .index_of_column_by_name(None, "_timestamp")
-            .ok_or_else(|| DataFusionError::Plan("missing _timestamp column".to_string()))?;
-        Ok(Self {
-            input,
-            qualifier,
-            watermark_expression,
-            schema,
-            timestamp_index,
+        Ok(CompiledTopologyNode {
+            execution_unit,
+            routing_edges: vec![incoming_edge],
         })
     }
-    pub(crate) fn arroyo_schema(&self) -> FsSchema {
-        FsSchema::new_unkeyed(Arc::new(self.schema.as_ref().into()), self.timestamp_index)
+
+    fn yielded_schema(&self) -> FsSchema {
+        self.generate_fs_schema()
     }
 }
diff --git a/src/sql/extensions/window_fn.rs b/src/sql/extensions/window_fn.rs
deleted file mode 100644
index c2594546..00000000
--- a/src/sql/extensions/window_fn.rs
+++ /dev/null
@@ -1,123 +0,0 @@
-use std::sync::Arc;
-use datafusion::common::{Column, DFSchema, DFSchemaRef, Result, plan_err};
-use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore};
-use datafusion_proto::physical_plan::DefaultPhysicalExtensionCodec;
-use datafusion_proto::physical_plan::to_proto::serialize_physical_expr;
-use datafusion_proto::{physical_plan::AsExecutionPlan, protobuf::PhysicalPlanNode};
-use prost::Message;
-use protocol::grpc::api::WindowFunctionOperator;
-use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName};
-use crate::sql::logical_planner::FsPhysicalExtensionCodec;
-use crate::sql::logical_planner::planner::{NamedNode, Planner};
-use crate::sql::types::TIMESTAMP_FIELD;
-use crate::sql::common::{FsSchema, FsSchemaRef};
-use super::{ NodeWithIncomingEdges, StreamExtension};
-
-pub(crate) const WINDOW_FUNCTION_EXTENSION_NAME: &str = "WindowFunctionExtension";
-
-#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd)]
-pub(crate) struct WindowFunctionExtension {
-    window_plan: LogicalPlan,
-    key_fields: Vec<usize>,
-}
-
-impl WindowFunctionExtension {
-    pub fn new(window_plan: LogicalPlan, key_fields: Vec<usize>) -> Self {
-        Self {
-            window_plan,
-            key_fields,
-        }
-    }
-}
-
-impl UserDefinedLogicalNodeCore for WindowFunctionExtension {
-    fn name(&self) -> &str {
-        WINDOW_FUNCTION_EXTENSION_NAME
-    }
-
-    fn inputs(&self) -> Vec<&LogicalPlan> {
-        vec![&self.window_plan]
-    }
-
-    fn schema(&self) -> &DFSchemaRef {
-        self.window_plan.schema()
-    }
-
-    fn expressions(&self) -> Vec<datafusion::prelude::Expr> {
-        vec![]
-    }
-
-    fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        write!(f, "WindowFunction: {}", self.schema())
-    }
-
-    fn with_exprs_and_inputs(
-        &self,
-        _exprs: Vec<datafusion::prelude::Expr>,
-        inputs: Vec<LogicalPlan>,
-    ) -> Result<Self> {
-        Ok(Self::new(inputs[0].clone(), self.key_fields.clone()))
-    }
-}
-
-impl StreamExtension for WindowFunctionExtension {
-    fn node_name(&self) -> Option<NamedNode> {
-        None
-    }
-
-    fn plan_node(
-        &self,
-        planner: &Planner,
-        index: usize,
-        input_schemas: Vec<FsSchemaRef>,
-    ) -> Result<super::NodeWithIncomingEdges> {
-        if input_schemas.len() != 1 {
-            return plan_err!("WindowFunctionExtension requires exactly one input");
-        }
-        let input_schema = input_schemas[0].clone();
-        let input_df_schema =
-            Arc::new(DFSchema::try_from(input_schema.schema.as_ref().clone()).unwrap());
-
-        let binning_function = planner.create_physical_expr(
-            &Expr::Column(Column::new_unqualified(TIMESTAMP_FIELD.to_string())),
-            &input_df_schema,
-        )?;
-        let binning_function_proto =
-            serialize_physical_expr(&binning_function, &DefaultPhysicalExtensionCodec {})?;
-
-        let window_plan = planner.sync_plan(&self.window_plan)?;
-        let codec = FsPhysicalExtensionCodec::default();
-        let window_plan_proto = PhysicalPlanNode::try_from_physical_plan(window_plan, &codec)?;
-
-        let config = WindowFunctionOperator {
-            name: "WindowFunction".to_string(),
-            input_schema: Some(input_schema.as_ref().clone().into()),
-            binning_function: binning_function_proto.encode_to_vec(),
-            window_function_plan: window_plan_proto.encode_to_vec(),
-        };
-
-        let logical_node = LogicalNode::single(
-            index as u32,
-            format!("window_function_{index}"),
-            OperatorName::WindowFunction,
-            config.encode_to_vec(),
-            "window function".to_string(),
-            1,
-        );
-
-        let edge = LogicalEdge::project_all(
-            // TODO: detect when this shuffle is unnecessary
-            LogicalEdgeType::Shuffle,
-            input_schema.as_ref().clone(),
-        );
-
-        Ok(NodeWithIncomingEdges {
-            node: logical_node,
-            edges: vec![edge],
-        })
-    }
-
-    fn output_schema(&self) -> FsSchema {
-        FsSchema::from_schema_unkeyed(Arc::new(self.schema().as_ref().clone().into())).unwrap()
-    }
-}
diff --git a/src/sql/extensions/windows_function.rs b/src/sql/extensions/windows_function.rs
new file mode 100644
index 00000000..e53e2ee9
--- /dev/null
+++ b/src/sql/extensions/windows_function.rs
@@ -0,0 +1,197 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::fmt::Formatter;
+use std::sync::Arc;
+
+use datafusion::common::{Column, DFSchema, DFSchemaRef, Result, internal_err, plan_err};
+use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore};
+use datafusion_proto::physical_plan::DefaultPhysicalExtensionCodec;
+use datafusion_proto::physical_plan::to_proto::serialize_physical_expr;
+use datafusion_proto::{physical_plan::AsExecutionPlan, protobuf::PhysicalPlanNode};
+use prost::Message;
+use protocol::grpc::api::WindowFunctionOperator;
+
+use crate::sql::common::{FsSchema, FsSchemaRef};
+use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName};
+use crate::sql::logical_planner::FsPhysicalExtensionCodec;
+use crate::sql::logical_planner::planner::{NamedNode, Planner};
+use crate::sql::types::TIMESTAMP_FIELD;
+
+use super::{CompiledTopologyNode, StreamingOperatorBlueprint};
+
+// -----------------------------------------------------------------------------
+// Constants & Identifiers
+// -----------------------------------------------------------------------------
+
+pub(crate) const STREAMING_WINDOW_NODE_NAME: &str = "StreamingWindowFunctionNode";
+
+// -----------------------------------------------------------------------------
+// Logical Node Definition
+// -----------------------------------------------------------------------------
+
+/// Stateful streaming window: temporal binning plus underlying window evaluation plan.
+#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd)]
+pub(crate) struct StreamingWindowFunctionNode {
+    pub(crate) underlying_evaluation_plan: LogicalPlan,
+    pub(crate) partition_key_indices: Vec<usize>,
+}
+
+impl StreamingWindowFunctionNode {
+    pub fn new(
+        underlying_evaluation_plan: LogicalPlan,
+        partition_key_indices: Vec<usize>,
+    ) -> Self {
+        Self {
+            underlying_evaluation_plan,
+            partition_key_indices,
+        }
+    }
+
+    fn compile_temporal_binning_function(
+        &self,
+        planner: &Planner,
+        input_df_schema: &DFSchema,
+    ) -> Result<Vec<u8>> {
+        let timestamp_column = Expr::Column(Column::new_unqualified(TIMESTAMP_FIELD.to_string()));
+
+        let physical_binning_expr =
+            planner.create_physical_expr(&timestamp_column, input_df_schema)?;
+
+        let serialized_expr =
+            serialize_physical_expr(&physical_binning_expr, &DefaultPhysicalExtensionCodec {})?;
+
+        Ok(serialized_expr.encode_to_vec())
+    }
+
+    fn compile_physical_evaluation_plan(&self, planner: &Planner) -> Result<Vec<u8>> {
+        let physical_window_plan = planner.sync_plan(&self.underlying_evaluation_plan)?;
+
+        let proto_plan_node = PhysicalPlanNode::try_from_physical_plan(
+            physical_window_plan,
+            &FsPhysicalExtensionCodec::default(),
+        )?;
+
+        Ok(proto_plan_node.encode_to_vec())
+    }
+}
+
+// -----------------------------------------------------------------------------
+// DataFusion Logical Node Hooks
+// -----------------------------------------------------------------------------
+
+impl UserDefinedLogicalNodeCore for StreamingWindowFunctionNode {
+    fn name(&self) -> &str {
+        STREAMING_WINDOW_NODE_NAME
+    }
+
+    fn inputs(&self) -> Vec<&LogicalPlan> {
+        vec![&self.underlying_evaluation_plan]
+    }
+
+    fn schema(&self) -> &DFSchemaRef {
+        self.underlying_evaluation_plan.schema()
+    }
+
+    fn expressions(&self) -> Vec<Expr> {
+        vec![]
+    }
+
+    fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result {
+        write!(
+            f,
+            "StreamingWindowFunction: Schema={}",
+            self.schema()
+        )
+    }
+
+    fn with_exprs_and_inputs(
+        &self,
+        _exprs: Vec<Expr>,
+        mut inputs: Vec<LogicalPlan>,
+    ) -> Result<Self> {
+        if inputs.len() != 1 {
+            return internal_err!(
+                "StreamingWindowFunctionNode requires exactly 1 upstream input, got {}",
+                inputs.len()
+            );
+        }
+
+        Ok(Self::new(
+            inputs.remove(0),
+            self.partition_key_indices.clone(),
+        ))
+    }
+}
+
+// -----------------------------------------------------------------------------
+// Core Execution Blueprint Implementation
+// -----------------------------------------------------------------------------
+
+impl StreamingOperatorBlueprint for StreamingWindowFunctionNode {
+    fn operator_identity(&self) -> Option<NamedNode> {
+        None
+    }
+
+    fn compile_to_graph_node(
+        &self,
+        planner: &Planner,
+        node_index: usize,
+        mut input_schemas: Vec<FsSchemaRef>,
+    ) -> Result<CompiledTopologyNode> {
+        if input_schemas.len() != 1 {
+            return plan_err!(
+                "Topology Violation: StreamingWindowFunctionNode requires exactly 1 upstream input schema, received {}",
+                input_schemas.len()
+            );
+        }
+
+        let input_schema = input_schemas.remove(0);
+
+        let input_df_schema = DFSchema::try_from(input_schema.schema.as_ref().clone())?;
+
+        let binning_payload = self.compile_temporal_binning_function(planner, &input_df_schema)?;
+        let evaluation_plan_payload = self.compile_physical_evaluation_plan(planner)?;
+
+        let operator_config = WindowFunctionOperator {
+            name: "WindowFunction".to_string(),
+            input_schema: Some(input_schema.as_ref().clone().into()),
+            binning_function: binning_payload,
+            window_function_plan: evaluation_plan_payload,
+        };
+
+        let logical_node = LogicalNode::single(
+            node_index as u32,
+            format!("window_function_{node_index}"),
+            OperatorName::WindowFunction,
+            operator_config.encode_to_vec(),
+            "streaming_window_evaluator".to_string(),
+            1,
+        );
+
+        let routing_edge = LogicalEdge::project_all(
+            LogicalEdgeType::Shuffle,
+            (*input_schema).clone(),
+        );
+
+        Ok(CompiledTopologyNode {
+            execution_unit: logical_node,
+            routing_edges: vec![routing_edge],
+        })
+    }
+
+    fn yielded_schema(&self) -> FsSchema {
+        FsSchema::from_schema_unkeyed(Arc::new(self.schema().as_ref().clone().into())).expect(
+            "Fatal: Failed to generate unkeyed output schema for StreamingWindowFunctionNode",
+        )
+    }
+}
diff --git a/src/sql/frontend_sql_coverage_tests.rs b/src/sql/frontend_sql_coverage_tests.rs
index fa730614..cee4d82e 100644
--- a/src/sql/frontend_sql_coverage_tests.rs
+++ b/src/sql/frontend_sql_coverage_tests.rs
@@ -23,7 +23,7 @@ use crate::coordinator::Coordinator;
 use crate::sql::common::TIMESTAMP_FIELD;
 use crate::sql::parse::parse_sql;
 use crate::sql::rewrite_plan;
-use crate::sql::schema::optimizer::produce_optimized_plan;
+use crate::sql::logical_planner::optimizers::produce_optimized_plan;
 use crate::sql::schema::StreamSchemaProvider;
 
 fn assert_parses_as(sql: &str, type_prefix: &str) {
diff --git a/src/sql/logical_node/logical.rs b/src/sql/logical_node/logical.rs
deleted file mode 100644
index 9fa139d1..00000000
--- a/src/sql/logical_node/logical.rs
+++ /dev/null
@@ -1,378 +0,0 @@
-use itertools::Itertools;
-
-use datafusion::arrow::datatypes::DataType;
-use petgraph::Direction;
-use petgraph::dot::Dot;
-use petgraph::graph::DiGraph;
-use std::collections::{HashMap, HashSet};
-use std::fmt::{Debug, Display, Formatter};
-use std::sync::Arc;
-use datafusion_proto::protobuf::ArrowType;
-use prost::Message;
-use strum::{Display, EnumString};
-use protocol::grpc::api;
-use crate::sql::common::FsSchema;
-
-#[derive(Clone, Copy, Debug, Eq, PartialEq, EnumString, Display)]
-pub enum OperatorName {
-    ExpressionWatermark,
-    ArrowValue,
-    ArrowKey,
-    Projection,
-    AsyncUdf,
-    Join,
-    InstantJoin,
-    LookupJoin,
-    WindowFunction,
-    TumblingWindowAggregate,
-    SlidingWindowAggregate,
-    SessionWindowAggregate,
-    UpdatingAggregate,
-    ConnectorSource,
-    ConnectorSink,
-}
-
-#[derive(Copy, Clone, Debug, Eq, PartialEq, PartialOrd, Ord)]
-pub enum LogicalEdgeType {
-    Forward,
-    Shuffle,
-    LeftJoin,
-    RightJoin,
-}
-
-impl Display for LogicalEdgeType {
-    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        match self {
-            LogicalEdgeType::Forward => write!(f, "→"),
-            LogicalEdgeType::Shuffle => write!(f, "⤨"),
-            LogicalEdgeType::LeftJoin => write!(f, "-[left]⤨"),
-            LogicalEdgeType::RightJoin => write!(f, "-[right]⤨"),
-        }
-    }
-}
-
-#[derive(Clone, Debug, Eq, PartialEq)]
-pub struct LogicalEdge {
-    pub edge_type: LogicalEdgeType,
-    pub schema: Arc<FsSchema>,
-}
-
-impl LogicalEdge {
-    pub fn new(edge_type: LogicalEdgeType, schema: FsSchema) -> Self {
-        LogicalEdge {
-            edge_type,
-            schema: Arc::new(schema),
-        }
-    }
-
-    pub fn project_all(edge_type: LogicalEdgeType, schema: FsSchema) -> Self {
-        LogicalEdge {
-            edge_type,
-            schema: Arc::new(schema),
-        }
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct ChainedLogicalOperator {
-    pub operator_id: String,
-    pub operator_name: OperatorName,
-    pub operator_config: Vec<u8>,
-}
-
-#[derive(Clone, Debug)]
-pub struct OperatorChain {
-    pub(crate) operators: Vec<ChainedLogicalOperator>,
-    pub(crate) edges: Vec<Arc<FsSchema>>,
-}
-
-impl OperatorChain {
-    pub fn new(operator: ChainedLogicalOperator) -> Self {
-        Self {
-            operators: vec![operator],
-            edges: vec![],
-        }
-    }
-
-    pub fn iter(
-        &self,
-    ) -> impl Iterator<Item = (&ChainedLogicalOperator, Option<&Arc<FsSchema>>)> {
-        self.operators
-            .iter()
-            .zip_longest(self.edges.iter())
-            .map(|e| e.left_and_right())
-            .map(|(l, r)| (l.unwrap(), r))
-    }
-
-    pub fn iter_mut(
-        &mut self,
-    ) -> impl Iterator<Item = (&mut ChainedLogicalOperator, Option<&Arc<FsSchema>>)> {
-        self.operators
-            .iter_mut()
-            .zip_longest(self.edges.iter())
-            .map(|e| e.left_and_right())
-            .map(|(l, r)| (l.unwrap(), r))
-    }
-
-    pub fn first(&self) -> &ChainedLogicalOperator {
-        &self.operators[0]
-    }
-
-    pub fn len(&self) -> usize {
-        self.operators.len()
-    }
-
-    pub fn is_empty(&self) -> bool {
-        self.operators.is_empty()
-    }
-
-    pub fn is_source(&self) -> bool {
-        self.operators[0].operator_name == OperatorName::ConnectorSource
-    }
-
-    pub fn is_sink(&self) -> bool {
-        self.operators[0].operator_name == OperatorName::ConnectorSink
-    }
-}
-
-#[derive(Clone)]
-pub struct LogicalNode {
-    pub node_id: u32,
-    pub description: String,
-    pub operator_chain: OperatorChain,
-    pub parallelism: usize,
-}
-
-impl LogicalNode {
-    pub fn single(
-        id: u32,
-        operator_id: String,
-        name: OperatorName,
-        config: Vec<u8>,
-        description: String,
-        parallelism: usize,
-    ) -> Self {
-        Self {
-            node_id: id,
-            description,
-            operator_chain: OperatorChain {
-                operators: vec![ChainedLogicalOperator {
-                    operator_id,
-                    operator_name: name,
-                    operator_config: config,
-                }],
-                edges: vec![],
-            },
-            parallelism,
-        }
-    }
-}
-
-impl Display for LogicalNode {
-    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        write!(f, "{}", self.description)
-    }
-}
-
-impl Debug for LogicalNode {
-    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        write!(
-            f,
-            "{}[{}]",
-            self.operator_chain
-                .operators
-                .iter()
-                .map(|op| op.operator_id.clone())
-                .collect::<Vec<_>>()
-                .join(" -> "),
-            self.parallelism
-        )
-    }
-}
-
-pub type LogicalGraph = DiGraph<LogicalNode, LogicalEdge>;
-
-pub trait Optimizer {
-    fn optimize_once(&self, plan: &mut LogicalGraph) -> bool;
-
-    fn optimize(&self, plan: &mut LogicalGraph) {
-        loop {
-            if !self.optimize_once(plan) {
-                break;
-            }
-        }
-    }
-}
-
-#[derive(Clone, Debug, Eq, PartialEq, Hash, PartialOrd)]
-pub struct DylibUdfConfig {
-    pub dylib_path: String,
-    pub arg_types: Vec<DataType>,
-    pub return_type: DataType,
-    pub aggregate: bool,
-    pub is_async: bool,
-}
-
-#[derive(Clone, Debug, Eq, PartialEq, Hash)]
-pub struct PythonUdfConfig {
-    pub arg_types: Vec<DataType>,
-    pub return_type: DataType,
-    pub name: Arc<String>,
-    pub definition: Arc<String>,
-}
-
-#[derive(Clone, Debug, Default)]
-pub struct ProgramConfig {
-    pub udf_dylibs: HashMap<String, DylibUdfConfig>,
-    pub python_udfs: HashMap<String, PythonUdfConfig>,
-}
-
-#[derive(Clone, Debug, Default)]
-pub struct LogicalProgram {
-    pub graph: LogicalGraph,
-    pub program_config: ProgramConfig,
-}
-
-impl LogicalProgram {
-    pub fn new(graph: LogicalGraph, program_config: ProgramConfig) -> Self {
-        Self {
-            graph,
-            program_config,
-        }
-    }
-
-    pub fn optimize(&mut self, optimizer: &dyn Optimizer) {
-        optimizer.optimize(&mut self.graph);
-    }
-
-    pub fn update_parallelism(&mut self, overrides: &HashMap<u32, usize>) {
-        for node in self.graph.node_weights_mut() {
-            if let Some(p) = overrides.get(&node.node_id) {
-                node.parallelism = *p;
-            }
-        }
-    }
-
-    pub fn dot(&self) -> String {
-        format!("{:?}", Dot::with_config(&self.graph, &[]))
-    }
-
-    pub fn task_count(&self) -> usize {
-        self.graph.node_weights().map(|nw| nw.parallelism).sum()
-    }
-
-    pub fn sources(&self) -> HashSet<u32> {
-        self.graph
-            .externals(Direction::Incoming)
-            .map(|t| self.graph.node_weight(t).unwrap().node_id)
-            .collect()
-    }
-
-    pub fn tasks_per_operator(&self) -> HashMap<String, usize> {
-        let mut tasks_per_operator = HashMap::new();
-        for node in self.graph.node_weights() {
-            for op in &node.operator_chain.operators {
-                tasks_per_operator.insert(op.operator_id.clone(), node.parallelism);
-            }
-        }
-        tasks_per_operator
-    }
-
-    pub fn operator_names_by_id(&self) -> HashMap<String, String> {
-        let mut m = HashMap::new();
-        for node in self.graph.node_weights() {
-            for op in &node.operator_chain.operators {
-                m.insert(op.operator_id.clone(), op.operator_name.to_string());
-            }
-        }
-        m
-    }
-
-    pub fn tasks_per_node(&self) -> HashMap<u32, usize> {
-        let mut tasks_per_node = HashMap::new();
-        for node in self.graph.node_weights() {
-            tasks_per_node.insert(node.node_id, node.parallelism);
-        }
-        tasks_per_node
-    }
-
-    pub fn features(&self) -> HashSet<String> {
-        let mut s = HashSet::new();
-        for n in self.graph.node_weights() {
-            for t in &n.operator_chain.operators {
-                let feature = match &t.operator_name {
-                    OperatorName::AsyncUdf => "async-udf".to_string(),
-                    OperatorName::ExpressionWatermark
-                    | OperatorName::ArrowValue
-                    | OperatorName::ArrowKey
-                    | OperatorName::Projection => continue,
-                    OperatorName::Join => "join-with-expiration".to_string(),
-                    OperatorName::InstantJoin => "windowed-join".to_string(),
-                    OperatorName::WindowFunction => "sql-window-function".to_string(),
-                    OperatorName::LookupJoin => "lookup-join".to_string(),
-                    OperatorName::TumblingWindowAggregate => {
-                        "sql-tumbling-window-aggregate".to_string()
-                    }
-                    OperatorName::SlidingWindowAggregate => {
-                        "sql-sliding-window-aggregate".to_string()
-                    }
-                    OperatorName::SessionWindowAggregate => {
-                        "sql-session-window-aggregate".to_string()
-                    }
-                    OperatorName::UpdatingAggregate => "sql-updating-aggregate".to_string(),
-                    OperatorName::ConnectorSource => "connector-source".to_string(),
-                    OperatorName::ConnectorSink => "connector-sink".to_string(),
-                };
-                s.insert(feature);
-            }
-        }
-        s
-    }
-}
-
-
-impl From<DylibUdfConfig> for api::DylibUdfConfig {
-    fn from(from: DylibUdfConfig) -> Self {
-        api::DylibUdfConfig {
-            dylib_path: from.dylib_path,
-            arg_types: from
-                .arg_types
-                .iter()
-                .map(|t| {
-                    ArrowType::try_from(t)
-                        .expect("unsupported data type")
-                        .encode_to_vec()
-                })
-                .collect(),
-            return_type: ArrowType::try_from(&from.return_type)
-                .expect("unsupported data type")
-                .encode_to_vec(),
-            aggregate: from.aggregate,
-            is_async: from.is_async,
-        }
-    }
-}
-
-impl From<api::DylibUdfConfig> for DylibUdfConfig {
-    fn from(from: api::DylibUdfConfig) -> Self {
-        DylibUdfConfig {
-            dylib_path: from.dylib_path,
-            arg_types: from
-                .arg_types
-                .iter()
-                .map(|t| {
-                    DataType::try_from(
-                        &ArrowType::decode(&mut t.as_slice()).expect("invalid arrow type"),
-                    )
-                        .expect("invalid arrow type")
-                })
-                .collect(),
-            return_type: DataType::try_from(
-                &ArrowType::decode(&mut from.return_type.as_slice()).unwrap(),
-            )
-                .expect("invalid arrow type"),
-            aggregate: from.aggregate,
-            is_async: from.is_async,
-        }
-    }
-}
\ No newline at end of file
diff --git a/src/sql/logical_node/logical/dylib_udf_config.rs b/src/sql/logical_node/logical/dylib_udf_config.rs
new file mode 100644
index 00000000..6c88054f
--- /dev/null
+++ b/src/sql/logical_node/logical/dylib_udf_config.rs
@@ -0,0 +1,71 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use datafusion::arrow::datatypes::DataType;
+use datafusion_proto::protobuf::ArrowType;
+use prost::Message;
+use protocol::grpc::api;
+
+#[derive(Clone, Debug, Eq, PartialEq, Hash, PartialOrd)]
+pub struct DylibUdfConfig {
+    pub dylib_path: String,
+    pub arg_types: Vec<DataType>,
+    pub return_type: DataType,
+    pub aggregate: bool,
+    pub is_async: bool,
+}
+
+impl From<DylibUdfConfig> for api::DylibUdfConfig {
+    fn from(from: DylibUdfConfig) -> Self {
+        api::DylibUdfConfig {
+            dylib_path: from.dylib_path,
+            arg_types: from
+                .arg_types
+                .iter()
+                .map(|t| {
+                    ArrowType::try_from(t)
+                        .expect("unsupported data type")
+                        .encode_to_vec()
+                })
+                .collect(),
+            return_type: ArrowType::try_from(&from.return_type)
+                .expect("unsupported data type")
+                .encode_to_vec(),
+            aggregate: from.aggregate,
+            is_async: from.is_async,
+        }
+    }
+}
+
+impl From<api::DylibUdfConfig> for DylibUdfConfig {
+    fn from(from: api::DylibUdfConfig) -> Self {
+        DylibUdfConfig {
+            dylib_path: from.dylib_path,
+            arg_types: from
+                .arg_types
+                .iter()
+                .map(|t| {
+                    DataType::try_from(
+                        &ArrowType::decode(&mut t.as_slice()).expect("invalid arrow type"),
+                    )
+                    .expect("invalid arrow type")
+                })
+                .collect(),
+            return_type: DataType::try_from(
+                &ArrowType::decode(&mut from.return_type.as_slice()).unwrap(),
+            )
+            .expect("invalid arrow type"),
+            aggregate: from.aggregate,
+            is_async: from.is_async,
+        }
+    }
+}
diff --git a/src/sql/logical_node/logical/logical_edge.rs b/src/sql/logical_node/logical/logical_edge.rs
new file mode 100644
index 00000000..2f850988
--- /dev/null
+++ b/src/sql/logical_node/logical/logical_edge.rs
@@ -0,0 +1,57 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::fmt::{Display, Formatter};
+use std::sync::Arc;
+
+use crate::sql::common::FsSchema;
+
+#[derive(Copy, Clone, Debug, Eq, PartialEq, PartialOrd, Ord)]
+pub enum LogicalEdgeType {
+    Forward,
+    Shuffle,
+    LeftJoin,
+    RightJoin,
+}
+
+impl Display for LogicalEdgeType {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        match self {
+            LogicalEdgeType::Forward => write!(f, "→"),
+            LogicalEdgeType::Shuffle => write!(f, "⤨"),
+            LogicalEdgeType::LeftJoin => write!(f, "-[left]⤨"),
+            LogicalEdgeType::RightJoin => write!(f, "-[right]⤨"),
+        }
+    }
+}
+
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct LogicalEdge {
+    pub edge_type: LogicalEdgeType,
+    pub schema: Arc<FsSchema>,
+}
+
+impl LogicalEdge {
+    pub fn new(edge_type: LogicalEdgeType, schema: FsSchema) -> Self {
+        LogicalEdge {
+            edge_type,
+            schema: Arc::new(schema),
+        }
+    }
+
+    pub fn project_all(edge_type: LogicalEdgeType, schema: FsSchema) -> Self {
+        LogicalEdge {
+            edge_type,
+            schema: Arc::new(schema),
+        }
+    }
+}
diff --git a/src/sql/logical_node/logical/logical_graph.rs b/src/sql/logical_node/logical/logical_graph.rs
new file mode 100644
index 00000000..b877e2a0
--- /dev/null
+++ b/src/sql/logical_node/logical/logical_graph.rs
@@ -0,0 +1,30 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use petgraph::graph::DiGraph;
+
+use super::logical_edge::LogicalEdge;
+use super::logical_node::LogicalNode;
+
+pub type LogicalGraph = DiGraph<LogicalNode, LogicalEdge>;
+
+pub trait Optimizer {
+    fn optimize_once(&self, plan: &mut LogicalGraph) -> bool;
+
+    fn optimize(&self, plan: &mut LogicalGraph) {
+        loop {
+            if !self.optimize_once(plan) {
+                break;
+            }
+        }
+    }
+}
diff --git a/src/sql/logical_node/logical/logical_node.rs b/src/sql/logical_node/logical/logical_node.rs
new file mode 100644
index 00000000..492eae26
--- /dev/null
+++ b/src/sql/logical_node/logical/logical_node.rs
@@ -0,0 +1,71 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::fmt::{Debug, Display, Formatter};
+
+use super::operator_chain::{ChainedLogicalOperator, OperatorChain};
+use super::operator_name::OperatorName;
+
+#[derive(Clone)]
+pub struct LogicalNode {
+    pub node_id: u32,
+    pub description: String,
+    pub operator_chain: OperatorChain,
+    pub parallelism: usize,
+}
+
+impl LogicalNode {
+    pub fn single(
+        id: u32,
+        operator_id: String,
+        name: OperatorName,
+        config: Vec<u8>,
+        description: String,
+        parallelism: usize,
+    ) -> Self {
+        Self {
+            node_id: id,
+            description,
+            operator_chain: OperatorChain {
+                operators: vec![ChainedLogicalOperator {
+                    operator_id,
+                    operator_name: name,
+                    operator_config: config,
+                }],
+                edges: vec![],
+            },
+            parallelism,
+        }
+    }
+}
+
+impl Display for LogicalNode {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{}", self.description)
+    }
+}
+
+impl Debug for LogicalNode {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        write!(
+            f,
+            "{}[{}]",
+            self.operator_chain
+                .operators
+                .iter()
+                .map(|op| op.operator_id.clone())
+                .collect::<Vec<_>>()
+                .join(" -> "),
+            self.parallelism
+        )
+    }
+}
diff --git a/src/sql/logical_node/logical/logical_program.rs b/src/sql/logical_node/logical/logical_program.rs
new file mode 100644
index 00000000..db6883b8
--- /dev/null
+++ b/src/sql/logical_node/logical/logical_program.rs
@@ -0,0 +1,123 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::collections::{HashMap, HashSet};
+
+use petgraph::Direction;
+use petgraph::dot::Dot;
+
+use super::logical_graph::{LogicalGraph, Optimizer};
+use super::operator_name::OperatorName;
+use super::program_config::ProgramConfig;
+
+#[derive(Clone, Debug, Default)]
+pub struct LogicalProgram {
+    pub graph: LogicalGraph,
+    pub program_config: ProgramConfig,
+}
+
+impl LogicalProgram {
+    pub fn new(graph: LogicalGraph, program_config: ProgramConfig) -> Self {
+        Self {
+            graph,
+            program_config,
+        }
+    }
+
+    pub fn optimize(&mut self, optimizer: &dyn Optimizer) {
+        optimizer.optimize(&mut self.graph);
+    }
+
+    pub fn update_parallelism(&mut self, overrides: &HashMap<u32, usize>) {
+        for node in self.graph.node_weights_mut() {
+            if let Some(p) = overrides.get(&node.node_id) {
+                node.parallelism = *p;
+            }
+        }
+    }
+
+    pub fn dot(&self) -> String {
+        format!("{:?}", Dot::with_config(&self.graph, &[]))
+    }
+
+    pub fn task_count(&self) -> usize {
+        self.graph.node_weights().map(|nw| nw.parallelism).sum()
+    }
+
+    pub fn sources(&self) -> HashSet<u32> {
+        self.graph
+            .externals(Direction::Incoming)
+            .map(|t| self.graph.node_weight(t).unwrap().node_id)
+            .collect()
+    }
+
+    pub fn tasks_per_operator(&self) -> HashMap<String, usize> {
+        let mut tasks_per_operator = HashMap::new();
+        for node in self.graph.node_weights() {
+            for op in &node.operator_chain.operators {
+                tasks_per_operator.insert(op.operator_id.clone(), node.parallelism);
+            }
+        }
+        tasks_per_operator
+    }
+
+    pub fn operator_names_by_id(&self) -> HashMap<String, String> {
+        let mut m = HashMap::new();
+        for node in self.graph.node_weights() {
+            for op in &node.operator_chain.operators {
+                m.insert(op.operator_id.clone(), op.operator_name.to_string());
+            }
+        }
+        m
+    }
+
+    pub fn tasks_per_node(&self) -> HashMap<u32, usize> {
+        let mut tasks_per_node = HashMap::new();
+        for node in self.graph.node_weights() {
+            tasks_per_node.insert(node.node_id, node.parallelism);
+        }
+        tasks_per_node
+    }
+
+    pub fn features(&self) -> HashSet<String> {
+        let mut s = HashSet::new();
+        for n in self.graph.node_weights() {
+            for t in &n.operator_chain.operators {
+                let feature = match &t.operator_name {
+                    OperatorName::AsyncUdf => "async-udf".to_string(),
+                    OperatorName::ExpressionWatermark
+                    | OperatorName::ArrowValue
+                    | OperatorName::ArrowKey
+                    | OperatorName::Projection => continue,
+                    OperatorName::Join => "join-with-expiration".to_string(),
+                    OperatorName::InstantJoin => "windowed-join".to_string(),
+                    OperatorName::WindowFunction => "sql-window-function".to_string(),
+                    OperatorName::LookupJoin => "lookup-join".to_string(),
+                    OperatorName::TumblingWindowAggregate => {
+                        "sql-tumbling-window-aggregate".to_string()
+                    }
+                    OperatorName::SlidingWindowAggregate => {
+                        "sql-sliding-window-aggregate".to_string()
+                    }
+                    OperatorName::SessionWindowAggregate => {
+                        "sql-session-window-aggregate".to_string()
+                    }
+                    OperatorName::UpdatingAggregate => "sql-updating-aggregate".to_string(),
+                    OperatorName::ConnectorSource => "connector-source".to_string(),
+                    OperatorName::ConnectorSink => "connector-sink".to_string(),
+                };
+                s.insert(feature);
+            }
+        }
+        s
+    }
+}
diff --git a/src/sql/logical_node/logical/mod.rs b/src/sql/logical_node/logical/mod.rs
new file mode 100644
index 00000000..96dd2ce5
--- /dev/null
+++ b/src/sql/logical_node/logical/mod.rs
@@ -0,0 +1,30 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+mod dylib_udf_config;
+mod logical_edge;
+mod logical_graph;
+mod logical_node;
+mod logical_program;
+mod operator_chain;
+mod operator_name;
+mod program_config;
+mod python_udf_config;
+
+pub use dylib_udf_config::DylibUdfConfig;
+pub use logical_edge::{LogicalEdge, LogicalEdgeType};
+pub use logical_graph::{LogicalGraph, Optimizer};
+pub use logical_node::LogicalNode;
+pub use logical_program::LogicalProgram;
+pub use operator_name::OperatorName;
+pub use program_config::ProgramConfig;
+pub use python_udf_config::PythonUdfConfig;
diff --git a/src/sql/logical_node/logical/operator_chain.rs b/src/sql/logical_node/logical/operator_chain.rs
new file mode 100644
index 00000000..e3db96b2
--- /dev/null
+++ b/src/sql/logical_node/logical/operator_chain.rs
@@ -0,0 +1,80 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::sync::Arc;
+
+use itertools::Itertools;
+
+use super::operator_name::OperatorName;
+use crate::sql::common::FsSchema;
+
+#[derive(Clone, Debug)]
+pub struct ChainedLogicalOperator {
+    pub operator_id: String,
+    pub operator_name: OperatorName,
+    pub operator_config: Vec<u8>,
+}
+
+#[derive(Clone, Debug)]
+pub struct OperatorChain {
+    pub(crate) operators: Vec<ChainedLogicalOperator>,
+    pub(crate) edges: Vec<Arc<FsSchema>>,
+}
+
+impl OperatorChain {
+    pub fn new(operator: ChainedLogicalOperator) -> Self {
+        Self {
+            operators: vec![operator],
+            edges: vec![],
+        }
+    }
+
+    pub fn iter(
+        &self,
+    ) -> impl Iterator<Item = (&ChainedLogicalOperator, Option<&Arc<FsSchema>>)> {
+        self.operators
+            .iter()
+            .zip_longest(self.edges.iter())
+            .map(|e| e.left_and_right())
+            .map(|(l, r)| (l.unwrap(), r))
+    }
+
+    pub fn iter_mut(
+        &mut self,
+    ) -> impl Iterator<Item = (&mut ChainedLogicalOperator, Option<&Arc<FsSchema>>)> {
+        self.operators
+            .iter_mut()
+            .zip_longest(self.edges.iter())
+            .map(|e| e.left_and_right())
+            .map(|(l, r)| (l.unwrap(), r))
+    }
+
+    pub fn first(&self) -> &ChainedLogicalOperator {
+        &self.operators[0]
+    }
+
+    pub fn len(&self) -> usize {
+        self.operators.len()
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.operators.is_empty()
+    }
+
+    pub fn is_source(&self) -> bool {
+        self.operators[0].operator_name == OperatorName::ConnectorSource
+    }
+
+    pub fn is_sink(&self) -> bool {
+        self.operators[0].operator_name == OperatorName::ConnectorSink
+    }
+}
diff --git a/src/sql/logical_node/logical/operator_name.rs b/src/sql/logical_node/logical/operator_name.rs
new file mode 100644
index 00000000..057d8e82
--- /dev/null
+++ b/src/sql/logical_node/logical/operator_name.rs
@@ -0,0 +1,32 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use strum::{Display, EnumString};
+
+#[derive(Clone, Copy, Debug, Eq, PartialEq, EnumString, Display)]
+pub enum OperatorName {
+    ExpressionWatermark,
+    ArrowValue,
+    ArrowKey,
+    Projection,
+    AsyncUdf,
+    Join,
+    InstantJoin,
+    LookupJoin,
+    WindowFunction,
+    TumblingWindowAggregate,
+    SlidingWindowAggregate,
+    SessionWindowAggregate,
+    UpdatingAggregate,
+    ConnectorSource,
+    ConnectorSink,
+}
diff --git a/src/sql/logical_node/logical/program_config.rs b/src/sql/logical_node/logical/program_config.rs
new file mode 100644
index 00000000..38c76e66
--- /dev/null
+++ b/src/sql/logical_node/logical/program_config.rs
@@ -0,0 +1,22 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::collections::HashMap;
+
+use super::dylib_udf_config::DylibUdfConfig;
+use super::python_udf_config::PythonUdfConfig;
+
+#[derive(Clone, Debug, Default)]
+pub struct ProgramConfig {
+    pub udf_dylibs: HashMap<String, DylibUdfConfig>,
+    pub python_udfs: HashMap<String, PythonUdfConfig>,
+}
diff --git a/src/sql/logical_node/logical/python_udf_config.rs b/src/sql/logical_node/logical/python_udf_config.rs
new file mode 100644
index 00000000..6e7d5c66
--- /dev/null
+++ b/src/sql/logical_node/logical/python_udf_config.rs
@@ -0,0 +1,23 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::sync::Arc;
+
+use datafusion::arrow::datatypes::DataType;
+
+#[derive(Clone, Debug, Eq, PartialEq, Hash)]
+pub struct PythonUdfConfig {
+    pub arg_types: Vec<DataType>,
+    pub return_type: DataType,
+    pub name: Arc<String>,
+    pub definition: Arc<String>,
+}
diff --git a/src/sql/logical_planner/optimizers.rs b/src/sql/logical_planner/optimizers/chaining.rs
similarity index 81%
rename from src/sql/logical_planner/optimizers.rs
rename to src/sql/logical_planner/optimizers/chaining.rs
index bdf32657..5935c985 100644
--- a/src/sql/logical_planner/optimizers.rs
+++ b/src/sql/logical_planner/optimizers/chaining.rs
@@ -1,7 +1,21 @@
-use crate::sql::logical_node::logical::{LogicalEdgeType, LogicalGraph, Optimizer};
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::mem;
+
 use petgraph::prelude::*;
 use petgraph::visit::NodeRef;
-use std::mem;
+
+use crate::sql::logical_node::logical::{LogicalEdgeType, LogicalGraph, Optimizer};
 
 pub struct ChainingOptimizer {}
 
diff --git a/src/sql/schema/optimizer.rs b/src/sql/logical_planner/optimizers/datafusion_logical.rs
similarity index 100%
rename from src/sql/schema/optimizer.rs
rename to src/sql/logical_planner/optimizers/datafusion_logical.rs
diff --git a/src/sql/logical_planner/optimizers/mod.rs b/src/sql/logical_planner/optimizers/mod.rs
new file mode 100644
index 00000000..0e0de6a2
--- /dev/null
+++ b/src/sql/logical_planner/optimizers/mod.rs
@@ -0,0 +1,20 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Logical planner optimizers: graph-level chaining ([`ChainingOptimizer`]) and
+//! DataFusion SQL logical-plan rules ([`produce_optimized_plan`]).
+
+mod chaining;
+mod datafusion_logical;
+
+pub use chaining::ChainingOptimizer;
+pub use datafusion_logical::produce_optimized_plan;
diff --git a/src/sql/logical_planner/planner.rs b/src/sql/logical_planner/planner.rs
index 0f2075c1..bd25423c 100644
--- a/src/sql/logical_planner/planner.rs
+++ b/src/sql/logical_planner/planner.rs
@@ -34,11 +34,9 @@ use crate::sql::logical_node::logical::{LogicalEdge, LogicalGraph, LogicalNode};
 use crate::sql::logical_planner::{
     DebeziumUnrollingExec, DecodingContext, FsMemExec, FsPhysicalExtensionCodec, ToDebeziumExec,
 };
-use crate::sql::extensions::debezium::{
-    DEBEZIUM_UNROLLING_EXTENSION_NAME, DebeziumUnrollingExtension, TO_DEBEZIUM_EXTENSION_NAME,
-};
-use crate::sql::extensions::key_calculation::KeyCalculationExtension;
-use crate::sql::extensions::{NodeWithIncomingEdges, StreamExtension};
+use crate::sql::extensions::debezium::{PACK_NODE_NAME, UNROLL_NODE_NAME, UnrollDebeziumPayloadNode};
+use crate::sql::extensions::key_calculation::KeyExtractionNode;
+use crate::sql::extensions::{CompiledTopologyNode, StreamingOperatorBlueprint};
 use crate::sql::schema::utils::add_timestamp_field_arrow;
 use crate::sql::schema::StreamSchemaProvider;
 use crate::sql::common::{FsSchema, FsSchemaRef};
@@ -238,21 +236,21 @@ impl ExtensionPlanner for FsExtensionPlanner {
         _session_state: &SessionState,
     ) -> Result<Option<Arc<dyn ExecutionPlan>>> {
         let schema = node.schema().as_ref().into();
-        if let Ok::<&dyn StreamExtension, _>(stream_extension) = node.try_into() {
-            if stream_extension.transparent() {
+        if let Ok::<&dyn StreamingOperatorBlueprint, _>(stream_extension) = node.try_into() {
+            if stream_extension.is_passthrough_boundary() {
                 match node.name() {
-                    DEBEZIUM_UNROLLING_EXTENSION_NAME => {
+                    UNROLL_NODE_NAME => {
                         let node = node
                             .as_any()
-                            .downcast_ref::<DebeziumUnrollingExtension>()
+                            .downcast_ref::<UnrollDebeziumPayloadNode>()
                             .unwrap();
                         let input = physical_inputs[0].clone();
                         return Ok(Some(Arc::new(DebeziumUnrollingExec::try_new(
                             input,
-                            node.primary_keys.clone(),
+                            node.pk_indices.clone(),
                         )?)));
                     }
-                    TO_DEBEZIUM_EXTENSION_NAME => {
+                    PACK_NODE_NAME => {
                         let input = physical_inputs[0].clone();
                         return Ok(Some(Arc::new(ToDebeziumExec::try_new(input)?)));
                     }
@@ -261,8 +259,8 @@ impl ExtensionPlanner for FsExtensionPlanner {
             }
         };
         let name =
-            if let Some(key_extension) = node.as_any().downcast_ref::<KeyCalculationExtension>() {
-                key_extension.name.clone()
+            if let Some(key_extension) = node.as_any().downcast_ref::<KeyExtractionNode>() {
+                key_extension.operator_label.clone()
             } else {
                 None
             };
@@ -293,9 +291,9 @@ impl PlanToGraphVisitor<'_> {
     pub fn build_extension(
         &mut self,
         input_nodes: Vec<NodeIndex>,
-        extension: &dyn StreamExtension,
+        extension: &dyn StreamingOperatorBlueprint,
     ) -> Result<()> {
-        if let Some(node_name) = extension.node_name() {
+        if let Some(node_name) = extension.operator_identity() {
             if self.named_nodes.contains_key(&node_name) {
                 return plan_err!(
                     "extension {:?} has already been planned, shouldn't try again.",
@@ -315,21 +313,24 @@ impl PlanToGraphVisitor<'_> {
             })
             .collect::<Result<Vec<_>>>()?;
 
-        let NodeWithIncomingEdges { node, edges } = extension
-            .plan_node(&self.planner, self.graph.node_count(), input_schemas)
+        let CompiledTopologyNode {
+            execution_unit,
+            routing_edges,
+        } = extension
+            .compile_to_graph_node(&self.planner, self.graph.node_count(), input_schemas)
             .map_err(|e| e.context(format!("planning operator {extension:?}")))?;
 
-        let node_index = self.graph.add_node(node);
+        let node_index = self.graph.add_node(execution_unit);
         self.add_index_to_traversal(node_index);
 
-        for (source, edge) in input_nodes.into_iter().zip(edges.into_iter()) {
+        for (source, edge) in input_nodes.into_iter().zip(routing_edges.into_iter()) {
             self.graph.add_edge(source, node_index, edge);
         }
 
         self.output_schemas
-            .insert(node_index, extension.output_schema().into());
+            .insert(node_index, extension.yielded_schema().into());
 
-        if let Some(node_name) = extension.node_name() {
+        if let Some(node_name) = extension.operator_identity() {
             self.named_nodes.insert(node_name, node_index);
         }
         Ok(())
@@ -344,14 +345,14 @@ impl TreeNodeVisitor<'_> for PlanToGraphVisitor<'_> {
             return Ok(TreeNodeRecursion::Continue);
         };
 
-        let stream_extension: &dyn StreamExtension = node
+        let stream_extension: &dyn StreamingOperatorBlueprint = node
             .try_into()
             .map_err(|e: DataFusionError| e.context("converting extension"))?;
-        if stream_extension.transparent() {
+        if stream_extension.is_passthrough_boundary() {
             return Ok(TreeNodeRecursion::Continue);
         }
 
-        if let Some(name) = stream_extension.node_name() {
+        if let Some(name) = stream_extension.operator_identity() {
             if let Some(node_index) = self.named_nodes.get(&name) {
                 self.add_index_to_traversal(*node_index);
                 return Ok(TreeNodeRecursion::Jump);
@@ -370,15 +371,15 @@ impl TreeNodeVisitor<'_> for PlanToGraphVisitor<'_> {
             return Ok(TreeNodeRecursion::Continue);
         };
 
-        let stream_extension: &dyn StreamExtension = node
+        let stream_extension: &dyn StreamingOperatorBlueprint = node
             .try_into()
             .map_err(|e: DataFusionError| e.context("planning extension"))?;
 
-        if stream_extension.transparent() {
+        if stream_extension.is_passthrough_boundary() {
             return Ok(TreeNodeRecursion::Continue);
         }
 
-        if let Some(name) = stream_extension.node_name() {
+        if let Some(name) = stream_extension.operator_identity() {
             if self.named_nodes.contains_key(&name) {
                 return Ok(TreeNodeRecursion::Continue);
             }
@@ -389,7 +390,7 @@ impl TreeNodeVisitor<'_> for PlanToGraphVisitor<'_> {
         } else {
             vec![]
         };
-        let stream_extension: &dyn StreamExtension = node
+        let stream_extension: &dyn StreamingOperatorBlueprint = node
             .try_into()
             .map_err(|e: DataFusionError| e.context("converting extension"))?;
         self.build_extension(input_nodes, stream_extension)?;
diff --git a/src/sql/mod.rs b/src/sql/mod.rs
index 6e17e0f2..fc89787a 100644
--- a/src/sql/mod.rs
+++ b/src/sql/mod.rs
@@ -22,7 +22,7 @@ pub mod analysis;
 pub(crate) mod extensions;
 pub mod types;
 
-pub use schema::StreamSchemaProvider;
+pub use schema::{StreamPlanningContext, StreamSchemaProvider};
 pub use parse::parse_sql;
 pub use analysis::rewrite_plan;
 pub use logical_planner::CompiledSql;
diff --git a/src/sql/schema/column_descriptor.rs b/src/sql/schema/column_descriptor.rs
new file mode 100644
index 00000000..941a7500
--- /dev/null
+++ b/src/sql/schema/column_descriptor.rs
@@ -0,0 +1,136 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use datafusion::arrow::datatypes::{DataType, Field, TimeUnit};
+use datafusion::logical_expr::Expr;
+
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub enum ColumnDescriptor {
+    Physical(Field),
+    SystemMeta {
+        field: Field,
+        meta_key: String,
+    },
+    Computed {
+        field: Field,
+        logic: Box<Expr>,
+    },
+}
+
+impl ColumnDescriptor {
+    #[inline]
+    pub fn new_physical(field: Field) -> Self {
+        Self::Physical(field)
+    }
+
+    #[inline]
+    pub fn new_system_meta(field: Field, meta_key: impl Into<String>) -> Self {
+        Self::SystemMeta {
+            field,
+            meta_key: meta_key.into(),
+        }
+    }
+
+    #[inline]
+    pub fn new_computed(field: Field, logic: Expr) -> Self {
+        Self::Computed {
+            field,
+            logic: Box::new(logic),
+        }
+    }
+
+    #[inline]
+    pub fn arrow_field(&self) -> &Field {
+        match self {
+            Self::Physical(f) => f,
+            Self::SystemMeta { field: f, .. } => f,
+            Self::Computed { field: f, .. } => f,
+        }
+    }
+
+    #[inline]
+    pub fn into_arrow_field(self) -> Field {
+        match self {
+            Self::Physical(f) => f,
+            Self::SystemMeta { field: f, .. } => f,
+            Self::Computed { field: f, .. } => f,
+        }
+    }
+
+    #[inline]
+    pub fn is_computed(&self) -> bool {
+        matches!(self, Self::Computed { .. })
+    }
+
+    #[inline]
+    pub fn is_physical(&self) -> bool {
+        matches!(self, Self::Physical(_))
+    }
+
+    #[inline]
+    pub fn system_meta_key(&self) -> Option<&str> {
+        if let Self::SystemMeta { meta_key, .. } = self {
+            Some(meta_key.as_str())
+        } else {
+            None
+        }
+    }
+
+    #[inline]
+    pub fn computation_logic(&self) -> Option<&Expr> {
+        if let Self::Computed { logic, .. } = self {
+            Some(logic)
+        } else {
+            None
+        }
+    }
+
+    #[inline]
+    pub fn data_type(&self) -> &DataType {
+        self.arrow_field().data_type()
+    }
+
+    pub fn force_precision(&mut self, unit: TimeUnit) {
+        match self {
+            Self::Physical(f) => {
+                if let DataType::Timestamp(_, tz) = f.data_type() {
+                    *f = Field::new(f.name(), DataType::Timestamp(unit, tz.clone()), f.is_nullable());
+                }
+            }
+            Self::SystemMeta { field, .. } => {
+                if let DataType::Timestamp(_, tz) = field.data_type() {
+                    *field = Field::new(
+                        field.name(),
+                        DataType::Timestamp(unit, tz.clone()),
+                        field.is_nullable(),
+                    );
+                }
+            }
+            Self::Computed { field, .. } => {
+                if let DataType::Timestamp(_, tz) = field.data_type() {
+                    *field = Field::new(
+                        field.name(),
+                        DataType::Timestamp(unit, tz.clone()),
+                        field.is_nullable(),
+                    );
+                }
+            }
+        }
+    }
+}
+
+impl From<Field> for ColumnDescriptor {
+    #[inline]
+    fn from(field: Field) -> Self {
+        Self::Physical(field)
+    }
+}
diff --git a/src/sql/schema/connector.rs b/src/sql/schema/connection_type.rs
similarity index 100%
rename from src/sql/schema/connector.rs
rename to src/sql/schema/connection_type.rs
diff --git a/src/sql/schema/connector_table.rs b/src/sql/schema/connector_table.rs
deleted file mode 100644
index 25e37184..00000000
--- a/src/sql/schema/connector_table.rs
+++ /dev/null
@@ -1,205 +0,0 @@
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use std::sync::Arc;
-use std::time::Duration;
-
-use datafusion::arrow::datatypes::{FieldRef, Schema};
-use datafusion::common::{Result, plan_err};
-use datafusion::logical_expr::Expr;
-use protocol::grpc::api::ConnectorOp;
-use super::field_spec::FieldSpec;
-use crate::multifield_partial_ord;
-use crate::sql::schema::ConnectionType;
-use crate::sql::schema::table::SqlSource;
-use crate::sql::types::ProcessingMode;
-
-/// Represents a table backed by an external connector (e.g., Kafka, Pulsar, NATS).
-#[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub struct ConnectorTable {
-    pub id: Option<i64>,
-    pub connector: String,
-    pub name: String,
-    pub connection_type: ConnectionType,
-    pub fields: Vec<FieldSpec>,
-    pub config: String,
-    pub description: String,
-    pub event_time_field: Option<String>,
-    pub watermark_field: Option<String>,
-    pub idle_time: Option<Duration>,
-    pub primary_keys: Arc<Vec<String>>,
-    pub inferred_fields: Option<Vec<FieldRef>>,
-    pub partition_exprs: Arc<Option<Vec<Expr>>>,
-    pub lookup_cache_max_bytes: Option<u64>,
-    pub lookup_cache_ttl: Option<Duration>,
-}
-
-multifield_partial_ord!(
-    ConnectorTable,
-    id,
-    connector,
-    name,
-    connection_type,
-    config,
-    description,
-    event_time_field,
-    watermark_field,
-    idle_time,
-    primary_keys
-);
-
-impl ConnectorTable {
-    pub fn new(
-        name: impl Into<String>,
-        connector: impl Into<String>,
-        connection_type: ConnectionType,
-    ) -> Self {
-        Self {
-            id: None,
-            connector: connector.into(),
-            name: name.into(),
-            connection_type,
-            fields: Vec::new(),
-            config: String::new(),
-            description: String::new(),
-            event_time_field: None,
-            watermark_field: None,
-            idle_time: None,
-            primary_keys: Arc::new(Vec::new()),
-            inferred_fields: None,
-            partition_exprs: Arc::new(None),
-            lookup_cache_max_bytes: None,
-            lookup_cache_ttl: None,
-        }
-    }
-
-    pub fn has_virtual_fields(&self) -> bool {
-        self.fields.iter().any(|f| f.is_virtual())
-    }
-
-    pub fn is_updating(&self) -> bool {
-        // TODO: check format for debezium/update mode
-        false
-    }
-
-    pub fn physical_schema(&self) -> Schema {
-        Schema::new(
-            self.fields
-                .iter()
-                .filter(|f| !f.is_virtual())
-                .map(|f| f.field().clone())
-                .collect::<Vec<_>>(),
-        )
-    }
-
-    pub fn connector_op(&self) -> ConnectorOp {
-        ConnectorOp {
-            connector: self.connector.clone(),
-            config: self.config.clone(),
-            description: self.description.clone(),
-        }
-    }
-
-    pub fn processing_mode(&self) -> ProcessingMode {
-        if self.is_updating() {
-            ProcessingMode::Update
-        } else {
-            ProcessingMode::Append
-        }
-    }
-
-    pub fn timestamp_override(&self) -> Result<Option<Expr>> {
-        if let Some(field_name) = &self.event_time_field {
-            if self.is_updating() {
-                return plan_err!("can't use event_time_field with update mode");
-            }
-            let _field = self.get_time_field(field_name)?;
-            Ok(Some(Expr::Column(datafusion::common::Column::from_name(
-                field_name,
-            ))))
-        } else {
-            Ok(None)
-        }
-    }
-
-    fn get_time_field(&self, field_name: &str) -> Result<&FieldSpec> {
-        self.fields
-            .iter()
-            .find(|f| {
-                f.field().name() == field_name
-                    && matches!(
-                        f.field().data_type(),
-                        datafusion::arrow::datatypes::DataType::Timestamp(..)
-                    )
-            })
-            .ok_or_else(|| {
-                datafusion::error::DataFusionError::Plan(format!(
-                    "field {field_name} not found or not a timestamp"
-                ))
-            })
-    }
-
-    pub fn watermark_column(&self) -> Result<Option<Expr>> {
-        if let Some(field_name) = &self.watermark_field {
-            let _field = self.get_time_field(field_name)?;
-            Ok(Some(Expr::Column(datafusion::common::Column::from_name(
-                field_name,
-            ))))
-        } else {
-            Ok(None)
-        }
-    }
-
-    pub fn as_sql_source(&self) -> Result<SourceOperator> {
-        match self.connection_type {
-            ConnectionType::Source => {}
-            ConnectionType::Sink | ConnectionType::Lookup => {
-                return plan_err!("cannot read from sink");
-            }
-        };
-
-        if self.is_updating() && self.has_virtual_fields() {
-            return plan_err!("can't read from a source with virtual fields and update mode.");
-        }
-
-        let timestamp_override = self.timestamp_override()?;
-        let watermark_column = self.watermark_column()?;
-
-        let source = SqlSource {
-            id: self.id,
-            struct_def: self
-                .fields
-                .iter()
-                .filter(|f| !f.is_virtual())
-                .map(|f| Arc::new(f.field().clone()))
-                .collect(),
-            config: self.connector_op(),
-            processing_mode: self.processing_mode(),
-            idle_time: self.idle_time,
-        };
-
-        Ok(SourceOperator {
-            name: self.name.clone(),
-            source,
-            timestamp_override,
-            watermark_column,
-        })
-    }
-}
-
-#[derive(Debug, Clone)]
-pub struct SourceOperator {
-    pub name: String,
-    pub source: SqlSource,
-    pub timestamp_override: Option<Expr>,
-    pub watermark_column: Option<Expr>,
-}
diff --git a/src/sql/schema/data_encoding_format.rs b/src/sql/schema/data_encoding_format.rs
new file mode 100644
index 00000000..5b93c90a
--- /dev/null
+++ b/src/sql/schema/data_encoding_format.rs
@@ -0,0 +1,82 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::collections::HashMap;
+
+use datafusion::arrow::datatypes::{DataType, Field};
+use datafusion::common::{Result, plan_err};
+
+use super::column_descriptor::ColumnDescriptor;
+use crate::sql::common::Format;
+
+/// High-level payload encoding (orthogonal to `Format` wire details in `ConnectionSchema`).
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub enum DataEncodingFormat {
+    StandardJson,
+    DebeziumJson,
+    Avro,
+    Parquet,
+    Raw,
+}
+
+impl DataEncodingFormat {
+    pub fn extract_from_map(opts: &HashMap<String, String>) -> Result<Self> {
+        let format_str = opts.get("format").map(|s| s.as_str()).unwrap_or("json");
+        let is_debezium = opts
+            .get("format.debezium")
+            .or_else(|| opts.get("json.debezium"))
+            .map(|s| s == "true")
+            .unwrap_or(false);
+
+        match (format_str, is_debezium) {
+            ("json", true) | ("debezium_json", _) => Ok(Self::DebeziumJson),
+            ("json", false) => Ok(Self::StandardJson),
+            ("avro", _) => Ok(Self::Avro),
+            ("parquet", _) => Ok(Self::Parquet),
+            _ => Ok(Self::Raw),
+        }
+    }
+
+    pub fn from_connection_format(format: &Format) -> Self {
+        match format {
+            Format::Json(j) if j.debezium => Self::DebeziumJson,
+            Format::Json(_) => Self::StandardJson,
+            Format::Avro(_) => Self::Avro,
+            Format::Parquet(_) => Self::Parquet,
+            Format::Protobuf(_) | Format::RawString(_) | Format::RawBytes(_) => Self::Raw,
+        }
+    }
+
+    pub fn supports_delta_updates(&self) -> bool {
+        matches!(self, Self::DebeziumJson)
+    }
+
+    pub fn apply_envelope(self, columns: Vec<ColumnDescriptor>) -> Result<Vec<ColumnDescriptor>> {
+        if !self.supports_delta_updates() {
+            return Ok(columns);
+        }
+        if columns.iter().any(|c| c.is_computed()) {
+            return plan_err!("Virtual fields are not supported with CDC envelope");
+        }
+        if columns.is_empty() {
+            return Ok(columns);
+        }
+        let fields: Vec<Field> = columns.into_iter().map(|c| c.into_arrow_field()).collect();
+        let struct_type = DataType::Struct(fields.into());
+
+        Ok(vec![
+            ColumnDescriptor::new_physical(Field::new("before", struct_type.clone(), true)),
+            ColumnDescriptor::new_physical(Field::new("after", struct_type.clone(), true)),
+            ColumnDescriptor::new_physical(Field::new("op", DataType::Utf8, true)),
+        ])
+    }
+}
diff --git a/src/sql/schema/field_spec.rs b/src/sql/schema/field_spec.rs
deleted file mode 100644
index 2fe8a50e..00000000
--- a/src/sql/schema/field_spec.rs
+++ /dev/null
@@ -1,52 +0,0 @@
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use datafusion::arrow::datatypes::Field;
-use datafusion::logical_expr::Expr;
-
-/// Describes how a field in a connector table should be interpreted.
-#[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub enum FieldSpec {
-    /// A regular struct field that maps to a column in the data.
-    Struct(Field),
-    /// A metadata field extracted from message metadata (e.g., Kafka headers).
-    Metadata { field: Field, key: String },
-    /// A virtual field computed from an expression over other fields.
-    Virtual { field: Field, expression: Box<Expr> },
-}
-
-impl FieldSpec {
-    pub fn is_virtual(&self) -> bool {
-        matches!(self, FieldSpec::Virtual { .. })
-    }
-
-    pub fn field(&self) -> &Field {
-        match self {
-            FieldSpec::Struct(f) => f,
-            FieldSpec::Metadata { field, .. } => field,
-            FieldSpec::Virtual { field, .. } => field,
-        }
-    }
-
-    pub fn metadata_key(&self) -> Option<&str> {
-        match self {
-            FieldSpec::Metadata { key, .. } => Some(key.as_str()),
-            _ => None,
-        }
-    }
-}
-
-impl From<Field> for FieldSpec {
-    fn from(value: Field) -> Self {
-        FieldSpec::Struct(value)
-    }
-}
diff --git a/src/sql/schema/insert.rs b/src/sql/schema/insert.rs
deleted file mode 100644
index fe91325b..00000000
--- a/src/sql/schema/insert.rs
+++ /dev/null
@@ -1,55 +0,0 @@
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use datafusion::common::Result;
-use datafusion::logical_expr::{DmlStatement, LogicalPlan, WriteOp};
-use datafusion::sql::sqlparser::ast::Statement;
-
-use super::optimizer::produce_optimized_plan;
-use crate::sql::schema::StreamSchemaProvider;
-
-/// Represents an INSERT operation in a streaming SQL pipeline.
-#[derive(Debug)]
-pub enum Insert {
-    /// Insert into a named sink table.
-    InsertQuery {
-        sink_name: String,
-        logical_plan: LogicalPlan,
-    },
-    /// An anonymous query (no explicit INSERT target).
-    Anonymous { logical_plan: LogicalPlan },
-}
-
-impl Insert {
-    pub fn try_from_statement(
-        statement: &Statement,
-        schema_provider: &StreamSchemaProvider,
-    ) -> Result<Insert> {
-        let logical_plan = produce_optimized_plan(statement, schema_provider)?;
-
-        match &logical_plan {
-            LogicalPlan::Dml(DmlStatement {
-                table_name,
-                op: WriteOp::Insert(_),
-                input,
-                ..
-            }) => {
-                let sink_name = table_name.to_string();
-                Ok(Insert::InsertQuery {
-                    sink_name,
-                    logical_plan: (**input).clone(),
-                })
-            }
-            _ => Ok(Insert::Anonymous { logical_plan }),
-        }
-    }
-}
diff --git a/src/sql/schema/mod.rs b/src/sql/schema/mod.rs
index 0bf7e4ea..cac86d52 100644
--- a/src/sql/schema/mod.rs
+++ b/src/sql/schema/mod.rs
@@ -10,18 +10,34 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-pub mod connector;
-pub mod connector_table;
-pub mod field_spec;
-pub mod insert;
-pub mod optimizer;
+pub mod column_descriptor;
+pub mod connection_type;
+pub mod source_table;
+pub mod data_encoding_format;
+pub mod schema_context;
 pub mod schema_provider;
 pub mod table;
+pub mod table_execution_unit;
+pub mod table_role;
+pub mod temporal_pipeline_config;
 pub mod utils;
 
-pub use connector::{ConnectionType};
-pub use connector_table::{ConnectorTable, SourceOperator};
-pub use field_spec::FieldSpec;
-pub use insert::Insert;
-pub use schema_provider::{LogicalBatchInput, StreamSchemaProvider, StreamTable};
+pub use column_descriptor::ColumnDescriptor;
+pub use connection_type::ConnectionType;
+pub use source_table::{SourceOperator, SourceTable};
+
+/// Back-compat alias for [`SourceTable`].
+pub type ConnectorTable = SourceTable;
+pub use data_encoding_format::DataEncodingFormat;
+pub use schema_context::{DfSchemaContext, SchemaContext};
+pub use schema_provider::{
+    FunctionCatalog, LogicalBatchInput, ObjectName, StreamPlanningContext,
+    StreamPlanningContextBuilder, StreamSchemaProvider, StreamTable, TableCatalog,
+};
 pub use table::Table;
+pub use table_execution_unit::{EngineDescriptor, SyncMode, TableExecutionUnit};
+pub use table_role::{
+    apply_adapter_specific_rules, deduce_role, serialize_backend_params, validate_adapter_availability,
+    TableRole,
+};
+pub use temporal_pipeline_config::{resolve_temporal_logic, TemporalPipelineConfig, TemporalSpec};
diff --git a/src/sql/schema/schema_context.rs b/src/sql/schema/schema_context.rs
new file mode 100644
index 00000000..232fd9e7
--- /dev/null
+++ b/src/sql/schema/schema_context.rs
@@ -0,0 +1,37 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use datafusion::arrow::datatypes::{DataType, Schema};
+use datafusion::common::{Result, DFSchema};
+use datafusion::logical_expr::Expr;
+use datafusion_expr::ExprSchemable;
+
+pub trait SchemaContext {
+    fn resolve_expression(&self, expr: &Expr, schema: &Schema) -> Result<Expr>;
+    fn extract_datatype(&self, expr: &Expr, schema: &Schema) -> Result<DataType>;
+}
+
+/// [`SchemaContext`] backed by a [`DFSchema`] built from the physical Arrow schema.
+pub struct DfSchemaContext;
+
+impl SchemaContext for DfSchemaContext {
+    fn resolve_expression(&self, expr: &Expr, schema: &Schema) -> Result<Expr> {
+        let df = DFSchema::try_from(schema.clone())?;
+        let _ = expr.get_type(&df)?;
+        Ok(expr.clone())
+    }
+
+    fn extract_datatype(&self, expr: &Expr, schema: &Schema) -> Result<DataType> {
+        let df = DFSchema::try_from(schema.clone())?;
+        expr.get_type(&df)
+    }
+}
diff --git a/src/sql/schema/schema_provider.rs b/src/sql/schema/schema_provider.rs
index 11c0d461..5e34991a 100644
--- a/src/sql/schema/schema_provider.rs
+++ b/src/sql/schema/schema_provider.rs
@@ -1,10 +1,21 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use std::collections::{HashMap, HashSet};
 use std::sync::Arc;
 
 use datafusion::arrow::datatypes::{self as datatypes, DataType, Field, Schema};
-use datafusion::common::{Result, plan_err};
-use datafusion::datasource::DefaultTableSource;
-use datafusion::error::DataFusionError;
+use datafusion::common::{DataFusionError, Result};
+use datafusion::datasource::{DefaultTableSource, TableProvider, TableType};
 use datafusion::execution::{FunctionRegistry, SessionStateDefaults};
 use datafusion::logical_expr::expr_rewriter::FunctionRewrite;
 use datafusion::logical_expr::planner::ExprPlanner;
@@ -12,27 +23,20 @@ use datafusion::logical_expr::{
     AggregateUDF, Expr, LogicalPlan, ScalarUDF, TableSource, WindowUDF,
 };
 use datafusion::optimizer::Analyzer;
-use datafusion::sql::TableReference;
 use datafusion::sql::planner::ContextProvider;
+use datafusion::sql::TableReference;
 use unicase::UniCase;
+
 use crate::sql::logical_node::logical::DylibUdfConfig;
 use crate::sql::schema::table::Table as CatalogTable;
 use crate::sql::schema::utils::window_arrow_struct;
 use crate::sql::types::{PlaceholderUdf, PlanningOptions};
 
-#[derive(Clone, Default)]
-pub struct StreamSchemaProvider {
-    pub source_defs: HashMap<String, String>,
-    tables: HashMap<UniCase<String>, StreamTable>,
-    catalog_tables: HashMap<UniCase<String>, CatalogTable>,
-    pub functions: HashMap<String, Arc<ScalarUDF>>,
-    pub aggregate_functions: HashMap<String, Arc<AggregateUDF>>,
-    pub window_functions: HashMap<String, Arc<WindowUDF>>,
-    pub dylib_udfs: HashMap<String, DylibUdfConfig>,
-    config_options: datafusion::config::ConfigOptions,
-    pub expr_planners: Vec<Arc<dyn ExprPlanner>>,
-    pub planning_options: PlanningOptions,
-    pub analyzer: Analyzer,
+pub type ObjectName = UniCase<String>;
+
+#[inline]
+fn object_name(s: impl Into<String>) -> ObjectName {
+    UniCase::new(s.into())
 }
 
 #[derive(Clone, Debug)]
@@ -56,39 +60,36 @@ pub enum StreamTable {
 impl StreamTable {
     pub fn name(&self) -> &str {
         match self {
-            StreamTable::Source { name, .. } => name,
-            StreamTable::Sink { name, .. } => name,
-            StreamTable::Memory { name, .. } => name,
+            Self::Source { name, .. } | Self::Sink { name, .. } | Self::Memory { name, .. } => name,
         }
     }
 
-    pub fn get_fields(&self) -> Vec<Arc<Field>> {
+    pub fn schema(&self) -> Arc<Schema> {
         match self {
-            StreamTable::Source { schema, .. } => schema.fields().to_vec(),
-            StreamTable::Sink { schema, .. } => schema.fields().to_vec(),
-            StreamTable::Memory { .. } => vec![],
+            Self::Source { schema, .. } | Self::Sink { schema, .. } => Arc::clone(schema),
+            Self::Memory { .. } => Arc::new(Schema::empty()),
         }
     }
 }
 
-#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
+#[derive(Debug, Clone)]
 pub struct LogicalBatchInput {
     pub table_name: String,
     pub schema: Arc<Schema>,
 }
 
 #[async_trait::async_trait]
-impl datafusion::datasource::TableProvider for LogicalBatchInput {
+impl TableProvider for LogicalBatchInput {
     fn as_any(&self) -> &dyn std::any::Any {
         self
     }
 
     fn schema(&self) -> Arc<Schema> {
-        self.schema.clone()
+        Arc::clone(&self.schema)
     }
 
-    fn table_type(&self) -> datafusion::datasource::TableType {
-        datafusion::datasource::TableType::Temporary
+    fn table_type(&self) -> TableType {
+        TableType::Temporary
     }
 
     async fn scan(
@@ -100,85 +101,96 @@ impl datafusion::datasource::TableProvider for LogicalBatchInput {
     ) -> Result<Arc<dyn datafusion::physical_plan::ExecutionPlan>> {
         Ok(Arc::new(crate::sql::logical_planner::FsMemExec::new(
             self.table_name.clone(),
-            self.schema.clone(),
+            Arc::clone(&self.schema),
         )))
     }
 }
 
-fn create_table(table_name: String, schema: Arc<Schema>) -> Arc<dyn TableSource> {
-    let table_provider = LogicalBatchInput { table_name, schema };
-    let wrapped = Arc::new(table_provider);
-    let provider = DefaultTableSource::new(wrapped);
-    Arc::new(provider)
+#[derive(Clone, Default)]
+pub struct FunctionCatalog {
+    pub scalars: HashMap<String, Arc<ScalarUDF>>,
+    pub aggregates: HashMap<String, Arc<AggregateUDF>>,
+    pub windows: HashMap<String, Arc<WindowUDF>>,
+    pub planners: Vec<Arc<dyn ExprPlanner>>,
 }
 
-impl StreamSchemaProvider {
-    pub fn new() -> Self {
-        let mut registry = Self {
-            ..Default::default()
-        };
+#[derive(Clone, Default)]
+pub struct TableCatalog {
+    pub streams: HashMap<ObjectName, Arc<StreamTable>>,
+    pub catalogs: HashMap<ObjectName, Arc<CatalogTable>>,
+    pub source_defs: HashMap<String, String>,
+}
 
-        registry
-            .register_udf(PlaceholderUdf::with_return(
-                "hop",
-                vec![
-                    DataType::Interval(datatypes::IntervalUnit::MonthDayNano),
-                    DataType::Interval(datatypes::IntervalUnit::MonthDayNano),
-                ],
-                window_arrow_struct(),
-            ))
-            .unwrap();
+#[derive(Clone)]
+pub struct StreamPlanningContext {
+    pub tables: TableCatalog,
+    pub functions: FunctionCatalog,
+    pub dylib_udfs: HashMap<String, DylibUdfConfig>,
+    pub config_options: datafusion::config::ConfigOptions,
+    pub planning_options: PlanningOptions,
+    pub analyzer: Analyzer,
+}
 
-        registry
-            .register_udf(PlaceholderUdf::with_return(
-                "tumble",
-                vec![DataType::Interval(datatypes::IntervalUnit::MonthDayNano)],
-                window_arrow_struct(),
-            ))
-            .unwrap();
+impl Default for StreamPlanningContext {
+    fn default() -> Self {
+        Self {
+            tables: TableCatalog::default(),
+            functions: FunctionCatalog::default(),
+            dylib_udfs: HashMap::new(),
+            config_options: datafusion::config::ConfigOptions::default(),
+            planning_options: PlanningOptions::default(),
+            analyzer: Analyzer::default(),
+        }
+    }
+}
 
-        registry
-            .register_udf(PlaceholderUdf::with_return(
-                "session",
-                vec![DataType::Interval(datatypes::IntervalUnit::MonthDayNano)],
-                window_arrow_struct(),
-            ))
-            .unwrap();
+/// Back-compat name for [`StreamPlanningContext`].
+pub type StreamSchemaProvider = StreamPlanningContext;
 
-        registry
-            .register_udf(PlaceholderUdf::with_return(
-                "unnest",
-                vec![DataType::List(Arc::new(Field::new(
-                    "field",
-                    DataType::Utf8,
-                    true,
-                )))],
-                DataType::Utf8,
-            ))
-            .unwrap();
+impl StreamPlanningContext {
+    pub fn builder() -> StreamPlanningContextBuilder {
+        StreamPlanningContextBuilder::default()
+    }
 
-        registry
-            .register_udf(PlaceholderUdf::with_return(
-                "row_time",
-                vec![],
-                DataType::Timestamp(datatypes::TimeUnit::Nanosecond, None),
-            ))
-            .unwrap();
+    /// Same registration order as the historical `StreamSchemaProvider::new` (placeholders, then DataFusion defaults).
+    pub fn new() -> Self {
+        Self::builder()
+            .with_streaming_extensions()
+            .expect("streaming extensions")
+            .with_default_functions()
+            .expect("default functions")
+            .build()
+    }
 
-        for p in SessionStateDefaults::default_scalar_functions() {
-            registry.register_udf(p).unwrap();
-        }
-        for p in SessionStateDefaults::default_aggregate_functions() {
-            registry.register_udaf(p).unwrap();
-        }
-        for p in SessionStateDefaults::default_window_functions() {
-            registry.register_udwf(p).unwrap();
-        }
-        for p in SessionStateDefaults::default_expr_planners() {
-            registry.register_expr_planner(p).unwrap();
-        }
+    pub fn register_stream_table(&mut self, table: StreamTable) {
+        let key = object_name(table.name().to_string());
+        self.tables.streams.insert(key, Arc::new(table));
+    }
+
+    pub fn get_stream_table(&self, name: &str) -> Option<Arc<StreamTable>> {
+        self.tables.streams.get(&object_name(name.to_string())).cloned()
+    }
+
+    pub fn register_catalog_table(&mut self, table: CatalogTable) {
+        let key = object_name(table.name().to_string());
+        self.tables.catalogs.insert(key, Arc::new(table));
+    }
 
-        registry
+    pub fn get_catalog_table(&self, table_name: impl AsRef<str>) -> Option<&CatalogTable> {
+        self.tables
+            .catalogs
+            .get(&object_name(table_name.as_ref().to_string()))
+            .map(|t| t.as_ref())
+    }
+
+    pub fn get_catalog_table_mut(
+        &mut self,
+        table_name: impl AsRef<str>,
+    ) -> Option<&mut CatalogTable> {
+        self.tables
+            .catalogs
+            .get_mut(&object_name(table_name.as_ref().to_string()))
+            .map(|t| Arc::make_mut(t))
     }
 
     pub fn add_source_table(
@@ -188,86 +200,70 @@ impl StreamSchemaProvider {
         event_time_field: Option<String>,
         watermark_field: Option<String>,
     ) {
-        self.tables.insert(
-            UniCase::new(name.clone()),
-            StreamTable::Source {
-                name,
-                schema,
-                event_time_field,
-                watermark_field,
-            },
-        );
+        self.register_stream_table(StreamTable::Source {
+            name,
+            schema,
+            event_time_field,
+            watermark_field,
+        });
     }
 
     pub fn add_sink_table(&mut self, name: String, schema: Arc<Schema>) {
-        self.tables.insert(
-            UniCase::new(name.clone()),
-            StreamTable::Sink { name, schema },
-        );
+        self.register_stream_table(StreamTable::Sink { name, schema });
     }
 
     pub fn insert_table(&mut self, table: StreamTable) {
-        self.tables
-            .insert(UniCase::new(table.name().to_string()), table);
-    }
-
-    pub fn get_table(&self, table_name: impl Into<String>) -> Option<&StreamTable> {
-        self.tables.get(&UniCase::new(table_name.into()))
-    }
-
-    pub fn get_table_mut(&mut self, table_name: impl Into<String>) -> Option<&mut StreamTable> {
-        self.tables.get_mut(&UniCase::new(table_name.into()))
+        self.register_stream_table(table);
     }
 
+    /// Alias for [`Self::register_catalog_table`].
     pub fn insert_catalog_table(&mut self, table: CatalogTable) {
-        self.catalog_tables
-            .insert(UniCase::new(table.name().to_string()), table);
+        self.register_catalog_table(table);
     }
 
-    pub fn get_catalog_table(&self, table_name: impl Into<String>) -> Option<&CatalogTable> {
-        self.catalog_tables.get(&UniCase::new(table_name.into()))
+    pub fn get_table(&self, table_name: impl AsRef<str>) -> Option<&StreamTable> {
+        self.tables
+            .streams
+            .get(&object_name(table_name.as_ref().to_string()))
+            .map(|a| a.as_ref())
     }
 
-    pub fn get_catalog_table_mut(
-        &mut self,
-        table_name: impl Into<String>,
-    ) -> Option<&mut CatalogTable> {
-        self.catalog_tables
-            .get_mut(&UniCase::new(table_name.into()))
+    pub fn get_table_mut(&mut self, table_name: impl AsRef<str>) -> Option<&mut StreamTable> {
+        self.tables
+            .streams
+            .get_mut(&object_name(table_name.as_ref().to_string()))
+            .map(|a| Arc::make_mut(a))
     }
 
-    pub fn get_async_udf_options(
-        &self,
-        _name: &str,
-    ) -> Option<crate::sql::analysis::AsyncOptions> {
-        // TODO: implement async UDF lookup
+    pub fn get_async_udf_options(&self, _name: &str) -> Option<crate::sql::analysis::AsyncOptions> {
         None
     }
+
+    fn create_table_source(name: String, schema: Arc<Schema>) -> Arc<dyn TableSource> {
+        let provider = LogicalBatchInput { table_name: name, schema };
+        Arc::new(DefaultTableSource::new(Arc::new(provider)))
+    }
 }
 
-impl ContextProvider for StreamSchemaProvider {
+impl ContextProvider for StreamPlanningContext {
     fn get_table_source(&self, name: TableReference) -> Result<Arc<dyn TableSource>> {
         let table = self
-            .get_table(name.to_string())
-            .ok_or_else(|| DataFusionError::Plan(format!("Table {name} not found")))?;
+            .get_stream_table(name.table())
+            .ok_or_else(|| DataFusionError::Plan(format!("Table {} not found", name)))?;
 
-        let fields = table.get_fields();
-        let schema = Arc::new(Schema::new_with_metadata(
-            fields
-                .iter()
-                .map(|f| f.as_ref().clone())
-                .collect::<Vec<Field>>(),
-            HashMap::new(),
-        ));
-        Ok(create_table(name.to_string(), schema))
+        Ok(Self::create_table_source(name.to_string(), table.schema()))
     }
 
     fn get_function_meta(&self, name: &str) -> Option<Arc<ScalarUDF>> {
-        self.functions.get(name).cloned()
+        self.functions.scalars.get(name).cloned()
     }
 
     fn get_aggregate_meta(&self, name: &str) -> Option<Arc<AggregateUDF>> {
-        self.aggregate_functions.get(name).cloned()
+        self.functions.aggregates.get(name).cloned()
+    }
+
+    fn get_window_meta(&self, name: &str) -> Option<Arc<WindowUDF>> {
+        self.functions.windows.get(name).cloned()
     }
 
     fn get_variable_type(&self, _variable_names: &[String]) -> Option<DataType> {
@@ -278,54 +274,50 @@ impl ContextProvider for StreamSchemaProvider {
         &self.config_options
     }
 
-    fn get_window_meta(&self, name: &str) -> Option<Arc<WindowUDF>> {
-        self.window_functions.get(name).cloned()
-    }
-
     fn udf_names(&self) -> Vec<String> {
-        self.functions.keys().cloned().collect()
+        self.functions.scalars.keys().cloned().collect()
     }
 
     fn udaf_names(&self) -> Vec<String> {
-        self.aggregate_functions.keys().cloned().collect()
+        self.functions.aggregates.keys().cloned().collect()
     }
 
     fn udwf_names(&self) -> Vec<String> {
-        self.window_functions.keys().cloned().collect()
+        self.functions.windows.keys().cloned().collect()
     }
 
     fn get_expr_planners(&self) -> &[Arc<dyn ExprPlanner>] {
-        &self.expr_planners
+        &self.functions.planners
     }
 }
 
-impl FunctionRegistry for StreamSchemaProvider {
+impl FunctionRegistry for StreamPlanningContext {
     fn udfs(&self) -> HashSet<String> {
-        self.functions.keys().cloned().collect()
+        self.functions.scalars.keys().cloned().collect()
     }
 
     fn udf(&self, name: &str) -> Result<Arc<ScalarUDF>> {
-        if let Some(f) = self.functions.get(name) {
-            Ok(Arc::clone(f))
-        } else {
-            plan_err!("No UDF with name {name}")
-        }
+        self.functions
+            .scalars
+            .get(name)
+            .cloned()
+            .ok_or_else(|| DataFusionError::Plan(format!("No UDF with name {name}")))
     }
 
     fn udaf(&self, name: &str) -> Result<Arc<AggregateUDF>> {
-        if let Some(f) = self.aggregate_functions.get(name) {
-            Ok(Arc::clone(f))
-        } else {
-            plan_err!("No UDAF with name {name}")
-        }
+        self.functions
+            .aggregates
+            .get(name)
+            .cloned()
+            .ok_or_else(|| DataFusionError::Plan(format!("No UDAF with name {name}")))
     }
 
     fn udwf(&self, name: &str) -> Result<Arc<WindowUDF>> {
-        if let Some(f) = self.window_functions.get(name) {
-            Ok(Arc::clone(f))
-        } else {
-            plan_err!("No UDWF with name {name}")
-        }
+        self.functions
+            .windows
+            .get(name)
+            .cloned()
+            .ok_or_else(|| DataFusionError::Plan(format!("No UDWF with name {name}")))
     }
 
     fn register_function_rewrite(
@@ -337,25 +329,96 @@ impl FunctionRegistry for StreamSchemaProvider {
     }
 
     fn register_udf(&mut self, udf: Arc<ScalarUDF>) -> Result<Option<Arc<ScalarUDF>>> {
-        Ok(self.functions.insert(udf.name().to_string(), udf))
+        Ok(self.functions.scalars.insert(udf.name().to_string(), udf))
     }
 
     fn register_udaf(&mut self, udaf: Arc<AggregateUDF>) -> Result<Option<Arc<AggregateUDF>>> {
         Ok(self
-            .aggregate_functions
+            .functions
+            .aggregates
             .insert(udaf.name().to_string(), udaf))
     }
 
     fn register_udwf(&mut self, udwf: Arc<WindowUDF>) -> Result<Option<Arc<WindowUDF>>> {
-        Ok(self.window_functions.insert(udwf.name().to_string(), udwf))
+        Ok(self.functions.windows.insert(udwf.name().to_string(), udwf))
     }
 
     fn register_expr_planner(&mut self, expr_planner: Arc<dyn ExprPlanner>) -> Result<()> {
-        self.expr_planners.push(expr_planner);
+        self.functions.planners.push(expr_planner);
         Ok(())
     }
 
     fn expr_planners(&self) -> Vec<Arc<dyn ExprPlanner>> {
-        self.expr_planners.clone()
+        self.functions.planners.clone()
+    }
+}
+
+#[derive(Default)]
+pub struct StreamPlanningContextBuilder {
+    context: StreamPlanningContext,
+}
+
+impl StreamPlanningContextBuilder {
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    pub fn with_default_functions(mut self) -> Result<Self> {
+        for p in SessionStateDefaults::default_scalar_functions() {
+            self.context.register_udf(p)?;
+        }
+        for p in SessionStateDefaults::default_aggregate_functions() {
+            self.context.register_udaf(p)?;
+        }
+        for p in SessionStateDefaults::default_window_functions() {
+            self.context.register_udwf(p)?;
+        }
+        for p in SessionStateDefaults::default_expr_planners() {
+            self.context.register_expr_planner(p)?;
+        }
+        Ok(self)
+    }
+
+    pub fn with_streaming_extensions(mut self) -> Result<Self> {
+        let extensions = vec![
+            PlaceholderUdf::with_return(
+                "hop",
+                vec![
+                    DataType::Interval(datatypes::IntervalUnit::MonthDayNano),
+                    DataType::Interval(datatypes::IntervalUnit::MonthDayNano),
+                ],
+                window_arrow_struct(),
+            ),
+            PlaceholderUdf::with_return(
+                "tumble",
+                vec![DataType::Interval(datatypes::IntervalUnit::MonthDayNano)],
+                window_arrow_struct(),
+            ),
+            PlaceholderUdf::with_return(
+                "session",
+                vec![DataType::Interval(datatypes::IntervalUnit::MonthDayNano)],
+                window_arrow_struct(),
+            ),
+            PlaceholderUdf::with_return(
+                "unnest",
+                vec![DataType::List(Arc::new(Field::new("field", DataType::Utf8, true)))],
+                DataType::Utf8,
+            ),
+            PlaceholderUdf::with_return(
+                "row_time",
+                vec![],
+                DataType::Timestamp(datatypes::TimeUnit::Nanosecond, None),
+            ),
+        ];
+
+        for ext in extensions {
+            self.context.register_udf(ext)?;
+        }
+
+        Ok(self)
+    }
+
+    pub fn build(self) -> StreamPlanningContext {
+        self.context
     }
 }
diff --git a/src/sql/schema/source_table.rs b/src/sql/schema/source_table.rs
new file mode 100644
index 00000000..dd962e34
--- /dev/null
+++ b/src/sql/schema/source_table.rs
@@ -0,0 +1,564 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::collections::{HashMap, HashSet};
+use std::sync::Arc;
+use std::time::Duration;
+
+use datafusion::arrow::datatypes::{DataType, Field, FieldRef, Schema};
+use datafusion::common::{Column, DFSchema, Result, plan_datafusion_err, plan_err};
+use datafusion::error::DataFusionError;
+use datafusion::logical_expr::Expr;
+use datafusion_expr::ExprSchemable;
+use datafusion::sql::planner::{PlannerContext, SqlToRel};
+use datafusion::sql::sqlparser::ast;
+use datafusion::sql::TableReference;
+use protocol::grpc::api::ConnectorOp;
+use tracing::warn;
+
+use super::column_descriptor::ColumnDescriptor;
+use super::data_encoding_format::DataEncodingFormat;
+use super::schema_context::SchemaContext;
+use super::table_execution_unit::{EngineDescriptor, SyncMode, TableExecutionUnit};
+use super::table_role::{
+    apply_adapter_specific_rules, deduce_role, serialize_backend_params,
+    validate_adapter_availability, TableRole,
+};
+use super::temporal_pipeline_config::{resolve_temporal_logic, TemporalPipelineConfig, TemporalSpec};
+use super::StreamSchemaProvider;
+use crate::multifield_partial_ord;
+use crate::sql::api::{ConnectionProfile, ConnectionSchema, SourceField};
+use crate::sql::common::connector_options::ConnectorOptions;
+use crate::sql::common::{BadData, Format, Framing, JsonCompression, JsonFormat};
+use crate::sql::schema::ConnectionType;
+use crate::sql::schema::table::SqlSource;
+use crate::sql::types::ProcessingMode;
+
+/// Connector-backed catalog table (adapter / source-sink model).
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct SourceTable {
+    pub registry_id: Option<i64>,
+    pub adapter_type: String,
+    pub table_identifier: String,
+    pub role: TableRole,
+    pub schema_specs: Vec<ColumnDescriptor>,
+    /// Serialized runtime payload (e.g. JSON: connector + `connection_schema` + options).
+    pub opaque_config: String,
+    pub temporal_config: TemporalPipelineConfig,
+    pub key_constraints: Vec<String>,
+    pub payload_format: Option<DataEncodingFormat>,
+    /// Wire [`Format`] when built from SQL `WITH` (updating mode, `ConnectionSchema`).
+    pub connection_format: Option<Format>,
+    pub description: String,
+    pub partition_exprs: Arc<Option<Vec<Expr>>>,
+    pub lookup_cache_max_bytes: Option<u64>,
+    pub lookup_cache_ttl: Option<Duration>,
+    pub inferred_fields: Option<Vec<FieldRef>>,
+}
+
+multifield_partial_ord!(
+    SourceTable,
+    registry_id,
+    adapter_type,
+    table_identifier,
+    role,
+    opaque_config,
+    description,
+    key_constraints,
+    connection_format
+);
+
+impl SourceTable {
+    #[inline]
+    pub fn name(&self) -> &str {
+        self.table_identifier.as_str()
+    }
+
+    pub fn new(
+        table_identifier: impl Into<String>,
+        connector: impl Into<String>,
+        connection_type: ConnectionType,
+    ) -> Self {
+        Self {
+            registry_id: None,
+            adapter_type: connector.into(),
+            table_identifier: table_identifier.into(),
+            role: connection_type.into(),
+            schema_specs: Vec::new(),
+            opaque_config: String::new(),
+            temporal_config: TemporalPipelineConfig::default(),
+            key_constraints: Vec::new(),
+            payload_format: None,
+            connection_format: None,
+            description: String::new(),
+            partition_exprs: Arc::new(None),
+            lookup_cache_max_bytes: None,
+            lookup_cache_ttl: None,
+            inferred_fields: None,
+        }
+    }
+
+    #[inline]
+    pub fn connector(&self) -> &str {
+        self.adapter_type.as_str()
+    }
+
+    #[inline]
+    pub fn connection_type(&self) -> ConnectionType {
+        self.role.into()
+    }
+
+    pub fn event_time_field(&self) -> Option<&str> {
+        self.temporal_config.event_column.as_deref()
+    }
+
+    pub fn watermark_field(&self) -> Option<&str> {
+        self.temporal_config.watermark_strategy_column.as_deref()
+    }
+
+    pub fn idle_time(&self) -> Option<Duration> {
+        self.temporal_config.liveness_timeout
+    }
+
+    pub fn initialize_from_params(
+        id: &str,
+        adapter: &str,
+        raw_columns: Vec<ColumnDescriptor>,
+        pk_list: Vec<String>,
+        time_meta: Option<TemporalSpec>,
+        options: &mut HashMap<String, String>,
+        _schema_ctx: &dyn SchemaContext,
+    ) -> Result<Self> {
+        validate_adapter_availability(adapter)?;
+
+        let encoding = DataEncodingFormat::extract_from_map(options)?;
+
+        let mut refined_columns = apply_adapter_specific_rules(adapter, raw_columns);
+        refined_columns = encoding.apply_envelope(refined_columns)?;
+
+        let temporal_settings = resolve_temporal_logic(&refined_columns, time_meta)?;
+        let finalized_config = serialize_backend_params(adapter, options)?;
+        let role = deduce_role(options)?;
+
+        if role == TableRole::Ingestion && encoding.supports_delta_updates() && pk_list.is_empty() {
+            return plan_err!("CDC source requires at least one primary key");
+        }
+
+        Ok(Self {
+            registry_id: None,
+            adapter_type: adapter.to_string(),
+            table_identifier: id.to_string(),
+            role,
+            schema_specs: refined_columns,
+            opaque_config: finalized_config,
+            temporal_config: temporal_settings,
+            key_constraints: pk_list,
+            payload_format: Some(encoding),
+            connection_format: None,
+            description: String::new(),
+            partition_exprs: Arc::new(None),
+            lookup_cache_max_bytes: None,
+            lookup_cache_ttl: None,
+            inferred_fields: None,
+        })
+    }
+
+    pub fn produce_physical_schema(&self) -> Schema {
+        Schema::new(
+            self.schema_specs
+                .iter()
+                .filter(|c| !c.is_computed())
+                .map(|c| c.arrow_field().clone())
+                .collect::<Vec<_>>(),
+        )
+    }
+
+    #[inline]
+    pub fn physical_schema(&self) -> Schema {
+        self.produce_physical_schema()
+    }
+
+    pub fn convert_to_execution_unit(&self) -> Result<TableExecutionUnit> {
+        if self.role == TableRole::Egress {
+            return plan_err!("Target [{}] is write-only", self.table_identifier);
+        }
+
+        if self.is_cdc_enabled() && self.schema_specs.iter().any(|c| c.is_computed()) {
+            return plan_err!("CDC cannot be mixed with computed columns natively");
+        }
+
+        let mode = if self.is_cdc_enabled() {
+            SyncMode::Incremental
+        } else {
+            SyncMode::AppendOnly
+        };
+
+        Ok(TableExecutionUnit {
+            label: self.table_identifier.clone(),
+            engine_meta: EngineDescriptor {
+                engine_type: self.adapter_type.clone(),
+                raw_payload: self.opaque_config.clone(),
+            },
+            sync_mode: mode,
+            temporal_offset: self.temporal_config.clone(),
+        })
+    }
+
+    #[inline]
+    pub fn to_execution_unit(&self) -> Result<TableExecutionUnit> {
+        self.convert_to_execution_unit()
+    }
+
+    fn is_cdc_enabled(&self) -> bool {
+        self.payload_format
+            .as_ref()
+            .is_some_and(|f| f.supports_delta_updates())
+    }
+
+    #[allow(clippy::too_many_arguments)]
+    pub fn from_options(
+        table_identifier: &str,
+        connector_name: &str,
+        temporary: bool,
+        fields: Vec<ColumnDescriptor>,
+        primary_keys: Vec<String>,
+        watermark: Option<(String, Option<ast::Expr>)>,
+        options: &mut ConnectorOptions,
+        connection_profile: Option<&ConnectionProfile>,
+        schema_provider: &StreamSchemaProvider,
+        connection_type_override: Option<ConnectionType>,
+        description: String,
+    ) -> Result<Self> {
+        let _ = connection_profile;
+
+        if let Some(c) = options.pull_opt_str("connector")? {
+            if c != connector_name {
+                return plan_err!(
+                    "WITH option `connector` is '{c}' but table uses connector '{connector_name}'"
+                );
+            }
+        }
+
+        validate_adapter_availability(connector_name)?;
+
+        let inferred_empty = fields.is_empty();
+        let mut columns = fields;
+        columns = apply_adapter_specific_rules(connector_name, columns);
+
+        let format = Format::from_opts(options)
+            .map_err(|e| DataFusionError::Plan(format!("invalid format: '{e}'")))?;
+
+        if let Some(Format::Json(JsonFormat { compression, .. })) = &format
+            && !matches!(compression, JsonCompression::Uncompressed)
+            && connector_name != "filesystem"
+        {
+            return plan_err!("'json.compression' is only supported for the filesystem connector");
+        }
+
+        let framing = Framing::from_opts(options)
+            .map_err(|e| DataFusionError::Plan(format!("invalid framing: '{e}'")))?;
+
+        if temporary
+            && let Some(t) = options.insert_str("type", "lookup")?
+            && t != "lookup"
+        {
+            return plan_err!(
+                "Cannot have a temporary table with type '{t}'; temporary tables must be type 'lookup'"
+            );
+        }
+
+        let payload_format = format.as_ref().map(DataEncodingFormat::from_connection_format);
+        let encoding = payload_format.unwrap_or(DataEncodingFormat::Raw);
+        columns = encoding.apply_envelope(columns)?;
+
+        let schema_fields: Vec<SourceField> = columns
+            .iter()
+            .filter(|c| !c.is_computed())
+            .map(|c| {
+                let mut sf: SourceField = c.arrow_field().clone().try_into().map_err(|_| {
+                    DataFusionError::Plan(format!(
+                        "field '{}' has a type '{:?}' that cannot be used in a connection table",
+                        c.arrow_field().name(),
+                        c.arrow_field().data_type()
+                    ))
+                })?;
+                if let Some(key) = c.system_meta_key() {
+                    sf.metadata_key = Some(key.to_string());
+                }
+                Ok(sf)
+            })
+            .collect::<Result<_>>()?;
+
+        let bad_data = BadData::from_opts(options)
+            .map_err(|e| DataFusionError::Plan(format!("Invalid bad_data: '{e}'")))?;
+
+        let connection_schema = ConnectionSchema::try_new(
+            format.clone(),
+            Some(bad_data),
+            framing,
+            schema_fields,
+            None,
+            Some(inferred_empty),
+            primary_keys.iter().cloned().collect::<HashSet<_>>(),
+        )
+        .map_err(|e| DataFusionError::Plan(format!("could not create connection schema: {e}")))?;
+
+        let role = if let Some(t) = connection_type_override {
+            t.into()
+        } else {
+            match options.pull_opt_str("type")?.as_deref() {
+                None | Some("source") => TableRole::Ingestion,
+                Some("sink") => TableRole::Egress,
+                Some("lookup") => TableRole::Reference,
+                Some(other) => {
+                    return plan_err!("invalid connection type '{other}' in WITH options");
+                }
+            }
+        };
+
+        let mut table = SourceTable {
+            registry_id: None,
+            adapter_type: connector_name.to_string(),
+            table_identifier: table_identifier.to_string(),
+            role,
+            schema_specs: columns,
+            opaque_config: String::new(),
+            temporal_config: TemporalPipelineConfig::default(),
+            key_constraints: Vec::new(),
+            payload_format,
+            connection_format: format.clone(),
+            description,
+            partition_exprs: Arc::new(None),
+            lookup_cache_max_bytes: None,
+            lookup_cache_ttl: None,
+            inferred_fields: None,
+        };
+
+        if let Some(event_time_field) = options.pull_opt_field("event_time_field")? {
+            warn!("`event_time_field` WITH option is deprecated; use WATERMARK FOR syntax");
+            table.temporal_config.event_column = Some(event_time_field);
+        }
+
+        if let Some(watermark_field) = options.pull_opt_field("watermark_field")? {
+            warn!("`watermark_field` WITH option is deprecated; use WATERMARK FOR syntax");
+            table.temporal_config.watermark_strategy_column = Some(watermark_field);
+        }
+
+        if let Some((time_field, watermark_expr)) = watermark {
+            let table_ref = TableReference::bare(table.table_identifier.as_str());
+            let df_schema =
+                DFSchema::try_from_qualified_schema(table_ref, &table.produce_physical_schema())?;
+
+            let field = table
+                .schema_specs
+                .iter()
+                .find(|c| c.arrow_field().name().as_str() == time_field.as_str())
+                .ok_or_else(|| {
+                    plan_datafusion_err!(
+                        "WATERMARK FOR field `{}` does not exist in table",
+                        time_field
+                    )
+                })?;
+
+            if !matches!(field.arrow_field().data_type(), DataType::Timestamp(_, None)) {
+                return plan_err!(
+                    "WATERMARK FOR field `{time_field}` has type {}, but expected TIMESTAMP",
+                    field.arrow_field().data_type()
+                );
+            }
+
+            table.temporal_config.event_column = Some(time_field.clone());
+
+            if let Some(expr) = watermark_expr {
+                let logical_expr = plan_generating_expr(&expr, &df_schema, schema_provider)
+                    .map_err(|e| {
+                        DataFusionError::Plan(format!("could not plan watermark expression: {e}"))
+                    })?;
+
+                let (data_type, nullable) = logical_expr.data_type_and_nullable(&df_schema)?;
+                if !matches!(data_type, DataType::Timestamp(_, _)) {
+                    return plan_err!(
+                        "the type of the WATERMARK FOR expression must be TIMESTAMP, but was {data_type}"
+                    );
+                }
+                if nullable {
+                    return plan_err!(
+                        "the type of the WATERMARK FOR expression must be NOT NULL"
+                    );
+                }
+
+                table.schema_specs.push(ColumnDescriptor::new_computed(
+                    Field::new(
+                        "__watermark",
+                        logical_expr.get_type(&df_schema)?,
+                        false,
+                    ),
+                    logical_expr,
+                ));
+                table.temporal_config.watermark_strategy_column = Some("__watermark".to_string());
+            } else {
+                table.temporal_config.watermark_strategy_column = Some(time_field);
+            }
+        }
+
+        let idle_from_micros = options
+            .pull_opt_i64("idle_micros")?
+            .filter(|t| *t > 0)
+            .map(|t| Duration::from_micros(t as u64));
+        let idle_from_duration = options.pull_opt_duration("idle_time")?;
+        table.temporal_config.liveness_timeout = idle_from_micros.or(idle_from_duration);
+
+        table.lookup_cache_max_bytes = options.pull_opt_u64("lookup.cache.max_bytes")?;
+
+        table.lookup_cache_ttl = options.pull_opt_duration("lookup.cache.ttl")?;
+
+        let extra_opts = options.drain_remaining_string_values()?;
+        let mut config_root = serde_json::json!({
+            "connector": connector_name,
+            "connection_schema": connection_schema,
+        });
+        if let serde_json::Value::Object(ref mut map) = config_root {
+            for (k, v) in extra_opts {
+                map.insert(k, serde_json::Value::String(v));
+            }
+        }
+        table.opaque_config = serde_json::to_string(&config_root).map_err(|e| {
+            DataFusionError::Plan(format!("failed to serialize connector config: {e}"))
+        })?;
+
+        if role == TableRole::Ingestion && encoding.supports_delta_updates() && primary_keys.is_empty()
+        {
+            return plan_err!("Debezium source must have at least one PRIMARY KEY field");
+        }
+
+        table.key_constraints = primary_keys;
+
+        Ok(table)
+    }
+
+    pub fn has_virtual_fields(&self) -> bool {
+        self.schema_specs.iter().any(|c| c.is_computed())
+    }
+
+    pub fn is_updating(&self) -> bool {
+        self.connection_format
+            .as_ref()
+            .is_some_and(|f| f.is_updating())
+            || self.payload_format == Some(DataEncodingFormat::DebeziumJson)
+    }
+
+    pub fn connector_op(&self) -> ConnectorOp {
+        ConnectorOp {
+            connector: self.adapter_type.clone(),
+            config: self.opaque_config.clone(),
+            description: self.description.clone(),
+        }
+    }
+
+    pub fn processing_mode(&self) -> ProcessingMode {
+        if self.is_updating() {
+            ProcessingMode::Update
+        } else {
+            ProcessingMode::Append
+        }
+    }
+
+    pub fn timestamp_override(&self) -> Result<Option<Expr>> {
+        if let Some(field_name) = self.temporal_config.event_column.clone() {
+            if self.is_updating() {
+                return plan_err!("can't use event_time_field with update mode");
+            }
+            let _field = self.get_time_column(&field_name)?;
+            Ok(Some(Expr::Column(Column::from_name(field_name.as_str()))))
+        } else {
+            Ok(None)
+        }
+    }
+
+    fn get_time_column(&self, field_name: &str) -> Result<&ColumnDescriptor> {
+        self.schema_specs
+            .iter()
+            .find(|c| {
+                c.arrow_field().name() == field_name
+                    && matches!(c.arrow_field().data_type(), DataType::Timestamp(..))
+            })
+            .ok_or_else(|| {
+                DataFusionError::Plan(format!(
+                    "field {field_name} not found or not a timestamp"
+                ))
+            })
+    }
+
+    pub fn watermark_column(&self) -> Result<Option<Expr>> {
+        if let Some(field_name) = self.temporal_config.watermark_strategy_column.clone() {
+            let _field = self.get_time_column(&field_name)?;
+            Ok(Some(Expr::Column(Column::from_name(field_name.as_str()))))
+        } else {
+            Ok(None)
+        }
+    }
+
+    pub fn as_sql_source(&self) -> Result<SourceOperator> {
+        match self.role {
+            TableRole::Ingestion => {}
+            TableRole::Egress | TableRole::Reference => {
+                return plan_err!("cannot read from sink");
+            }
+        };
+
+        if self.is_updating() && self.has_virtual_fields() {
+            return plan_err!("can't read from a source with virtual fields and update mode.");
+        }
+
+        let timestamp_override = self.timestamp_override()?;
+        let watermark_column = self.watermark_column()?;
+
+        let source = SqlSource {
+            id: self.registry_id,
+            struct_def: self
+                .schema_specs
+                .iter()
+                .filter(|c| !c.is_computed())
+                .map(|c| Arc::new(c.arrow_field().clone()))
+                .collect(),
+            config: self.connector_op(),
+            processing_mode: self.processing_mode(),
+            idle_time: self.temporal_config.liveness_timeout,
+        };
+
+        Ok(SourceOperator {
+            name: self.table_identifier.clone(),
+            source,
+            timestamp_override,
+            watermark_column,
+        })
+    }
+}
+
+/// Plan a SQL scalar expression against a table-qualified schema (e.g. watermark `AS` clause).
+fn plan_generating_expr(
+    ast: &ast::Expr,
+    df_schema: &DFSchema,
+    schema_provider: &StreamSchemaProvider,
+) -> Result<Expr> {
+    let planner = SqlToRel::new(schema_provider);
+    let mut ctx = PlannerContext::new();
+    planner.sql_to_expr(ast.clone(), df_schema, &mut ctx)
+}
+
+#[derive(Debug, Clone)]
+pub struct SourceOperator {
+    pub name: String,
+    pub source: SqlSource,
+    pub timestamp_override: Option<Expr>,
+    pub watermark_column: Option<Expr>,
+}
diff --git a/src/sql/schema/table.rs b/src/sql/schema/table.rs
index 21f064fe..156e8ffd 100644
--- a/src/sql/schema/table.rs
+++ b/src/sql/schema/table.rs
@@ -17,10 +17,10 @@ use datafusion::common::{Result, plan_err};
 use datafusion::logical_expr::{Extension, LogicalPlan};
 use datafusion::sql::sqlparser::ast::Statement;
 use protocol::grpc::api::ConnectorOp;
-use super::connector_table::ConnectorTable;
-use super::optimizer::produce_optimized_plan;
+use super::source_table::SourceTable;
+use crate::sql::logical_planner::optimizers::produce_optimized_plan;
 use crate::sql::schema::StreamSchemaProvider;
-use crate::sql::extensions::remote_table::RemoteTableExtension;
+use crate::sql::extensions::remote_table::RemoteTableBoundaryNode;
 use crate::sql::analysis::rewrite_plan;
 use crate::sql::types::{DFField, ProcessingMode};
 
@@ -29,9 +29,9 @@ use crate::sql::types::{DFField, ProcessingMode};
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub enum Table {
     /// A lookup table backed by an external connector.
-    LookupTable(ConnectorTable),
+    LookupTable(SourceTable),
     /// A source/sink table backed by an external connector.
-    ConnectorTable(ConnectorTable),
+    ConnectorTable(SourceTable),
     /// A table defined by a query (CREATE VIEW / CREATE TABLE AS SELECT).
     TableFromQuery {
         name: String,
@@ -63,11 +63,11 @@ impl Table {
             }))) => {
                 let rewritten = rewrite_plan(input.as_ref().clone(), schema_provider)?;
                 let schema = rewritten.schema().clone();
-                let remote = RemoteTableExtension {
-                    input: rewritten,
-                    name: name.to_owned(),
-                    schema,
-                    materialize: true,
+                let remote = RemoteTableBoundaryNode {
+                    upstream_plan: rewritten,
+                    table_identifier: name.to_owned(),
+                    resolved_schema: schema,
+                    requires_materialization: true,
                 };
                 Ok(Some(Table::TableFromQuery {
                     name: name.to_string(),
@@ -83,25 +83,25 @@ impl Table {
     pub fn name(&self) -> &str {
         match self {
             Table::TableFromQuery { name, .. } => name.as_str(),
-            Table::ConnectorTable(c) | Table::LookupTable(c) => c.name.as_str(),
+            Table::ConnectorTable(c) | Table::LookupTable(c) => c.name(),
         }
     }
 
     pub fn get_fields(&self) -> Vec<FieldRef> {
         match self {
-            Table::ConnectorTable(ConnectorTable {
-                fields,
+            Table::ConnectorTable(SourceTable {
+                schema_specs,
                 inferred_fields,
                 ..
             })
-            | Table::LookupTable(ConnectorTable {
-                fields,
+            | Table::LookupTable(SourceTable {
+                schema_specs,
                 inferred_fields,
                 ..
             }) => inferred_fields.clone().unwrap_or_else(|| {
-                fields
+                schema_specs
                     .iter()
-                    .map(|field| field.field().clone().into())
+                    .map(|c| Arc::new(c.arrow_field().clone()))
                     .collect()
             }),
             Table::TableFromQuery { logical_plan, .. } => {
@@ -115,7 +115,7 @@ impl Table {
             return Ok(());
         };
 
-        if !t.fields.is_empty() {
+        if !t.schema_specs.is_empty() {
             return Ok(());
         }
 
diff --git a/src/sql/schema/table_execution_unit.rs b/src/sql/schema/table_execution_unit.rs
new file mode 100644
index 00000000..c23dda7a
--- /dev/null
+++ b/src/sql/schema/table_execution_unit.rs
@@ -0,0 +1,33 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use super::temporal_pipeline_config::TemporalPipelineConfig;
+
+#[derive(Debug, Clone)]
+pub struct EngineDescriptor {
+    pub engine_type: String,
+    pub raw_payload: String,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub enum SyncMode {
+    AppendOnly,
+    Incremental,
+}
+
+#[derive(Debug, Clone)]
+pub struct TableExecutionUnit {
+    pub label: String,
+    pub engine_meta: EngineDescriptor,
+    pub sync_mode: SyncMode,
+    pub temporal_offset: TemporalPipelineConfig,
+}
diff --git a/src/sql/schema/table_role.rs b/src/sql/schema/table_role.rs
new file mode 100644
index 00000000..31629ad8
--- /dev/null
+++ b/src/sql/schema/table_role.rs
@@ -0,0 +1,110 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::collections::HashMap;
+
+use datafusion::arrow::datatypes::{DataType, TimeUnit};
+use datafusion::common::{Result, plan_err};
+use datafusion::error::DataFusionError;
+
+use super::column_descriptor::ColumnDescriptor;
+use super::connection_type::ConnectionType;
+
+/// Role of a connector-backed table in the pipeline (ingest / egress / lookup).
+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub enum TableRole {
+    Ingestion,
+    Egress,
+    Reference,
+}
+
+impl From<TableRole> for ConnectionType {
+    fn from(r: TableRole) -> Self {
+        match r {
+            TableRole::Ingestion => ConnectionType::Source,
+            TableRole::Egress => ConnectionType::Sink,
+            TableRole::Reference => ConnectionType::Lookup,
+        }
+    }
+}
+
+impl From<ConnectionType> for TableRole {
+    fn from(c: ConnectionType) -> Self {
+        match c {
+            ConnectionType::Source => TableRole::Ingestion,
+            ConnectionType::Sink => TableRole::Egress,
+            ConnectionType::Lookup => TableRole::Reference,
+        }
+    }
+}
+
+pub fn validate_adapter_availability(adapter: &str) -> Result<()> {
+    let supported = [
+        "kafka",
+        "kinesis",
+        "filesystem",
+        "delta",
+        "iceberg",
+        "pulsar",
+        "nats",
+        "redis",
+        "mqtt",
+        "websocket",
+        "sse",
+        "nexmark",
+        "blackhole",
+        "lookup",
+        "memory",
+        "postgres",
+    ];
+    if !supported.contains(&adapter) {
+        return Err(DataFusionError::Plan(format!("Unknown adapter '{adapter}'")));
+    }
+    Ok(())
+}
+
+pub fn apply_adapter_specific_rules(adapter: &str, mut cols: Vec<ColumnDescriptor>) -> Vec<ColumnDescriptor> {
+    match adapter {
+        "delta" | "iceberg" => {
+            for c in &mut cols {
+                if matches!(c.data_type(), DataType::Timestamp(_, _)) {
+                    c.force_precision(TimeUnit::Microsecond);
+                }
+            }
+            cols
+        }
+        _ => cols,
+    }
+}
+
+pub fn deduce_role(options: &HashMap<String, String>) -> Result<TableRole> {
+    match options.get("type").map(|s| s.as_str()) {
+        None | Some("source") => Ok(TableRole::Ingestion),
+        Some("sink") => Ok(TableRole::Egress),
+        Some("lookup") => Ok(TableRole::Reference),
+        Some(other) => plan_err!("Invalid role '{other}'"),
+    }
+}
+
+pub fn serialize_backend_params(adapter: &str, options: &HashMap<String, String>) -> Result<String> {
+    let mut payload = serde_json::Map::new();
+    payload.insert(
+        "adapter".to_string(),
+        serde_json::Value::String(adapter.to_string()),
+    );
+
+    for (k, v) in options {
+        payload.insert(k.clone(), serde_json::Value::String(v.clone()));
+    }
+
+    serde_json::to_string(&payload).map_err(|e| DataFusionError::Plan(e.to_string()))
+}
diff --git a/src/sql/schema/temporal_pipeline_config.rs b/src/sql/schema/temporal_pipeline_config.rs
new file mode 100644
index 00000000..eb29f71c
--- /dev/null
+++ b/src/sql/schema/temporal_pipeline_config.rs
@@ -0,0 +1,57 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::time::Duration;
+
+use datafusion::common::{Result, plan_err};
+use datafusion::logical_expr::Expr;
+
+use super::column_descriptor::ColumnDescriptor;
+
+/// Event-time and watermark configuration for streaming tables.
+#[derive(Debug, Clone, Default, PartialEq, Eq, Hash)]
+pub struct TemporalPipelineConfig {
+    pub event_column: Option<String>,
+    pub watermark_strategy_column: Option<String>,
+    pub liveness_timeout: Option<Duration>,
+}
+
+#[derive(Debug, Clone)]
+pub struct TemporalSpec {
+    pub time_field: String,
+    pub watermark_expr: Option<Expr>,
+}
+
+pub fn resolve_temporal_logic(
+    columns: &[ColumnDescriptor],
+    time_meta: Option<TemporalSpec>,
+) -> Result<TemporalPipelineConfig> {
+    let mut config = TemporalPipelineConfig::default();
+
+    if let Some(meta) = time_meta {
+        let field_exists = columns
+            .iter()
+            .any(|c| c.arrow_field().name() == meta.time_field.as_str());
+        if !field_exists {
+            return plan_err!("Temporal field {} does not exist", meta.time_field);
+        }
+        config.event_column = Some(meta.time_field.clone());
+
+        if meta.watermark_expr.is_some() {
+            config.watermark_strategy_column = Some("__watermark".to_string());
+        } else {
+            config.watermark_strategy_column = Some(meta.time_field);
+        }
+    }
+
+    Ok(config)
+}

From c830cbb0f037f6b6545054e2862ee75ed09e59fe Mon Sep 17 00:00:00 2001
From: luoluoyuyu <zhenyu@apache.org>
Date: Sun, 22 Mar 2026 02:25:22 +0800
Subject: [PATCH 11/44] update

---
 src/common/fs_schema.rs              | 444 +++++++++++++++++++++++++++
 src/common/mod.rs                    |  72 +++++
 src/sql/datastream/logical.rs        | 378 +++++++++++++++++++++++
 src/sql/datastream/mod.rs            |   1 +
 src/sql/physical/physical_planner.rs | 406 ++++++++++++++++++++++++
 5 files changed, 1301 insertions(+)
 create mode 100644 src/common/fs_schema.rs
 create mode 100644 src/common/mod.rs
 create mode 100644 src/sql/datastream/logical.rs
 create mode 100644 src/sql/datastream/mod.rs
 create mode 100644 src/sql/physical/physical_planner.rs

diff --git a/src/common/fs_schema.rs b/src/common/fs_schema.rs
new file mode 100644
index 00000000..e9ce6586
--- /dev/null
+++ b/src/common/fs_schema.rs
@@ -0,0 +1,444 @@
+//! FunctionStream table/stream schema: Arrow [`Schema`] plus timestamp index and optional key columns.
+//!
+//! [`Schema`]: datafusion::arrow::datatypes::Schema
+
+use datafusion::arrow::array::builder::{ArrayBuilder, make_builder};
+use datafusion::arrow::array::{RecordBatch, TimestampNanosecondArray};
+use datafusion::arrow::datatypes::{DataType, Field, FieldRef, Schema, SchemaBuilder, TimeUnit};
+use datafusion::arrow::error::ArrowError;
+use datafusion::common::{DataFusionError, Result as DFResult};
+use std::sync::Arc;
+use std::time::SystemTime;
+use arrow::compute::{filter_record_batch, lexsort_to_indices, partition, take, SortColumn};
+use arrow::compute::kernels::cmp::gt_eq;
+use arrow::compute::kernels::numeric::div;
+use arrow::row::SortField;
+use arrow_array::{PrimitiveArray, UInt64Array};
+use arrow_array::types::UInt64Type;
+use protocol::grpc::api;
+use super::{to_nanos, TIMESTAMP_FIELD};
+use std::ops::Range;
+use crate::common::converter::Converter;
+
+pub type FsSchemaRef = Arc<FsSchema>;
+
+#[derive(Debug, Clone, Eq, PartialEq, Hash)]
+pub struct FsSchema {
+    pub schema: Arc<Schema>,
+    pub timestamp_index: usize,
+    key_indices: Option<Vec<usize>>,
+    /// If defined, these indices are used for routing (i.e., which subtask gets which piece of data)
+    routing_key_indices: Option<Vec<usize>>,
+}
+
+impl TryFrom<api::FsSchema> for FsSchema {
+    type Error = DataFusionError;
+    fn try_from(schema_proto: api::FsSchema) -> Result<Self, DataFusionError> {
+        let schema: Schema = serde_json::from_str(&schema_proto.arrow_schema)
+            .map_err(|e| DataFusionError::Plan(format!("Invalid arrow schema: {e}")))?;
+        let timestamp_index = schema_proto.timestamp_index as usize;
+
+        let key_indices = schema_proto.has_keys.then(|| {
+            schema_proto
+                .key_indices
+                .into_iter()
+                .map(|index| index as usize)
+                .collect()
+        });
+
+        let routing_key_indices = schema_proto.has_routing_keys.then(|| {
+            schema_proto
+                .routing_key_indices
+                .into_iter()
+                .map(|index| index as usize)
+                .collect()
+        });
+
+        Ok(Self {
+            schema: Arc::new(schema),
+            timestamp_index,
+            key_indices,
+            routing_key_indices,
+        })
+    }
+}
+
+impl From<FsSchema> for api::FsSchema {
+    fn from(schema: FsSchema) -> Self {
+        let arrow_schema = serde_json::to_string(schema.schema.as_ref()).unwrap();
+        let timestamp_index = schema.timestamp_index as u32;
+
+        let has_keys = schema.key_indices.is_some();
+        let key_indices = schema
+            .key_indices
+            .map(|ks| ks.into_iter().map(|index| index as u32).collect())
+            .unwrap_or_default();
+
+        let has_routing_keys = schema.routing_key_indices.is_some();
+        let routing_key_indices = schema
+            .routing_key_indices
+            .map(|ks| ks.into_iter().map(|index| index as u32).collect())
+            .unwrap_or_default();
+
+        Self {
+            arrow_schema,
+            timestamp_index,
+            key_indices,
+            has_keys,
+            routing_key_indices,
+            has_routing_keys,
+        }
+    }
+}
+
+impl FsSchema {
+    pub fn new(
+        schema: Arc<Schema>,
+        timestamp_index: usize,
+        key_indices: Option<Vec<usize>>,
+        routing_key_indices: Option<Vec<usize>>,
+    ) -> Self {
+        Self {
+            schema,
+            timestamp_index,
+            key_indices,
+            routing_key_indices,
+        }
+    }
+    pub fn new_unkeyed(schema: Arc<Schema>, timestamp_index: usize) -> Self {
+        Self {
+            schema,
+            timestamp_index,
+            key_indices: None,
+            routing_key_indices: None,
+        }
+    }
+    pub fn new_keyed(schema: Arc<Schema>, timestamp_index: usize, key_indices: Vec<usize>) -> Self {
+        Self {
+            schema,
+            timestamp_index,
+            key_indices: Some(key_indices),
+            routing_key_indices: None,
+        }
+    }
+
+    pub fn from_fields(mut fields: Vec<Field>) -> Self {
+        if !fields.iter().any(|f| f.name() == TIMESTAMP_FIELD) {
+            fields.push(Field::new(
+                TIMESTAMP_FIELD,
+                DataType::Timestamp(TimeUnit::Nanosecond, None),
+                false,
+            ));
+        }
+
+        Self::from_schema_keys(Arc::new(Schema::new(fields)), vec![]).unwrap()
+    }
+
+    pub fn from_schema_unkeyed(schema: Arc<Schema>) -> DFResult<Self> {
+        let timestamp_index = schema
+            .column_with_name(TIMESTAMP_FIELD)
+            .ok_or_else(|| {
+                DataFusionError::Plan(format!(
+                    "no {TIMESTAMP_FIELD} field in schema, schema is {schema:?}"
+                ))
+            })?
+            .0;
+
+        Ok(Self {
+            schema,
+            timestamp_index,
+            key_indices: None,
+            routing_key_indices: None,
+        })
+    }
+
+    pub fn from_schema_keys(schema: Arc<Schema>, key_indices: Vec<usize>) -> DFResult<Self> {
+        let timestamp_index = schema
+            .column_with_name(TIMESTAMP_FIELD)
+            .ok_or_else(|| {
+                DataFusionError::Plan(format!(
+                    "no {TIMESTAMP_FIELD} field in schema, schema is {schema:?}"
+                ))
+            })?
+            .0;
+
+        Ok(Self {
+            schema,
+            timestamp_index,
+            key_indices: Some(key_indices),
+            routing_key_indices: None,
+        })
+    }
+
+    pub fn schema_without_timestamp(&self) -> Schema {
+        let mut builder = SchemaBuilder::from(self.schema.fields());
+        builder.remove(self.timestamp_index);
+        builder.finish()
+    }
+
+    pub fn remove_timestamp_column(&self, batch: &mut RecordBatch) {
+        batch.remove_column(self.timestamp_index);
+    }
+
+    pub fn builders(&self) -> Vec<Box<dyn ArrayBuilder>> {
+        self.schema
+            .fields
+            .iter()
+            .map(|f| make_builder(f.data_type(), 8))
+            .collect()
+    }
+
+    pub fn timestamp_column<'a>(&self, batch: &'a RecordBatch) -> &'a TimestampNanosecondArray {
+        batch
+            .column(self.timestamp_index)
+            .as_any()
+            .downcast_ref::<TimestampNanosecondArray>()
+            .unwrap()
+    }
+
+    pub fn has_routing_keys(&self) -> bool {
+        self.routing_keys().map(|k| !k.is_empty()).unwrap_or(false)
+    }
+
+    pub fn routing_keys(&self) -> Option<&Vec<usize>> {
+        self.routing_key_indices
+            .as_ref()
+            .or(self.key_indices.as_ref())
+    }
+
+    pub fn storage_keys(&self) -> Option<&Vec<usize>> {
+        self.key_indices.as_ref()
+    }
+
+    pub fn filter_by_time(
+        &self,
+        batch: RecordBatch,
+        cutoff: Option<SystemTime>,
+    ) -> Result<RecordBatch, ArrowError> {
+        let Some(cutoff) = cutoff else {
+            // no watermark, so we just return the same batch.
+            return Ok(batch);
+        };
+        // filter out late data
+        let timestamp_column = batch
+            .column(self.timestamp_index)
+            .as_any()
+            .downcast_ref::<TimestampNanosecondArray>()
+            .ok_or_else(|| ArrowError::CastError(
+                format!("failed to downcast column {} of {:?} to timestamp. Schema is supposed to be {:?}",
+                        self.timestamp_index, batch, self.schema)))?;
+        let cutoff_scalar = TimestampNanosecondArray::new_scalar(to_nanos(cutoff) as i64);
+        let on_time = gt_eq(timestamp_column, &cutoff_scalar)?;
+        filter_record_batch(&batch, &on_time)
+    }
+
+    pub fn sort_columns(&self, batch: &RecordBatch, with_timestamp: bool) -> Vec<SortColumn> {
+        let mut columns = vec![];
+        if let Some(keys) = &self.key_indices {
+            columns.extend(keys.iter().map(|index| SortColumn {
+                values: batch.column(*index).clone(),
+                options: None,
+            }));
+        }
+        if with_timestamp {
+            columns.push(SortColumn {
+                values: batch.column(self.timestamp_index).clone(),
+                options: None,
+            });
+        }
+        columns
+    }
+
+    pub fn sort_fields(&self, with_timestamp: bool) -> Vec<SortField> {
+        let mut sort_fields = vec![];
+        if let Some(keys) = &self.key_indices {
+            sort_fields.extend(keys.iter());
+        }
+        if with_timestamp {
+            sort_fields.push(self.timestamp_index);
+        }
+        self.sort_fields_by_indices(&sort_fields)
+    }
+
+    fn sort_fields_by_indices(&self, indices: &[usize]) -> Vec<SortField> {
+        indices
+            .iter()
+            .map(|index| SortField::new(self.schema.field(*index).data_type().clone()))
+            .collect()
+    }
+
+    pub fn converter(&self, with_timestamp: bool) -> Result<Converter, ArrowError> {
+        Converter::new(self.sort_fields(with_timestamp))
+    }
+
+    pub fn value_converter(
+        &self,
+        with_timestamp: bool,
+        generation_index: usize,
+    ) -> Result<Converter, ArrowError> {
+        match &self.key_indices {
+            None => {
+                let mut indices = (0..self.schema.fields().len()).collect::<Vec<_>>();
+                indices.remove(generation_index);
+                if !with_timestamp {
+                    indices.remove(self.timestamp_index);
+                }
+                Converter::new(self.sort_fields_by_indices(&indices))
+            }
+            Some(keys) => {
+                let indices = (0..self.schema.fields().len())
+                    .filter(|index| {
+                        !keys.contains(index)
+                            && (with_timestamp || *index != self.timestamp_index)
+                            && *index != generation_index
+                    })
+                    .collect::<Vec<_>>();
+                Converter::new(self.sort_fields_by_indices(&indices))
+            }
+        }
+    }
+
+    pub fn value_indices(&self, with_timestamp: bool) -> Vec<usize> {
+        let field_count = self.schema.fields().len();
+        match &self.key_indices {
+            None => {
+                let mut indices = (0..field_count).collect::<Vec<_>>();
+
+                if !with_timestamp {
+                    indices.remove(self.timestamp_index);
+                }
+                indices
+            }
+            Some(keys) => (0..field_count)
+                .filter(|index| {
+                    !keys.contains(index) && (with_timestamp || *index != self.timestamp_index)
+                })
+                .collect::<Vec<_>>(),
+        }
+    }
+
+    pub fn sort(
+        &self,
+        batch: RecordBatch,
+        with_timestamp: bool,
+    ) -> Result<RecordBatch, ArrowError> {
+        if self.key_indices.is_none() && !with_timestamp {
+            return Ok(batch);
+        }
+        let sort_columns = self.sort_columns(&batch, with_timestamp);
+        let sort_indices = lexsort_to_indices(&sort_columns, None).expect("should be able to sort");
+        let columns = batch
+            .columns()
+            .iter()
+            .map(|c| take(c, &sort_indices, None).unwrap())
+            .collect();
+
+        RecordBatch::try_new(batch.schema(), columns)
+    }
+
+    pub fn partition(
+        &self,
+        batch: &RecordBatch,
+        with_timestamp: bool,
+    ) -> Result<Vec<Range<usize>>, ArrowError> {
+        if self.key_indices.is_none() && !with_timestamp {
+            #[allow(clippy::single_range_in_vec_init)]
+            return Ok(vec![0..batch.num_rows()]);
+        }
+
+        let mut partition_columns = vec![];
+
+        if let Some(keys) = &self.routing_keys() {
+            partition_columns.extend(keys.iter().map(|index| batch.column(*index).clone()));
+        }
+        if with_timestamp {
+            partition_columns.push(batch.column(self.timestamp_index).clone());
+        }
+
+        Ok(partition(&partition_columns)?.ranges())
+    }
+
+    pub fn unkeyed_batch(&self, batch: &RecordBatch) -> Result<RecordBatch, ArrowError> {
+        if self.key_indices.is_none() {
+            return Ok(batch.clone());
+        }
+        let columns: Vec<_> = (0..batch.num_columns())
+            .filter(|index| !self.key_indices.as_ref().unwrap().contains(index))
+            .collect();
+        batch.project(&columns)
+    }
+
+    pub fn schema_without_keys(&self) -> Result<Self, ArrowError> {
+        if self.key_indices.is_none() {
+            return Ok(self.clone());
+        }
+        let key_indices = self.key_indices.as_ref().unwrap();
+        let unkeyed_schema = Schema::new(
+            self.schema
+                .fields()
+                .iter()
+                .enumerate()
+                .filter(|(index, _field)| !key_indices.contains(index))
+                .map(|(_, field)| field.as_ref().clone())
+                .collect::<Vec<_>>(),
+        );
+        let timestamp_index = unkeyed_schema.index_of(TIMESTAMP_FIELD)?;
+        Ok(Self {
+            schema: Arc::new(unkeyed_schema),
+            timestamp_index,
+            key_indices: None,
+            routing_key_indices: None,
+        })
+    }
+
+    pub fn with_fields(&self, fields: Vec<FieldRef>) -> Result<Self, ArrowError> {
+        let schema = Arc::new(Schema::new_with_metadata(
+            fields,
+            self.schema.metadata.clone(),
+        ));
+
+        let timestamp_index = schema.index_of(TIMESTAMP_FIELD)?;
+        let max_index = *[&self.key_indices, &self.routing_key_indices]
+            .iter()
+            .map(|indices| indices.as_ref().and_then(|k| k.iter().max()))
+            .max()
+            .flatten()
+            .unwrap_or(&0);
+
+        if schema.fields.len() - 1 < max_index {
+            return Err(ArrowError::InvalidArgumentError(format!(
+                "expected at least {} fields, but were only {}",
+                max_index + 1,
+                schema.fields.len()
+            )));
+        }
+
+        Ok(Self {
+            schema,
+            timestamp_index,
+            key_indices: self.key_indices.clone(),
+            routing_key_indices: self.routing_key_indices.clone(),
+        })
+    }
+
+    pub fn with_additional_fields(
+        &self,
+        new_fields: impl Iterator<Item = Field>,
+    ) -> Result<Self, ArrowError> {
+        let mut fields = self.schema.fields.to_vec();
+        fields.extend(new_fields.map(Arc::new));
+
+        self.with_fields(fields)
+    }
+}
+
+pub fn server_for_hash_array(
+    hash: &PrimitiveArray<UInt64Type>,
+    n: usize,
+) -> Result<PrimitiveArray<UInt64Type>, ArrowError> {
+    let range_size = u64::MAX / (n as u64) + 1;
+    let range_scalar = UInt64Array::new_scalar(range_size);
+    let division = div(hash, &range_scalar)?;
+    let result: &PrimitiveArray<UInt64Type> = division.as_any().downcast_ref().unwrap();
+    Ok(result.clone())
+}
diff --git a/src/common/mod.rs b/src/common/mod.rs
new file mode 100644
index 00000000..e3c103a2
--- /dev/null
+++ b/src/common/mod.rs
@@ -0,0 +1,72 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Shared core types and constants for FunctionStream (`crate::common`).
+//!
+//! Used by the runtime, SQL planner, coordinator, and other subsystems —
+//! analogous to `arroyo-types` + `arroyo-rpc` in Arroyo.
+
+pub mod arrow_ext;
+pub mod control;
+pub mod date;
+pub mod debezium;
+pub mod fs_schema;
+pub mod errors;
+pub mod formats;
+pub mod hash;
+pub mod message;
+pub mod operator_config;
+pub mod task_info;
+pub mod time_utils;
+pub mod worker;
+mod converter;
+
+// ── Re-exports from existing modules ──
+pub use arrow_ext::{DisplayAsSql, FsExtensionType, GetArrowSchema, GetArrowType};
+pub use date::{DatePart, DateTruncPrecision};
+pub use debezium::{Debezium, DebeziumOp, UpdatingData};
+pub use hash::{range_for_server, server_for_hash, HASH_SEEDS};
+pub use message::{ArrowMessage, CheckpointBarrier, SignalMessage, Watermark};
+pub use task_info::{ChainInfo, TaskInfo};
+pub use time_utils::{from_micros, from_millis, from_nanos, to_micros, to_millis, to_nanos};
+pub use worker::{MachineId, WorkerId};
+
+// ── Re-exports from new modules ──
+pub use control::{
+    CheckpointCompleted, CheckpointEvent, CompactionResult, ControlMessage, ControlResp,
+    ErrorDomain, RetryHint, StopMode, TaskCheckpointEventType, TaskError,
+};
+pub use fs_schema::{FsSchema, FsSchemaRef};
+pub use errors::DataflowError;
+pub use formats::{BadData, Format, Framing, JsonFormat};
+pub use operator_config::{MetadataField, OperatorConfig, RateLimit};
+
+// ── Well-known column names ──
+pub const TIMESTAMP_FIELD: &str = "_timestamp";
+pub const UPDATING_META_FIELD: &str = "_updating_meta";
+
+// ── Environment variables ──
+pub const JOB_ID_ENV: &str = "JOB_ID";
+pub const RUN_ID_ENV: &str = "RUN_ID";
+
+// ── Metric names ──
+pub const MESSAGES_RECV: &str = "fs_worker_messages_recv";
+pub const MESSAGES_SENT: &str = "fs_worker_messages_sent";
+pub const BYTES_RECV: &str = "fs_worker_bytes_recv";
+pub const BYTES_SENT: &str = "fs_worker_bytes_sent";
+pub const BATCHES_RECV: &str = "fs_worker_batches_recv";
+pub const BATCHES_SENT: &str = "fs_worker_batches_sent";
+pub const TX_QUEUE_SIZE: &str = "fs_worker_tx_queue_size";
+pub const TX_QUEUE_REM: &str = "fs_worker_tx_queue_rem";
+pub const DESERIALIZATION_ERRORS: &str = "fs_worker_deserialization_errors";
+
+pub const LOOKUP_KEY_INDEX_FIELD: &str = "__lookup_key_index";
diff --git a/src/sql/datastream/logical.rs b/src/sql/datastream/logical.rs
new file mode 100644
index 00000000..13560a3e
--- /dev/null
+++ b/src/sql/datastream/logical.rs
@@ -0,0 +1,378 @@
+use itertools::Itertools;
+
+use datafusion::arrow::datatypes::DataType;
+use petgraph::Direction;
+use petgraph::dot::Dot;
+use petgraph::graph::DiGraph;
+use std::collections::{HashMap, HashSet};
+use std::fmt::{Debug, Display, Formatter};
+use std::sync::Arc;
+use datafusion_proto::protobuf::ArrowType;
+use prost::Message;
+use strum::{Display, EnumString};
+use protocol::grpc::api;
+use crate::types::FsSchema;
+
+#[derive(Clone, Copy, Debug, Eq, PartialEq, EnumString, Display)]
+pub enum OperatorName {
+    ExpressionWatermark,
+    ArrowValue,
+    ArrowKey,
+    Projection,
+    AsyncUdf,
+    Join,
+    InstantJoin,
+    LookupJoin,
+    WindowFunction,
+    TumblingWindowAggregate,
+    SlidingWindowAggregate,
+    SessionWindowAggregate,
+    UpdatingAggregate,
+    ConnectorSource,
+    ConnectorSink,
+}
+
+#[derive(Copy, Clone, Debug, Eq, PartialEq, PartialOrd, Ord)]
+pub enum LogicalEdgeType {
+    Forward,
+    Shuffle,
+    LeftJoin,
+    RightJoin,
+}
+
+impl Display for LogicalEdgeType {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        match self {
+            LogicalEdgeType::Forward => write!(f, "→"),
+            LogicalEdgeType::Shuffle => write!(f, "⤨"),
+            LogicalEdgeType::LeftJoin => write!(f, "-[left]⤨"),
+            LogicalEdgeType::RightJoin => write!(f, "-[right]⤨"),
+        }
+    }
+}
+
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct LogicalEdge {
+    pub edge_type: LogicalEdgeType,
+    pub schema: Arc<FsSchema>,
+}
+
+impl LogicalEdge {
+    pub fn new(edge_type: LogicalEdgeType, schema: FsSchema) -> Self {
+        LogicalEdge {
+            edge_type,
+            schema: Arc::new(schema),
+        }
+    }
+
+    pub fn project_all(edge_type: LogicalEdgeType, schema: FsSchema) -> Self {
+        LogicalEdge {
+            edge_type,
+            schema: Arc::new(schema),
+        }
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct ChainedLogicalOperator {
+    pub operator_id: String,
+    pub operator_name: OperatorName,
+    pub operator_config: Vec<u8>,
+}
+
+#[derive(Clone, Debug)]
+pub struct OperatorChain {
+    pub(crate) operators: Vec<ChainedLogicalOperator>,
+    pub(crate) edges: Vec<Arc<FsSchema>>,
+}
+
+impl OperatorChain {
+    pub fn new(operator: ChainedLogicalOperator) -> Self {
+        Self {
+            operators: vec![operator],
+            edges: vec![],
+        }
+    }
+
+    pub fn iter(
+        &self,
+    ) -> impl Iterator<Item = (&ChainedLogicalOperator, Option<&Arc<FsSchema>>)> {
+        self.operators
+            .iter()
+            .zip_longest(self.edges.iter())
+            .map(|e| e.left_and_right())
+            .map(|(l, r)| (l.unwrap(), r))
+    }
+
+    pub fn iter_mut(
+        &mut self,
+    ) -> impl Iterator<Item = (&mut ChainedLogicalOperator, Option<&Arc<FsSchema>>)> {
+        self.operators
+            .iter_mut()
+            .zip_longest(self.edges.iter())
+            .map(|e| e.left_and_right())
+            .map(|(l, r)| (l.unwrap(), r))
+    }
+
+    pub fn first(&self) -> &ChainedLogicalOperator {
+        &self.operators[0]
+    }
+
+    pub fn len(&self) -> usize {
+        self.operators.len()
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.operators.is_empty()
+    }
+
+    pub fn is_source(&self) -> bool {
+        self.operators[0].operator_name == OperatorName::ConnectorSource
+    }
+
+    pub fn is_sink(&self) -> bool {
+        self.operators[0].operator_name == OperatorName::ConnectorSink
+    }
+}
+
+#[derive(Clone)]
+pub struct LogicalNode {
+    pub node_id: u32,
+    pub description: String,
+    pub operator_chain: OperatorChain,
+    pub parallelism: usize,
+}
+
+impl LogicalNode {
+    pub fn single(
+        id: u32,
+        operator_id: String,
+        name: OperatorName,
+        config: Vec<u8>,
+        description: String,
+        parallelism: usize,
+    ) -> Self {
+        Self {
+            node_id: id,
+            description,
+            operator_chain: OperatorChain {
+                operators: vec![ChainedLogicalOperator {
+                    operator_id,
+                    operator_name: name,
+                    operator_config: config,
+                }],
+                edges: vec![],
+            },
+            parallelism,
+        }
+    }
+}
+
+impl Display for LogicalNode {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{}", self.description)
+    }
+}
+
+impl Debug for LogicalNode {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        write!(
+            f,
+            "{}[{}]",
+            self.operator_chain
+                .operators
+                .iter()
+                .map(|op| op.operator_id.clone())
+                .collect::<Vec<_>>()
+                .join(" -> "),
+            self.parallelism
+        )
+    }
+}
+
+pub type LogicalGraph = DiGraph<LogicalNode, LogicalEdge>;
+
+pub trait Optimizer {
+    fn optimize_once(&self, plan: &mut LogicalGraph) -> bool;
+
+    fn optimize(&self, plan: &mut LogicalGraph) {
+        loop {
+            if !self.optimize_once(plan) {
+                break;
+            }
+        }
+    }
+}
+
+#[derive(Clone, Debug, Eq, PartialEq, Hash, PartialOrd)]
+pub struct DylibUdfConfig {
+    pub dylib_path: String,
+    pub arg_types: Vec<DataType>,
+    pub return_type: DataType,
+    pub aggregate: bool,
+    pub is_async: bool,
+}
+
+#[derive(Clone, Debug, Eq, PartialEq, Hash)]
+pub struct PythonUdfConfig {
+    pub arg_types: Vec<DataType>,
+    pub return_type: DataType,
+    pub name: Arc<String>,
+    pub definition: Arc<String>,
+}
+
+#[derive(Clone, Debug, Default)]
+pub struct ProgramConfig {
+    pub udf_dylibs: HashMap<String, DylibUdfConfig>,
+    pub python_udfs: HashMap<String, PythonUdfConfig>,
+}
+
+#[derive(Clone, Debug, Default)]
+pub struct LogicalProgram {
+    pub graph: LogicalGraph,
+    pub program_config: ProgramConfig,
+}
+
+impl LogicalProgram {
+    pub fn new(graph: LogicalGraph, program_config: ProgramConfig) -> Self {
+        Self {
+            graph,
+            program_config,
+        }
+    }
+
+    pub fn optimize(&mut self, optimizer: &dyn Optimizer) {
+        optimizer.optimize(&mut self.graph);
+    }
+
+    pub fn update_parallelism(&mut self, overrides: &HashMap<u32, usize>) {
+        for node in self.graph.node_weights_mut() {
+            if let Some(p) = overrides.get(&node.node_id) {
+                node.parallelism = *p;
+            }
+        }
+    }
+
+    pub fn dot(&self) -> String {
+        format!("{:?}", Dot::with_config(&self.graph, &[]))
+    }
+
+    pub fn task_count(&self) -> usize {
+        self.graph.node_weights().map(|nw| nw.parallelism).sum()
+    }
+
+    pub fn sources(&self) -> HashSet<u32> {
+        self.graph
+            .externals(Direction::Incoming)
+            .map(|t| self.graph.node_weight(t).unwrap().node_id)
+            .collect()
+    }
+
+    pub fn tasks_per_operator(&self) -> HashMap<String, usize> {
+        let mut tasks_per_operator = HashMap::new();
+        for node in self.graph.node_weights() {
+            for op in &node.operator_chain.operators {
+                tasks_per_operator.insert(op.operator_id.clone(), node.parallelism);
+            }
+        }
+        tasks_per_operator
+    }
+
+    pub fn operator_names_by_id(&self) -> HashMap<String, String> {
+        let mut m = HashMap::new();
+        for node in self.graph.node_weights() {
+            for op in &node.operator_chain.operators {
+                m.insert(op.operator_id.clone(), op.operator_name.to_string());
+            }
+        }
+        m
+    }
+
+    pub fn tasks_per_node(&self) -> HashMap<u32, usize> {
+        let mut tasks_per_node = HashMap::new();
+        for node in self.graph.node_weights() {
+            tasks_per_node.insert(node.node_id, node.parallelism);
+        }
+        tasks_per_node
+    }
+
+    pub fn features(&self) -> HashSet<String> {
+        let mut s = HashSet::new();
+        for n in self.graph.node_weights() {
+            for t in &n.operator_chain.operators {
+                let feature = match &t.operator_name {
+                    OperatorName::AsyncUdf => "async-udf".to_string(),
+                    OperatorName::ExpressionWatermark
+                    | OperatorName::ArrowValue
+                    | OperatorName::ArrowKey
+                    | OperatorName::Projection => continue,
+                    OperatorName::Join => "join-with-expiration".to_string(),
+                    OperatorName::InstantJoin => "windowed-join".to_string(),
+                    OperatorName::WindowFunction => "sql-window-function".to_string(),
+                    OperatorName::LookupJoin => "lookup-join".to_string(),
+                    OperatorName::TumblingWindowAggregate => {
+                        "sql-tumbling-window-aggregate".to_string()
+                    }
+                    OperatorName::SlidingWindowAggregate => {
+                        "sql-sliding-window-aggregate".to_string()
+                    }
+                    OperatorName::SessionWindowAggregate => {
+                        "sql-session-window-aggregate".to_string()
+                    }
+                    OperatorName::UpdatingAggregate => "sql-updating-aggregate".to_string(),
+                    OperatorName::ConnectorSource => "connector-source".to_string(),
+                    OperatorName::ConnectorSink => "connector-sink".to_string(),
+                };
+                s.insert(feature);
+            }
+        }
+        s
+    }
+}
+
+
+impl From<DylibUdfConfig> for api::DylibUdfConfig {
+    fn from(from: DylibUdfConfig) -> Self {
+        api::DylibUdfConfig {
+            dylib_path: from.dylib_path,
+            arg_types: from
+                .arg_types
+                .iter()
+                .map(|t| {
+                    ArrowType::try_from(t)
+                        .expect("unsupported data type")
+                        .encode_to_vec()
+                })
+                .collect(),
+            return_type: ArrowType::try_from(&from.return_type)
+                .expect("unsupported data type")
+                .encode_to_vec(),
+            aggregate: from.aggregate,
+            is_async: from.is_async,
+        }
+    }
+}
+
+impl From<api::DylibUdfConfig> for DylibUdfConfig {
+    fn from(from: api::DylibUdfConfig) -> Self {
+        DylibUdfConfig {
+            dylib_path: from.dylib_path,
+            arg_types: from
+                .arg_types
+                .iter()
+                .map(|t| {
+                    DataType::try_from(
+                        &ArrowType::decode(&mut t.as_slice()).expect("invalid arrow type"),
+                    )
+                        .expect("invalid arrow type")
+                })
+                .collect(),
+            return_type: DataType::try_from(
+                &ArrowType::decode(&mut from.return_type.as_slice()).unwrap(),
+            )
+                .expect("invalid arrow type"),
+            aggregate: from.aggregate,
+            is_async: from.is_async,
+        }
+    }
+}
\ No newline at end of file
diff --git a/src/sql/datastream/mod.rs b/src/sql/datastream/mod.rs
new file mode 100644
index 00000000..82d25f24
--- /dev/null
+++ b/src/sql/datastream/mod.rs
@@ -0,0 +1 @@
+pub mod logical;
diff --git a/src/sql/physical/physical_planner.rs b/src/sql/physical/physical_planner.rs
new file mode 100644
index 00000000..963fa76f
--- /dev/null
+++ b/src/sql/physical/physical_planner.rs
@@ -0,0 +1,406 @@
+use std::collections::HashMap;
+use std::sync::Arc;
+use std::thread;
+use std::time::Duration;
+
+use datafusion::arrow::datatypes::IntervalMonthDayNanoType;
+use datafusion::common::tree_node::{TreeNode, TreeNodeRecursion, TreeNodeVisitor};
+use datafusion::common::{
+    DFSchema, DFSchemaRef, DataFusionError, Result, ScalarValue, Spans, plan_err,
+};
+use datafusion::execution::context::SessionState;
+use datafusion::execution::runtime_env::RuntimeEnvBuilder;
+use datafusion::functions::datetime::date_bin;
+use datafusion::logical_expr::{Expr, Extension, LogicalPlan, UserDefinedLogicalNode};
+use datafusion::physical_expr::PhysicalExpr;
+use datafusion::physical_plan::ExecutionPlan;
+use datafusion::physical_planner::{DefaultPhysicalPlanner, ExtensionPlanner, PhysicalPlanner};
+use datafusion_proto::protobuf::{PhysicalExprNode, PhysicalPlanNode};
+use datafusion_proto::{
+    physical_plan::AsExecutionPlan,
+    protobuf::{AggregateMode, physical_plan_node::PhysicalPlanType},
+};
+use petgraph::graph::{DiGraph, NodeIndex};
+use prost::Message;
+use tokio::runtime::Builder;
+use tokio::sync::oneshot;
+
+use async_trait::async_trait;
+use datafusion_common::TableReference;
+use datafusion_proto::physical_plan::DefaultPhysicalExtensionCodec;
+use datafusion_proto::physical_plan::to_proto::serialize_physical_expr;
+
+use crate::sql::datastream::logical::{LogicalEdge, LogicalGraph, LogicalNode};
+use crate::sql::physical::{
+    DebeziumUnrollingExec, DecodingContext, FsMemExec, FsPhysicalExtensionCodec, ToDebeziumExec,
+};
+use crate::sql::logical_node::debezium::{
+    DEBEZIUM_UNROLLING_EXTENSION_NAME, DebeziumUnrollingExtension, TO_DEBEZIUM_EXTENSION_NAME,
+};
+use crate::sql::logical_node::key_calculation::KeyCalculationExtension;
+use crate::sql::logical_node::{NodeWithIncomingEdges, StreamExtension};
+use crate::sql::schema::utils::add_timestamp_field_arrow;
+use crate::sql::schema::StreamSchemaProvider;
+use crate::types::{FsSchema, FsSchemaRef};
+
+#[derive(Eq, Hash, PartialEq)]
+#[derive(Debug)]
+pub(crate) enum NamedNode {
+    Source(TableReference),
+    Watermark(TableReference),
+    RemoteTable(TableReference),
+    Sink(TableReference),
+}
+
+pub(crate) struct PlanToGraphVisitor<'a> {
+    graph: DiGraph<LogicalNode, LogicalEdge>,
+    output_schemas: HashMap<NodeIndex, FsSchemaRef>,
+    named_nodes: HashMap<NamedNode, NodeIndex>,
+    traversal: Vec<Vec<NodeIndex>>,
+    planner: Planner<'a>,
+}
+
+impl<'a> PlanToGraphVisitor<'a> {
+    pub fn new(schema_provider: &'a StreamSchemaProvider, session_state: &'a SessionState) -> Self {
+        Self {
+            graph: Default::default(),
+            output_schemas: Default::default(),
+            named_nodes: Default::default(),
+            traversal: vec![],
+            planner: Planner::new(schema_provider, session_state),
+        }
+    }
+}
+
+pub(crate) struct Planner<'a> {
+    schema_provider: &'a StreamSchemaProvider,
+    planner: DefaultPhysicalPlanner,
+    session_state: &'a SessionState,
+}
+
+impl<'a> Planner<'a> {
+    pub(crate) fn new(
+        schema_provider: &'a StreamSchemaProvider,
+        session_state: &'a SessionState,
+    ) -> Self {
+        let planner =
+            DefaultPhysicalPlanner::with_extension_planners(vec![Arc::new(FsExtensionPlanner {})]);
+        Self {
+            schema_provider,
+            planner,
+            session_state,
+        }
+    }
+
+    pub(crate) fn sync_plan(&self, plan: &LogicalPlan) -> Result<Arc<dyn ExecutionPlan>> {
+        let fut = self.planner.create_physical_plan(plan, self.session_state);
+        let (tx, mut rx) = oneshot::channel();
+        thread::scope(|s| {
+            let _handle = tokio::runtime::Handle::current();
+            let builder = thread::Builder::new();
+            let builder = if cfg!(debug_assertions) {
+                builder.stack_size(10_000_000)
+            } else {
+                builder
+            };
+            builder
+                .spawn_scoped(s, move || {
+                    let rt = Builder::new_current_thread().enable_all().build().unwrap();
+                    rt.block_on(async {
+                        let plan = fut.await;
+                        tx.send(plan).unwrap();
+                    });
+                })
+                .unwrap();
+        });
+
+        rx.try_recv().unwrap()
+    }
+
+    pub(crate) fn create_physical_expr(
+        &self,
+        expr: &Expr,
+        input_dfschema: &DFSchema,
+    ) -> Result<Arc<dyn PhysicalExpr>> {
+        self.planner
+            .create_physical_expr(expr, input_dfschema, self.session_state)
+    }
+
+    pub(crate) fn serialize_as_physical_expr(
+        &self,
+        expr: &Expr,
+        schema: &DFSchema,
+    ) -> Result<Vec<u8>> {
+        let physical = self.create_physical_expr(expr, schema)?;
+        let proto = serialize_physical_expr(&physical, &DefaultPhysicalExtensionCodec {})?;
+        Ok(proto.encode_to_vec())
+    }
+
+    pub(crate) fn split_physical_plan(
+        &self,
+        key_indices: Vec<usize>,
+        aggregate: &LogicalPlan,
+        add_timestamp_field: bool,
+    ) -> Result<SplitPlanOutput> {
+        let physical_plan = self.sync_plan(aggregate)?;
+        let codec = FsPhysicalExtensionCodec {
+            context: DecodingContext::Planning,
+        };
+        let mut physical_plan_node =
+            PhysicalPlanNode::try_from_physical_plan(physical_plan.clone(), &codec)?;
+        let PhysicalPlanType::Aggregate(mut final_aggregate_proto) = physical_plan_node
+            .physical_plan_type
+            .take()
+            .ok_or_else(|| DataFusionError::Plan("missing physical plan type".to_string()))?
+        else {
+            return plan_err!("unexpected physical plan type");
+        };
+        let AggregateMode::Final = final_aggregate_proto.mode() else {
+            return plan_err!("unexpected physical plan type");
+        };
+
+        let partial_aggregation_plan = *final_aggregate_proto
+            .input
+            .take()
+            .ok_or_else(|| DataFusionError::Plan("missing input".to_string()))?;
+
+        let partial_aggregation_exec_plan = partial_aggregation_plan.try_into_physical_plan(
+            self.schema_provider,
+            &RuntimeEnvBuilder::new().build().unwrap(),
+            &codec,
+        )?;
+
+        let partial_schema = partial_aggregation_exec_plan.schema();
+        let final_input_table_provider = FsMemExec::new("partial".into(), partial_schema.clone());
+
+        final_aggregate_proto.input = Some(Box::new(PhysicalPlanNode::try_from_physical_plan(
+            Arc::new(final_input_table_provider),
+            &codec,
+        )?));
+
+        let finish_plan = PhysicalPlanNode {
+            physical_plan_type: Some(PhysicalPlanType::Aggregate(final_aggregate_proto)),
+        };
+
+        let (partial_schema, timestamp_index) = if add_timestamp_field {
+            (
+                add_timestamp_field_arrow((*partial_schema).clone()),
+                partial_schema.fields().len(),
+            )
+        } else {
+            (partial_schema.clone(), partial_schema.fields().len() - 1)
+        };
+
+        let partial_schema = FsSchema::new_keyed(partial_schema, timestamp_index, key_indices);
+
+        Ok(SplitPlanOutput {
+            partial_aggregation_plan,
+            partial_schema,
+            finish_plan,
+        })
+    }
+
+    pub fn binning_function_proto(
+        &self,
+        width: Duration,
+        input_schema: DFSchemaRef,
+    ) -> Result<PhysicalExprNode> {
+        let date_bin = date_bin().call(vec![
+            Expr::Literal(
+                ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNanoType::make_value(
+                    0,
+                    0,
+                    width.as_nanos() as i64,
+                ))),
+                None,
+            ),
+            Expr::Column(datafusion::common::Column {
+                relation: None,
+                name: "_timestamp".into(),
+                spans: Spans::new(),
+            }),
+        ]);
+
+        let binning_function = self.create_physical_expr(&date_bin, &input_schema)?;
+        serialize_physical_expr(&binning_function, &DefaultPhysicalExtensionCodec {})
+    }
+}
+
+struct FsExtensionPlanner {}
+
+#[async_trait]
+impl ExtensionPlanner for FsExtensionPlanner {
+    async fn plan_extension(
+        &self,
+        _planner: &dyn PhysicalPlanner,
+        node: &dyn UserDefinedLogicalNode,
+        _logical_inputs: &[&LogicalPlan],
+        physical_inputs: &[Arc<dyn ExecutionPlan>],
+        _session_state: &SessionState,
+    ) -> Result<Option<Arc<dyn ExecutionPlan>>> {
+        let schema = node.schema().as_ref().into();
+        if let Ok::<&dyn StreamExtension, _>(stream_extension) = node.try_into() {
+            if stream_extension.transparent() {
+                match node.name() {
+                    DEBEZIUM_UNROLLING_EXTENSION_NAME => {
+                        let node = node
+                            .as_any()
+                            .downcast_ref::<DebeziumUnrollingExtension>()
+                            .unwrap();
+                        let input = physical_inputs[0].clone();
+                        return Ok(Some(Arc::new(DebeziumUnrollingExec::try_new(
+                            input,
+                            node.primary_keys.clone(),
+                        )?)));
+                    }
+                    TO_DEBEZIUM_EXTENSION_NAME => {
+                        let input = physical_inputs[0].clone();
+                        return Ok(Some(Arc::new(ToDebeziumExec::try_new(input)?)));
+                    }
+                    _ => return Ok(None),
+                }
+            }
+        };
+        let name =
+            if let Some(key_extension) = node.as_any().downcast_ref::<KeyCalculationExtension>() {
+                key_extension.name.clone()
+            } else {
+                None
+            };
+        Ok(Some(Arc::new(FsMemExec::new(
+            name.unwrap_or("memory".to_string()),
+            Arc::new(schema),
+        ))))
+    }
+}
+
+impl PlanToGraphVisitor<'_> {
+    fn add_index_to_traversal(&mut self, index: NodeIndex) {
+        if let Some(last) = self.traversal.last_mut() {
+            last.push(index);
+        }
+    }
+
+    pub(crate) fn add_plan(&mut self, plan: LogicalPlan) -> Result<()> {
+        self.traversal.clear();
+        plan.visit(self)?;
+        Ok(())
+    }
+
+    pub fn into_graph(self) -> LogicalGraph {
+        self.graph
+    }
+
+    pub fn build_extension(
+        &mut self,
+        input_nodes: Vec<NodeIndex>,
+        extension: &dyn StreamExtension,
+    ) -> Result<()> {
+        if let Some(node_name) = extension.node_name() {
+            if self.named_nodes.contains_key(&node_name) {
+                return plan_err!(
+                    "extension {:?} has already been planned, shouldn't try again.",
+                    node_name
+                );
+            }
+        }
+
+        let input_schemas = input_nodes
+            .iter()
+            .map(|index| {
+                Ok(self
+                    .output_schemas
+                    .get(index)
+                    .ok_or_else(|| DataFusionError::Plan("missing input node".to_string()))?
+                    .clone())
+            })
+            .collect::<Result<Vec<_>>>()?;
+
+        let NodeWithIncomingEdges { node, edges } = extension
+            .plan_node(&self.planner, self.graph.node_count(), input_schemas)
+            .map_err(|e| e.context(format!("planning operator {extension:?}")))?;
+
+        let node_index = self.graph.add_node(node);
+        self.add_index_to_traversal(node_index);
+
+        for (source, edge) in input_nodes.into_iter().zip(edges.into_iter()) {
+            self.graph.add_edge(source, node_index, edge);
+        }
+
+        self.output_schemas
+            .insert(node_index, extension.output_schema().into());
+
+        if let Some(node_name) = extension.node_name() {
+            self.named_nodes.insert(node_name, node_index);
+        }
+        Ok(())
+    }
+}
+
+impl TreeNodeVisitor<'_> for PlanToGraphVisitor<'_> {
+    type Node = LogicalPlan;
+
+    fn f_down(&mut self, node: &Self::Node) -> Result<TreeNodeRecursion> {
+        let LogicalPlan::Extension(Extension { node }) = node else {
+            return Ok(TreeNodeRecursion::Continue);
+        };
+
+        let stream_extension: &dyn StreamExtension = node
+            .try_into()
+            .map_err(|e: DataFusionError| e.context("converting extension"))?;
+        if stream_extension.transparent() {
+            return Ok(TreeNodeRecursion::Continue);
+        }
+
+        if let Some(name) = stream_extension.node_name() {
+            if let Some(node_index) = self.named_nodes.get(&name) {
+                self.add_index_to_traversal(*node_index);
+                return Ok(TreeNodeRecursion::Jump);
+            }
+        }
+
+        if !node.inputs().is_empty() {
+            self.traversal.push(vec![]);
+        }
+
+        Ok(TreeNodeRecursion::Continue)
+    }
+
+    fn f_up(&mut self, node: &Self::Node) -> Result<TreeNodeRecursion> {
+        let LogicalPlan::Extension(Extension { node }) = node else {
+            return Ok(TreeNodeRecursion::Continue);
+        };
+
+        let stream_extension: &dyn StreamExtension = node
+            .try_into()
+            .map_err(|e: DataFusionError| e.context("planning extension"))?;
+
+        if stream_extension.transparent() {
+            return Ok(TreeNodeRecursion::Continue);
+        }
+
+        if let Some(name) = stream_extension.node_name() {
+            if self.named_nodes.contains_key(&name) {
+                return Ok(TreeNodeRecursion::Continue);
+            }
+        }
+
+        let input_nodes = if !node.inputs().is_empty() {
+            self.traversal.pop().unwrap_or_default()
+        } else {
+            vec![]
+        };
+        let stream_extension: &dyn StreamExtension = node
+            .try_into()
+            .map_err(|e: DataFusionError| e.context("converting extension"))?;
+        self.build_extension(input_nodes, stream_extension)?;
+
+        Ok(TreeNodeRecursion::Continue)
+    }
+}
+
+pub(crate) struct SplitPlanOutput {
+    pub(crate) partial_aggregation_plan: PhysicalPlanNode,
+    pub(crate) partial_schema: FsSchema,
+    pub(crate) finish_plan: PhysicalPlanNode,
+}

From e768a48979b56667154d554776074c93ff1d7bc6 Mon Sep 17 00:00:00 2001
From: luoluoyuyu <zhenyu@apache.org>
Date: Sun, 22 Mar 2026 14:43:45 +0800
Subject: [PATCH 12/44] update

---
 protocol/build.rs                            |  11 +-
 protocol/proto/storage.proto                 |  91 ++++
 protocol/src/lib.rs                          |   9 +
 src/coordinator/analyze/analyzer.rs          |  14 +-
 src/coordinator/coordinator.rs               | 383 ++++++++---------
 src/coordinator/execution/executor.rs        | 185 +++++---
 src/coordinator/mod.rs                       |   6 +-
 src/coordinator/plan/create_table_plan.rs    |  29 +-
 src/coordinator/plan/drop_table_plan.rs      |  34 ++
 src/coordinator/plan/logical_plan_visitor.rs | 420 ++++++++++++-------
 src/coordinator/plan/mod.rs                  |   4 +-
 src/coordinator/plan/visitor.rs              |   8 +-
 src/coordinator/runtime_context.rs           |  64 +++
 src/coordinator/statement/create_table.rs    |   4 +
 src/coordinator/statement/drop_table.rs      |  41 ++
 src/coordinator/statement/mod.rs             |  14 +
 src/coordinator/statement/streaming_table.rs |   4 +
 src/coordinator/statement/visitor.rs         |  10 +-
 src/main.rs                                  |   4 +-
 src/server/handler.rs                        | 398 +++++++-----------
 src/server/initializer.rs                    | 142 +++----
 src/server/mod.rs                            |   2 +-
 src/sql/parse.rs                             |  74 +++-
 src/storage/mod.rs                           |   1 +
 src/storage/stream_catalog/codec.rs          |  57 +++
 src/storage/stream_catalog/manager.rs        | 333 +++++++++++++++
 src/storage/stream_catalog/meta_store.rs     |  70 ++++
 src/storage/stream_catalog/mod.rs            |  23 +
 src/storage/task/mod.rs                      |   1 +
 src/storage/task/proto_codec.rs              | 271 ++++++++++++
 src/storage/task/rocksdb_storage.rs          |  83 ++--
 src/storage/task/storage.rs                  |   2 +-
 32 files changed, 2003 insertions(+), 789 deletions(-)
 create mode 100644 protocol/proto/storage.proto
 create mode 100644 src/coordinator/plan/drop_table_plan.rs
 create mode 100644 src/coordinator/runtime_context.rs
 create mode 100644 src/coordinator/statement/drop_table.rs
 create mode 100644 src/storage/stream_catalog/codec.rs
 create mode 100644 src/storage/stream_catalog/manager.rs
 create mode 100644 src/storage/stream_catalog/meta_store.rs
 create mode 100644 src/storage/stream_catalog/mod.rs
 create mode 100644 src/storage/task/proto_codec.rs

diff --git a/protocol/build.rs b/protocol/build.rs
index e258f456..d3943f53 100644
--- a/protocol/build.rs
+++ b/protocol/build.rs
@@ -39,7 +39,6 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
         .build_server(true)
         .compile_protos(&["proto/function_stream.proto"], &["proto"])?;
 
-    // 2. fs_api.proto → with file descriptor set + serde for REST/JSON
     let api_dir = out_dir.join("api");
     std::fs::create_dir_all(&api_dir)?;
 
@@ -56,10 +55,20 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
         .build_server(false)
         .compile_protos(&["proto/fs_api.proto"], &["proto"])?;
 
+    let storage_dir = out_dir.join("storage");
+    std::fs::create_dir_all(&storage_dir)?;
+    tonic_build::configure()
+        .out_dir(&storage_dir)
+        .protoc_arg("--experimental_allow_proto3_optional")
+        .build_client(false)
+        .build_server(false)
+        .compile_protos(&["proto/storage.proto"], &["proto"])?;
+
     log::info!("Protocol Buffers code generated successfully");
     println!("cargo:rustc-env=PROTO_GEN_DIR={}", out_dir.display());
     println!("cargo:rerun-if-changed=proto/function_stream.proto");
     println!("cargo:rerun-if-changed=proto/fs_api.proto");
+    println!("cargo:rerun-if-changed=proto/storage.proto");
 
     Ok(())
 }
diff --git a/protocol/proto/storage.proto b/protocol/proto/storage.proto
new file mode 100644
index 00000000..b11037a2
--- /dev/null
+++ b/protocol/proto/storage.proto
@@ -0,0 +1,91 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+//
+// All durable / persisted payloads for FunctionStream (single source of truth for storage wire format).
+// - Stream table catalog (MetaStore KV)
+// - Task rows (RocksDB task_meta / task_payload; values may be prefixed — see runtime codec)
+
+syntax = "proto3";
+
+package function_stream.storage;
+
+// =============================================================================
+// Stream catalog (coordinator stream tables: source / sink / memory)
+// =============================================================================
+
+// Top-level persisted record for one stream table.
+message TableDefinition {
+  string table_name = 1;
+  int64 updated_at_millis = 2;
+  oneof table_type {
+    StreamSource source = 3;
+    StreamSink sink = 4;
+    StreamMemory memory = 5;
+  }
+}
+
+message StreamSource {
+  bytes arrow_schema_ipc = 1;
+  optional string event_time_field = 2;
+  optional string watermark_field = 3;
+}
+
+message StreamSink {
+  bytes arrow_schema_ipc = 1;
+}
+
+message StreamMemory {
+  optional bytes logical_plan_bytes = 1;
+}
+
+// =============================================================================
+// Task storage (RocksDB metadata + module payload)
+// =============================================================================
+
+// Lifecycle state persisted for task recovery. New enum values MUST be appended
+// with new numbers (never renumber) for forward compatibility.
+enum ComponentStateKind {
+  COMPONENT_STATE_KIND_UNSPECIFIED = 0;
+  UNINITIALIZED = 1;
+  INITIALIZED = 2;
+  STARTING = 3;
+  RUNNING = 4;
+  CHECKPOINTING = 5;
+  STOPPING = 6;
+  STOPPED = 7;
+  CLOSING = 8;
+  CLOSED = 9;
+  ERROR = 10;
+}
+
+message ComponentStateProto {
+  ComponentStateKind kind = 1;
+  // Set when kind == ERROR
+  string error_message = 2;
+}
+
+// Stored in CF task_meta (after magic prefix FSP1).
+message TaskMetadataProto {
+  string task_type = 1;
+  ComponentStateProto state = 2;
+  uint64 created_at = 3;
+  optional uint64 checkpoint_id = 4;
+}
+
+message TaskModuleWasm {
+  bytes wasm_binary = 1;
+}
+
+message TaskModulePython {
+  string class_name = 1;
+  string module_path = 2;
+  optional bytes embedded_code = 3;
+}
+
+// Stored in CF task_payload (after magic prefix FSP1).
+message TaskModulePayloadProto {
+  oneof payload {
+    TaskModuleWasm wasm = 1;
+    TaskModulePython python = 2;
+  }
+}
diff --git a/protocol/src/lib.rs b/protocol/src/lib.rs
index f924a5c6..d1bdfff9 100644
--- a/protocol/src/lib.rs
+++ b/protocol/src/lib.rs
@@ -37,3 +37,12 @@ pub mod grpc {
 /// File descriptor set for fs_api.proto (for gRPC reflection / REST gateway).
 pub const FS_API_FILE_DESCRIPTOR_SET: &[u8] =
     tonic::include_file_descriptor_set!("fs_api_descriptor");
+
+// ─────────────── Durable storage (storage.proto: catalog + task rows) ───────────────
+
+/// Prost types for persisted stream catalog and task storage (`proto/storage.proto`).
+pub mod storage {
+    #![allow(clippy::all)]
+    #![allow(warnings)]
+    include!("../generated/storage/function_stream.storage.rs");
+}
diff --git a/src/coordinator/analyze/analyzer.rs b/src/coordinator/analyze/analyzer.rs
index c351f3ae..3889431e 100644
--- a/src/coordinator/analyze/analyzer.rs
+++ b/src/coordinator/analyze/analyzer.rs
@@ -13,9 +13,9 @@
 use super::Analysis;
 use crate::coordinator::execution_context::ExecutionContext;
 use crate::coordinator::statement::{
-    CreateFunction, CreatePythonFunction, CreateTable, DropFunction, ShowFunctions, StartFunction,
-    Statement, StatementVisitor, StatementVisitorContext, StatementVisitorResult, StopFunction,
-    StreamingTableStatement,
+    CreateFunction, CreatePythonFunction, CreateTable, DropFunction, DropTableStatement,
+    ShowFunctions, StartFunction, Statement, StatementVisitor, StatementVisitorContext,
+    StatementVisitorResult, StopFunction, StreamingTableStatement,
 };
 use std::fmt;
 
@@ -134,4 +134,12 @@ impl StatementVisitor for Analyzer<'_> {
             stmt.statement.clone(),
         )))
     }
+
+    fn visit_drop_table_statement(
+        &self,
+        stmt: &DropTableStatement,
+        _context: &StatementVisitorContext,
+    ) -> StatementVisitorResult {
+        StatementVisitorResult::Analyze(Box::new(DropTableStatement::new(stmt.statement.clone())))
+    }
 }
diff --git a/src/coordinator/coordinator.rs b/src/coordinator/coordinator.rs
index 0ddca660..ec81132a 100644
--- a/src/coordinator/coordinator.rs
+++ b/src/coordinator/coordinator.rs
@@ -10,252 +10,248 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+use std::sync::Arc;
 use std::time::Instant;
 
 use anyhow::{Context, Result};
 
-use crate::coordinator::analyze::{Analysis, Analyzer};
+use crate::coordinator::analyze::Analyzer;
 use crate::coordinator::dataset::ExecuteResult;
 use crate::coordinator::execution::Executor;
 use crate::coordinator::plan::{LogicalPlanVisitor, LogicalPlanner, PlanNode};
 use crate::coordinator::statement::Statement;
-use crate::runtime::taskexecutor::TaskManager;
 use crate::sql::schema::StreamSchemaProvider;
 
 use super::execution_context::ExecutionContext;
+use super::runtime_context::CoordinatorRuntimeContext;
 
+#[derive(Default)]
 pub struct Coordinator {}
 
-impl Default for Coordinator {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
 impl Coordinator {
     pub fn new() -> Self {
         Self {}
     }
 
-    pub fn compile_plan(
-        &self,
-        stmt: &dyn Statement,
-        schema_provider: StreamSchemaProvider,
-    ) -> Result<Box<dyn PlanNode>, anyhow::Error> {
-        let context = ExecutionContext::new();
-        let analysis = self.step_analyze(&context, stmt)?;
-        let plan = self.step_build_logical_plan(&analysis, schema_provider)?;
-        self.step_optimize(&analysis, plan)
-    }
+    // ========================================================================
+    // Plan compilation
+    // ========================================================================
 
-    /// Same as [`Self::execute`], but uses the provided catalog / stream tables (e.g. tests).
-    pub fn execute_with_schema_provider(
+    pub fn compile_plan(
         &self,
         stmt: &dyn Statement,
         schema_provider: StreamSchemaProvider,
-    ) -> ExecuteResult {
-        let start_time = Instant::now();
-        let context = ExecutionContext::new();
-        let execution_id = context.execution_id;
-
-        match self.execute_pipeline(&context, stmt, schema_provider) {
-            Ok(result) => {
-                log::debug!(
-                    "[{}] Execution completed in {}ms",
-                    execution_id,
-                    start_time.elapsed().as_millis()
-                );
-                result
-            }
-            Err(e) => {
-                log::error!(
-                    "[{}] Execution failed after {}ms. Error: {:#}",
-                    execution_id,
-                    start_time.elapsed().as_millis(),
-                    e
-                );
-                ExecuteResult::err(format!("Execution failed: {:#}", e))
-            }
-        }
-    }
-
-    pub fn execute(&self, stmt: &dyn Statement) -> ExecuteResult {
-        self.execute_with_schema_provider(stmt, StreamSchemaProvider::new())
+    ) -> Result<Box<dyn PlanNode>> {
+        self.compile_plan_internal(&ExecutionContext::new(), stmt, schema_provider)
     }
 
-    fn execute_pipeline(
+    /// Internal pipeline: Analyze → build logical plan → optimize.
+    fn compile_plan_internal(
         &self,
         context: &ExecutionContext,
         stmt: &dyn Statement,
         schema_provider: StreamSchemaProvider,
-    ) -> Result<ExecuteResult> {
-        let analysis = self.step_analyze(context, stmt)?;
-        let plan = self.step_build_logical_plan(&analysis, schema_provider)?;
-        let optimized_plan = self.step_optimize(&analysis, plan)?;
-        self.step_execute(optimized_plan)
-    }
-
-    fn step_analyze(&self, context: &ExecutionContext, stmt: &dyn Statement) -> Result<Analysis> {
+    ) -> Result<Box<dyn PlanNode>> {
+        let exec_id = context.execution_id;
         let start = Instant::now();
-        let analyzer = Analyzer::new(context);
-        let result = analyzer
+
+        let analysis = Analyzer::new(context)
             .analyze(stmt)
             .map_err(|e| anyhow::anyhow!(e))
-            .context("Analyzer phase failed");
-
+            .context("Analyzer phase failed")?;
         log::debug!(
             "[{}] Analyze phase finished in {}ms",
-            context.execution_id,
+            exec_id,
             start.elapsed().as_millis()
         );
-        result
-    }
 
-    fn step_build_logical_plan(
-        &self,
-        analysis: &Analysis,
-        schema_provider: StreamSchemaProvider,
-    ) -> Result<Box<dyn PlanNode>> {
-        let visitor = LogicalPlanVisitor::new(schema_provider);
-        let plan = visitor.visit(analysis);
-        Ok(plan)
-    }
-
-    fn step_optimize(
-        &self,
-        analysis: &Analysis,
-        plan: Box<dyn PlanNode>,
-    ) -> Result<Box<dyn PlanNode>> {
-        let start = Instant::now();
-        let planner = LogicalPlanner::new();
-        let optimized = planner.optimize(plan, analysis);
+        let plan = LogicalPlanVisitor::new(schema_provider).visit(&analysis);
 
+        let opt_start = Instant::now();
+        let optimized = LogicalPlanner::new().optimize(plan, &analysis);
         log::debug!(
-            "Optimizer phase finished in {}ms",
-            start.elapsed().as_millis()
+            "[{}] Optimizer phase finished in {}ms",
+            exec_id,
+            opt_start.elapsed().as_millis()
         );
+
         Ok(optimized)
     }
 
-    fn step_execute(&self, plan: Box<dyn PlanNode>) -> Result<ExecuteResult> {
+    // ========================================================================
+    // Execution
+    // ========================================================================
+
+    pub fn execute(&self, stmt: &dyn Statement) -> ExecuteResult {
+        match CoordinatorRuntimeContext::try_from_globals() {
+            Ok(ctx) => self.execute_with_runtime_context(stmt, &ctx),
+            Err(e) => ExecuteResult::err(e.to_string()),
+        }
+    }
+
+    pub async fn execute_with_stream_catalog(&self, stmt: &dyn Statement) -> ExecuteResult {
+        self.execute(stmt)
+    }
+
+    /// Same as [`Self::execute`], but uses an explicit [`CoordinatorRuntimeContext`] (e.g. tests or custom wiring).
+    pub fn execute_with_runtime_context(
+        &self,
+        stmt: &dyn Statement,
+        runtime: &CoordinatorRuntimeContext,
+    ) -> ExecuteResult {
         let start = Instant::now();
-        let task_manager = match TaskManager::get() {
-            Ok(tm) => tm,
-            Err(e) => {
-                return Ok(ExecuteResult::err(format!(
-                    "Failed to get TaskManager: {}",
-                    e
-                )));
-            }
-        };
-        let executor = Executor::new(task_manager.clone());
-        let result = executor
+        let context = ExecutionContext::new();
+        let exec_id = context.execution_id;
+        let schema_provider = runtime.planning_schema_provider();
+
+        let result = (|| -> Result<ExecuteResult> {
+            let plan = self.compile_plan_internal(&context, stmt, schema_provider)?;
+
+            let exec_start = Instant::now();
+            let res = Executor::new(
+                Arc::clone(&runtime.task_manager),
+                runtime.catalog_manager.clone(),
+            )
             .execute(plan.as_ref())
             .map_err(|e| anyhow::anyhow!(e))
-            .context("Executor phase failed");
+            .context("Executor phase failed")?;
 
-        log::debug!(
-            "Executor phase finished in {}ms",
-            start.elapsed().as_millis()
-        );
-        result
+            log::debug!(
+                "[{}] Executor phase finished in {}ms",
+                exec_id,
+                exec_start.elapsed().as_millis()
+            );
+            Ok(res)
+        })();
+
+        match result {
+            Ok(res) => {
+                log::debug!(
+                    "[{}] Execution completed in {}ms",
+                    exec_id,
+                    start.elapsed().as_millis()
+                );
+                res
+            }
+            Err(e) => {
+                log::error!(
+                    "[{}] Execution failed after {}ms. Error: {:#}",
+                    exec_id,
+                    start.elapsed().as_millis(),
+                    e
+                );
+                ExecuteResult::err(format!("Execution failed: {:#}", e))
+            }
+        }
     }
 }
 
+// ---------------------------------------------------------------------------
+// Test-only helpers (used by `create_streaming_table_coordinator_tests` below)
+// ---------------------------------------------------------------------------
+
 #[cfg(test)]
-mod create_streaming_table_coordinator_tests {
-    use std::sync::Arc;
+use datafusion::arrow::datatypes::{DataType, Field, Schema, TimeUnit};
+
+#[cfg(test)]
+use crate::sql::common::TIMESTAMP_FIELD;
+#[cfg(test)]
+use crate::sql::parse::parse_sql;
 
-    use datafusion::arrow::datatypes::{DataType, Field, Schema, TimeUnit};
+#[cfg(test)]
+fn fake_stream_schema_provider() -> StreamSchemaProvider {
+    let mut provider = StreamSchemaProvider::new();
+    let schema = Arc::new(Schema::new(vec![
+        Field::new("id", DataType::Int64, false),
+        Field::new(
+            TIMESTAMP_FIELD,
+            DataType::Timestamp(TimeUnit::Nanosecond, None),
+            false,
+        ),
+    ]));
+    provider.add_source_table(
+        "src".to_string(),
+        schema,
+        Some(TIMESTAMP_FIELD.to_string()),
+        None,
+    );
+    provider
+}
 
-    use crate::sql::common::TIMESTAMP_FIELD;
-    use crate::sql::parse::parse_sql;
-    use crate::sql::schema::StreamSchemaProvider;
-
-    use super::Coordinator;
-
-    fn fake_stream_schema_provider() -> StreamSchemaProvider {
-        let mut provider = StreamSchemaProvider::new();
-        let schema = Arc::new(Schema::new(vec![
-            Field::new("id", DataType::Int64, false),
-            Field::new(
-                TIMESTAMP_FIELD,
-                DataType::Timestamp(TimeUnit::Nanosecond, None),
-                false,
-            ),
-        ]));
-        provider.add_source_table(
-            "src".to_string(),
-            schema,
-            Some(TIMESTAMP_FIELD.to_string()),
-            None,
-        );
-        provider
-    }
+#[cfg(test)]
+fn fake_stream_schema_provider_with_v() -> StreamSchemaProvider {
+    let mut provider = StreamSchemaProvider::new();
+    let schema = Arc::new(Schema::new(vec![
+        Field::new("id", DataType::Int64, false),
+        Field::new("v", DataType::Utf8, true),
+        Field::new(
+            TIMESTAMP_FIELD,
+            DataType::Timestamp(TimeUnit::Nanosecond, None),
+            false,
+        ),
+    ]));
+    provider.add_source_table(
+        "src".to_string(),
+        schema,
+        Some(TIMESTAMP_FIELD.to_string()),
+        None,
+    );
+    provider
+}
 
-    fn fake_stream_schema_provider_with_v() -> StreamSchemaProvider {
-        let mut provider = StreamSchemaProvider::new();
-        let schema = Arc::new(Schema::new(vec![
-            Field::new("id", DataType::Int64, false),
-            Field::new("v", DataType::Utf8, true),
-            Field::new(
-                TIMESTAMP_FIELD,
-                DataType::Timestamp(TimeUnit::Nanosecond, None),
-                false,
-            ),
-        ]));
-        provider.add_source_table(
-            "src".to_string(),
-            schema,
-            Some(TIMESTAMP_FIELD.to_string()),
-            None,
-        );
-        provider
-    }
+#[cfg(test)]
+fn fake_src_dim_provider() -> StreamSchemaProvider {
+    let mut provider = fake_stream_schema_provider_with_v();
+    let dim = Arc::new(Schema::new(vec![
+        Field::new("id", DataType::Int64, false),
+        Field::new("name", DataType::Utf8, true),
+        Field::new("amt", DataType::Float64, true),
+        Field::new(
+            TIMESTAMP_FIELD,
+            DataType::Timestamp(TimeUnit::Nanosecond, None),
+            false,
+        ),
+    ]));
+    provider.add_source_table(
+        "dim".to_string(),
+        dim,
+        Some(TIMESTAMP_FIELD.to_string()),
+        None,
+    );
+    provider
+}
 
-    fn fake_src_dim_provider() -> StreamSchemaProvider {
-        let mut provider = fake_stream_schema_provider_with_v();
-        let dim = Arc::new(Schema::new(vec![
-            Field::new("id", DataType::Int64, false),
-            Field::new("name", DataType::Utf8, true),
-            Field::new("amt", DataType::Float64, true),
-            Field::new(
-                TIMESTAMP_FIELD,
-                DataType::Timestamp(TimeUnit::Nanosecond, None),
-                false,
-            ),
-        ]));
-        provider.add_source_table(
-            "dim".to_string(),
-            dim,
-            Some(TIMESTAMP_FIELD.to_string()),
-            None,
-        );
-        provider
-    }
+#[cfg(test)]
+fn assert_coordinator_streaming_build_ok(
+    sql: &str,
+    provider: StreamSchemaProvider,
+    expect_sink_substring: &str,
+    expect_connector_substring: &str,
+) {
+    let stmts = parse_sql(sql).unwrap_or_else(|e| panic!("parse {sql:?}: {e}"));
+    assert_eq!(stmts.len(), 1);
+    let plan = Coordinator::new()
+        .compile_plan(stmts[0].as_ref(), provider)
+        .unwrap_or_else(|e| panic!("compile_plan {sql:?}: {e:#}"));
+    let rendered = format!("{plan:?}");
+    assert!(rendered.contains("StreamingTable"), "{rendered}");
+    assert!(
+        rendered.contains(expect_sink_substring),
+        "expected sink name fragment {expect_sink_substring:?} in:\n{rendered}"
+    );
+    assert!(
+        rendered.contains(expect_connector_substring),
+        "expected connector fragment {expect_connector_substring:?} in:\n{rendered}"
+    );
+}
 
-    fn assert_coordinator_streaming_build_ok(
-        sql: &str,
-        provider: StreamSchemaProvider,
-        expect_sink_substring: &str,
-        expect_connector_substring: &str,
-    ) {
-        let stmts = parse_sql(sql).unwrap_or_else(|e| panic!("parse {sql:?}: {e}"));
-        assert_eq!(stmts.len(), 1);
-        let plan = Coordinator::new()
-            .compile_plan(stmts[0].as_ref(), provider)
-            .unwrap_or_else(|e| panic!("compile_plan {sql:?}: {e:#}"));
-        let rendered = format!("{plan:?}");
-        assert!(rendered.contains("StreamingTable"), "{rendered}");
-        assert!(
-            rendered.contains(expect_sink_substring),
-            "expected sink name fragment {expect_sink_substring:?} in:\n{rendered}"
-        );
-        assert!(
-            rendered.contains(expect_connector_substring),
-            "expected connector fragment {expect_connector_substring:?} in:\n{rendered}"
-        );
-    }
+#[cfg(test)]
+mod create_streaming_table_coordinator_tests {
+    use super::{
+        assert_coordinator_streaming_build_ok, fake_src_dim_provider,
+        fake_stream_schema_provider, fake_stream_schema_provider_with_v,
+    };
+    use crate::sql::common::TIMESTAMP_FIELD;
 
     #[test]
     fn coordinator_build_create_streaming_table_select_star_kafka() {
@@ -333,7 +329,12 @@ mod create_streaming_table_coordinator_tests {
             let sql = format!(
                 "CREATE STREAMING TABLE sink_w_{label} WITH ('connector'='kafka') AS {body}"
             );
-            assert_coordinator_streaming_build_ok(&sql, p.clone(), &format!("sink_w_{label}"), "kafka");
+            assert_coordinator_streaming_build_ok(
+                &sql,
+                p.clone(),
+                &format!("sink_w_{label}"),
+                "kafka",
+            );
         }
     }
 
diff --git a/src/coordinator/execution/executor.rs b/src/coordinator/execution/executor.rs
index 4dae91d5..b8fbb3a5 100644
--- a/src/coordinator/execution/executor.rs
+++ b/src/coordinator/execution/executor.rs
@@ -10,20 +10,22 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-use crate::coordinator::dataset::{ExecuteResult, ShowFunctionsResult, empty_record_batch};
+use std::sync::Arc;
+
+use thiserror::Error;
+use tracing::{debug, info};
+
+use crate::coordinator::dataset::{empty_record_batch, ExecuteResult, ShowFunctionsResult};
 use crate::coordinator::plan::{
-    CreateFunctionPlan, CreatePythonFunctionPlan, CreateTablePlan, DropFunctionPlan,
-    LookupTablePlan, PlanNode, PlanVisitor, PlanVisitorContext, PlanVisitorResult,
-    ShowFunctionsPlan, StartFunctionPlan, StopFunctionPlan, StreamingTable,
+    CreateFunctionPlan, CreatePythonFunctionPlan, CreateTablePlan, CreateTablePlanBody,
+    DropFunctionPlan, DropTablePlan, LookupTablePlan, PlanNode, PlanVisitor, PlanVisitorContext,
+    PlanVisitorResult, ShowFunctionsPlan, StartFunctionPlan, StopFunctionPlan, StreamingTable,
     StreamingTableConnectorPlan,
 };
 use crate::coordinator::statement::{ConfigSource, FunctionSource};
 use crate::runtime::taskexecutor::TaskManager;
-use crate::sql::schema::table::Table as CatalogTable;
-use crate::sql::analysis::{ StreamSchemaProvider};
-use std::sync::Arc;
-use thiserror::Error;
-use tracing::{debug, info};
+use crate::sql::schema::StreamTable;
+use crate::storage::stream_catalog::CatalogManager;
 
 #[derive(Error, Debug)]
 pub enum ExecuteError {
@@ -39,11 +41,15 @@ pub enum ExecuteError {
 
 pub struct Executor {
     task_manager: Arc<TaskManager>,
+    catalog_manager: Arc<CatalogManager>,
 }
 
 impl Executor {
-    pub fn new(task_manager: Arc<TaskManager>) -> Self {
-        Self { task_manager }
+    pub fn new(task_manager: Arc<TaskManager>, catalog_manager: Arc<CatalogManager>) -> Self {
+        Self {
+            task_manager,
+            catalog_manager,
+        }
     }
 
     pub fn execute(&self, plan: &dyn PlanNode) -> Result<ExecuteResult, ExecuteError> {
@@ -54,8 +60,11 @@ impl Executor {
 
         match visitor_result {
             PlanVisitorResult::Execute(result) => {
-                let elapsed = timer.elapsed();
-                debug!(target: "executor", elapsed_ms = elapsed.as_millis(), "Execution completed");
+                debug!(
+                    target: "executor",
+                    elapsed_ms = timer.elapsed().as_millis(),
+                    "Execution completed"
+                );
                 result
             }
         }
@@ -63,23 +72,22 @@ impl Executor {
 }
 
 impl PlanVisitor for Executor {
-    #[allow(clippy::redundant_closure_call)]
     fn visit_create_function(
         &self,
         plan: &CreateFunctionPlan,
         _context: &PlanVisitorContext,
     ) -> PlanVisitorResult {
-        let result = (|| -> Result<ExecuteResult, ExecuteError> {
+        let execute = || -> Result<ExecuteResult, ExecuteError> {
             let function_bytes = match &plan.function_source {
                 FunctionSource::Path(path) => std::fs::read(path).map_err(|e| {
-                    ExecuteError::Validation(format!("Failed to read function at {}: {}", path, e))
+                    ExecuteError::Validation(format!("Failed to read function at {path}: {e}"))
                 })?,
                 FunctionSource::Bytes(bytes) => bytes.clone(),
             };
 
             let config_bytes = match &plan.config_source {
                 Some(ConfigSource::Path(path)) => std::fs::read(path).map_err(|e| {
-                    ExecuteError::Validation(format!("Failed to read config at {}: {}", path, e))
+                    ExecuteError::Validation(format!("Failed to read config at {path}: {e}"))
                 })?,
                 Some(ConfigSource::Bytes(bytes)) => bytes.clone(),
                 None => {
@@ -92,35 +100,34 @@ impl PlanVisitor for Executor {
             info!(config_size = config_bytes.len(), "Registering Wasm task");
             self.task_manager
                 .register_task(&config_bytes, &function_bytes)
-                .map_err(|e| ExecuteError::Task(format!("Registration failed: {:?}", e)))?;
+                .map_err(|e| ExecuteError::Task(format!("Registration failed: {e:?}")))?;
 
             Ok(ExecuteResult::ok_with_data(
                 "Function registered successfully",
                 empty_record_batch(),
             ))
-        })();
+        };
 
-        PlanVisitorResult::Execute(result)
+        PlanVisitorResult::Execute(execute())
     }
 
-    #[allow(clippy::redundant_closure_call)]
     fn visit_drop_function(
         &self,
         plan: &DropFunctionPlan,
         _context: &PlanVisitorContext,
     ) -> PlanVisitorResult {
-        let result = (|| -> Result<ExecuteResult, ExecuteError> {
+        let execute = || -> Result<ExecuteResult, ExecuteError> {
             self.task_manager
                 .remove_task(&plan.name)
-                .map_err(|e| ExecuteError::Task(format!("Removal failed: {}", e)))?;
+                .map_err(|e| ExecuteError::Task(format!("Removal failed: {e}")))?;
 
             Ok(ExecuteResult::ok_with_data(
                 format!("Function '{}' dropped", plan.name),
                 empty_record_batch(),
             ))
-        })();
+        };
 
-        PlanVisitorResult::Execute(result)
+        PlanVisitorResult::Execute(execute())
     }
 
     fn visit_start_function(
@@ -142,48 +149,43 @@ impl PlanVisitor for Executor {
         PlanVisitorResult::Execute(result)
     }
 
-    #[allow(clippy::redundant_closure_call)]
     fn visit_show_functions(
         &self,
         _plan: &ShowFunctionsPlan,
         _context: &PlanVisitorContext,
     ) -> PlanVisitorResult {
-        let result = {
-            let functions = self.task_manager.list_all_functions();
-
-            Ok(ExecuteResult::ok_with_data(
-                format!("Found {} task(s)", functions.len()),
-                ShowFunctionsResult::new(functions),
-            ))
-        };
+        let functions = self.task_manager.list_all_functions();
+        let result = ExecuteResult::ok_with_data(
+            format!("Found {} task(s)", functions.len()),
+            ShowFunctionsResult::new(functions),
+        );
 
-        PlanVisitorResult::Execute(result)
+        PlanVisitorResult::Execute(Ok(result))
     }
 
-    #[allow(clippy::redundant_closure_call)]
     fn visit_create_python_function(
         &self,
         plan: &CreatePythonFunctionPlan,
         _context: &PlanVisitorContext,
     ) -> PlanVisitorResult {
-        let result = (|| -> Result<ExecuteResult, ExecuteError> {
-            let modules: Vec<(String, Vec<u8>)> = plan
+        let execute = || -> Result<ExecuteResult, ExecuteError> {
+            let modules = plan
                 .modules
                 .iter()
                 .map(|m| (m.name.clone(), m.bytes.clone()))
-                .collect();
+                .collect::<Vec<_>>();
 
             self.task_manager
                 .register_python_task(plan.config_content.as_bytes(), &modules)
-                .map_err(|e| ExecuteError::Task(format!("Python registration failed: {}", e)))?;
+                .map_err(|e| ExecuteError::Task(format!("Python registration failed: {e}")))?;
 
             Ok(ExecuteResult::ok_with_data(
                 format!("Python function '{}' deployed", plan.class_name),
                 empty_record_batch(),
             ))
-        })();
+        };
 
-        PlanVisitorResult::Execute(result)
+        PlanVisitorResult::Execute(execute())
     }
 
     fn visit_stop_function(
@@ -210,12 +212,50 @@ impl PlanVisitor for Executor {
         plan: &CreateTablePlan,
         _context: &PlanVisitorContext,
     ) -> PlanVisitorResult {
-        // TODO: register table in catalog and execute DDL
-        let result = Err(ExecuteError::Internal(format!(
-            "CREATE TABLE execution not yet implemented. LogicalPlan:\n{}",
-            plan.logical_plan.display_indent()
-        )));
-        PlanVisitorResult::Execute(result)
+        let execute = || -> Result<ExecuteResult, ExecuteError> {
+            let (table_name, if_not_exists, stream_table) = match &plan.body {
+                CreateTablePlanBody::ConnectorSource {
+                    source_table,
+                    if_not_exists,
+                } => {
+                    let table_name = source_table.name().to_string();
+                    let schema = Arc::new(source_table.produce_physical_schema());
+                    let table_instance = StreamTable::Source {
+                        name: table_name.clone(),
+                        schema,
+                        event_time_field: source_table.event_time_field().map(str::to_string),
+                        watermark_field: source_table.watermark_field().map(str::to_string),
+                    };
+                    (table_name, *if_not_exists, table_instance)
+                }
+                CreateTablePlanBody::DataFusion(_) => {
+                    return Err(ExecuteError::Internal(
+                        "Operation not supported: Currently, the system strictly supports creating tables backed by an external Connector Source (e.g., Kafka, Postgres). In-memory tables, Views, or CTAS (Create Table As Select) are not supported."
+                            .into(),
+                    ));
+                }
+            };
+
+            if if_not_exists && self.catalog_manager.has_stream_table(&table_name) {
+                return Ok(ExecuteResult::ok(format!(
+                    "Table '{table_name}' already exists (skipped)"
+                )));
+            }
+
+            self.catalog_manager
+                .add_table(stream_table)
+                .map_err(|e| {
+                    ExecuteError::Internal(format!(
+                        "Failed to register connector source table '{table_name}': {e}"
+                    ))
+                })?;
+
+            Ok(ExecuteResult::ok(format!(
+                "Created connector source table '{table_name}'"
+            )))
+        };
+
+        PlanVisitorResult::Execute(execute())
     }
 
     fn visit_streaming_table(
@@ -223,19 +263,23 @@ impl PlanVisitor for Executor {
         plan: &StreamingTable,
         _context: &PlanVisitorContext,
     ) -> PlanVisitorResult {
-        let result = (|| -> Result<ExecuteResult, ExecuteError> {
-            let catalog_table =
-                CatalogTable::ConnectorTable(plan.source_table.clone());
-            let mut schema_provider = StreamSchemaProvider::new();
-            schema_provider.insert_catalog_table(catalog_table.clone());
+        let execute = || -> Result<ExecuteResult, ExecuteError> {
+            let sink = StreamTable::Sink {
+                name: plan.name.clone(),
+                schema: Arc::new(plan.logical_plan.schema().as_arrow().clone()),
+            };
 
+            self.catalog_manager
+                .add_table(sink)
+                .map_err(|e| ExecuteError::Internal(e.to_string()))?;
 
             Ok(ExecuteResult::ok_with_data(
-                format!("Streaming table '{}' compiled successfully", plan.name),
+                format!("Registered streaming table '{}'", plan.name),
                 empty_record_batch(),
             ))
-        })();
-        PlanVisitorResult::Execute(result)
+        };
+
+        PlanVisitorResult::Execute(execute())
     }
 
     fn visit_lookup_table(
@@ -243,10 +287,9 @@ impl PlanVisitor for Executor {
         _plan: &LookupTablePlan,
         _context: &PlanVisitorContext,
     ) -> PlanVisitorResult {
-        let result = Err(ExecuteError::Internal(
+        PlanVisitorResult::Execute(Err(ExecuteError::Internal(
             "LookupTable execution not yet implemented".to_string(),
-        ));
-        PlanVisitorResult::Execute(result)
+        )))
     }
 
     fn visit_streaming_connector_table(
@@ -254,9 +297,27 @@ impl PlanVisitor for Executor {
         _plan: &StreamingTableConnectorPlan,
         _context: &PlanVisitorContext,
     ) -> PlanVisitorResult {
-        let result = Err(ExecuteError::Internal(
+        PlanVisitorResult::Execute(Err(ExecuteError::Internal(
             "StreamingTableConnector execution not yet implemented".to_string(),
-        ));
-        PlanVisitorResult::Execute(result)
+        )))
+    }
+
+    fn visit_drop_table_plan(
+        &self,
+        plan: &DropTablePlan,
+        _context: &PlanVisitorContext,
+    ) -> PlanVisitorResult {
+        let execute = || -> Result<ExecuteResult, ExecuteError> {
+            self.catalog_manager
+                .drop_table(&plan.table_name, plan.if_exists)
+                .map_err(|e| ExecuteError::Internal(e.to_string()))?;
+
+            Ok(ExecuteResult::ok(format!(
+                "Dropped table '{}'",
+                plan.table_name
+            )))
+        };
+
+        PlanVisitorResult::Execute(execute())
     }
 }
diff --git a/src/coordinator/mod.rs b/src/coordinator/mod.rs
index 7791e8a8..a781f1e1 100644
--- a/src/coordinator/mod.rs
+++ b/src/coordinator/mod.rs
@@ -17,12 +17,14 @@ mod dataset;
 mod execution;
 mod execution_context;
 mod plan;
+mod runtime_context;
 mod statement;
 mod tool;
 
 pub use coordinator::Coordinator;
+pub use runtime_context::CoordinatorRuntimeContext;
 pub use dataset::{DataSet, ShowFunctionsResult};
 pub use statement::{
-    CreateFunction, CreatePythonFunction, CreateTable, DropFunction, PythonModule, ShowFunctions,
-    StartFunction, Statement, StopFunction, StreamingTableStatement,
+    CreateFunction, CreatePythonFunction, CreateTable, DropFunction, DropTableStatement,
+    PythonModule, ShowFunctions, StartFunction, Statement, StopFunction, StreamingTableStatement,
 };
diff --git a/src/coordinator/plan/create_table_plan.rs b/src/coordinator/plan/create_table_plan.rs
index 450c8813..7ad82bb3 100644
--- a/src/coordinator/plan/create_table_plan.rs
+++ b/src/coordinator/plan/create_table_plan.rs
@@ -12,16 +12,39 @@
 
 use datafusion::logical_expr::LogicalPlan;
 
+use crate::sql::schema::SourceTable;
+
 use super::{PlanNode, PlanVisitor, PlanVisitorContext, PlanVisitorResult};
 
-#[derive(Debug)]
+/// Payload for [`CreateTablePlan`]: either a DataFusion DDL plan or a connector `CREATE TABLE` (no `AS SELECT`).
+#[derive(Debug, Clone)]
+pub enum CreateTablePlanBody {
+    DataFusion(LogicalPlan),
+    ConnectorSource {
+        source_table: SourceTable,
+        if_not_exists: bool,
+    },
+}
+
+#[derive(Debug, Clone)]
 pub struct CreateTablePlan {
-    pub logical_plan: LogicalPlan,
+    pub body: CreateTablePlanBody,
 }
 
 impl CreateTablePlan {
     pub fn new(logical_plan: LogicalPlan) -> Self {
-        Self { logical_plan }
+        Self {
+            body: CreateTablePlanBody::DataFusion(logical_plan),
+        }
+    }
+
+    pub fn connector_source(source_table: SourceTable, if_not_exists: bool) -> Self {
+        Self {
+            body: CreateTablePlanBody::ConnectorSource {
+                source_table,
+                if_not_exists,
+            },
+        }
     }
 }
 
diff --git a/src/coordinator/plan/drop_table_plan.rs b/src/coordinator/plan/drop_table_plan.rs
new file mode 100644
index 00000000..7d80a7b7
--- /dev/null
+++ b/src/coordinator/plan/drop_table_plan.rs
@@ -0,0 +1,34 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use super::{PlanNode, PlanVisitor, PlanVisitorContext, PlanVisitorResult};
+
+#[derive(Debug, Clone)]
+pub struct DropTablePlan {
+    pub table_name: String,
+    pub if_exists: bool,
+}
+
+impl DropTablePlan {
+    pub fn new(table_name: String, if_exists: bool) -> Self {
+        Self {
+            table_name,
+            if_exists,
+        }
+    }
+}
+
+impl PlanNode for DropTablePlan {
+    fn accept(&self, visitor: &dyn PlanVisitor, context: &PlanVisitorContext) -> PlanVisitorResult {
+        visitor.visit_drop_table_plan(self, context)
+    }
+}
diff --git a/src/coordinator/plan/logical_plan_visitor.rs b/src/coordinator/plan/logical_plan_visitor.rs
index 4a747fdf..aa8364ef 100644
--- a/src/coordinator/plan/logical_plan_visitor.rs
+++ b/src/coordinator/plan/logical_plan_visitor.rs
@@ -12,54 +12,45 @@
 
 use std::sync::Arc;
 
-use datafusion::common::{Result, plan_datafusion_err, plan_err};
+use datafusion::common::{plan_datafusion_err, plan_err, Result};
 use datafusion::execution::SessionStateBuilder;
-use datafusion::sql::sqlparser::ast::{SqlOption, Statement as DFStatement};
+use datafusion::sql::sqlparser::ast::{
+    CreateTable as SqlCreateTable, Expr as SqlExpr, ObjectType, SqlOption, Statement as DFStatement,
+    TableConstraint,
+};
 use datafusion_common::TableReference;
 use datafusion_execution::config::SessionConfig;
-use datafusion_expr::{Expr, Extension, LogicalPlan, col};
+use datafusion_expr::{col, Extension, Expr, LogicalPlan};
 use sqlparser::ast::Statement;
 use tracing::debug;
 
 use crate::coordinator::analyze::analysis::Analysis;
 use crate::coordinator::plan::{
-    CreateFunctionPlan, CreatePythonFunctionPlan, CreateTablePlan, DropFunctionPlan, PlanNode,
-    ShowFunctionsPlan, StartFunctionPlan, StopFunctionPlan, StreamingTable,
+    CreateFunctionPlan, CreatePythonFunctionPlan, CreateTablePlan, DropFunctionPlan, DropTablePlan,
+    PlanNode, ShowFunctionsPlan, StartFunctionPlan, StopFunctionPlan, StreamingTable,
 };
 use crate::coordinator::statement::{
-    CreateFunction, CreatePythonFunction, CreateTable, DropFunction, ShowFunctions, StartFunction,
-    StatementVisitor, StatementVisitorContext, StatementVisitorResult, StopFunction,
-    StreamingTableStatement,
+    CreateFunction, CreatePythonFunction, CreateTable, DropFunction, DropTableStatement,
+    ShowFunctions, StartFunction, StatementVisitor, StatementVisitorContext,
+    StatementVisitorResult, StopFunction, StreamingTableStatement,
 };
 use crate::coordinator::tool::ConnectorOptions;
-use crate::sql::logical_node::logical::{LogicalProgram, ProgramConfig};
-use crate::sql::logical_planner::optimizers::{ChainingOptimizer, produce_optimized_plan};
-use crate::sql::schema::Table;
-use crate::sql::schema::ConnectionType;
-use crate::sql::schema::source_table::SourceTable;
-use crate::sql::schema::ColumnDescriptor;
-use crate::sql::functions::{is_json_union, serialize_outgoing_json};
+use crate::sql::analysis::{
+    maybe_add_key_extension_to_sink, rewrite_sinks, StreamSchemaProvider,
+};
 use crate::sql::extensions::sink::StreamEgressNode;
-use crate::sql::logical_planner::planner;
-use crate::sql::analysis::{StreamSchemaProvider, maybe_add_key_extension_to_sink, rewrite_sinks};
+use crate::sql::functions::{is_json_union, serialize_outgoing_json};
+use crate::sql::logical_node::logical::{LogicalProgram, ProgramConfig};
+use crate::sql::logical_planner::optimizers::{produce_optimized_plan, ChainingOptimizer};
+use crate::sql::logical_planner::planner::PlanToGraphVisitor;
 use crate::sql::rewrite_plan;
+use crate::sql::schema::source_table::SourceTable;
+use crate::sql::schema::{ColumnDescriptor, ConnectionType, Table};
 
-const CONNECTOR: &str = "connector";
-const PARTITION_BY: &str = "partition_by";
-
-fn with_options_to_map(options: &[SqlOption]) -> std::collections::HashMap<String, String> {
-    options
-        .iter()
-        .filter_map(|opt| match opt {
-            SqlOption::KeyValue { key, value } => Some((
-                key.value.clone(),
-                value.to_string().trim_matches('\'').to_string(),
-            )),
-            _ => None,
-        })
-        .collect()
-}
+const OPT_CONNECTOR: &str = "connector";
+const OPT_PARTITION_BY: &str = "partition_by";
 
+#[derive(Clone)]
 pub struct LogicalPlanVisitor {
     schema_provider: StreamSchemaProvider,
 }
@@ -70,20 +61,26 @@ impl LogicalPlanVisitor {
     }
 
     pub fn visit(&self, analysis: &Analysis) -> Box<dyn PlanNode> {
-        let context = StatementVisitorContext::Empty;
         let stmt = analysis.statement();
+        let context = StatementVisitorContext::Empty;
 
-        let result = stmt.accept(self, &context);
-
-        match result {
+        match stmt.accept(self, &context) {
             StatementVisitorResult::Plan(plan) => plan,
-            _ => panic!("LogicalPlanVisitor should return Plan"),
+            _ => panic!("Fatal: LogicalPlanVisitor must yield a PlanNode variant"),
         }
     }
-    fn build_create_streaming_table_plan(
+
+    pub fn build_streaming_table(
+        schema_provider: &StreamSchemaProvider,
+        stmt: &StreamingTableStatement,
+    ) -> Result<StreamingTable> {
+        Self::new(schema_provider.clone()).compile_streaming_sink(stmt)
+    }
+
+    fn compile_streaming_sink(
         &self,
         stmt: &StreamingTableStatement,
-    ) -> Result<Box<dyn PlanNode>> {
+    ) -> Result<StreamingTable> {
         let DFStatement::CreateStreamingTable {
             name,
             with_options,
@@ -91,123 +88,233 @@ impl LogicalPlanVisitor {
             query,
         } = &stmt.statement
         else {
-            return plan_err!("Only CREATE STREAMING TABLE is supported in this context");
+            return plan_err!("Statement mismatch: Expected CREATE STREAMING TABLE AST node");
         };
 
-        let table_name = name.to_string();
-        debug!("Compiling Streaming Table Sink for: {}", table_name);
+        let target_name = name.to_string();
+        debug!(
+            "Initiating streaming sink compilation for identifier: {}",
+            target_name
+        );
 
-        let mut opts = ConnectorOptions::new(with_options, &None)?;
-        let connector = opts.pull_opt_str(CONNECTOR)?.ok_or_else(|| {
+        let mut connector_options = ConnectorOptions::new(with_options, &None)?;
+        let adapter_type = connector_options.pull_opt_str(OPT_CONNECTOR)?.ok_or_else(|| {
             plan_datafusion_err!(
-                "Streaming Table '{}' must specify the '{}' option",
-                table_name,
-                CONNECTOR
+                "Validation Error: Streaming table '{}' requires the '{}' property",
+                target_name,
+                OPT_CONNECTOR
             )
         })?;
 
-        let partition_exprs = self.resolve_partition_expressions(&mut opts)?;
+        let routing_exprs = Self::extract_partitioning_keys(&mut connector_options)?;
 
-        let base_plan =
-            produce_optimized_plan(&Statement::Query(query.clone()), &self.schema_provider)?;
-        let mut plan = rewrite_plan(base_plan, &self.schema_provider)?;
+        let mut logical_plan = rewrite_plan(
+            produce_optimized_plan(&Statement::Query(query.clone()), &self.schema_provider)?,
+            &self.schema_provider,
+        )?;
 
-        if plan
+        if logical_plan
             .schema()
             .fields()
             .iter()
             .any(|f| is_json_union(f.data_type()))
         {
-            plan = serialize_outgoing_json(&self.schema_provider, Arc::new(plan));
+            logical_plan = serialize_outgoing_json(&self.schema_provider, Arc::new(logical_plan));
         }
 
-        let fields: Vec<ColumnDescriptor> = plan
+        let output_descriptors = logical_plan
             .schema()
             .fields()
             .iter()
             .map(|f| ColumnDescriptor::from((**f).clone()))
-            .collect();
+            .collect::<Vec<_>>();
 
-        let mut source_table = SourceTable::from_options(
-            &table_name,
-            &connector,
+        let mut source_definition = SourceTable::from_options(
+            &target_name,
+            &adapter_type,
             false,
-            fields,
+            output_descriptors,
             vec![],
             None,
-            &mut opts,
+            &mut connector_options,
             None,
             &self.schema_provider,
             Some(ConnectionType::Sink),
             comment.clone().unwrap_or_default(),
         )?;
-        source_table.partition_exprs = Arc::new(partition_exprs);
-
-        let sink_extension = StreamEgressNode::try_new(
-            TableReference::bare(table_name.clone()),
-            Table::ConnectorTable(source_table.clone()),
-            plan.schema().clone(),
-            plan,
+        source_definition.partition_exprs = Arc::new(routing_exprs);
+
+        let sink_schema = logical_plan.schema().clone();
+        let egress_node = StreamEgressNode::try_new(
+            TableReference::bare(target_name.clone()),
+            Table::ConnectorTable(source_definition.clone()),
+            sink_schema,
+            logical_plan,
         )?;
 
-        let plan_with_keys = maybe_add_key_extension_to_sink(LogicalPlan::Extension(Extension {
-            node: Arc::new(sink_extension),
-        }))?;
+        let mut plan_topology = rewrite_sinks(vec![maybe_add_key_extension_to_sink(
+            LogicalPlan::Extension(Extension {
+                node: Arc::new(egress_node),
+            }),
+        )?])?;
 
-        let final_extensions = rewrite_sinks(vec![plan_with_keys])?;
-        let final_plan = final_extensions.into_iter().next().unwrap();
+        let final_execution_plan = plan_topology.remove(0);
 
+        self.validate_graph_topology(&final_execution_plan)?;
 
+        Ok(StreamingTable {
+            name: target_name,
+            comment: comment.clone(),
+            source_table: source_definition,
+            logical_plan: final_execution_plan,
+        })
+    }
 
-        let mut config = SessionConfig::new();
-        config
-            .options_mut()
-            .optimizer
-            .enable_round_robin_repartition = false;
-        config.options_mut().optimizer.repartition_aggregations = false;
-        config.options_mut().optimizer.repartition_windows = false;
-        config.options_mut().optimizer.repartition_sorts = false;
-        config.options_mut().optimizer.repartition_joins = false;
-        config.options_mut().execution.target_partitions = 1;
+    fn validate_graph_topology(&self, logical_plan: &LogicalPlan) -> Result<()> {
+        let mut session_config = SessionConfig::new();
+        let opts = session_config.options_mut();
+        opts.optimizer.enable_round_robin_repartition = false;
+        opts.optimizer.repartition_aggregations = false;
+        opts.optimizer.repartition_windows = false;
+        opts.optimizer.repartition_sorts = false;
+        opts.optimizer.repartition_joins = false;
+        opts.execution.target_partitions = 1;
 
         let session_state = SessionStateBuilder::new()
-            .with_config(config)
+            .with_config(session_config)
             .with_default_features()
             .with_physical_optimizer_rules(vec![])
             .build();
 
-        let mut plan_to_graph_visitor =
-            planner::PlanToGraphVisitor::new(&self.schema_provider, &session_state);
+        let mut graph_compiler = PlanToGraphVisitor::new(&self.schema_provider, &session_state);
+        graph_compiler.add_plan(logical_plan.clone())?;
 
-        plan_to_graph_visitor.add_plan(final_plan.clone())?;
+        let mut executable_program =
+            LogicalProgram::new(graph_compiler.into_graph(), ProgramConfig::default());
+        executable_program.optimize(&ChainingOptimizer {});
 
-        let graph = plan_to_graph_visitor.into_graph();
+        Ok(())
+    }
 
-        let mut program = LogicalProgram::new(graph, ProgramConfig::default());
+    fn extract_partitioning_keys(
+        options: &mut ConnectorOptions,
+    ) -> Result<Option<Vec<Expr>>> {
+        options
+            .pull_opt_str(OPT_PARTITION_BY)?
+            .map(|raw_cols| raw_cols.split(',').map(|c| col(c.trim())).collect())
+            .map(Ok)
+            .transpose()
+    }
 
-        program.optimize(&ChainingOptimizer {});
+    fn contains_connector_property(options: &[SqlOption]) -> bool {
+        options.iter().any(|opt| match opt {
+            SqlOption::KeyValue { key, .. } => key.value.eq_ignore_ascii_case(OPT_CONNECTOR),
+            _ => false,
+        })
+    }
 
+    fn parse_primary_keys(constraints: &[TableConstraint]) -> Result<Vec<String>> {
+        let mut keys = None;
+        for constraint in constraints {
+            if let TableConstraint::PrimaryKey { columns, .. } = constraint {
+                if keys.is_some() {
+                    return plan_err!(
+                        "Constraint Violation: Multiple PRIMARY KEY constraints are forbidden"
+                    );
+                }
+                keys = Some(columns.iter().map(|ident| ident.value.clone()).collect());
+            }
+        }
+        Ok(keys.unwrap_or_default())
+    }
 
-        Ok(Box::new(StreamingTable {
-            name: table_name,
-            comment: comment.clone(),
-            source_table,
-            logical_plan: final_plan,
-        }))
+    fn parse_watermark_strategy(
+        constraints: &[TableConstraint],
+    ) -> Result<Option<(String, Option<SqlExpr>)>> {
+        let mut strategy = None;
+        for constraint in constraints {
+            if let TableConstraint::Watermark {
+                column_name,
+                watermark_expr,
+            } = constraint
+            {
+                if strategy.is_some() {
+                    return plan_err!(
+                        "Constraint Violation: Only a single WATERMARK FOR clause is permitted"
+                    );
+                }
+                strategy = Some((column_name.value.clone(), watermark_expr.clone()));
+            }
+        }
+        Ok(strategy)
     }
 
-    fn resolve_partition_expressions(
+    fn compile_connector_source_plan(
         &self,
-        opts: &mut ConnectorOptions,
-    ) -> Result<Option<Vec<Expr>>> {
-        opts.pull_opt_str(PARTITION_BY)?
-            .map(|cols| {
-                cols.split(',')
-                    .map(|c| col(c.trim()))
-                    .collect::<Vec<Expr>>()
-            })
-            .map(Ok)
-            .transpose()
+        stmt: &SqlCreateTable,
+    ) -> Result<CreateTablePlan> {
+        if stmt.query.is_some() {
+            return plan_err!("Syntax Error: CREATE TABLE ... AS SELECT combined with WITH ('connector'=...) is invalid. Use CREATE STREAMING TABLE instead.");
+        }
+        if stmt.or_replace {
+            return plan_err!(
+                "Syntax Error: OR REPLACE is not supported for external connector tables."
+            );
+        }
+        if stmt.temporary {
+            return plan_err!(
+                "Syntax Error: TEMPORARY is not supported for external connector tables."
+            );
+        }
+        if stmt.external {
+            return plan_err!("Syntax Error: EXTERNAL keyword is redundant and unsupported for connector configurations.");
+        }
+
+        let target_name = stmt.name.to_string();
+        let table_description = stmt
+            .comment
+            .clone()
+            .map(|c| c.to_string())
+            .unwrap_or_default();
+
+        let schema_compiler = datafusion::sql::planner::SqlToRel::new(&self.schema_provider);
+        let arrow_schema = schema_compiler.build_schema(stmt.columns.clone())?;
+
+        let schema_descriptors = arrow_schema
+            .fields()
+            .iter()
+            .map(|f| ColumnDescriptor::from((**f).clone()))
+            .collect::<Vec<_>>();
+
+        let mut connector_options = ConnectorOptions::new(&stmt.with_options, &None)?;
+        let adapter_type = connector_options.pull_opt_str(OPT_CONNECTOR)?.ok_or_else(|| {
+            plan_datafusion_err!(
+                "Configuration Error: Missing required property '{}' in WITH clause",
+                OPT_CONNECTOR
+            )
+        })?;
+
+        let pk_constraints = Self::parse_primary_keys(&stmt.constraints)?;
+        let watermark_strategy = Self::parse_watermark_strategy(&stmt.constraints)?;
+
+        let source_definition = SourceTable::from_options(
+            &target_name,
+            &adapter_type,
+            false,
+            schema_descriptors,
+            pk_constraints,
+            watermark_strategy,
+            &mut connector_options,
+            None,
+            &self.schema_provider,
+            Some(ConnectionType::Source),
+            table_description,
+        )?;
+
+        Ok(CreateTablePlan::connector_source(
+            source_definition,
+            stmt.if_not_exists,
+        ))
     }
 }
 
@@ -215,23 +322,19 @@ impl StatementVisitor for LogicalPlanVisitor {
     fn visit_create_function(
         &self,
         stmt: &CreateFunction,
-        _context: &StatementVisitorContext,
+        _ctx: &StatementVisitorContext,
     ) -> StatementVisitorResult {
-        let function_source = stmt.get_function_source().clone();
-        let config_source = stmt.get_config_source().cloned();
-        let extra_props = stmt.get_extra_properties().clone();
-
         StatementVisitorResult::Plan(Box::new(CreateFunctionPlan::new(
-            function_source,
-            config_source,
-            extra_props,
+            stmt.get_function_source().clone(),
+            stmt.get_config_source().cloned(),
+            stmt.get_extra_properties().clone(),
         )))
     }
 
     fn visit_drop_function(
         &self,
         stmt: &DropFunction,
-        _context: &StatementVisitorContext,
+        _ctx: &StatementVisitorContext,
     ) -> StatementVisitorResult {
         StatementVisitorResult::Plan(Box::new(DropFunctionPlan::new(stmt.name.clone())))
     }
@@ -239,7 +342,7 @@ impl StatementVisitor for LogicalPlanVisitor {
     fn visit_start_function(
         &self,
         stmt: &StartFunction,
-        _context: &StatementVisitorContext,
+        _ctx: &StatementVisitorContext,
     ) -> StatementVisitorResult {
         StatementVisitorResult::Plan(Box::new(StartFunctionPlan::new(stmt.name.clone())))
     }
@@ -247,7 +350,7 @@ impl StatementVisitor for LogicalPlanVisitor {
     fn visit_stop_function(
         &self,
         stmt: &StopFunction,
-        _context: &StatementVisitorContext,
+        _ctx: &StatementVisitorContext,
     ) -> StatementVisitorResult {
         StatementVisitorResult::Plan(Box::new(StopFunctionPlan::new(stmt.name.clone())))
     }
@@ -255,7 +358,7 @@ impl StatementVisitor for LogicalPlanVisitor {
     fn visit_show_functions(
         &self,
         _stmt: &ShowFunctions,
-        _context: &StatementVisitorContext,
+        _ctx: &StatementVisitorContext,
     ) -> StatementVisitorResult {
         StatementVisitorResult::Plan(Box::new(ShowFunctionsPlan::new()))
     }
@@ -263,46 +366,83 @@ impl StatementVisitor for LogicalPlanVisitor {
     fn visit_create_python_function(
         &self,
         stmt: &CreatePythonFunction,
-        _context: &StatementVisitorContext,
+        _ctx: &StatementVisitorContext,
     ) -> StatementVisitorResult {
-        let class_name = stmt.get_class_name().to_string();
-        let modules = stmt.get_modules().to_vec();
-        let config_content = stmt.get_config_content().to_string();
-
         StatementVisitorResult::Plan(Box::new(CreatePythonFunctionPlan::new(
-            class_name,
-            modules,
-            config_content,
+            stmt.get_class_name().to_string(),
+            stmt.get_modules().to_vec(),
+            stmt.get_config_content().to_string(),
         )))
     }
 
     fn visit_create_table(
         &self,
         stmt: &CreateTable,
-        _context: &StatementVisitorContext,
+        _ctx: &StatementVisitorContext,
     ) -> StatementVisitorResult {
-        let sql_to_rel = datafusion::sql::planner::SqlToRel::new(&self.schema_provider);
-
-        match sql_to_rel.sql_statement_to_plan(stmt.statement.clone()) {
-            Ok(plan) => {
-                debug!("Create table plan:\n{}", plan.display_graphviz());
-                StatementVisitorResult::Plan(Box::new(CreateTablePlan::new(plan)))
+        if let Statement::CreateTable(ast_node) = &stmt.statement {
+            if ast_node.query.is_none()
+                && Self::contains_connector_property(&ast_node.with_options)
+            {
+                let execution_plan = self.compile_connector_source_plan(ast_node).unwrap_or_else(
+                    |err| {
+                        panic!("Fatal Compiler Error: Connector source resolution failed - {err:#}");
+                    },
+                );
+                return StatementVisitorResult::Plan(Box::new(execution_plan));
             }
-            Err(e) => {
-                panic!("Failed to convert CREATE TABLE to logical plan: {e}");
+        }
+
+        let schema_compiler = datafusion::sql::planner::SqlToRel::new(&self.schema_provider);
+        match schema_compiler.sql_statement_to_plan(stmt.statement.clone()) {
+            Ok(logical_plan) => {
+                debug!(
+                    "Successfully compiled logical DDL topology:\n{}",
+                    logical_plan.display_graphviz()
+                );
+                StatementVisitorResult::Plan(Box::new(CreateTablePlan::new(logical_plan)))
             }
+            Err(err) => panic!("Fatal Compiler Error: Logical plan translation failed - {err}"),
         }
     }
 
     fn visit_streaming_table_statement(
         &self,
         stmt: &StreamingTableStatement,
-        _context: &StatementVisitorContext,
+        _ctx: &StatementVisitorContext,
+    ) -> StatementVisitorResult {
+        let execution_plan = self.compile_streaming_sink(stmt).unwrap_or_else(|err| {
+            panic!("Fatal Compiler Error: Streaming sink compilation aborted - {err}");
+        });
+        StatementVisitorResult::Plan(Box::new(execution_plan))
+    }
+
+    fn visit_drop_table_statement(
+        &self,
+        stmt: &DropTableStatement,
+        _ctx: &StatementVisitorContext,
     ) -> StatementVisitorResult {
-        match self.build_create_streaming_table_plan(stmt) {
-            Ok(plan) => StatementVisitorResult::Plan(plan),
-            Err(e) => panic!("Failed to build CreateStreamingTable plan: {e}"),
+        let DFStatement::Drop {
+            object_type,
+            if_exists,
+            names,
+            ..
+        } = &stmt.statement
+        else {
+            panic!("Fatal Compiler Error: AST mismatch on DropTableStatement");
+        };
+
+        if *object_type != ObjectType::Table {
+            panic!("Fatal Compiler Error: Drop target must be of type TABLE");
+        }
+        if names.len() != 1 {
+            panic!("Fatal Compiler Error: Bulk drop operations are not supported. Specify exactly one table.");
         }
+
+        StatementVisitorResult::Plan(Box::new(DropTablePlan::new(
+            names[0].to_string(),
+            *if_exists,
+        )))
     }
 }
 
@@ -316,8 +456,8 @@ mod create_streaming_table_tests {
     use datafusion::sql::sqlparser::parser::Parser;
 
     use crate::sql::common::TIMESTAMP_FIELD;
-    use crate::sql::rewrite_plan;
     use crate::sql::logical_planner::optimizers::produce_optimized_plan;
+    use crate::sql::rewrite_plan;
     use crate::sql::schema::StreamSchemaProvider;
 
     fn schema_provider_with_src() -> StreamSchemaProvider {
diff --git a/src/coordinator/plan/mod.rs b/src/coordinator/plan/mod.rs
index d68320d8..2dbbab77 100644
--- a/src/coordinator/plan/mod.rs
+++ b/src/coordinator/plan/mod.rs
@@ -14,6 +14,7 @@ mod create_function_plan;
 mod create_python_function_plan;
 mod create_table_plan;
 mod drop_function_plan;
+mod drop_table_plan;
 mod logical_plan_visitor;
 mod lookup_table_plan;
 mod optimizer;
@@ -26,8 +27,9 @@ mod visitor;
 
 pub use create_function_plan::CreateFunctionPlan;
 pub use create_python_function_plan::CreatePythonFunctionPlan;
-pub use create_table_plan::CreateTablePlan;
+pub use create_table_plan::{CreateTablePlan, CreateTablePlanBody};
 pub use drop_function_plan::DropFunctionPlan;
+pub use drop_table_plan::DropTablePlan;
 pub use logical_plan_visitor::LogicalPlanVisitor;
 pub use lookup_table_plan::LookupTablePlan;
 pub use optimizer::LogicalPlanner;
diff --git a/src/coordinator/plan/visitor.rs b/src/coordinator/plan/visitor.rs
index fc764b2b..e8efcf32 100644
--- a/src/coordinator/plan/visitor.rs
+++ b/src/coordinator/plan/visitor.rs
@@ -11,7 +11,7 @@
 // limitations under the License.
 
 use super::{
-    CreateFunctionPlan, CreatePythonFunctionPlan, CreateTablePlan, DropFunctionPlan,
+    CreateFunctionPlan, CreatePythonFunctionPlan, CreateTablePlan, DropFunctionPlan, DropTablePlan,
     LookupTablePlan, ShowFunctionsPlan, StartFunctionPlan, StopFunctionPlan, StreamingTable,
     StreamingTableConnectorPlan,
 };
@@ -109,4 +109,10 @@ pub trait PlanVisitor {
         plan: &StreamingTableConnectorPlan,
         context: &PlanVisitorContext,
     ) -> PlanVisitorResult;
+
+    fn visit_drop_table_plan(
+        &self,
+        plan: &DropTablePlan,
+        context: &PlanVisitorContext,
+    ) -> PlanVisitorResult;
 }
diff --git a/src/coordinator/runtime_context.rs b/src/coordinator/runtime_context.rs
new file mode 100644
index 00000000..7b1d82dc
--- /dev/null
+++ b/src/coordinator/runtime_context.rs
@@ -0,0 +1,64 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Runtime resources for a single coordinator run: [`TaskManager`] and [`CatalogManager`].
+
+use std::sync::Arc;
+
+use anyhow::Result;
+
+use crate::runtime::taskexecutor::TaskManager;
+use crate::sql::schema::StreamSchemaProvider;
+use crate::storage::stream_catalog::CatalogManager;
+
+/// Dependencies shared by analyze / plan / execute, analogous to installing globals in
+/// [`TaskManager`] and [`CatalogManager`].
+#[derive(Clone)]
+pub struct CoordinatorRuntimeContext {
+    pub task_manager: Arc<TaskManager>,
+    pub catalog_manager: Arc<CatalogManager>,
+    /// When set (e.g. unit tests), used for SQL planning instead of a catalog snapshot.
+    planning_schema_override: Option<StreamSchemaProvider>,
+}
+
+impl CoordinatorRuntimeContext {
+    /// Resolve [`TaskManager`] and global stream catalog (same pattern as server startup).
+    pub fn try_from_globals() -> Result<Self> {
+        Ok(Self {
+            task_manager: TaskManager::get()
+                .map_err(|e| anyhow::anyhow!("Failed to get TaskManager: {}", e))?,
+            catalog_manager: CatalogManager::global()
+                .map_err(|e| anyhow::anyhow!("Failed to get CatalogManager: {}", e))?,
+            planning_schema_override: None,
+        })
+    }
+
+    pub fn new(
+        task_manager: Arc<TaskManager>,
+        catalog_manager: Arc<CatalogManager>,
+        planning_schema_override: Option<StreamSchemaProvider>,
+    ) -> Self {
+        Self {
+            task_manager,
+            catalog_manager,
+            planning_schema_override,
+        }
+    }
+
+    /// Schema provider for [`LogicalPlanVisitor`] / [`SqlToRel`]: override if set, else catalog snapshot.
+    pub fn planning_schema_provider(&self) -> StreamSchemaProvider {
+        if let Some(ref p) = self.planning_schema_override {
+            return p.clone();
+        }
+        self.catalog_manager.acquire_planning_context()
+    }
+}
diff --git a/src/coordinator/statement/create_table.rs b/src/coordinator/statement/create_table.rs
index 8aa16bf0..67a500d1 100644
--- a/src/coordinator/statement/create_table.rs
+++ b/src/coordinator/statement/create_table.rs
@@ -37,4 +37,8 @@ impl Statement for CreateTable {
     ) -> StatementVisitorResult {
         visitor.visit_create_table(self, context)
     }
+
+    fn as_create_table(&self) -> Option<&CreateTable> {
+        Some(self)
+    }
 }
diff --git a/src/coordinator/statement/drop_table.rs b/src/coordinator/statement/drop_table.rs
new file mode 100644
index 00000000..fa547dca
--- /dev/null
+++ b/src/coordinator/statement/drop_table.rs
@@ -0,0 +1,41 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use datafusion::sql::sqlparser::ast::Statement as DFStatement;
+
+use super::{Statement, StatementVisitor, StatementVisitorContext, StatementVisitorResult};
+
+/// `DROP TABLE` / `DROP TABLE IF EXISTS` (and `DROP STREAMING TABLE`, normalized at parse time).
+#[derive(Debug, Clone)]
+pub struct DropTableStatement {
+    pub statement: DFStatement,
+}
+
+impl DropTableStatement {
+    pub fn new(statement: DFStatement) -> Self {
+        Self { statement }
+    }
+}
+
+impl Statement for DropTableStatement {
+    fn accept(
+        &self,
+        visitor: &dyn StatementVisitor,
+        context: &StatementVisitorContext,
+    ) -> StatementVisitorResult {
+        visitor.visit_drop_table_statement(self, context)
+    }
+
+    fn as_drop_table_statement(&self) -> Option<&DropTableStatement> {
+        Some(self)
+    }
+}
diff --git a/src/coordinator/statement/mod.rs b/src/coordinator/statement/mod.rs
index 15880284..7b39787d 100644
--- a/src/coordinator/statement/mod.rs
+++ b/src/coordinator/statement/mod.rs
@@ -14,6 +14,7 @@ mod create_function;
 mod create_python_function;
 mod create_table;
 mod drop_function;
+mod drop_table;
 mod show_functions;
 mod start_function;
 mod stop_function;
@@ -24,6 +25,7 @@ pub use create_function::{ConfigSource, CreateFunction, FunctionSource};
 pub use create_python_function::{CreatePythonFunction, PythonModule};
 pub use create_table::CreateTable;
 pub use drop_function::DropFunction;
+pub use drop_table::DropTableStatement;
 pub use show_functions::ShowFunctions;
 pub use start_function::StartFunction;
 pub use stop_function::StopFunction;
@@ -38,4 +40,16 @@ pub trait Statement: fmt::Debug + Send + Sync {
         visitor: &dyn StatementVisitor,
         context: &StatementVisitorContext,
     ) -> StatementVisitorResult;
+
+    fn as_create_table(&self) -> Option<&CreateTable> {
+        None
+    }
+
+    fn as_drop_table_statement(&self) -> Option<&DropTableStatement> {
+        None
+    }
+
+    fn as_streaming_table_statement(&self) -> Option<&StreamingTableStatement> {
+        None
+    }
 }
diff --git a/src/coordinator/statement/streaming_table.rs b/src/coordinator/statement/streaming_table.rs
index 86ec1a85..bfef3503 100644
--- a/src/coordinator/statement/streaming_table.rs
+++ b/src/coordinator/statement/streaming_table.rs
@@ -37,4 +37,8 @@ impl Statement for StreamingTableStatement {
     ) -> StatementVisitorResult {
         visitor.visit_streaming_table_statement(self, context)
     }
+
+    fn as_streaming_table_statement(&self) -> Option<&StreamingTableStatement> {
+        Some(self)
+    }
 }
diff --git a/src/coordinator/statement/visitor.rs b/src/coordinator/statement/visitor.rs
index 1867b603..641abf98 100644
--- a/src/coordinator/statement/visitor.rs
+++ b/src/coordinator/statement/visitor.rs
@@ -11,8 +11,8 @@
 // limitations under the License.
 
 use super::{
-    CreateFunction, CreatePythonFunction, CreateTable, DropFunction, ShowFunctions, StartFunction,
-    StopFunction, StreamingTableStatement,
+    CreateFunction, CreatePythonFunction, CreateTable, DropFunction, DropTableStatement,
+    ShowFunctions, StartFunction, StopFunction, StreamingTableStatement,
 };
 use crate::coordinator::plan::PlanNode;
 use crate::coordinator::statement::Statement;
@@ -100,4 +100,10 @@ pub trait StatementVisitor {
         stmt: &StreamingTableStatement,
         context: &StatementVisitorContext,
     ) -> StatementVisitorResult;
+
+    fn visit_drop_table_statement(
+        &self,
+        stmt: &DropTableStatement,
+        context: &StatementVisitorContext,
+    ) -> StatementVisitorResult;
 }
diff --git a/src/main.rs b/src/main.rs
index 562b1526..1faf45f1 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -179,9 +179,7 @@ fn main() -> Result<()> {
     );
 
     // 2. Component Initialization
-    let registry = server::register_components();
-    registry
-        .initialize_all(&config)
+    server::bootstrap_system(&config)
         .context("Component initialization failed")?;
 
     // 3. Server Startup
diff --git a/src/server/handler.rs b/src/server/handler.rs
index 1920680c..8ed484d6 100644
--- a/src/server/handler.rs
+++ b/src/server/handler.rs
@@ -14,20 +14,19 @@ use std::sync::Arc;
 use std::time::Instant;
 
 use arrow_ipc::writer::StreamWriter;
-use log::{error, info};
 use tonic::{Request, Response as TonicResponse, Status};
+use tracing::{debug, error, info, warn};
 
 use protocol::service::FunctionInfo as ProtoFunctionInfo;
 use protocol::service::{
-    CreateFunctionRequest, CreatePythonFunctionRequest, DropFunctionRequest, Response,
-    ShowFunctionsRequest, ShowFunctionsResponse, SqlRequest, StartFunctionRequest, StatusCode,
-    StopFunctionRequest, function_stream_service_server::FunctionStreamService,
+    function_stream_service_server::FunctionStreamService, CreateFunctionRequest,
+    CreatePythonFunctionRequest, DropFunctionRequest, Response, ShowFunctionsRequest,
+    ShowFunctionsResponse, SqlRequest, StartFunctionRequest, StatusCode, StopFunctionRequest,
 };
 
-use crate::coordinator::Coordinator;
 use crate::coordinator::{
-    CreateFunction, CreatePythonFunction, DataSet, DropFunction, ShowFunctions,
-    ShowFunctionsResult, StartFunction, Statement, StopFunction,
+    Coordinator, CreateFunction, CreatePythonFunction, DataSet, DropFunction, PythonModule,
+    ShowFunctions, ShowFunctionsResult, StartFunction, Statement, StopFunction,
 };
 use crate::sql::parse::parse_sql;
 
@@ -40,23 +39,66 @@ impl FunctionStreamServiceImpl {
         Self { coordinator }
     }
 
-    fn build_response(status_code: StatusCode, message: String, data: Option<Vec<u8>>) -> Response {
+    fn serialize_dataset(ds: &dyn DataSet) -> Result<Vec<u8>, String> {
+        let batch = ds.to_record_batch();
+        let mut buf = Vec::new();
+
+        let mut writer = StreamWriter::try_new(&mut buf, &batch.schema())
+            .map_err(|e| format!("IPC writer initialization failed: {e}"))?;
+
+        writer
+            .write(&batch)
+            .map_err(|e| format!("IPC write failed: {e}"))?;
+
+        writer
+            .finish()
+            .map_err(|e| format!("IPC finish failed: {e}"))?;
+
+        Ok(buf)
+    }
+
+    fn build_success_response(
+        status: StatusCode,
+        message: String,
+        data: Option<Arc<dyn DataSet>>,
+    ) -> Response {
+        let payload = match data {
+            Some(ds) => match Self::serialize_dataset(ds.as_ref()) {
+                Ok(bytes) => Some(bytes),
+                Err(e) => {
+                    error!("Data serialization error: {}", e);
+                    return Self::build_error_response(
+                        StatusCode::InternalServerError,
+                        "Internal data serialization error".to_string(),
+                    );
+                }
+            },
+            None => None,
+        };
+
         Response {
-            status_code: status_code as i32,
+            status_code: status as i32,
             message,
-            data,
+            data: payload,
         }
     }
 
-    fn data_set_to_ipc_bytes(ds: &dyn DataSet) -> Option<Vec<u8>> {
-        let batch = ds.to_record_batch();
-        let mut buf = Vec::new();
-        {
-            let mut writer = StreamWriter::try_new(&mut buf, &batch.schema()).ok()?;
-            writer.write(&batch).ok()?;
-            writer.finish().ok()?;
+    fn build_error_response(status: StatusCode, message: String) -> Response {
+        Response {
+            status_code: status as i32,
+            message,
+            data: None,
+        }
+    }
+
+    async fn execute_statement(&self, stmt: &dyn Statement, success_status: StatusCode) -> Response {
+        let result = self.coordinator.execute_with_stream_catalog(stmt).await;
+
+        if result.success {
+            Self::build_success_response(success_status, result.message, result.data)
+        } else {
+            Self::build_error_response(StatusCode::InternalServerError, result.message)
         }
-        Some(buf)
     }
 }
 
@@ -66,236 +108,133 @@ impl FunctionStreamService for FunctionStreamServiceImpl {
         &self,
         request: Request<SqlRequest>,
     ) -> Result<TonicResponse<Response>, Status> {
-        let start_time = Instant::now();
+        let timer = Instant::now();
         let req = request.into_inner();
 
-        let parse_start = Instant::now();
-        let statements = match parse_sql(&req.sql) {
-            Ok(stmts) => {
-                log::debug!(
-                    "SQL parsed {} statement(s) in {}ms",
-                    stmts.len(),
-                    parse_start.elapsed().as_millis()
-                );
-                stmts
-            }
-            Err(e) => {
-                return Ok(TonicResponse::new(Self::build_response(
-                    StatusCode::BadRequest,
-                    format!("Parse error: {}", e),
-                    None,
+        let statements = parse_sql(&req.sql).map_err(|e| {
+            warn!("SQL parse rejection: {}", e);
+            Status::invalid_argument("Provided SQL syntax is invalid")
+        })?;
+
+        if statements.is_empty() {
+            return Ok(TonicResponse::new(Self::build_success_response(
+                StatusCode::Ok,
+                "No statements executed".to_string(),
+                None,
+            )));
+        }
+
+        let mut final_response = None;
+
+        for stmt in statements {
+            let result = self
+                .coordinator
+                .execute_with_stream_catalog(stmt.as_ref())
+                .await;
+
+            if !result.success {
+                error!("SQL execution aborted: {}", result.message);
+                return Ok(TonicResponse::new(Self::build_error_response(
+                    StatusCode::InternalServerError,
+                    result.message,
                 )));
             }
-        };
 
-        let exec_start = Instant::now();
-        let mut last_result = self.coordinator.execute(statements[0].as_ref());
-        for stmt in &statements[1..] {
-            if !last_result.success {
-                break;
-            }
-            last_result = self.coordinator.execute(stmt.as_ref());
+            final_response = Some(result);
         }
-        let result = last_result;
-        log::debug!(
-            "Coordinator execution finished in {}ms",
-            exec_start.elapsed().as_millis()
-        );
 
-        let status_code = if result.success {
-            StatusCode::Ok
-        } else {
-            error!("Execution failed: {}", result.message);
-            StatusCode::InternalServerError
-        };
-
-        log::debug!(
-            "Total SQL request cost: {}ms",
-            start_time.elapsed().as_millis()
-        );
+        let result = final_response.unwrap();
+        let response = Self::build_success_response(StatusCode::Ok, result.message, result.data);
 
-        Ok(TonicResponse::new(Self::build_response(
-            status_code,
-            result.message,
-            result
-                .data
-                .as_ref()
-                .and_then(|ds| Self::data_set_to_ipc_bytes(ds.as_ref())),
-        )))
+        debug!("execute_sql completed in {}ms", timer.elapsed().as_millis());
+        Ok(TonicResponse::new(response))
     }
 
     async fn create_function(
         &self,
         request: Request<CreateFunctionRequest>,
     ) -> Result<TonicResponse<Response>, Status> {
-        let start_time = Instant::now();
+        let timer = Instant::now();
         let req = request.into_inner();
-        info!(
-            "Received CreateFunction request. Config size: {}, Function size: {}",
-            req.config_bytes.len(),
-            req.function_bytes.len()
-        );
-
-        let config_bytes = if !req.config_bytes.is_empty() {
-            Some(req.config_bytes)
-        } else {
-            None
-        };
 
+        let config_bytes = (!req.config_bytes.is_empty()).then_some(req.config_bytes);
         let stmt = CreateFunction::from_bytes(req.function_bytes, config_bytes);
 
-        let exec_start = Instant::now();
-        let result = self.coordinator.execute(&stmt as &dyn Statement);
-        info!(
-            "Coordinator execution finished in {}ms",
-            exec_start.elapsed().as_millis()
-        );
-
-        let status_code = if result.success {
-            StatusCode::Created
-        } else {
-            error!("CreateFunction failed: {}", result.message);
-            StatusCode::InternalServerError
-        };
-
-        info!(
-            "Total CreateFunction request cost: {}ms",
-            start_time.elapsed().as_millis()
-        );
+        let response = self.execute_statement(&stmt, StatusCode::Created).await;
 
-        Ok(TonicResponse::new(Self::build_response(
-            status_code,
-            result.message,
-            result
-                .data
-                .as_ref()
-                .and_then(|ds| Self::data_set_to_ipc_bytes(ds.as_ref())),
-        )))
+        info!("create_function completed in {}ms", timer.elapsed().as_millis());
+        Ok(TonicResponse::new(response))
     }
 
     async fn create_python_function(
         &self,
         request: Request<CreatePythonFunctionRequest>,
     ) -> Result<TonicResponse<Response>, Status> {
-        let start_time = Instant::now();
+        let timer = Instant::now();
         let req = request.into_inner();
-        info!(
-            "Received CreatePythonFunction request. Class name: {}, Modules: {}",
-            req.class_name,
-            req.modules.len()
-        );
 
-        // Convert proto modules to PythonModule
-        let modules: Vec<crate::coordinator::PythonModule> = req
+        if req.modules.is_empty() {
+            return Ok(TonicResponse::new(Self::build_error_response(
+                StatusCode::BadRequest,
+                "Python function creation requires at least one module".to_string(),
+            )));
+        }
+
+        let modules: Vec<PythonModule> = req
             .modules
             .into_iter()
-            .map(|m| crate::coordinator::PythonModule {
+            .map(|m| PythonModule {
                 name: m.module_name,
                 bytes: m.module_bytes,
             })
             .collect();
 
-        if modules.is_empty() {
-            return Ok(TonicResponse::new(Self::build_response(
-                StatusCode::BadRequest,
-                "At least one module is required".to_string(),
-                None,
-            )));
-        }
-
         let stmt = CreatePythonFunction::new(req.class_name, modules, req.config_content);
+        let response = self.execute_statement(&stmt, StatusCode::Created).await;
 
-        let exec_start = Instant::now();
-        let result = self.coordinator.execute(&stmt as &dyn Statement);
         info!(
-            "Coordinator execution finished in {}ms",
-            exec_start.elapsed().as_millis()
+            "create_python_function completed in {}ms",
+            timer.elapsed().as_millis()
         );
-
-        let status_code = if result.success {
-            StatusCode::Created
-        } else {
-            error!("CreatePythonFunction failed: {}", result.message);
-            StatusCode::InternalServerError
-        };
-
-        info!(
-            "Total CreatePythonFunction request cost: {}ms",
-            start_time.elapsed().as_millis()
-        );
-
-        Ok(TonicResponse::new(Self::build_response(
-            status_code,
-            result.message,
-            result
-                .data
-                .as_ref()
-                .and_then(|ds| Self::data_set_to_ipc_bytes(ds.as_ref())),
-        )))
+        Ok(TonicResponse::new(response))
     }
 
     async fn drop_function(
         &self,
         request: Request<DropFunctionRequest>,
     ) -> Result<TonicResponse<Response>, Status> {
-        let start_time = Instant::now();
+        let timer = Instant::now();
         let req = request.into_inner();
-        info!(
-            "Received DropFunction request: function_name={}",
-            req.function_name
-        );
 
         let stmt = DropFunction::new(req.function_name);
-        let exec_start = Instant::now();
-        let result = self.coordinator.execute(&stmt as &dyn Statement);
-        info!(
-            "Coordinator execution finished in {}ms",
-            exec_start.elapsed().as_millis()
-        );
+        let response = self.execute_statement(&stmt, StatusCode::Ok).await;
 
-        let status_code = if result.success {
-            StatusCode::Ok
-        } else {
-            error!("DropFunction failed: {}", result.message);
-            StatusCode::InternalServerError
-        };
-
-        info!(
-            "Total DropFunction request cost: {}ms",
-            start_time.elapsed().as_millis()
-        );
-
-        Ok(TonicResponse::new(Self::build_response(
-            status_code,
-            result.message,
-            None,
-        )))
+        info!("drop_function completed in {}ms", timer.elapsed().as_millis());
+        Ok(TonicResponse::new(response))
     }
 
     async fn show_functions(
         &self,
-        request: Request<ShowFunctionsRequest>,
+        _request: Request<ShowFunctionsRequest>,
     ) -> Result<TonicResponse<ShowFunctionsResponse>, Status> {
-        let start_time = Instant::now();
-        let _req = request.into_inner();
-        info!("Received ShowFunctions request");
-
+        let timer = Instant::now();
         let stmt = ShowFunctions::new();
-        let exec_start = Instant::now();
-        let result = self.coordinator.execute(&stmt as &dyn Statement);
-        info!(
-            "Coordinator execution finished in {}ms",
-            exec_start.elapsed().as_millis()
-        );
 
-        let (status_code, message) = if result.success {
-            (StatusCode::Ok as i32, result.message)
-        } else {
-            error!("ShowFunctions failed: {}", result.message);
-            (StatusCode::InternalServerError as i32, result.message)
-        };
+        let result = self
+            .coordinator
+            .execute_with_stream_catalog(&stmt)
+            .await;
+
+        if !result.success {
+            error!("show_functions execution failed: {}", result.message);
+            return Ok(TonicResponse::new(ShowFunctionsResponse {
+                status_code: StatusCode::InternalServerError as i32,
+                message: "Failed to retrieve function definitions".to_string(),
+                functions: vec![],
+            }));
+        }
 
-        let functions: Vec<ProtoFunctionInfo> = result
+        let functions = result
             .data
             .as_ref()
             .and_then(|arc_ds| {
@@ -313,15 +252,10 @@ impl FunctionStreamService for FunctionStreamServiceImpl {
             })
             .unwrap_or_default();
 
-        info!(
-            "Total ShowFunctions request cost: {}ms, count={}",
-            start_time.elapsed().as_millis(),
-            functions.len()
-        );
-
+        info!("show_functions completed in {}ms", timer.elapsed().as_millis());
         Ok(TonicResponse::new(ShowFunctionsResponse {
-            status_code,
-            message,
+            status_code: StatusCode::Ok as i32,
+            message: result.message,
             functions,
         }))
     }
@@ -330,76 +264,28 @@ impl FunctionStreamService for FunctionStreamServiceImpl {
         &self,
         request: Request<StartFunctionRequest>,
     ) -> Result<TonicResponse<Response>, Status> {
-        let start_time = Instant::now();
+        let timer = Instant::now();
         let req = request.into_inner();
-        info!(
-            "Received StartFunction request: function_name={}",
-            req.function_name
-        );
 
         let stmt = StartFunction::new(req.function_name);
-        let exec_start = Instant::now();
-        let result = self.coordinator.execute(&stmt as &dyn Statement);
-        info!(
-            "Coordinator execution finished in {}ms",
-            exec_start.elapsed().as_millis()
-        );
-
-        let status_code = if result.success {
-            StatusCode::Ok
-        } else {
-            error!("StartFunction failed: {}", result.message);
-            StatusCode::InternalServerError
-        };
+        let response = self.execute_statement(&stmt, StatusCode::Ok).await;
 
-        info!(
-            "Total StartFunction request cost: {}ms",
-            start_time.elapsed().as_millis()
-        );
-
-        Ok(TonicResponse::new(Self::build_response(
-            status_code,
-            result.message,
-            None,
-        )))
+        info!("start_function completed in {}ms", timer.elapsed().as_millis());
+        Ok(TonicResponse::new(response))
     }
 
     async fn stop_function(
         &self,
         request: Request<StopFunctionRequest>,
     ) -> Result<TonicResponse<Response>, Status> {
-        let start_time = Instant::now();
+        let timer = Instant::now();
         let req = request.into_inner();
-        info!(
-            "Received StopFunction request: function_name={}",
-            req.function_name
-        );
 
         let stmt = StopFunction::new(req.function_name);
-        let exec_start = Instant::now();
-        let result = self.coordinator.execute(&stmt as &dyn Statement);
-        info!(
-            "Coordinator execution finished in {}ms",
-            exec_start.elapsed().as_millis()
-        );
-
-        let status_code = if result.success {
-            StatusCode::Ok
-        } else {
-            error!("StopFunction failed: {}", result.message);
-            StatusCode::InternalServerError
-        };
-
-        info!(
-            "Total StopFunction request cost: {}ms",
-            start_time.elapsed().as_millis()
-        );
+        let response = self.execute_statement(&stmt, StatusCode::Ok).await;
 
-        Ok(TonicResponse::new(Self::build_response(
-            status_code,
-            result.message,
-            None,
-        )))
+        info!("stop_function completed in {}ms", timer.elapsed().as_millis());
+        Ok(TonicResponse::new(response))
     }
 }
 
diff --git a/src/server/initializer.rs b/src/server/initializer.rs
index ccb02788..46eca375 100644
--- a/src/server/initializer.rs
+++ b/src/server/initializer.rs
@@ -10,15 +10,23 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-use crate::config::GlobalConfig;
+use std::time::Instant;
+
 use anyhow::{Context, Result};
+use tracing::{debug, info, warn};
+
+use crate::config::GlobalConfig;
 
-type InitializerFn = fn(&GlobalConfig) -> Result<()>;
+pub type InitializerFn = fn(&GlobalConfig) -> Result<()>;
 
 #[derive(Clone)]
-struct Component {
-    name: &'static str,
-    initializer: InitializerFn,
+pub struct Component {
+    pub name: &'static str,
+    pub initializer: InitializerFn,
+}
+
+pub struct ComponentRegistry {
+    components: Vec<Component>,
 }
 
 #[derive(Default)]
@@ -27,25 +35,17 @@ pub struct ComponentRegistryBuilder {
 }
 
 impl ComponentRegistryBuilder {
-    #[inline]
     pub fn new() -> Self {
-        Self::with_capacity(8)
-    }
-
-    #[inline]
-    pub fn with_capacity(capacity: usize) -> Self {
         Self {
-            components: Vec::with_capacity(capacity),
+            components: Vec::with_capacity(8),
         }
     }
 
-    #[inline]
     pub fn register(mut self, name: &'static str, initializer: InitializerFn) -> Self {
         self.components.push(Component { name, initializer });
         self
     }
 
-    #[inline]
     pub fn build(self) -> ComponentRegistry {
         ComponentRegistry {
             components: self.components,
@@ -53,57 +53,69 @@ impl ComponentRegistryBuilder {
     }
 }
 
-pub struct ComponentRegistry {
-    components: Vec<Component>,
-}
-
 impl ComponentRegistry {
     pub fn initialize_all(&self, config: &GlobalConfig) -> Result<()> {
         if self.components.is_empty() {
-            log::warn!("No components registered for initialization");
+            warn!("Component registry is empty; no components to initialize");
             return Ok(());
         }
 
-        log::info!("Initializing {} components...", self.components.len());
+        let total = self.components.len();
+        info!(total_components = total, "Commencing system initialization sequence");
+
+        for (index, component) in self.components.iter().enumerate() {
+            let start_time = Instant::now();
 
-        for (idx, component) in self.components.iter().enumerate() {
-            let start = std::time::Instant::now();
-            log::debug!(
-                "[{}/{}] Initializing component: {}",
-                idx + 1,
-                self.components.len(),
-                component.name
+            debug!(
+                component = component.name,
+                step = format!("{}/{}", index + 1, total),
+                "Initializing component"
             );
 
-            (component.initializer)(config)
-                .with_context(|| format!("Component '{}' initialization failed", component.name))?;
+            (component.initializer)(config).with_context(|| {
+                format!("Fatal error initializing component: {}", component.name)
+            })?;
 
-            let elapsed = start.elapsed();
-            log::debug!(
-                "[{}/{}] Component '{}' initialized successfully in {:?}",
-                idx + 1,
-                self.components.len(),
-                component.name,
-                elapsed
+            debug!(
+                component = component.name,
+                elapsed_ms = start_time.elapsed().as_millis(),
+                "Component initialized successfully"
             );
         }
 
-        log::info!(
-            "All {} components initialized successfully",
-            self.components.len()
-        );
+        info!("System initialization sequence completed successfully");
         Ok(())
     }
+}
 
-    #[inline]
-    pub fn len(&self) -> usize {
-        self.components.len()
-    }
+pub fn build_core_registry() -> ComponentRegistry {
+    let builder = {
+        let b = ComponentRegistryBuilder::new()
+            .register("WasmCache", initialize_wasm_cache)
+            .register("TaskManager", initialize_task_manager);
+        #[cfg(feature = "python")]
+        let b = b.register("PythonService", initialize_python_service);
+        b
+    };
 
-    #[inline]
-    pub fn is_empty(&self) -> bool {
-        self.components.is_empty()
-    }
+    builder
+        .register(
+            "StreamCatalog",
+            crate::storage::stream_catalog::initialize_stream_catalog,
+        )
+        .register("Coordinator", initialize_coordinator)
+        .build()
+}
+
+pub fn bootstrap_system(config: &GlobalConfig) -> Result<()> {
+    let registry = build_core_registry();
+
+    registry.initialize_all(config)?;
+
+    crate::storage::stream_catalog::restore_global_catalog_from_store();
+
+    info!("System bootstrap finished. Node is ready to accept traffic.");
+    Ok(())
 }
 
 fn initialize_wasm_cache(config: &GlobalConfig) -> Result<()> {
@@ -114,18 +126,20 @@ fn initialize_wasm_cache(config: &GlobalConfig) -> Result<()> {
             max_size: config.wasm.max_cache_size,
         },
     );
-    log::info!(
-        "WASM cache configuration: enabled={}, dir={}, max_size={} bytes",
-        config.wasm.enable_cache,
-        config.wasm.cache_dir,
-        config.wasm.max_cache_size
+
+    debug!(
+        enabled = config.wasm.enable_cache,
+        dir = %config.wasm.cache_dir,
+        max_size = config.wasm.max_cache_size,
+        "WASM cache configured"
     );
+
     Ok(())
 }
 
 fn initialize_task_manager(config: &GlobalConfig) -> Result<()> {
     crate::runtime::taskexecutor::TaskManager::init(config)
-        .context("TaskManager initialization failed")?;
+        .context("TaskManager service failed to start")?;
     Ok(())
 }
 
@@ -138,22 +152,10 @@ fn initialize_python_service(config: &GlobalConfig) -> Result<()> {
 
 fn initialize_coordinator(_config: &GlobalConfig) -> Result<()> {
     crate::runtime::taskexecutor::TaskManager::get()
-        .context("Coordinator requires TaskManager to be initialized first")?;
-    log::info!("Coordinator verified and ready");
-    Ok(())
-}
+        .context("Dependency violation: Coordinator requires TaskManager")?;
 
-pub fn register_components() -> ComponentRegistry {
-    let builder = {
-        let b = ComponentRegistryBuilder::new()
-            .register("WasmCache", initialize_wasm_cache)
-            .register("TaskManager", initialize_task_manager);
-        #[cfg(feature = "python")]
-        let b = b.register("PythonService", initialize_python_service);
-        b
-    };
+    crate::storage::stream_catalog::CatalogManager::global()
+        .context("Dependency violation: Coordinator requires StreamCatalog")?;
 
-    builder
-        .register("Coordinator", initialize_coordinator)
-        .build()
+    Ok(())
 }
diff --git a/src/server/mod.rs b/src/server/mod.rs
index 03254af3..7795f29b 100644
--- a/src/server/mod.rs
+++ b/src/server/mod.rs
@@ -17,5 +17,5 @@ mod initializer;
 mod service;
 
 pub use handler::FunctionStreamServiceImpl;
-pub use initializer::register_components;
+pub use initializer::{bootstrap_system, build_core_registry};
 pub use service::start_server_with_shutdown;
diff --git a/src/sql/parse.rs b/src/sql/parse.rs
index 78c8bac0..1feff64a 100644
--- a/src/sql/parse.rs
+++ b/src/sql/parse.rs
@@ -14,8 +14,11 @@
 //!
 //! **Data-definition / pipeline shape (this entry point)**  
 //! Only these table-related forms are supported:
-//! - **`CREATE TABLE ...`** (including `CREATE TABLE ... AS SELECT` where the planner accepts it)
+//! - **`CREATE TABLE ... (cols [, WATERMARK FOR ...]) WITH ('connector' = '...', 'format' = '...', ...)`**  
+//!   connector-backed **source** DDL (no `AS SELECT`; `connector` in `WITH` selects this path)
+//! - **`CREATE TABLE ...`** other forms (including `CREATE TABLE ... AS SELECT` where DataFusion accepts it)
 //! - **`CREATE STREAMING TABLE ... WITH (...) AS SELECT ...`** (streaming sink DDL)
+//! - **`DROP TABLE`** / **`DROP TABLE IF EXISTS`** / **`DROP STREAMING TABLE`** (alias for `DROP TABLE` on the stream catalog)
 //!
 //! **`INSERT` is not supported** here — use `CREATE TABLE ... AS SELECT` or
 //! `CREATE STREAMING TABLE ... AS SELECT` to define the query shape instead.
@@ -26,15 +29,30 @@ use std::collections::HashMap;
 
 use datafusion::common::{Result, plan_err};
 use datafusion::error::DataFusionError;
-use datafusion::sql::sqlparser::ast::{SqlOption, Statement as DFStatement};
+use datafusion::sql::sqlparser::ast::{ObjectType, SqlOption, Statement as DFStatement};
 use datafusion::sql::sqlparser::dialect::FunctionStreamDialect;
 use datafusion::sql::sqlparser::parser::Parser;
 
 use crate::coordinator::{
-    CreateFunction, CreateTable, DropFunction, ShowFunctions, StartFunction,
+    CreateFunction, CreateTable, DropFunction, DropTableStatement, ShowFunctions, StartFunction,
     Statement as CoordinatorStatement, StopFunction, StreamingTableStatement,
 };
 
+/// `DROP STREAMING TABLE t` is accepted as sugar for `DROP TABLE t` against the same catalog.
+fn rewrite_drop_streaming_table(sql: &str) -> String {
+    let trimmed = sql.trim_start();
+    let tokens: Vec<&str> = trimmed.split_whitespace().collect();
+    if tokens.len() >= 4
+        && tokens[0].eq_ignore_ascii_case("drop")
+        && tokens[1].eq_ignore_ascii_case("streaming")
+        && tokens[2].eq_ignore_ascii_case("table")
+    {
+        let rest = tokens[3..].join(" ");
+        return format!("DROP TABLE {rest}");
+    }
+    sql.to_string()
+}
+
 pub fn parse_sql(query: &str) -> Result<Vec<Box<dyn CoordinatorStatement>>> {
     let trimmed = query.trim();
     if trimmed.is_empty() {
@@ -42,7 +60,8 @@ pub fn parse_sql(query: &str) -> Result<Vec<Box<dyn CoordinatorStatement>>> {
     }
 
     let dialect = FunctionStreamDialect {};
-    let statements = Parser::parse_sql(&dialect, trimmed)
+    let to_parse = rewrite_drop_streaming_table(trimmed);
+    let statements = Parser::parse_sql(&dialect, &to_parse)
         .map_err(|e| DataFusionError::Plan(format!("SQL parse error: {e}")))?;
 
     if statements.is_empty() {
@@ -74,6 +93,25 @@ fn classify_statement(stmt: DFStatement) -> Result<Box<dyn CoordinatorStatement>
         s @ DFStatement::CreateStreamingTable { .. } => {
             Ok(Box::new(StreamingTableStatement::new(s)))
         }
+        stmt @ DFStatement::Drop { .. } => {
+            {
+                let DFStatement::Drop {
+                    object_type,
+                    names,
+                    ..
+                } = &stmt
+                else {
+                    unreachable!()
+                };
+                if *object_type != ObjectType::Table {
+                    return plan_err!("Only DROP TABLE is supported in this SQL frontend");
+                }
+                if names.len() != 1 {
+                    return plan_err!("DROP TABLE supports exactly one table name per statement");
+                }
+            }
+            Ok(Box::new(DropTableStatement::new(stmt)))
+        }
         DFStatement::Insert { .. } => plan_err!(
             "INSERT is not supported; only CREATE TABLE and CREATE STREAMING TABLE (with AS SELECT) \
              are supported for defining table/query pipelines in this SQL frontend"
@@ -158,6 +196,34 @@ mod tests {
         assert!(is_type(stmt.as_ref(), "CreateTable"));
     }
 
+    #[test]
+    fn test_parse_create_table_connector_source_ddl() {
+        let sql = concat!(
+            "CREATE TABLE kafka_src (id BIGINT, ts TIMESTAMP NOT NULL, WATERMARK FOR ts) ",
+            "WITH ('connector' = 'kafka', 'format' = 'json', 'topic' = 'events')",
+        );
+        let stmt = first_stmt(sql);
+        assert!(is_type(stmt.as_ref(), "CreateTable"));
+    }
+
+    #[test]
+    fn test_parse_drop_table() {
+        let stmt = first_stmt("DROP TABLE foo");
+        assert!(is_type(stmt.as_ref(), "DropTableStatement"));
+    }
+
+    #[test]
+    fn test_parse_drop_table_if_exists() {
+        let stmt = first_stmt("DROP TABLE IF EXISTS foo");
+        assert!(is_type(stmt.as_ref(), "DropTableStatement"));
+    }
+
+    #[test]
+    fn test_parse_drop_streaming_table_rewritten() {
+        let stmt = first_stmt("DROP STREAMING TABLE my_sink");
+        assert!(is_type(stmt.as_ref(), "DropTableStatement"));
+    }
+
     /// `CREATE STREAMING TABLE` is the sink DDL supported by FunctionStream (not `CREATE STREAM TABLE`).
     #[test]
     fn test_parse_create_streaming_table() {
diff --git a/src/storage/mod.rs b/src/storage/mod.rs
index a4898619..823425d2 100644
--- a/src/storage/mod.rs
+++ b/src/storage/mod.rs
@@ -11,4 +11,5 @@
 // limitations under the License.
 
 pub mod state_backend;
+pub mod stream_catalog;
 pub mod task;
diff --git a/src/storage/stream_catalog/codec.rs b/src/storage/stream_catalog/codec.rs
new file mode 100644
index 00000000..dacaebf8
--- /dev/null
+++ b/src/storage/stream_catalog/codec.rs
@@ -0,0 +1,57 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Arrow Schema IPC and DataFusion logical plan serialization.
+
+use std::io::Cursor;
+use std::sync::Arc;
+
+use datafusion::arrow::datatypes::Schema;
+use datafusion::arrow::ipc::reader::StreamReader;
+use datafusion::arrow::ipc::writer::StreamWriter;
+use datafusion::arrow::record_batch::RecordBatch;
+use datafusion::common::{DataFusionError, Result};
+use datafusion::execution::context::SessionContext;
+use datafusion::logical_expr::LogicalPlan;
+
+pub struct CatalogCodec;
+
+impl CatalogCodec {
+    pub fn encode_schema(schema: &Arc<Schema>) -> Result<Vec<u8>> {
+        let mut buffer = Vec::new();
+        let empty_batch = RecordBatch::new_empty(Arc::clone(schema));
+        let mut writer = StreamWriter::try_new(&mut buffer, schema.as_ref())
+            .map_err(|e| DataFusionError::External(Box::new(e)))?;
+        writer
+            .write(&empty_batch)
+            .map_err(|e| DataFusionError::External(Box::new(e)))?;
+        writer
+            .finish()
+            .map_err(|e| DataFusionError::External(Box::new(e)))?;
+        Ok(buffer)
+    }
+
+    pub fn decode_schema(bytes: &[u8]) -> Result<Arc<Schema>> {
+        let cursor = Cursor::new(bytes);
+        let reader = StreamReader::try_new(cursor, None)
+            .map_err(|e| DataFusionError::External(Box::new(e)))?;
+        Ok(reader.schema())
+    }
+
+    pub fn encode_logical_plan(plan: &LogicalPlan) -> Result<Vec<u8>> {
+        datafusion_proto::bytes::logical_plan_to_bytes(plan).map(|b| b.to_vec())
+    }
+
+    pub fn decode_logical_plan(bytes: &[u8], ctx: &SessionContext) -> Result<LogicalPlan> {
+        datafusion_proto::bytes::logical_plan_from_bytes(bytes, ctx)
+    }
+}
diff --git a/src/storage/stream_catalog/manager.rs b/src/storage/stream_catalog/manager.rs
new file mode 100644
index 00000000..7e61b20e
--- /dev/null
+++ b/src/storage/stream_catalog/manager.rs
@@ -0,0 +1,333 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::collections::HashMap;
+use std::sync::{Arc, OnceLock};
+
+use anyhow::{anyhow, bail, Context};
+use datafusion::common::{internal_err, plan_err, Result as DFResult};
+use datafusion::execution::context::SessionContext;
+use parking_lot::RwLock;
+use prost::Message;
+use protocol::storage::{self as pb, table_definition};
+use tracing::warn;
+use unicase::UniCase;
+
+use crate::sql::schema::{ObjectName, StreamPlanningContext, StreamTable};
+
+use super::codec::CatalogCodec;
+use super::meta_store::MetaStore;
+
+const CATALOG_KEY_PREFIX: &str = "catalog:stream_table:";
+
+#[derive(Clone, Default, Debug)]
+pub struct StreamTableCatalogCache {
+    pub streams: HashMap<ObjectName, Arc<StreamTable>>,
+}
+
+pub struct CatalogManager {
+    store: Arc<dyn MetaStore>,
+    cache: RwLock<StreamTableCatalogCache>,
+    session_ctx: Arc<SessionContext>,
+}
+
+static GLOBAL_CATALOG: OnceLock<Arc<CatalogManager>> = OnceLock::new();
+
+impl CatalogManager {
+    pub fn new(store: Arc<dyn MetaStore>, session_ctx: Arc<SessionContext>) -> Self {
+        Self {
+            store,
+            cache: RwLock::new(StreamTableCatalogCache::default()),
+            session_ctx,
+        }
+    }
+
+    pub fn init_global_in_memory() -> anyhow::Result<()> {
+        Self::init_global(
+            Arc::new(super::InMemoryMetaStore::new()),
+            Arc::new(SessionContext::new()),
+        )
+    }
+
+    pub fn init_global(
+        store: Arc<dyn MetaStore>,
+        session_ctx: Arc<SessionContext>,
+    ) -> anyhow::Result<()> {
+        if GLOBAL_CATALOG.get().is_some() {
+            bail!("CatalogManager already initialized");
+        }
+
+        let mgr = Arc::new(CatalogManager::new(store, session_ctx));
+        GLOBAL_CATALOG
+            .set(mgr)
+            .map_err(|_| anyhow!("CatalogManager global install failed"))?;
+
+        Ok(())
+    }
+
+    pub fn try_global() -> Option<Arc<CatalogManager>> {
+        GLOBAL_CATALOG.get().cloned()
+    }
+
+    pub fn global() -> anyhow::Result<Arc<CatalogManager>> {
+        Self::try_global().ok_or_else(|| anyhow!("CatalogManager not initialized"))
+    }
+
+    #[inline]
+    fn build_store_key(table_name: &str) -> String {
+        format!("{CATALOG_KEY_PREFIX}{}", table_name.to_lowercase())
+    }
+
+    pub fn add_table(&self, table: StreamTable) -> DFResult<()> {
+        let proto_def = self.encode_table(&table)?;
+        let payload = proto_def.encode_to_vec();
+        let key = Self::build_store_key(table.name());
+
+        self.store.put(&key, payload)?;
+
+        let object_name = UniCase::new(table.name().to_string());
+        self.cache.write().streams.insert(object_name, Arc::new(table));
+
+        Ok(())
+    }
+
+    pub fn has_stream_table(&self, name: &str) -> bool {
+        let object_name = UniCase::new(name.to_string());
+        self.cache.read().streams.contains_key(&object_name)
+    }
+
+    pub fn drop_table(&self, table_name: &str, if_exists: bool) -> DFResult<()> {
+        let object_name = UniCase::new(table_name.to_string());
+
+        let exists = self.cache.read().streams.contains_key(&object_name);
+
+        if !exists {
+            if if_exists {
+                return Ok(());
+            }
+            return plan_err!("Table '{table_name}' not found");
+        }
+
+        let key = Self::build_store_key(table_name);
+        self.store.delete(&key)?;
+
+        self.cache.write().streams.remove(&object_name);
+
+        Ok(())
+    }
+
+    pub fn restore_from_store(&self) -> DFResult<()> {
+        let records = self.store.scan_prefix(CATALOG_KEY_PREFIX)?;
+        let mut restored = StreamTableCatalogCache::default();
+
+        for (_key, payload) in records {
+            let proto_def = pb::TableDefinition::decode(payload.as_slice()).map_err(|e| {
+                datafusion::common::DataFusionError::Execution(format!(
+                    "Failed to decode stream catalog protobuf: {e}"
+                ))
+            })?;
+
+            let table = self.decode_table(proto_def)?;
+            let object_name = UniCase::new(table.name().to_string());
+            restored.streams.insert(object_name, Arc::new(table));
+        }
+
+        *self.cache.write() = restored;
+
+        Ok(())
+    }
+
+    pub fn acquire_planning_context(&self) -> StreamPlanningContext {
+        let mut ctx = StreamPlanningContext::new();
+        ctx.tables.streams = self.cache.read().streams.clone();
+        ctx
+    }
+
+    fn encode_table(&self, table: &StreamTable) -> DFResult<pb::TableDefinition> {
+        let table_type = match table {
+            StreamTable::Source {
+                schema,
+                event_time_field,
+                watermark_field,
+                ..
+            } => table_definition::TableType::Source(pb::StreamSource {
+                arrow_schema_ipc: CatalogCodec::encode_schema(schema)?,
+                event_time_field: event_time_field.clone(),
+                watermark_field: watermark_field.clone(),
+            }),
+            StreamTable::Sink { schema, .. } => table_definition::TableType::Sink(pb::StreamSink {
+                arrow_schema_ipc: CatalogCodec::encode_schema(schema)?,
+            }),
+            StreamTable::Memory { logical_plan, .. } => {
+                let logical_plan_bytes = logical_plan
+                    .as_ref()
+                    .map(|plan| CatalogCodec::encode_logical_plan(plan))
+                    .transpose()?;
+
+                table_definition::TableType::Memory(pb::StreamMemory { logical_plan_bytes })
+            }
+        };
+
+        Ok(pb::TableDefinition {
+            table_name: table.name().to_string(),
+            updated_at_millis: chrono::Utc::now().timestamp_millis(),
+            table_type: Some(table_type),
+        })
+    }
+
+    fn decode_table(&self, proto_def: pb::TableDefinition) -> DFResult<StreamTable> {
+        let Some(table_type) = proto_def.table_type else {
+            return internal_err!(
+                "Corrupted catalog row: missing table_type for {}",
+                proto_def.table_name
+            );
+        };
+
+        match table_type {
+            table_definition::TableType::Source(src) => Ok(StreamTable::Source {
+                name: proto_def.table_name,
+                schema: CatalogCodec::decode_schema(&src.arrow_schema_ipc)?,
+                event_time_field: src.event_time_field,
+                watermark_field: src.watermark_field,
+            }),
+            table_definition::TableType::Sink(sink) => Ok(StreamTable::Sink {
+                name: proto_def.table_name,
+                schema: CatalogCodec::decode_schema(&sink.arrow_schema_ipc)?,
+            }),
+            table_definition::TableType::Memory(mem) => {
+                let logical_plan = mem
+                    .logical_plan_bytes
+                    .map(|bytes| CatalogCodec::decode_logical_plan(&bytes, &self.session_ctx))
+                    .transpose()?;
+
+                Ok(StreamTable::Memory {
+                    name: proto_def.table_name,
+                    logical_plan,
+                })
+            }
+        }
+    }
+}
+
+pub fn restore_global_catalog_from_store() {
+    let Some(mgr) = CatalogManager::try_global() else {
+        return;
+    };
+    if let Err(e) = mgr.restore_from_store() {
+        warn!("Stream catalog restore_from_store skipped or failed: {e:#}");
+    }
+}
+
+pub fn initialize_stream_catalog(_config: &crate::config::GlobalConfig) -> anyhow::Result<()> {
+    CatalogManager::init_global_in_memory().context("Stream catalog (CatalogManager) init failed")
+}
+
+pub fn planning_schema_provider() -> StreamPlanningContext {
+    CatalogManager::try_global()
+        .map(|m| m.acquire_planning_context())
+        .unwrap_or_else(StreamPlanningContext::new)
+}
+
+#[cfg(test)]
+mod tests {
+    use std::sync::Arc;
+
+    use datafusion::arrow::datatypes::{DataType, Field, Schema};
+    use datafusion::execution::context::SessionContext;
+
+    use crate::sql::schema::StreamTable;
+    use crate::storage::stream_catalog::{InMemoryMetaStore, MetaStore};
+
+    use super::CatalogManager;
+
+    fn create_test_manager() -> CatalogManager {
+        CatalogManager::new(
+            Arc::new(InMemoryMetaStore::new()),
+            Arc::new(SessionContext::new()),
+        )
+    }
+
+    #[test]
+    fn add_table_roundtrip_snapshot() {
+        let mgr = create_test_manager();
+        let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)]));
+
+        let table = StreamTable::Source {
+            name: "t1".into(),
+            schema: Arc::clone(&schema),
+            event_time_field: Some("ts".into()),
+            watermark_field: None,
+        };
+
+        mgr.add_table(table).unwrap();
+
+        let ctx = mgr.acquire_planning_context();
+        let got = ctx.get_stream_table("t1").expect("table present");
+
+        assert_eq!(got.name(), "t1");
+
+        if let StreamTable::Source {
+            event_time_field,
+            watermark_field,
+            ..
+        } = got.as_ref()
+        {
+            assert_eq!(event_time_field.as_deref(), Some("ts"));
+            assert!(watermark_field.is_none());
+        } else {
+            panic!("expected Source");
+        }
+    }
+
+    #[test]
+    fn drop_table_if_exists() {
+        let mgr = create_test_manager();
+        let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)]));
+
+        mgr.add_table(StreamTable::Source {
+            name: "t_drop".into(),
+            schema,
+            event_time_field: None,
+            watermark_field: None,
+        })
+        .unwrap();
+
+        mgr.drop_table("t_drop", false).unwrap();
+        assert!(!mgr.has_stream_table("t_drop"));
+
+        mgr.drop_table("t_drop", true).unwrap();
+        assert!(mgr.drop_table("nope", false).is_err());
+        mgr.drop_table("nope", true).unwrap();
+    }
+
+    #[test]
+    fn restore_from_store_rebuilds_cache() {
+        let store: Arc<dyn MetaStore> = Arc::new(InMemoryMetaStore::new());
+        let session = Arc::new(SessionContext::new());
+
+        let mgr_a = CatalogManager::new(Arc::clone(&store), Arc::clone(&session));
+        let schema = Arc::new(Schema::new(vec![Field::new("x", DataType::Utf8, true)]));
+
+        mgr_a
+            .add_table(StreamTable::Sink {
+                name: "sink1".into(),
+                schema,
+            })
+            .unwrap();
+
+        let mgr_b = CatalogManager::new(store, session);
+        mgr_b.restore_from_store().unwrap();
+
+        let ctx = mgr_b.acquire_planning_context();
+        assert!(ctx.get_stream_table("sink1").is_some());
+    }
+}
diff --git a/src/storage/stream_catalog/meta_store.rs b/src/storage/stream_catalog/meta_store.rs
new file mode 100644
index 00000000..6f61b3f7
--- /dev/null
+++ b/src/storage/stream_catalog/meta_store.rs
@@ -0,0 +1,70 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Pluggable metadata KV backend (memory, etcd, Redis, …).
+
+use std::collections::HashMap;
+
+use datafusion::common::Result;
+use parking_lot::RwLock;
+
+/// Synchronous metadata store for catalog records.
+pub trait MetaStore: Send + Sync {
+    fn put(&self, key: &str, value: Vec<u8>) -> Result<()>;
+    fn get(&self, key: &str) -> Result<Option<Vec<u8>>>;
+    fn delete(&self, key: &str) -> Result<()>;
+    fn scan_prefix(&self, prefix: &str) -> Result<Vec<(String, Vec<u8>)>>;
+}
+
+/// In-process KV store for single-node deployments and tests.
+pub struct InMemoryMetaStore {
+    db: RwLock<HashMap<String, Vec<u8>>>,
+}
+
+impl InMemoryMetaStore {
+    pub fn new() -> Self {
+        Self {
+            db: RwLock::new(HashMap::new()),
+        }
+    }
+}
+
+impl Default for InMemoryMetaStore {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl MetaStore for InMemoryMetaStore {
+    fn put(&self, key: &str, value: Vec<u8>) -> Result<()> {
+        self.db.write().insert(key.to_string(), value);
+        Ok(())
+    }
+
+    fn get(&self, key: &str) -> Result<Option<Vec<u8>>> {
+        Ok(self.db.read().get(key).cloned())
+    }
+
+    fn delete(&self, key: &str) -> Result<()> {
+        self.db.write().remove(key);
+        Ok(())
+    }
+
+    fn scan_prefix(&self, prefix: &str) -> Result<Vec<(String, Vec<u8>)>> {
+        let db = self.db.read();
+        Ok(db
+            .iter()
+            .filter(|(k, _)| k.starts_with(prefix))
+            .map(|(k, v)| (k.clone(), v.clone()))
+            .collect())
+    }
+}
diff --git a/src/storage/stream_catalog/mod.rs b/src/storage/stream_catalog/mod.rs
new file mode 100644
index 00000000..f4f84469
--- /dev/null
+++ b/src/storage/stream_catalog/mod.rs
@@ -0,0 +1,23 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Stream table catalog: protobuf persistence, MVCC-style planning snapshots for the coordinator.
+
+mod codec;
+mod manager;
+mod meta_store;
+
+pub use manager::{
+    CatalogManager, initialize_stream_catalog, planning_schema_provider,
+    restore_global_catalog_from_store,
+};
+pub use meta_store::{InMemoryMetaStore, MetaStore};
diff --git a/src/storage/task/mod.rs b/src/storage/task/mod.rs
index b4b3680f..3123415a 100644
--- a/src/storage/task/mod.rs
+++ b/src/storage/task/mod.rs
@@ -16,6 +16,7 @@
 
 pub mod factory;
 mod function_info;
+mod proto_codec;
 mod rocksdb_storage;
 pub mod storage;
 
diff --git a/src/storage/task/proto_codec.rs b/src/storage/task/proto_codec.rs
new file mode 100644
index 00000000..1e0bedb3
--- /dev/null
+++ b/src/storage/task/proto_codec.rs
@@ -0,0 +1,271 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Protobuf wire format for RocksDB task rows, with legacy bincode read support.
+
+use anyhow::{Context, Result, anyhow};
+use prost::Message;
+use protocol::storage::{
+    ComponentStateKind, ComponentStateProto, TaskMetadataProto, TaskModulePayloadProto,
+    TaskModulePython, TaskModuleWasm, task_module_payload_proto,
+};
+use serde::{Deserialize, Serialize};
+
+use crate::runtime::common::ComponentState;
+
+use super::storage::TaskModuleBytes;
+
+/// Magic prefix for protobuf-encoded task values (meta + payload). Legacy rows have no prefix.
+pub const TASK_STORAGE_PROTO_MAGIC: &[u8; 4] = b"FSP1";
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+struct LegacyTaskMetadata {
+    task_type: String,
+    state: ComponentState,
+    created_at: u64,
+    checkpoint_id: Option<u64>,
+}
+
+fn component_state_to_proto(state: &ComponentState) -> ComponentStateProto {
+    let (kind, error_message) = match state {
+        ComponentState::Uninitialized => (ComponentStateKind::Uninitialized, String::new()),
+        ComponentState::Initialized => (ComponentStateKind::Initialized, String::new()),
+        ComponentState::Starting => (ComponentStateKind::Starting, String::new()),
+        ComponentState::Running => (ComponentStateKind::Running, String::new()),
+        ComponentState::Checkpointing => (ComponentStateKind::Checkpointing, String::new()),
+        ComponentState::Stopping => (ComponentStateKind::Stopping, String::new()),
+        ComponentState::Stopped => (ComponentStateKind::Stopped, String::new()),
+        ComponentState::Closing => (ComponentStateKind::Closing, String::new()),
+        ComponentState::Closed => (ComponentStateKind::Closed, String::new()),
+        ComponentState::Error { error } => (ComponentStateKind::Error, error.clone()),
+    };
+    ComponentStateProto {
+        kind: kind as i32,
+        error_message,
+    }
+}
+
+fn component_state_from_proto(p: &ComponentStateProto) -> ComponentState {
+    let kind = ComponentStateKind::try_from(p.kind).unwrap_or(ComponentStateKind::Unspecified);
+    match kind {
+        ComponentStateKind::Unspecified | ComponentStateKind::Uninitialized => {
+            ComponentState::Uninitialized
+        }
+        ComponentStateKind::Initialized => ComponentState::Initialized,
+        ComponentStateKind::Starting => ComponentState::Starting,
+        ComponentStateKind::Running => ComponentState::Running,
+        ComponentStateKind::Checkpointing => ComponentState::Checkpointing,
+        ComponentStateKind::Stopping => ComponentState::Stopping,
+        ComponentStateKind::Stopped => ComponentState::Stopped,
+        ComponentStateKind::Closing => ComponentState::Closing,
+        ComponentStateKind::Closed => ComponentState::Closed,
+        ComponentStateKind::Error => ComponentState::Error {
+            error: if p.error_message.is_empty() {
+                "unknown error".to_string()
+            } else {
+                p.error_message.clone()
+            },
+        },
+    }
+}
+
+/// Encode task metadata for `task_meta` column family (always protobuf + magic).
+pub fn encode_task_metadata_bytes(
+    task_type: &str,
+    state: &ComponentState,
+    created_at: u64,
+    checkpoint_id: Option<u64>,
+) -> Result<Vec<u8>> {
+    let proto = TaskMetadataProto {
+        task_type: task_type.to_string(),
+        state: Some(component_state_to_proto(state)),
+        created_at,
+        checkpoint_id,
+    };
+    let mut out = TASK_STORAGE_PROTO_MAGIC.to_vec();
+    proto
+        .encode(&mut out)
+        .context("encode TaskMetadataProto")?;
+    Ok(out)
+}
+
+pub struct DecodedTaskMetadata {
+    pub task_type: String,
+    pub state: ComponentState,
+    pub created_at: u64,
+    pub checkpoint_id: Option<u64>,
+}
+
+/// Decode metadata written by this version (protobuf) or legacy bincode+serde.
+pub fn decode_task_metadata_bytes(raw: &[u8]) -> Result<DecodedTaskMetadata> {
+    if raw.len() >= TASK_STORAGE_PROTO_MAGIC.len()
+        && &raw[..TASK_STORAGE_PROTO_MAGIC.len()] == TASK_STORAGE_PROTO_MAGIC.as_slice()
+    {
+        let proto = TaskMetadataProto::decode(&raw[TASK_STORAGE_PROTO_MAGIC.len()..])
+            .context("decode TaskMetadataProto")?;
+        let state = proto
+            .state
+            .as_ref()
+            .map(component_state_from_proto)
+            .unwrap_or_default();
+        return Ok(DecodedTaskMetadata {
+            task_type: proto.task_type,
+            state,
+            created_at: proto.created_at,
+            checkpoint_id: proto.checkpoint_id,
+        });
+    }
+
+    let (legacy, _): (LegacyTaskMetadata, _) = bincode::serde::decode_from_slice(
+        raw,
+        bincode::config::standard(),
+    )
+    .map_err(|e| anyhow!("legacy task metadata bincode decode failed: {e}"))?;
+    Ok(DecodedTaskMetadata {
+        task_type: legacy.task_type,
+        state: legacy.state,
+        created_at: legacy.created_at,
+        checkpoint_id: legacy.checkpoint_id,
+    })
+}
+
+fn module_to_proto(module: &TaskModuleBytes) -> TaskModulePayloadProto {
+    match module {
+        TaskModuleBytes::Wasm(bytes) => TaskModulePayloadProto {
+            payload: Some(task_module_payload_proto::Payload::Wasm(TaskModuleWasm {
+                wasm_binary: bytes.clone(),
+            })),
+        },
+        TaskModuleBytes::Python {
+            class_name,
+            module,
+            bytes,
+        } => TaskModulePayloadProto {
+            payload: Some(task_module_payload_proto::Payload::Python(TaskModulePython {
+                class_name: class_name.clone(),
+                module_path: module.clone(),
+                embedded_code: bytes.clone(),
+            })),
+        },
+    }
+}
+
+/// Encode module payload for `task_payload` column family (always protobuf + magic).
+pub fn encode_task_module_bytes(module: &TaskModuleBytes) -> Result<Vec<u8>> {
+    let proto = module_to_proto(module);
+    let mut out = TASK_STORAGE_PROTO_MAGIC.to_vec();
+    proto
+        .encode(&mut out)
+        .context("encode TaskModulePayloadProto")?;
+    Ok(out)
+}
+
+/// Decode module payload: protobuf+magic or legacy bincode+serde [`TaskModuleBytes`].
+pub fn decode_task_module_bytes(raw: &[u8]) -> Result<TaskModuleBytes> {
+    if raw.len() >= TASK_STORAGE_PROTO_MAGIC.len()
+        && &raw[..TASK_STORAGE_PROTO_MAGIC.len()] == TASK_STORAGE_PROTO_MAGIC.as_slice()
+    {
+        let proto = TaskModulePayloadProto::decode(&raw[TASK_STORAGE_PROTO_MAGIC.len()..])
+            .context("decode TaskModulePayloadProto")?;
+        return proto.try_into_task_module();
+    }
+
+    let (legacy, _): (TaskModuleBytes, _) = bincode::serde::decode_from_slice(
+        raw,
+        bincode::config::standard(),
+    )
+    .map_err(|e| anyhow!("legacy task module bincode decode failed: {e}"))?;
+    Ok(legacy)
+}
+
+trait TryIntoTaskModule {
+    fn try_into_task_module(self) -> Result<TaskModuleBytes>;
+}
+
+impl TryIntoTaskModule for TaskModulePayloadProto {
+    fn try_into_task_module(self) -> Result<TaskModuleBytes> {
+        match self.payload {
+            Some(task_module_payload_proto::Payload::Wasm(w)) => {
+                Ok(TaskModuleBytes::Wasm(w.wasm_binary))
+            }
+            Some(task_module_payload_proto::Payload::Python(p)) => Ok(TaskModuleBytes::Python {
+                class_name: p.class_name,
+                module: p.module_path,
+                bytes: p.embedded_code,
+            }),
+            None => Err(anyhow!("TaskModulePayloadProto missing payload")),
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn metadata_roundtrip_proto() {
+        let enc = encode_task_metadata_bytes(
+            "wasm",
+            &ComponentState::Running,
+            42,
+            Some(7),
+        )
+        .unwrap();
+        let dec = decode_task_metadata_bytes(&enc).unwrap();
+        assert_eq!(dec.task_type, "wasm");
+        assert_eq!(dec.state, ComponentState::Running);
+        assert_eq!(dec.created_at, 42);
+        assert_eq!(dec.checkpoint_id, Some(7));
+    }
+
+    #[test]
+    fn module_roundtrip_wasm_proto() {
+        let m = TaskModuleBytes::Wasm(vec![1, 2, 3]);
+        let enc = encode_task_module_bytes(&m).unwrap();
+        let dec = decode_task_module_bytes(&enc).unwrap();
+        assert_eq!(dec, m);
+    }
+
+    #[test]
+    fn module_roundtrip_python_proto() {
+        let m = TaskModuleBytes::Python {
+            class_name: "C".into(),
+            module: "m".into(),
+            bytes: Some(vec![9]),
+        };
+        let enc = encode_task_module_bytes(&m).unwrap();
+        let dec = decode_task_module_bytes(&enc).unwrap();
+        assert_eq!(dec, m);
+    }
+
+    #[test]
+    fn legacy_bincode_metadata_still_decodes() {
+        let legacy = LegacyTaskMetadata {
+            task_type: "legacy".into(),
+            state: ComponentState::Stopped,
+            created_at: 99,
+            checkpoint_id: None,
+        };
+        let raw = bincode::serde::encode_to_vec(&legacy, bincode::config::standard()).unwrap();
+        let dec = decode_task_metadata_bytes(&raw).unwrap();
+        assert_eq!(dec.task_type, "legacy");
+        assert_eq!(dec.state, ComponentState::Stopped);
+        assert_eq!(dec.created_at, 99);
+    }
+
+    #[test]
+    fn legacy_bincode_module_still_decodes() {
+        let m = TaskModuleBytes::Wasm(vec![8, 9]);
+        let raw = bincode::serde::encode_to_vec(&m, bincode::config::standard()).unwrap();
+        assert_eq!(decode_task_module_bytes(&raw).unwrap(), m);
+    }
+}
diff --git a/src/storage/task/rocksdb_storage.rs b/src/storage/task/rocksdb_storage.rs
index 714a9143..cea0ceb9 100644
--- a/src/storage/task/rocksdb_storage.rs
+++ b/src/storage/task/rocksdb_storage.rs
@@ -14,12 +14,15 @@
 //!
 //! Uses three column families: task_meta, task_config, task_payload.
 
-use super::storage::{StoredTaskInfo, TaskModuleBytes, TaskStorage};
+use super::proto_codec::{
+    decode_task_metadata_bytes, decode_task_module_bytes, encode_task_metadata_bytes,
+    encode_task_module_bytes,
+};
+use super::storage::{StoredTaskInfo, TaskStorage};
 use crate::config::storage::RocksDBStorageConfig;
 use crate::runtime::common::ComponentState;
 use anyhow::{Context, Result, anyhow};
 use rocksdb::{ColumnFamilyDescriptor, DB, IteratorMode, Options, WriteBatch};
-use serde::{Deserialize, Serialize};
 use std::path::Path;
 use std::sync::Arc;
 
@@ -27,14 +30,6 @@ const CF_METADATA: &str = "task_meta";
 const CF_CONFIG: &str = "task_config";
 const CF_PAYLOAD: &str = "task_payload";
 
-#[derive(Debug, Clone, Serialize, Deserialize)]
-struct TaskMetadata {
-    task_type: String,
-    state: ComponentState,
-    created_at: u64,
-    checkpoint_id: Option<u64>,
-}
-
 pub struct RocksDBTaskStorage {
     db: Arc<DB>,
 }
@@ -95,27 +90,19 @@ impl TaskStorage for RocksDBTaskStorage {
             return Err(anyhow!("Task uniqueness violation: {}", task_info.name));
         }
 
-        let meta = TaskMetadata {
-            task_type: task_info.task_type.clone(),
-            state: task_info.state.clone(),
-            created_at: task_info.created_at,
-            checkpoint_id: task_info.checkpoint_id,
-        };
+        let meta_bytes = encode_task_metadata_bytes(
+            &task_info.task_type,
+            &task_info.state,
+            task_info.created_at,
+            task_info.checkpoint_id,
+        )?;
 
         let mut batch = WriteBatch::default();
-        batch.put_cf(
-            &cf_meta,
-            key,
-            bincode::serde::encode_to_vec(&meta, bincode::config::standard())?,
-        );
+        batch.put_cf(&cf_meta, key, meta_bytes);
         batch.put_cf(&cf_conf, key, &task_info.config_bytes);
 
         if let Some(ref module) = task_info.module_bytes {
-            batch.put_cf(
-                &cf_payl,
-                key,
-                bincode::serde::encode_to_vec(module, bincode::config::standard())?,
-            );
+            batch.put_cf(&cf_payl, key, encode_task_module_bytes(module)?);
         }
 
         self.db
@@ -132,14 +119,18 @@ impl TaskStorage for RocksDBTaskStorage {
             .get_cf(&cf, key)?
             .ok_or_else(|| anyhow!("Task {} not found", task_name))?;
 
-        let (mut meta, _): (TaskMetadata, _) =
-            bincode::serde::decode_from_slice(&raw, bincode::config::standard())?;
-        meta.state = new_state;
+        let mut decoded = decode_task_metadata_bytes(&raw)?;
+        decoded.state = new_state;
 
         self.db.put_cf(
             &cf,
             key,
-            bincode::serde::encode_to_vec(&meta, bincode::config::standard())?,
+            encode_task_metadata_bytes(
+                &decoded.task_type,
+                &decoded.state,
+                decoded.created_at,
+                decoded.checkpoint_id,
+            )?,
         )?;
         Ok(())
     }
@@ -153,14 +144,18 @@ impl TaskStorage for RocksDBTaskStorage {
             .get_cf(&cf, key)?
             .ok_or_else(|| anyhow!("Task {} not found", task_name))?;
 
-        let (mut meta, _): (TaskMetadata, _) =
-            bincode::serde::decode_from_slice(&raw, bincode::config::standard())?;
-        meta.checkpoint_id = checkpoint_id;
+        let mut decoded = decode_task_metadata_bytes(&raw)?;
+        decoded.checkpoint_id = checkpoint_id;
 
         self.db.put_cf(
             &cf,
             key,
-            bincode::serde::encode_to_vec(&meta, bincode::config::standard())?,
+            encode_task_metadata_bytes(
+                &decoded.task_type,
+                &decoded.state,
+                decoded.created_at,
+                decoded.checkpoint_id,
+            )?,
         )?;
         Ok(())
     }
@@ -189,20 +184,12 @@ impl TaskStorage for RocksDBTaskStorage {
             .get_cf(&self.get_cf(CF_CONFIG)?, key)?
             .ok_or_else(|| anyhow!("Config missing: {}", task_name))?;
 
-        let module_bytes = self
-            .db
-            .get_cf(&self.get_cf(CF_PAYLOAD)?, key)?
-            .and_then(|b| {
-                bincode::serde::decode_from_slice::<TaskModuleBytes, _>(
-                    &b,
-                    bincode::config::standard(),
-                )
-                .ok()
-                .map(|(v, _)| v)
-            });
-
-        let (meta, _): (TaskMetadata, _) =
-            bincode::serde::decode_from_slice(&meta_raw, bincode::config::standard())?;
+        let module_bytes = match self.db.get_cf(&self.get_cf(CF_PAYLOAD)?, key)? {
+            None => None,
+            Some(b) => Some(decode_task_module_bytes(&b)?),
+        };
+
+        let meta = decode_task_metadata_bytes(&meta_raw)?;
 
         Ok(StoredTaskInfo {
             name: task_name.to_string(),
diff --git a/src/storage/task/storage.rs b/src/storage/task/storage.rs
index 3c9e4080..156ee5d8 100644
--- a/src/storage/task/storage.rs
+++ b/src/storage/task/storage.rs
@@ -15,7 +15,7 @@ use anyhow::Result;
 use serde::{Deserialize, Serialize};
 
 #[allow(dead_code)]
-#[derive(Debug, Clone, Serialize, Deserialize)]
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
 pub enum TaskModuleBytes {
     Wasm(Vec<u8>),
     Python {

From cdb6ddb157682433a544041785bbffc231a82987 Mon Sep 17 00:00:00 2001
From: luoluoyuyu <zhenyu@apache.org>
Date: Sun, 22 Mar 2026 15:25:24 +0800
Subject: [PATCH 13/44] update

---
 .../logical_planner/optimizers/chaining.rs    | 140 +++++++++++-------
 1 file changed, 84 insertions(+), 56 deletions(-)

diff --git a/src/sql/logical_planner/optimizers/chaining.rs b/src/sql/logical_planner/optimizers/chaining.rs
index 5935c985..11c072d3 100644
--- a/src/sql/logical_planner/optimizers/chaining.rs
+++ b/src/sql/logical_planner/optimizers/chaining.rs
@@ -10,93 +10,121 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-use std::mem;
-
-use petgraph::prelude::*;
-use petgraph::visit::NodeRef;
+use petgraph::graph::{EdgeIndex, NodeIndex};
+use petgraph::visit::EdgeRef;
+use petgraph::Direction::{Incoming, Outgoing};
 
 use crate::sql::logical_node::logical::{LogicalEdgeType, LogicalGraph, Optimizer};
 
-pub struct ChainingOptimizer {}
-
-fn remove_in_place<N, E>(graph: &mut DiGraph<N, E>, node: NodeIndex) {
-    let incoming = graph.edges_directed(node, Incoming).next().unwrap();
-
-    let parent = incoming.source().id();
-    let incoming = incoming.id();
-    graph.remove_edge(incoming);
+pub type NodeId = NodeIndex;
+pub type EdgeId = EdgeIndex;
 
-    let outgoing: Vec<_> = graph
-        .edges_directed(node, Outgoing)
-        .map(|e| (e.id(), e.target().id()))
-        .collect();
+pub struct ChainingOptimizer;
 
-    for (edge, target) in outgoing {
-        let weight = graph.remove_edge(edge).unwrap();
-        graph.add_edge(parent, target, weight);
-    }
+impl ChainingOptimizer {
+    fn find_fusion_candidate(plan: &LogicalGraph) -> Option<(NodeId, NodeId, EdgeId)> {
+        let node_ids: Vec<NodeId> = plan.node_indices().collect();
 
-    graph.remove_node(node);
-}
+        for upstream_id in node_ids {
+            let upstream_node = plan.node_weight(upstream_id)?;
 
-impl Optimizer for ChainingOptimizer {
-    fn optimize_once(&self, plan: &mut LogicalGraph) -> bool {
-        let node_indices: Vec<NodeIndex> = plan.node_indices().collect();
-
-        for &node_idx in &node_indices {
-            let cur = plan.node_weight(node_idx).unwrap();
-
-            if cur.operator_chain.is_source() {
+            if upstream_node.operator_chain.is_source() {
                 continue;
             }
 
-            let mut successors = plan.edges_directed(node_idx, Outgoing).collect::<Vec<_>>();
+            let outgoing_edges: Vec<_> = plan.edges_directed(upstream_id, Outgoing).collect();
 
-            if successors.len() != 1 {
+            if outgoing_edges.len() != 1 {
                 continue;
             }
 
-            let edge = successors.remove(0);
-            let edge_type = edge.weight().edge_type;
+            let bridging_edge = &outgoing_edges[0];
 
-            if edge_type != LogicalEdgeType::Forward {
+            if bridging_edge.weight().edge_type != LogicalEdgeType::Forward {
                 continue;
             }
 
-            let successor_idx = edge.target();
+            let downstream_id = bridging_edge.target();
+            let downstream_node = plan.node_weight(downstream_id)?;
 
-            let successor_node = plan.node_weight(successor_idx).unwrap();
+            if downstream_node.operator_chain.is_sink() {
+                continue;
+            }
 
-            if cur.parallelism != successor_node.parallelism
-                || successor_node.operator_chain.is_sink()
-            {
+            if upstream_node.parallelism != downstream_node.parallelism {
                 continue;
             }
 
-            if plan.edges_directed(successor_idx, Incoming).count() > 1 {
+            let incoming_edges: Vec<_> = plan.edges_directed(downstream_id, Incoming).collect();
+            if incoming_edges.len() != 1 {
                 continue;
             }
 
-            let mut new_cur = cur.clone();
+            return Some((upstream_id, downstream_id, bridging_edge.id()));
+        }
+
+        None
+    }
 
-            new_cur.description = format!("{} -> {}", cur.description, successor_node.description);
+    fn apply_fusion(
+        plan: &mut LogicalGraph,
+        upstream_id: NodeId,
+        downstream_id: NodeId,
+        bridging_edge_id: EdgeId,
+    ) {
+        let bridging_edge = plan
+            .remove_edge(bridging_edge_id)
+            .expect("Graph Integrity Violation: Bridging edge missing");
+
+        let propagated_schema = bridging_edge.schema.clone();
+
+        let downstream_outgoing: Vec<_> = plan
+            .edges_directed(downstream_id, Outgoing)
+            .map(|e| (e.id(), e.target()))
+            .collect();
+
+        for (edge_id, target_id) in downstream_outgoing {
+            let edge_weight = plan
+                .remove_edge(edge_id)
+                .expect("Graph Integrity Violation: Outgoing edge missing");
+
+            plan.add_edge(upstream_id, target_id, edge_weight);
+        }
 
-            new_cur
-                .operator_chain
-                .operators
-                .extend(successor_node.operator_chain.operators.clone());
+        let downstream_node = plan
+            .remove_node(downstream_id)
+            .expect("Graph Integrity Violation: Downstream node missing");
 
-            new_cur
-                .operator_chain
-                .edges
-                .push(edge.weight().schema.clone());
+        let upstream_node = plan
+            .node_weight_mut(upstream_id)
+            .expect("Graph Integrity Violation: Upstream node missing");
 
-            mem::swap(&mut new_cur, plan.node_weight_mut(node_idx).unwrap());
+        upstream_node.description = format!(
+            "{} -> {}",
+            upstream_node.description, downstream_node.description
+        );
 
-            remove_in_place(plan, successor_idx);
-            return true;
-        }
+        upstream_node
+            .operator_chain
+            .operators
+            .extend(downstream_node.operator_chain.operators);
 
-        false
+        upstream_node
+            .operator_chain
+            .edges
+            .push(propagated_schema);
+    }
+}
+
+impl Optimizer for ChainingOptimizer {
+    fn optimize_once(&self, plan: &mut LogicalGraph) -> bool {
+        if let Some((upstream_id, downstream_id, bridging_edge_id)) =
+            Self::find_fusion_candidate(plan)
+        {
+            Self::apply_fusion(plan, upstream_id, downstream_id, bridging_edge_id);
+            true
+        } else {
+            false
+        }
     }
 }

From 27bd75c2a04f6787a735f7b6c2d926412904f2d6 Mon Sep 17 00:00:00 2001
From: luoluoyuyu <zhenyu@apache.org>
Date: Sun, 22 Mar 2026 16:49:12 +0800
Subject: [PATCH 14/44] update

---
 Cargo.lock                                    |   1 +
 Cargo.toml                                    |   1 +
 protocol/proto/storage.proto                  |   8 +-
 src/coordinator/execution/executor.rs         |   2 +-
 src/coordinator/plan/logical_plan_visitor.rs  |  78 +++----
 src/coordinator/plan/streaming_table_plan.rs  |   4 +-
 src/sql/common/fs_schema.rs                   |   3 +-
 .../logical/fs_program_convert.rs             | 201 ++++++++++++++++++
 src/sql/logical_node/logical/logical_edge.rs  |  59 ++++-
 src/sql/logical_node/logical/logical_node.rs  |  42 ++--
 .../logical_node/logical/logical_program.rs   | 133 +++++++-----
 src/sql/logical_node/logical/mod.rs           |   1 +
 .../logical_node/logical/operator_chain.rs    |  72 +++++--
 src/sql/logical_node/logical/operator_name.rs |  22 ++
 .../logical_node/logical/program_config.rs    |  25 ++-
 src/sql/logical_planner/compiled_sql.rs       |  21 --
 src/sql/logical_planner/mod.rs                |   3 -
 src/sql/mod.rs                                |   1 -
 src/sql/schema/schema_provider.rs             |  24 +--
 src/storage/stream_catalog/codec.rs           |  14 +-
 src/storage/stream_catalog/manager.rs         |  74 +++----
 21 files changed, 557 insertions(+), 232 deletions(-)
 create mode 100644 src/sql/logical_node/logical/fs_program_convert.rs
 delete mode 100644 src/sql/logical_planner/compiled_sql.rs

diff --git a/Cargo.lock b/Cargo.lock
index f39d5d3e..7cd510f3 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2375,6 +2375,7 @@ dependencies = [
  "proctitle",
  "prost",
  "protocol",
+ "rand 0.8.5",
  "rdkafka",
  "rocksdb",
  "serde",
diff --git a/Cargo.toml b/Cargo.toml
index 8b38dfe4..cee98282 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -60,6 +60,7 @@ xxhash-rust = { version = "0.8", features = ["xxh3"] }
 proctitle = "0.1"
 unicase = "2.7"
 petgraph = "0.7"
+rand = { version = "0.8", features = ["small_rng"] }
 itertools = "0.14"
 strum = { version = "0.26", features = ["derive"] }
 datafusion-functions-aggregate = {git = 'https://github.com/ArroyoSystems/arrow-datafusion', branch = '48.0.1/arroyo'}
diff --git a/protocol/proto/storage.proto b/protocol/proto/storage.proto
index b11037a2..cace3107 100644
--- a/protocol/proto/storage.proto
+++ b/protocol/proto/storage.proto
@@ -10,7 +10,7 @@ syntax = "proto3";
 package function_stream.storage;
 
 // =============================================================================
-// Stream catalog (coordinator stream tables: source / sink / memory)
+// Stream catalog (coordinator stream tables: source / sink)
 // =============================================================================
 
 // Top-level persisted record for one stream table.
@@ -20,7 +20,6 @@ message TableDefinition {
   oneof table_type {
     StreamSource source = 3;
     StreamSink sink = 4;
-    StreamMemory memory = 5;
   }
 }
 
@@ -32,10 +31,7 @@ message StreamSource {
 
 message StreamSink {
   bytes arrow_schema_ipc = 1;
-}
-
-message StreamMemory {
-  optional bytes logical_plan_bytes = 1;
+  bytes logical_program_bincode = 2;
 }
 
 // =============================================================================
diff --git a/src/coordinator/execution/executor.rs b/src/coordinator/execution/executor.rs
index b8fbb3a5..3639ee7a 100644
--- a/src/coordinator/execution/executor.rs
+++ b/src/coordinator/execution/executor.rs
@@ -266,7 +266,7 @@ impl PlanVisitor for Executor {
         let execute = || -> Result<ExecuteResult, ExecuteError> {
             let sink = StreamTable::Sink {
                 name: plan.name.clone(),
-                schema: Arc::new(plan.logical_plan.schema().as_arrow().clone()),
+                program: plan.program.clone(),
             };
 
             self.catalog_manager
diff --git a/src/coordinator/plan/logical_plan_visitor.rs b/src/coordinator/plan/logical_plan_visitor.rs
index aa8364ef..14ed01b8 100644
--- a/src/coordinator/plan/logical_plan_visitor.rs
+++ b/src/coordinator/plan/logical_plan_visitor.rs
@@ -91,86 +91,78 @@ impl LogicalPlanVisitor {
             return plan_err!("Statement mismatch: Expected CREATE STREAMING TABLE AST node");
         };
 
-        let target_name = name.to_string();
-        debug!(
-            "Initiating streaming sink compilation for identifier: {}",
-            target_name
-        );
+        let sink_table_name = name.to_string();
+        debug!("Initiating streaming sink compilation for identifier: {}", sink_table_name);
 
-        let mut connector_options = ConnectorOptions::new(with_options, &None)?;
-        let adapter_type = connector_options.pull_opt_str(OPT_CONNECTOR)?.ok_or_else(|| {
+        let mut sink_properties = ConnectorOptions::new(with_options, &None)?;
+        let connector_type = sink_properties.pull_opt_str(OPT_CONNECTOR)?.ok_or_else(|| {
             plan_datafusion_err!(
-                "Validation Error: Streaming table '{}' requires the '{}' property",
-                target_name,
-                OPT_CONNECTOR
-            )
+            "Validation Error: Streaming table '{}' requires the '{}' property",
+            sink_table_name,
+            OPT_CONNECTOR
+        )
         })?;
 
-        let routing_exprs = Self::extract_partitioning_keys(&mut connector_options)?;
+        let partition_keys = Self::extract_partitioning_keys(&mut sink_properties)?;
 
-        let mut logical_plan = rewrite_plan(
+        let mut query_logical_plan = rewrite_plan(
             produce_optimized_plan(&Statement::Query(query.clone()), &self.schema_provider)?,
             &self.schema_provider,
         )?;
 
-        if logical_plan
-            .schema()
-            .fields()
-            .iter()
-            .any(|f| is_json_union(f.data_type()))
-        {
-            logical_plan = serialize_outgoing_json(&self.schema_provider, Arc::new(logical_plan));
+        if query_logical_plan.schema().fields().iter().any(|f| is_json_union(f.data_type())) {
+            query_logical_plan = serialize_outgoing_json(&self.schema_provider, Arc::new(query_logical_plan));
         }
 
-        let output_descriptors = logical_plan
+        let output_schema_fields = query_logical_plan
             .schema()
             .fields()
             .iter()
             .map(|f| ColumnDescriptor::from((**f).clone()))
             .collect::<Vec<_>>();
 
-        let mut source_definition = SourceTable::from_options(
-            &target_name,
-            &adapter_type,
+        let mut sink_definition = SourceTable::from_options(
+            &sink_table_name,
+            &connector_type,
             false,
-            output_descriptors,
+            output_schema_fields,
             vec![],
             None,
-            &mut connector_options,
+            &mut sink_properties,
             None,
             &self.schema_provider,
             Some(ConnectionType::Sink),
             comment.clone().unwrap_or_default(),
         )?;
-        source_definition.partition_exprs = Arc::new(routing_exprs);
-
-        let sink_schema = logical_plan.schema().clone();
-        let egress_node = StreamEgressNode::try_new(
-            TableReference::bare(target_name.clone()),
-            Table::ConnectorTable(source_definition.clone()),
-            sink_schema,
-            logical_plan,
+        sink_definition.partition_exprs = Arc::new(partition_keys);
+
+        let output_schema = query_logical_plan.schema().clone();
+        let sink_plan_node = StreamEgressNode::try_new(
+            TableReference::bare(sink_table_name.clone()),
+            Table::ConnectorTable(sink_definition.clone()),
+            output_schema,
+            query_logical_plan,
         )?;
 
-        let mut plan_topology = rewrite_sinks(vec![maybe_add_key_extension_to_sink(
+        let mut rewritten_plans = rewrite_sinks(vec![maybe_add_key_extension_to_sink(
             LogicalPlan::Extension(Extension {
-                node: Arc::new(egress_node),
+                node: Arc::new(sink_plan_node),
             }),
         )?])?;
 
-        let final_execution_plan = plan_topology.remove(0);
+        let final_logical_plan = rewritten_plans.remove(0);
 
-        self.validate_graph_topology(&final_execution_plan)?;
+        let validated_program = self.validate_graph_topology(&final_logical_plan)?;
 
         Ok(StreamingTable {
-            name: target_name,
+            name: sink_table_name,
             comment: comment.clone(),
-            source_table: source_definition,
-            logical_plan: final_execution_plan,
+            source_table: sink_definition,
+            program: validated_program,
         })
     }
 
-    fn validate_graph_topology(&self, logical_plan: &LogicalPlan) -> Result<()> {
+    fn validate_graph_topology(&self, logical_plan: &LogicalPlan) -> Result<LogicalProgram> {
         let mut session_config = SessionConfig::new();
         let opts = session_config.options_mut();
         opts.optimizer.enable_round_robin_repartition = false;
@@ -193,7 +185,7 @@ impl LogicalPlanVisitor {
             LogicalProgram::new(graph_compiler.into_graph(), ProgramConfig::default());
         executable_program.optimize(&ChainingOptimizer {});
 
-        Ok(())
+        Ok(executable_program)
     }
 
     fn extract_partitioning_keys(
diff --git a/src/coordinator/plan/streaming_table_plan.rs b/src/coordinator/plan/streaming_table_plan.rs
index 01b8dbb8..c7b09c26 100644
--- a/src/coordinator/plan/streaming_table_plan.rs
+++ b/src/coordinator/plan/streaming_table_plan.rs
@@ -11,8 +11,8 @@
 // limitations under the License.
 
 use super::{PlanNode, PlanVisitor, PlanVisitorContext, PlanVisitorResult};
+use crate::sql::logical_node::logical::LogicalProgram;
 use crate::sql::schema::source_table::SourceTable;
-use datafusion::logical_expr::LogicalPlan;
 
 /// Plan node representing a fully resolved streaming table (DDL).
 #[derive(Debug)]
@@ -20,7 +20,7 @@ pub struct StreamingTable {
     pub name: String,
     pub comment: Option<String>,
     pub source_table: SourceTable,
-    pub logical_plan: LogicalPlan,
+    pub program: LogicalProgram,
 }
 
 impl PlanNode for StreamingTable {
diff --git a/src/sql/common/fs_schema.rs b/src/sql/common/fs_schema.rs
index e1507e3e..5233bd0c 100644
--- a/src/sql/common/fs_schema.rs
+++ b/src/sql/common/fs_schema.rs
@@ -7,6 +7,7 @@ use datafusion::arrow::array::{RecordBatch, TimestampNanosecondArray};
 use datafusion::arrow::datatypes::{DataType, Field, FieldRef, Schema, SchemaBuilder, TimeUnit};
 use datafusion::arrow::error::ArrowError;
 use datafusion::common::{DataFusionError, Result as DFResult};
+use serde::{Deserialize, Serialize};
 use std::sync::Arc;
 use std::time::SystemTime;
 use arrow::compute::{filter_record_batch, lexsort_to_indices, partition, take, SortColumn};
@@ -22,7 +23,7 @@ use crate::sql::common::converter::Converter;
 
 pub type FsSchemaRef = Arc<FsSchema>;
 
-#[derive(Debug, Clone, Eq, PartialEq, Hash)]
+#[derive(Debug, Clone, Eq, PartialEq, Hash, Serialize, Deserialize)]
 pub struct FsSchema {
     pub schema: Arc<Schema>,
     pub timestamp_index: usize,
diff --git a/src/sql/logical_node/logical/fs_program_convert.rs b/src/sql/logical_node/logical/fs_program_convert.rs
new file mode 100644
index 00000000..a8ac20b1
--- /dev/null
+++ b/src/sql/logical_node/logical/fs_program_convert.rs
@@ -0,0 +1,201 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+//
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Conversions between [`LogicalProgram`] and `protocol::grpc::api::FsProgram` / pipeline API types.
+
+use std::collections::HashMap;
+use std::str::FromStr;
+use std::sync::Arc;
+
+use datafusion::common::{DataFusionError, Result as DFResult};
+use petgraph::graph::DiGraph;
+use petgraph::prelude::EdgeRef;
+use protocol::grpc::api::{
+    ChainedOperator, EdgeType as ProtoEdgeType, FsEdge, FsNode, FsProgram, FsSchema as ProtoFsSchema,
+};
+
+use crate::sql::api::pipelines::{PipelineEdge, PipelineGraph, PipelineNode};
+use crate::sql::common::FsSchema;
+
+use super::logical_edge::logical_edge_type_from_proto_i32;
+use super::operator_chain::{ChainedLogicalOperator, OperatorChain};
+use super::operator_name::OperatorName;
+use super::{LogicalEdge, LogicalNode, LogicalProgram, ProgramConfig};
+
+impl TryFrom<FsProgram> for LogicalProgram {
+    type Error = DataFusionError;
+
+    fn try_from(value: FsProgram) -> DFResult<Self> {
+        let mut graph = DiGraph::new();
+        let mut id_map = HashMap::with_capacity(value.nodes.len());
+
+        for node in value.nodes {
+            let operators = node
+                .operators
+                .into_iter()
+                .map(|op| {
+                    let ChainedOperator {
+                        operator_id,
+                        operator_name: name_str,
+                        operator_config,
+                    } = op;
+                    let operator_name = OperatorName::from_str(&name_str).map_err(|_| {
+                        DataFusionError::Plan(format!("Invalid operator name: {name_str}"))
+                    })?;
+                    Ok(ChainedLogicalOperator {
+                        operator_id,
+                        operator_name,
+                        operator_config,
+                    })
+                })
+                .collect::<DFResult<Vec<_>>>()?;
+
+            let edges = node
+                .edges
+                .into_iter()
+                .map(|e| {
+                    let fs: FsSchema = e.try_into()?;
+                    Ok(Arc::new(fs))
+                })
+                .collect::<DFResult<Vec<_>>>()?;
+
+            let logical_node = LogicalNode {
+                node_id: node.node_id,
+                description: node.description,
+                operator_chain: OperatorChain { operators, edges },
+                parallelism: node.parallelism as usize,
+            };
+
+            id_map.insert(node.node_index, graph.add_node(logical_node));
+        }
+
+        for edge in value.edges {
+            let source = *id_map.get(&edge.source).ok_or_else(|| {
+                DataFusionError::Plan("Graph integrity error: Missing source node".into())
+            })?;
+            let target = *id_map.get(&edge.target).ok_or_else(|| {
+                DataFusionError::Plan("Graph integrity error: Missing target node".into())
+            })?;
+            let schema = edge
+                .schema
+                .ok_or_else(|| DataFusionError::Plan("Graph integrity error: Missing edge schema".into()))?;
+            let edge_type = logical_edge_type_from_proto_i32(edge.edge_type)?;
+
+            graph.add_edge(
+                source,
+                target,
+                LogicalEdge {
+                    edge_type,
+                    schema: Arc::new(FsSchema::try_from(schema)?),
+                },
+            );
+        }
+
+        let program_config = value
+            .program_config
+            .map(ProgramConfig::from)
+            .unwrap_or_default();
+
+        Ok(LogicalProgram::new(graph, program_config))
+    }
+}
+
+impl From<LogicalProgram> for FsProgram {
+    fn from(value: LogicalProgram) -> Self {
+        let nodes = value
+            .graph
+            .node_indices()
+            .filter_map(|idx| value.graph.node_weight(idx).map(|node| (idx, node)))
+            .map(|(idx, node)| FsNode {
+                node_index: idx.index() as i32,
+                node_id: node.node_id,
+                parallelism: node.parallelism as u32,
+                description: node.description.clone(),
+                operators: node
+                    .operator_chain
+                    .operators
+                    .iter()
+                    .map(|op| ChainedOperator {
+                        operator_id: op.operator_id.clone(),
+                        operator_name: op.operator_name.to_string(),
+                        operator_config: op.operator_config.clone(),
+                    })
+                    .collect(),
+                edges: node
+                    .operator_chain
+                    .edges
+                    .iter()
+                    .map(|edge| ProtoFsSchema::from((**edge).clone()))
+                    .collect(),
+            })
+            .collect();
+
+        let edges = value
+            .graph
+            .edge_indices()
+            .filter_map(|eidx| {
+                let edge = value.graph.edge_weight(eidx)?;
+                let (source, target) = value.graph.edge_endpoints(eidx)?;
+                Some(FsEdge {
+                    source: source.index() as i32,
+                    target: target.index() as i32,
+                    schema: Some(ProtoFsSchema::from((*edge.schema).clone())),
+                    edge_type: ProtoEdgeType::from(edge.edge_type) as i32,
+                })
+            })
+            .collect();
+
+        FsProgram {
+            nodes,
+            edges,
+            program_config: Some(value.program_config.into()),
+        }
+    }
+}
+
+impl TryFrom<LogicalProgram> for PipelineGraph {
+    type Error = DataFusionError;
+
+    fn try_from(value: LogicalProgram) -> DFResult<Self> {
+        let nodes = value
+            .graph
+            .node_weights()
+            .map(|node| {
+                Ok(PipelineNode {
+                    node_id: node.node_id,
+                    operator: node.resolve_pipeline_operator_name()?,
+                    description: node.description.clone(),
+                    parallelism: node.parallelism as u32,
+                })
+            })
+            .collect::<DFResult<Vec<_>>>()?;
+
+        let edges = value
+            .graph
+            .edge_references()
+            .filter_map(|edge| {
+                let src = value.graph.node_weight(edge.source())?;
+                let target = value.graph.node_weight(edge.target())?;
+                Some(PipelineEdge {
+                    src_id: src.node_id,
+                    dest_id: target.node_id,
+                    key_type: "()".to_string(),
+                    value_type: "()".to_string(),
+                    edge_type: format!("{:?}", edge.weight().edge_type),
+                })
+            })
+            .collect();
+
+        Ok(PipelineGraph { nodes, edges })
+    }
+}
diff --git a/src/sql/logical_node/logical/logical_edge.rs b/src/sql/logical_node/logical/logical_edge.rs
index 2f850988..1a169c1d 100644
--- a/src/sql/logical_node/logical/logical_edge.rs
+++ b/src/sql/logical_node/logical/logical_edge.rs
@@ -13,9 +13,13 @@
 use std::fmt::{Display, Formatter};
 use std::sync::Arc;
 
+use datafusion::common::{DataFusionError, Result};
+use protocol::grpc::api::EdgeType as ProtoEdgeType;
+use serde::{Deserialize, Serialize};
+
 use crate::sql::common::FsSchema;
 
-#[derive(Copy, Clone, Debug, Eq, PartialEq, PartialOrd, Ord)]
+#[derive(Copy, Clone, Debug, Eq, PartialEq, PartialOrd, Ord, Serialize, Deserialize)]
 pub enum LogicalEdgeType {
     Forward,
     Shuffle,
@@ -25,16 +29,57 @@ pub enum LogicalEdgeType {
 
 impl Display for LogicalEdgeType {
     fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        match self {
-            LogicalEdgeType::Forward => write!(f, "→"),
-            LogicalEdgeType::Shuffle => write!(f, "⤨"),
-            LogicalEdgeType::LeftJoin => write!(f, "-[left]⤨"),
-            LogicalEdgeType::RightJoin => write!(f, "-[right]⤨"),
+        let symbol = match self {
+            LogicalEdgeType::Forward => "→",
+            LogicalEdgeType::Shuffle => "⤨",
+            LogicalEdgeType::LeftJoin => "-[left]⤨",
+            LogicalEdgeType::RightJoin => "-[right]⤨",
+        };
+        write!(f, "{symbol}")
+    }
+}
+
+impl From<ProtoEdgeType> for LogicalEdgeType {
+    fn from(value: ProtoEdgeType) -> Self {
+        match value {
+            ProtoEdgeType::Unused => {
+                panic!("Critical: Invalid EdgeType 'Unused' encountered")
+            }
+            ProtoEdgeType::Forward => Self::Forward,
+            ProtoEdgeType::Shuffle => Self::Shuffle,
+            ProtoEdgeType::LeftJoin => Self::LeftJoin,
+            ProtoEdgeType::RightJoin => Self::RightJoin,
         }
     }
 }
 
-#[derive(Clone, Debug, Eq, PartialEq)]
+impl From<LogicalEdgeType> for ProtoEdgeType {
+    fn from(value: LogicalEdgeType) -> Self {
+        match value {
+            LogicalEdgeType::Forward => Self::Forward,
+            LogicalEdgeType::Shuffle => Self::Shuffle,
+            LogicalEdgeType::LeftJoin => Self::LeftJoin,
+            LogicalEdgeType::RightJoin => Self::RightJoin,
+        }
+    }
+}
+
+pub(crate) fn logical_edge_type_from_proto_i32(i: i32) -> Result<LogicalEdgeType> {
+    let e = ProtoEdgeType::try_from(i).map_err(|_| {
+        DataFusionError::Plan(format!("invalid protobuf EdgeType discriminant {i}"))
+    })?;
+    match e {
+        ProtoEdgeType::Unused => Err(DataFusionError::Plan(
+            "Critical: Invalid EdgeType 'Unused' encountered".into(),
+        )),
+        ProtoEdgeType::Forward => Ok(LogicalEdgeType::Forward),
+        ProtoEdgeType::Shuffle => Ok(LogicalEdgeType::Shuffle),
+        ProtoEdgeType::LeftJoin => Ok(LogicalEdgeType::LeftJoin),
+        ProtoEdgeType::RightJoin => Ok(LogicalEdgeType::RightJoin),
+    }
+}
+
+#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
 pub struct LogicalEdge {
     pub edge_type: LogicalEdgeType,
     pub schema: Arc<FsSchema>,
diff --git a/src/sql/logical_node/logical/logical_node.rs b/src/sql/logical_node/logical/logical_node.rs
index 492eae26..26129b26 100644
--- a/src/sql/logical_node/logical/logical_node.rs
+++ b/src/sql/logical_node/logical/logical_node.rs
@@ -12,10 +12,14 @@
 
 use std::fmt::{Debug, Display, Formatter};
 
+use datafusion::common::{DataFusionError, Result};
+use itertools::Itertools;
+use serde::{Deserialize, Serialize};
+
 use super::operator_chain::{ChainedLogicalOperator, OperatorChain};
 use super::operator_name::OperatorName;
 
-#[derive(Clone)]
+#[derive(Clone, Serialize, Deserialize)]
 pub struct LogicalNode {
     pub node_id: u32,
     pub description: String,
@@ -46,6 +50,24 @@ impl LogicalNode {
             parallelism,
         }
     }
+
+    pub fn resolve_pipeline_operator_name(&self) -> Result<String> {
+        let first_op = self
+            .operator_chain
+            .operators
+            .first()
+            .ok_or_else(|| DataFusionError::Plan("Invalid LogicalNode: Operator chain is empty".into()))?;
+
+        if let Some(connector_name) = first_op.extract_connector_name() {
+            return Ok(connector_name);
+        }
+
+        if self.operator_chain.len() == 1 {
+            return Ok(first_op.operator_id.clone());
+        }
+
+        Ok("chained_op".to_string())
+    }
 }
 
 impl Display for LogicalNode {
@@ -56,16 +78,12 @@ impl Display for LogicalNode {
 
 impl Debug for LogicalNode {
     fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        write!(
-            f,
-            "{}[{}]",
-            self.operator_chain
-                .operators
-                .iter()
-                .map(|op| op.operator_id.clone())
-                .collect::<Vec<_>>()
-                .join(" -> "),
-            self.parallelism
-        )
+        let chain_path = self
+            .operator_chain
+            .operators
+            .iter()
+            .map(|op| op.operator_id.as_str())
+            .join(" -> ");
+        write!(f, "{chain_path}[{}]", self.parallelism)
     }
 }
diff --git a/src/sql/logical_node/logical/logical_program.rs b/src/sql/logical_node/logical/logical_program.rs
index db6883b8..888f4292 100644
--- a/src/sql/logical_node/logical/logical_program.rs
+++ b/src/sql/logical_node/logical/logical_program.rs
@@ -1,5 +1,6 @@
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
+//
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
@@ -10,10 +11,20 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+use std::collections::hash_map::DefaultHasher;
 use std::collections::{HashMap, HashSet};
+use std::hash::Hasher;
+use std::sync::Arc;
 
+use datafusion::arrow::datatypes::Schema;
+use datafusion::common::{DataFusionError, Result as DFResult};
 use petgraph::Direction;
 use petgraph::dot::Dot;
+use prost::Message;
+use protocol::grpc::api::FsProgram;
+use rand::distributions::Alphanumeric;
+use rand::rngs::SmallRng;
+use rand::{Rng, SeedableRng};
 
 use super::logical_graph::{LogicalGraph, Optimizer};
 use super::operator_name::OperatorName;
@@ -39,8 +50,8 @@ impl LogicalProgram {
 
     pub fn update_parallelism(&mut self, overrides: &HashMap<u32, usize>) {
         for node in self.graph.node_weights_mut() {
-            if let Some(p) = overrides.get(&node.node_id) {
-                node.parallelism = *p;
+            if let Some(&p) = overrides.get(&node.node_id) {
+                node.parallelism = p;
             }
         }
     }
@@ -56,68 +67,90 @@ impl LogicalProgram {
     pub fn sources(&self) -> HashSet<u32> {
         self.graph
             .externals(Direction::Incoming)
-            .map(|t| self.graph.node_weight(t).unwrap().node_id)
+            .filter_map(|idx| self.graph.node_weight(idx))
+            .map(|node| node.node_id)
+            .collect()
+    }
+
+    pub fn get_hash(&self) -> String {
+        let mut hasher = DefaultHasher::new();
+        let program_bytes = FsProgram::from(self.clone()).encode_to_vec();
+        hasher.write(&program_bytes);
+        let rng = SmallRng::seed_from_u64(hasher.finish());
+        rng.sample_iter(&Alphanumeric)
+            .take(16)
+            .map(|c| (c as char).to_ascii_lowercase())
             .collect()
     }
 
     pub fn tasks_per_operator(&self) -> HashMap<String, usize> {
-        let mut tasks_per_operator = HashMap::new();
-        for node in self.graph.node_weights() {
-            for op in &node.operator_chain.operators {
-                tasks_per_operator.insert(op.operator_id.clone(), node.parallelism);
-            }
-        }
-        tasks_per_operator
+        self.graph
+            .node_weights()
+            .flat_map(|node| {
+                node.operator_chain
+                    .operators
+                    .iter()
+                    .map(move |op| (op.operator_id.clone(), node.parallelism))
+            })
+            .collect()
     }
 
     pub fn operator_names_by_id(&self) -> HashMap<String, String> {
-        let mut m = HashMap::new();
-        for node in self.graph.node_weights() {
-            for op in &node.operator_chain.operators {
-                m.insert(op.operator_id.clone(), op.operator_name.to_string());
-            }
-        }
-        m
+        self.graph
+            .node_weights()
+            .flat_map(|node| &node.operator_chain.operators)
+            .map(|op| {
+                let resolved_name = op
+                    .extract_connector_name()
+                    .unwrap_or_else(|| op.operator_name.to_string());
+                (op.operator_id.clone(), resolved_name)
+            })
+            .collect()
     }
 
     pub fn tasks_per_node(&self) -> HashMap<u32, usize> {
-        let mut tasks_per_node = HashMap::new();
-        for node in self.graph.node_weights() {
-            tasks_per_node.insert(node.node_id, node.parallelism);
-        }
-        tasks_per_node
+        self.graph
+            .node_weights()
+            .map(|node| (node.node_id, node.parallelism))
+            .collect()
     }
 
     pub fn features(&self) -> HashSet<String> {
-        let mut s = HashSet::new();
-        for n in self.graph.node_weights() {
-            for t in &n.operator_chain.operators {
-                let feature = match &t.operator_name {
-                    OperatorName::AsyncUdf => "async-udf".to_string(),
-                    OperatorName::ExpressionWatermark
-                    | OperatorName::ArrowValue
-                    | OperatorName::ArrowKey
-                    | OperatorName::Projection => continue,
-                    OperatorName::Join => "join-with-expiration".to_string(),
-                    OperatorName::InstantJoin => "windowed-join".to_string(),
-                    OperatorName::WindowFunction => "sql-window-function".to_string(),
-                    OperatorName::LookupJoin => "lookup-join".to_string(),
-                    OperatorName::TumblingWindowAggregate => {
-                        "sql-tumbling-window-aggregate".to_string()
-                    }
-                    OperatorName::SlidingWindowAggregate => {
-                        "sql-sliding-window-aggregate".to_string()
-                    }
-                    OperatorName::SessionWindowAggregate => {
-                        "sql-session-window-aggregate".to_string()
-                    }
-                    OperatorName::UpdatingAggregate => "sql-updating-aggregate".to_string(),
-                    OperatorName::ConnectorSource => "connector-source".to_string(),
-                    OperatorName::ConnectorSink => "connector-sink".to_string(),
-                };
-                s.insert(feature);
+        self.graph
+            .node_weights()
+            .flat_map(|node| &node.operator_chain.operators)
+            .filter_map(|op| op.extract_feature())
+            .collect()
+    }
+
+    /// Arrow schema carried on edges into the connector-sink node, if present.
+    pub fn egress_arrow_schema(&self) -> Option<Arc<Schema>> {
+        for idx in self.graph.node_indices() {
+            let node = self.graph.node_weight(idx)?;
+            if node
+                .operator_chain
+                .operators
+                .iter()
+                .any(|op| op.operator_name == OperatorName::ConnectorSink)
+            {
+                let e = self
+                    .graph
+                    .edges_directed(idx, Direction::Incoming)
+                    .next()?;
+                return Some(Arc::clone(&e.weight().schema.schema));
             }
         }
-        s
+        None
+    }
+
+    pub fn encode_for_catalog(&self) -> DFResult<Vec<u8>> {
+        Ok(FsProgram::from(self.clone()).encode_to_vec())
+    }
+
+    pub fn decode_for_catalog(bytes: &[u8]) -> DFResult<Self> {
+        let proto = FsProgram::decode(bytes).map_err(|e| {
+            DataFusionError::Execution(format!("FsProgram catalog decode failed: {e}"))
+        })?;
+        LogicalProgram::try_from(proto)
     }
 }
diff --git a/src/sql/logical_node/logical/mod.rs b/src/sql/logical_node/logical/mod.rs
index 96dd2ce5..3a94d1f3 100644
--- a/src/sql/logical_node/logical/mod.rs
+++ b/src/sql/logical_node/logical/mod.rs
@@ -11,6 +11,7 @@
 // limitations under the License.
 
 mod dylib_udf_config;
+mod fs_program_convert;
 mod logical_edge;
 mod logical_graph;
 mod logical_node;
diff --git a/src/sql/logical_node/logical/operator_chain.rs b/src/sql/logical_node/logical/operator_chain.rs
index e3db96b2..e74684ba 100644
--- a/src/sql/logical_node/logical/operator_chain.rs
+++ b/src/sql/logical_node/logical/operator_chain.rs
@@ -12,19 +12,62 @@
 
 use std::sync::Arc;
 
-use itertools::Itertools;
+use itertools::{EitherOrBoth, Itertools};
+use prost::Message;
+use protocol::grpc::api::ConnectorOp;
+use serde::{Deserialize, Serialize};
 
 use super::operator_name::OperatorName;
 use crate::sql::common::FsSchema;
 
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, Serialize, Deserialize)]
 pub struct ChainedLogicalOperator {
     pub operator_id: String,
     pub operator_name: OperatorName,
     pub operator_config: Vec<u8>,
 }
 
-#[derive(Clone, Debug)]
+impl ChainedLogicalOperator {
+    pub fn extract_connector_name(&self) -> Option<String> {
+        if matches!(
+            self.operator_name,
+            OperatorName::ConnectorSource | OperatorName::ConnectorSink
+        ) {
+            ConnectorOp::decode(self.operator_config.as_slice())
+                .ok()
+                .map(|op| op.connector)
+        } else {
+            None
+        }
+    }
+
+    pub fn extract_feature(&self) -> Option<String> {
+        match self.operator_name {
+            OperatorName::AsyncUdf => Some("async-udf".to_string()),
+            OperatorName::Join => Some("join-with-expiration".to_string()),
+            OperatorName::InstantJoin => Some("windowed-join".to_string()),
+            OperatorName::WindowFunction => Some("sql-window-function".to_string()),
+            OperatorName::LookupJoin => Some("lookup-join".to_string()),
+            OperatorName::TumblingWindowAggregate => {
+                Some("sql-tumbling-window-aggregate".to_string())
+            }
+            OperatorName::SlidingWindowAggregate => {
+                Some("sql-sliding-window-aggregate".to_string())
+            }
+            OperatorName::SessionWindowAggregate => {
+                Some("sql-session-window-aggregate".to_string())
+            }
+            OperatorName::UpdatingAggregate => Some("sql-updating-aggregate".to_string()),
+            OperatorName::ConnectorSource => self
+                .extract_connector_name()
+                .map(|c| format!("{c}-source")),
+            OperatorName::ConnectorSink => self.extract_connector_name().map(|c| format!("{c}-sink")),
+            _ => None,
+        }
+    }
+}
+
+#[derive(Clone, Debug, Serialize, Deserialize)]
 pub struct OperatorChain {
     pub(crate) operators: Vec<ChainedLogicalOperator>,
     pub(crate) edges: Vec<Arc<FsSchema>>,
@@ -41,11 +84,11 @@ impl OperatorChain {
     pub fn iter(
         &self,
     ) -> impl Iterator<Item = (&ChainedLogicalOperator, Option<&Arc<FsSchema>>)> {
-        self.operators
-            .iter()
-            .zip_longest(self.edges.iter())
-            .map(|e| e.left_and_right())
-            .map(|(l, r)| (l.unwrap(), r))
+        self.operators.iter().zip_longest(&self.edges).filter_map(|e| match e {
+            EitherOrBoth::Both(op, edge) => Some((op, Some(edge))),
+            EitherOrBoth::Left(op) => Some((op, None)),
+            EitherOrBoth::Right(_) => None,
+        })
     }
 
     pub fn iter_mut(
@@ -53,13 +96,18 @@ impl OperatorChain {
     ) -> impl Iterator<Item = (&mut ChainedLogicalOperator, Option<&Arc<FsSchema>>)> {
         self.operators
             .iter_mut()
-            .zip_longest(self.edges.iter())
-            .map(|e| e.left_and_right())
-            .map(|(l, r)| (l.unwrap(), r))
+            .zip_longest(&self.edges)
+            .filter_map(|e| match e {
+                EitherOrBoth::Both(op, edge) => Some((op, Some(edge))),
+                EitherOrBoth::Left(op) => Some((op, None)),
+                EitherOrBoth::Right(_) => None,
+            })
     }
 
     pub fn first(&self) -> &ChainedLogicalOperator {
-        &self.operators[0]
+        self.operators
+            .first()
+            .expect("OperatorChain must contain at least one operator")
     }
 
     pub fn len(&self) -> usize {
diff --git a/src/sql/logical_node/logical/operator_name.rs b/src/sql/logical_node/logical/operator_name.rs
index 057d8e82..22f58bbe 100644
--- a/src/sql/logical_node/logical/operator_name.rs
+++ b/src/sql/logical_node/logical/operator_name.rs
@@ -10,6 +10,9 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+use std::str::FromStr;
+
+use serde::{Deserialize, Deserializer, Serialize, Serializer};
 use strum::{Display, EnumString};
 
 #[derive(Clone, Copy, Debug, Eq, PartialEq, EnumString, Display)]
@@ -30,3 +33,22 @@ pub enum OperatorName {
     ConnectorSource,
     ConnectorSink,
 }
+
+impl Serialize for OperatorName {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: Serializer,
+    {
+        serializer.serialize_str(&self.to_string())
+    }
+}
+
+impl<'de> Deserialize<'de> for OperatorName {
+    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+    where
+        D: Deserializer<'de>,
+    {
+        let s = String::deserialize(deserializer)?;
+        Self::from_str(&s).map_err(serde::de::Error::custom)
+    }
+}
diff --git a/src/sql/logical_node/logical/program_config.rs b/src/sql/logical_node/logical/program_config.rs
index 38c76e66..931a5424 100644
--- a/src/sql/logical_node/logical/program_config.rs
+++ b/src/sql/logical_node/logical/program_config.rs
@@ -1,5 +1,6 @@
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
+//
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
@@ -10,13 +11,23 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-use std::collections::HashMap;
-
-use super::dylib_udf_config::DylibUdfConfig;
-use super::python_udf_config::PythonUdfConfig;
+use protocol::grpc::api::FsProgramConfig;
 
+/// Placeholder program-level config (UDF tables live elsewhere; wire maps stay empty).
 #[derive(Clone, Debug, Default)]
-pub struct ProgramConfig {
-    pub udf_dylibs: HashMap<String, DylibUdfConfig>,
-    pub python_udfs: HashMap<String, PythonUdfConfig>,
+pub struct ProgramConfig {}
+
+impl From<ProgramConfig> for FsProgramConfig {
+    fn from(_: ProgramConfig) -> Self {
+        Self {
+            udf_dylibs: Default::default(),
+            python_udfs: Default::default(),
+        }
+    }
+}
+
+impl From<FsProgramConfig> for ProgramConfig {
+    fn from(_: FsProgramConfig) -> Self {
+        Self::default()
+    }
 }
diff --git a/src/sql/logical_planner/compiled_sql.rs b/src/sql/logical_planner/compiled_sql.rs
deleted file mode 100644
index e0525097..00000000
--- a/src/sql/logical_planner/compiled_sql.rs
+++ /dev/null
@@ -1,21 +0,0 @@
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use crate::sql::logical_node::logical::LogicalProgram;
-
-// ── Compilation pipeline ──────────────────────────────────────────────
-
-#[derive(Clone, Debug)]
-pub struct CompiledSql {
-    pub program: LogicalProgram,
-    pub connection_ids: Vec<i64>,
-}
diff --git a/src/sql/logical_planner/mod.rs b/src/sql/logical_planner/mod.rs
index 8b7d9e76..85046c0d 100644
--- a/src/sql/logical_planner/mod.rs
+++ b/src/sql/logical_planner/mod.rs
@@ -56,12 +56,9 @@ use std::fmt::Debug;
 use tokio::sync::mpsc::UnboundedReceiver;
 use tokio_stream::wrappers::UnboundedReceiverStream;
 
-pub mod compiled_sql;
 pub(crate) mod planner;
 pub mod optimizers;
 
-pub use compiled_sql::CompiledSql;
-
 // ─────────────────── Updating Meta Helpers ───────────────────
 
 pub fn updating_meta_fields() -> Fields {
diff --git a/src/sql/mod.rs b/src/sql/mod.rs
index fc89787a..04f6c897 100644
--- a/src/sql/mod.rs
+++ b/src/sql/mod.rs
@@ -25,7 +25,6 @@ pub mod types;
 pub use schema::{StreamPlanningContext, StreamSchemaProvider};
 pub use parse::parse_sql;
 pub use analysis::rewrite_plan;
-pub use logical_planner::CompiledSql;
 
 #[cfg(test)]
 mod frontend_sql_coverage_tests;
diff --git a/src/sql/schema/schema_provider.rs b/src/sql/schema/schema_provider.rs
index 5e34991a..2bb5ef87 100644
--- a/src/sql/schema/schema_provider.rs
+++ b/src/sql/schema/schema_provider.rs
@@ -19,15 +19,13 @@ use datafusion::datasource::{DefaultTableSource, TableProvider, TableType};
 use datafusion::execution::{FunctionRegistry, SessionStateDefaults};
 use datafusion::logical_expr::expr_rewriter::FunctionRewrite;
 use datafusion::logical_expr::planner::ExprPlanner;
-use datafusion::logical_expr::{
-    AggregateUDF, Expr, LogicalPlan, ScalarUDF, TableSource, WindowUDF,
-};
+use datafusion::logical_expr::{AggregateUDF, Expr, ScalarUDF, TableSource, WindowUDF};
 use datafusion::optimizer::Analyzer;
 use datafusion::sql::planner::ContextProvider;
 use datafusion::sql::TableReference;
 use unicase::UniCase;
 
-use crate::sql::logical_node::logical::DylibUdfConfig;
+use crate::sql::logical_node::logical::{DylibUdfConfig, LogicalProgram};
 use crate::sql::schema::table::Table as CatalogTable;
 use crate::sql::schema::utils::window_arrow_struct;
 use crate::sql::types::{PlaceholderUdf, PlanningOptions};
@@ -49,25 +47,23 @@ pub enum StreamTable {
     },
     Sink {
         name: String,
-        schema: Arc<Schema>,
-    },
-    Memory {
-        name: String,
-        logical_plan: Option<LogicalPlan>,
+        program: LogicalProgram,
     },
 }
 
 impl StreamTable {
     pub fn name(&self) -> &str {
         match self {
-            Self::Source { name, .. } | Self::Sink { name, .. } | Self::Memory { name, .. } => name,
+            Self::Source { name, .. } | Self::Sink { name, .. } => name,
         }
     }
 
     pub fn schema(&self) -> Arc<Schema> {
         match self {
-            Self::Source { schema, .. } | Self::Sink { schema, .. } => Arc::clone(schema),
-            Self::Memory { .. } => Arc::new(Schema::empty()),
+            Self::Source { schema, .. } => Arc::clone(schema),
+            Self::Sink { program, .. } => program
+                .egress_arrow_schema()
+                .unwrap_or_else(|| Arc::new(Schema::empty())),
         }
     }
 }
@@ -208,8 +204,8 @@ impl StreamPlanningContext {
         });
     }
 
-    pub fn add_sink_table(&mut self, name: String, schema: Arc<Schema>) {
-        self.register_stream_table(StreamTable::Sink { name, schema });
+    pub fn add_sink_table(&mut self, name: String, program: LogicalProgram) {
+        self.register_stream_table(StreamTable::Sink { name, program });
     }
 
     pub fn insert_table(&mut self, table: StreamTable) {
diff --git a/src/storage/stream_catalog/codec.rs b/src/storage/stream_catalog/codec.rs
index dacaebf8..34c2c4ba 100644
--- a/src/storage/stream_catalog/codec.rs
+++ b/src/storage/stream_catalog/codec.rs
@@ -10,7 +10,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//! Arrow Schema IPC and DataFusion logical plan serialization.
+//! Arrow Schema IPC and [`LogicalProgram`] bincode payloads for stream catalog rows.
 
 use std::io::Cursor;
 use std::sync::Arc;
@@ -20,8 +20,8 @@ use datafusion::arrow::ipc::reader::StreamReader;
 use datafusion::arrow::ipc::writer::StreamWriter;
 use datafusion::arrow::record_batch::RecordBatch;
 use datafusion::common::{DataFusionError, Result};
-use datafusion::execution::context::SessionContext;
-use datafusion::logical_expr::LogicalPlan;
+
+use crate::sql::logical_node::logical::LogicalProgram;
 
 pub struct CatalogCodec;
 
@@ -47,11 +47,11 @@ impl CatalogCodec {
         Ok(reader.schema())
     }
 
-    pub fn encode_logical_plan(plan: &LogicalPlan) -> Result<Vec<u8>> {
-        datafusion_proto::bytes::logical_plan_to_bytes(plan).map(|b| b.to_vec())
+    pub fn encode_logical_program(program: &LogicalProgram) -> Result<Vec<u8>> {
+        program.encode_for_catalog()
     }
 
-    pub fn decode_logical_plan(bytes: &[u8], ctx: &SessionContext) -> Result<LogicalPlan> {
-        datafusion_proto::bytes::logical_plan_from_bytes(bytes, ctx)
+    pub fn decode_logical_program(bytes: &[u8]) -> Result<LogicalProgram> {
+        LogicalProgram::decode_for_catalog(bytes)
     }
 }
diff --git a/src/storage/stream_catalog/manager.rs b/src/storage/stream_catalog/manager.rs
index 7e61b20e..7e75f786 100644
--- a/src/storage/stream_catalog/manager.rs
+++ b/src/storage/stream_catalog/manager.rs
@@ -14,8 +14,8 @@ use std::collections::HashMap;
 use std::sync::{Arc, OnceLock};
 
 use anyhow::{anyhow, bail, Context};
+use datafusion::arrow::datatypes::Schema;
 use datafusion::common::{internal_err, plan_err, Result as DFResult};
-use datafusion::execution::context::SessionContext;
 use parking_lot::RwLock;
 use prost::Message;
 use protocol::storage::{self as pb, table_definition};
@@ -37,36 +37,28 @@ pub struct StreamTableCatalogCache {
 pub struct CatalogManager {
     store: Arc<dyn MetaStore>,
     cache: RwLock<StreamTableCatalogCache>,
-    session_ctx: Arc<SessionContext>,
 }
 
 static GLOBAL_CATALOG: OnceLock<Arc<CatalogManager>> = OnceLock::new();
 
 impl CatalogManager {
-    pub fn new(store: Arc<dyn MetaStore>, session_ctx: Arc<SessionContext>) -> Self {
+    pub fn new(store: Arc<dyn MetaStore>) -> Self {
         Self {
             store,
             cache: RwLock::new(StreamTableCatalogCache::default()),
-            session_ctx,
         }
     }
 
     pub fn init_global_in_memory() -> anyhow::Result<()> {
-        Self::init_global(
-            Arc::new(super::InMemoryMetaStore::new()),
-            Arc::new(SessionContext::new()),
-        )
+        Self::init_global(Arc::new(super::InMemoryMetaStore::new()))
     }
 
-    pub fn init_global(
-        store: Arc<dyn MetaStore>,
-        session_ctx: Arc<SessionContext>,
-    ) -> anyhow::Result<()> {
+    pub fn init_global(store: Arc<dyn MetaStore>) -> anyhow::Result<()> {
         if GLOBAL_CATALOG.get().is_some() {
             bail!("CatalogManager already initialized");
         }
 
-        let mgr = Arc::new(CatalogManager::new(store, session_ctx));
+        let mgr = Arc::new(CatalogManager::new(store));
         GLOBAL_CATALOG
             .set(mgr)
             .map_err(|_| anyhow!("CatalogManager global install failed"))?;
@@ -164,16 +156,15 @@ impl CatalogManager {
                 event_time_field: event_time_field.clone(),
                 watermark_field: watermark_field.clone(),
             }),
-            StreamTable::Sink { schema, .. } => table_definition::TableType::Sink(pb::StreamSink {
-                arrow_schema_ipc: CatalogCodec::encode_schema(schema)?,
-            }),
-            StreamTable::Memory { logical_plan, .. } => {
-                let logical_plan_bytes = logical_plan
-                    .as_ref()
-                    .map(|plan| CatalogCodec::encode_logical_plan(plan))
-                    .transpose()?;
-
-                table_definition::TableType::Memory(pb::StreamMemory { logical_plan_bytes })
+            StreamTable::Sink { program, .. } => {
+                let logical_program_bincode = CatalogCodec::encode_logical_program(program)?;
+                let schema = program
+                    .egress_arrow_schema()
+                    .unwrap_or_else(|| Arc::new(Schema::empty()));
+                table_definition::TableType::Sink(pb::StreamSink {
+                    arrow_schema_ipc: CatalogCodec::encode_schema(&schema)?,
+                    logical_program_bincode,
+                })
             }
         };
 
@@ -199,19 +190,17 @@ impl CatalogManager {
                 event_time_field: src.event_time_field,
                 watermark_field: src.watermark_field,
             }),
-            table_definition::TableType::Sink(sink) => Ok(StreamTable::Sink {
-                name: proto_def.table_name,
-                schema: CatalogCodec::decode_schema(&sink.arrow_schema_ipc)?,
-            }),
-            table_definition::TableType::Memory(mem) => {
-                let logical_plan = mem
-                    .logical_plan_bytes
-                    .map(|bytes| CatalogCodec::decode_logical_plan(&bytes, &self.session_ctx))
-                    .transpose()?;
-
-                Ok(StreamTable::Memory {
+            table_definition::TableType::Sink(sink) => {
+                if sink.logical_program_bincode.is_empty() {
+                    return internal_err!(
+                        "Corrupted catalog row: sink '{}' missing logical_program_bincode",
+                        proto_def.table_name
+                    );
+                }
+                let program = CatalogCodec::decode_logical_program(&sink.logical_program_bincode)?;
+                Ok(StreamTable::Sink {
                     name: proto_def.table_name,
-                    logical_plan,
+                    program,
                 })
             }
         }
@@ -242,18 +231,15 @@ mod tests {
     use std::sync::Arc;
 
     use datafusion::arrow::datatypes::{DataType, Field, Schema};
-    use datafusion::execution::context::SessionContext;
 
+    use crate::sql::logical_node::logical::LogicalProgram;
     use crate::sql::schema::StreamTable;
     use crate::storage::stream_catalog::{InMemoryMetaStore, MetaStore};
 
     use super::CatalogManager;
 
     fn create_test_manager() -> CatalogManager {
-        CatalogManager::new(
-            Arc::new(InMemoryMetaStore::new()),
-            Arc::new(SessionContext::new()),
-        )
+        CatalogManager::new(Arc::new(InMemoryMetaStore::new()))
     }
 
     #[test]
@@ -312,19 +298,17 @@ mod tests {
     #[test]
     fn restore_from_store_rebuilds_cache() {
         let store: Arc<dyn MetaStore> = Arc::new(InMemoryMetaStore::new());
-        let session = Arc::new(SessionContext::new());
 
-        let mgr_a = CatalogManager::new(Arc::clone(&store), Arc::clone(&session));
-        let schema = Arc::new(Schema::new(vec![Field::new("x", DataType::Utf8, true)]));
+        let mgr_a = CatalogManager::new(Arc::clone(&store));
 
         mgr_a
             .add_table(StreamTable::Sink {
                 name: "sink1".into(),
-                schema,
+                program: LogicalProgram::default(),
             })
             .unwrap();
 
-        let mgr_b = CatalogManager::new(store, session);
+        let mgr_b = CatalogManager::new(store);
         mgr_b.restore_from_store().unwrap();
 
         let ctx = mgr_b.acquire_planning_context();

From b4149bce95356e50cf812874737bc49609718ee5 Mon Sep 17 00:00:00 2001
From: luoluoyuyu <zhenyu@apache.org>
Date: Tue, 24 Mar 2026 00:50:44 +0800
Subject: [PATCH 15/44] update

---
 Cargo.lock                                    | 3490 +++++++++++++++--
 Cargo.toml                                    |    5 +
 src/runtime/mod.rs                            |    9 +-
 src/runtime/source/mod.rs                     |   15 -
 src/runtime/streaming/api/context.rs          |   95 +
 src/runtime/streaming/api/mod.rs              |    9 +
 src/runtime/streaming/api/operator.rs         |   90 +
 src/runtime/streaming/api/source.rs           |   43 +
 src/runtime/streaming/arrow/mod.rs            |   68 +
 src/runtime/streaming/cluster/graph.rs        |  136 +
 src/runtime/streaming/cluster/manager.rs      |  164 +
 src/runtime/streaming/cluster/master.rs       |  274 ++
 src/runtime/streaming/cluster/mod.rs          |   11 +
 src/runtime/streaming/cluster/wiring.rs       |   46 +
 src/runtime/streaming/error.rs                |   10 +
 src/runtime/streaming/execution/mod.rs        |    8 +
 src/runtime/streaming/execution/runner.rs     |  298 ++
 src/runtime/streaming/execution/source.rs     |  120 +
 .../execution/tracker/barrier_aligner.rs      |   57 +
 .../streaming/execution/tracker/mod.rs        |    7 +
 .../execution/tracker/watermark_tracker.rs    |   86 +
 src/runtime/streaming/factory/mod.rs          |    3 +
 src/runtime/streaming/factory/registry.rs     |   44 +
 src/runtime/streaming/format/mod.rs           |    0
 src/runtime/streaming/lib.rs                  |   44 +
 src/runtime/streaming/memory/mod.rs           |    5 +
 src/runtime/streaming/memory/pool.rs          |   75 +
 src/runtime/streaming/memory/ticket.rs        |   24 +
 src/runtime/streaming/mod.rs                  |   45 +
 src/runtime/streaming/network/endpoint.rs     |   59 +
 src/runtime/streaming/network/environment.rs  |   82 +
 src/runtime/streaming/network/mod.rs          |    5 +
 .../grouping/incremental_aggregate.rs         |  847 ++++
 .../streaming/operators/grouping/mod.rs       |    5 +
 .../operators/grouping/updating_cache.rs      |  498 +++
 .../operators/joins/join_instance.rs          |  351 ++
 .../operators/joins/join_with_expiration.rs   |  261 ++
 .../streaming/operators/joins/lookup_join.rs  |  363 ++
 src/runtime/streaming/operators/joins/mod.rs  |    7 +
 src/runtime/streaming/operators/mod.rs        |   75 +
 .../streaming/operators/sink/kafka/mod.rs     |  366 ++
 src/runtime/streaming/operators/sink/mod.rs   |    5 +
 .../streaming/operators/source/kafka/mod.rs   |  325 ++
 src/runtime/streaming/operators/source/mod.rs |    5 +
 .../streaming/operators/watermark/mod.rs      |    3 +
 .../watermark/watermark_generator.rs          |  244 ++
 .../streaming/operators/windows/mod.rs        |    9 +
 .../windows/session_aggregating_window.rs     |  804 ++++
 .../windows/sliding_aggregating_window.rs     |  578 +++
 .../windows/tumbling_aggregating_window.rs    |  399 ++
 .../operators/windows/window_function.rs      |  292 ++
 src/runtime/streaming/protocol/control.rs     |   74 +
 src/runtime/streaming/protocol/event.rs       |   11 +
 src/runtime/streaming/protocol/mod.rs         |   15 +
 src/runtime/streaming/protocol/stream_out.rs  |   15 +
 src/runtime/streaming/protocol/tracked.rs     |   31 +
 src/runtime/streaming/protocol/watermark.rs   |   80 +
 src/runtime/streaming/state/mod.rs            |    0
 src/runtime/streaming/state/table_manager.rs  |    0
 .../{ => wasm}/input/input_protocol.rs        |    0
 .../{ => wasm}/input/input_provider.rs        |    0
 src/runtime/{ => wasm}/input/input_runner.rs  |    0
 src/runtime/{ => wasm}/input/interface.rs     |    0
 src/runtime/{ => wasm}/input/mod.rs           |    0
 .../{ => wasm}/input/protocol/kafka/config.rs |    0
 .../input/protocol/kafka/kafka_protocol.rs    |    0
 .../{ => wasm}/input/protocol/kafka/mod.rs    |    0
 src/runtime/{ => wasm}/input/protocol/mod.rs  |    0
 src/runtime/{sink => wasm}/mod.rs             |    7 +-
 src/runtime/{ => wasm}/output/interface.rs    |    0
 src/runtime/{ => wasm}/output/mod.rs          |    0
 .../{ => wasm}/output/output_protocol.rs      |    0
 .../{ => wasm}/output/output_provider.rs      |    0
 .../{ => wasm}/output/output_runner.rs        |    0
 .../output/protocol/kafka/kafka_protocol.rs   |    0
 .../{ => wasm}/output/protocol/kafka/mod.rs   |    0
 .../output/protocol/kafka/producer_config.rs  |    0
 src/runtime/{ => wasm}/output/protocol/mod.rs |    0
 .../{ => wasm}/processor/function_error.rs    |    0
 src/runtime/{ => wasm}/processor/mod.rs       |    0
 .../{ => wasm}/processor/python/mod.rs        |    0
 .../processor/python/python_host.rs           |    0
 .../processor/python/python_service.rs        |    0
 .../processor/wasm/input_strategy.rs          |    0
 src/runtime/{ => wasm}/processor/wasm/mod.rs  |    0
 .../{ => wasm}/processor/wasm/thread_pool.rs  |    0
 .../{ => wasm}/processor/wasm/wasm_cache.rs   |    0
 .../{ => wasm}/processor/wasm/wasm_host.rs    |    0
 .../processor/wasm/wasm_processor.rs          |    0
 .../processor/wasm/wasm_processor_trait.rs    |    0
 .../{ => wasm}/processor/wasm/wasm_task.rs    |    0
 src/sql/common/errors.rs                      |   13 +
 src/sql/common/fs_schema.rs                   |    9 +
 src/sql/common/mod.rs                         |    6 +-
 94 files changed, 10697 insertions(+), 428 deletions(-)
 delete mode 100644 src/runtime/source/mod.rs
 create mode 100644 src/runtime/streaming/api/context.rs
 create mode 100644 src/runtime/streaming/api/mod.rs
 create mode 100644 src/runtime/streaming/api/operator.rs
 create mode 100644 src/runtime/streaming/api/source.rs
 create mode 100644 src/runtime/streaming/arrow/mod.rs
 create mode 100644 src/runtime/streaming/cluster/graph.rs
 create mode 100644 src/runtime/streaming/cluster/manager.rs
 create mode 100644 src/runtime/streaming/cluster/master.rs
 create mode 100644 src/runtime/streaming/cluster/mod.rs
 create mode 100644 src/runtime/streaming/cluster/wiring.rs
 create mode 100644 src/runtime/streaming/error.rs
 create mode 100644 src/runtime/streaming/execution/mod.rs
 create mode 100644 src/runtime/streaming/execution/runner.rs
 create mode 100644 src/runtime/streaming/execution/source.rs
 create mode 100644 src/runtime/streaming/execution/tracker/barrier_aligner.rs
 create mode 100644 src/runtime/streaming/execution/tracker/mod.rs
 create mode 100644 src/runtime/streaming/execution/tracker/watermark_tracker.rs
 create mode 100644 src/runtime/streaming/factory/mod.rs
 create mode 100644 src/runtime/streaming/factory/registry.rs
 create mode 100644 src/runtime/streaming/format/mod.rs
 create mode 100644 src/runtime/streaming/lib.rs
 create mode 100644 src/runtime/streaming/memory/mod.rs
 create mode 100644 src/runtime/streaming/memory/pool.rs
 create mode 100644 src/runtime/streaming/memory/ticket.rs
 create mode 100644 src/runtime/streaming/mod.rs
 create mode 100644 src/runtime/streaming/network/endpoint.rs
 create mode 100644 src/runtime/streaming/network/environment.rs
 create mode 100644 src/runtime/streaming/network/mod.rs
 create mode 100644 src/runtime/streaming/operators/grouping/incremental_aggregate.rs
 create mode 100644 src/runtime/streaming/operators/grouping/mod.rs
 create mode 100644 src/runtime/streaming/operators/grouping/updating_cache.rs
 create mode 100644 src/runtime/streaming/operators/joins/join_instance.rs
 create mode 100644 src/runtime/streaming/operators/joins/join_with_expiration.rs
 create mode 100644 src/runtime/streaming/operators/joins/lookup_join.rs
 create mode 100644 src/runtime/streaming/operators/joins/mod.rs
 create mode 100644 src/runtime/streaming/operators/mod.rs
 create mode 100644 src/runtime/streaming/operators/sink/kafka/mod.rs
 create mode 100644 src/runtime/streaming/operators/sink/mod.rs
 create mode 100644 src/runtime/streaming/operators/source/kafka/mod.rs
 create mode 100644 src/runtime/streaming/operators/source/mod.rs
 create mode 100644 src/runtime/streaming/operators/watermark/mod.rs
 create mode 100644 src/runtime/streaming/operators/watermark/watermark_generator.rs
 create mode 100644 src/runtime/streaming/operators/windows/mod.rs
 create mode 100644 src/runtime/streaming/operators/windows/session_aggregating_window.rs
 create mode 100644 src/runtime/streaming/operators/windows/sliding_aggregating_window.rs
 create mode 100644 src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs
 create mode 100644 src/runtime/streaming/operators/windows/window_function.rs
 create mode 100644 src/runtime/streaming/protocol/control.rs
 create mode 100644 src/runtime/streaming/protocol/event.rs
 create mode 100644 src/runtime/streaming/protocol/mod.rs
 create mode 100644 src/runtime/streaming/protocol/stream_out.rs
 create mode 100644 src/runtime/streaming/protocol/tracked.rs
 create mode 100644 src/runtime/streaming/protocol/watermark.rs
 create mode 100644 src/runtime/streaming/state/mod.rs
 create mode 100644 src/runtime/streaming/state/table_manager.rs
 rename src/runtime/{ => wasm}/input/input_protocol.rs (100%)
 rename src/runtime/{ => wasm}/input/input_provider.rs (100%)
 rename src/runtime/{ => wasm}/input/input_runner.rs (100%)
 rename src/runtime/{ => wasm}/input/interface.rs (100%)
 rename src/runtime/{ => wasm}/input/mod.rs (100%)
 rename src/runtime/{ => wasm}/input/protocol/kafka/config.rs (100%)
 rename src/runtime/{ => wasm}/input/protocol/kafka/kafka_protocol.rs (100%)
 rename src/runtime/{ => wasm}/input/protocol/kafka/mod.rs (100%)
 rename src/runtime/{ => wasm}/input/protocol/mod.rs (100%)
 rename src/runtime/{sink => wasm}/mod.rs (86%)
 rename src/runtime/{ => wasm}/output/interface.rs (100%)
 rename src/runtime/{ => wasm}/output/mod.rs (100%)
 rename src/runtime/{ => wasm}/output/output_protocol.rs (100%)
 rename src/runtime/{ => wasm}/output/output_provider.rs (100%)
 rename src/runtime/{ => wasm}/output/output_runner.rs (100%)
 rename src/runtime/{ => wasm}/output/protocol/kafka/kafka_protocol.rs (100%)
 rename src/runtime/{ => wasm}/output/protocol/kafka/mod.rs (100%)
 rename src/runtime/{ => wasm}/output/protocol/kafka/producer_config.rs (100%)
 rename src/runtime/{ => wasm}/output/protocol/mod.rs (100%)
 rename src/runtime/{ => wasm}/processor/function_error.rs (100%)
 rename src/runtime/{ => wasm}/processor/mod.rs (100%)
 rename src/runtime/{ => wasm}/processor/python/mod.rs (100%)
 rename src/runtime/{ => wasm}/processor/python/python_host.rs (100%)
 rename src/runtime/{ => wasm}/processor/python/python_service.rs (100%)
 rename src/runtime/{ => wasm}/processor/wasm/input_strategy.rs (100%)
 rename src/runtime/{ => wasm}/processor/wasm/mod.rs (100%)
 rename src/runtime/{ => wasm}/processor/wasm/thread_pool.rs (100%)
 rename src/runtime/{ => wasm}/processor/wasm/wasm_cache.rs (100%)
 rename src/runtime/{ => wasm}/processor/wasm/wasm_host.rs (100%)
 rename src/runtime/{ => wasm}/processor/wasm/wasm_processor.rs (100%)
 rename src/runtime/{ => wasm}/processor/wasm/wasm_processor_trait.rs (100%)
 rename src/runtime/{ => wasm}/processor/wasm/wasm_task.rs (100%)

diff --git a/Cargo.lock b/Cargo.lock
index 7cd510f3..4cc46aef 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -19,16 +19,16 @@ checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
 
 [[package]]
 name = "ahash"
-version = "0.8.12"
+version = "0.8.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75"
+checksum = "77c3a9648d43b9cd48db467b3f87fdd6e146bcc88ab0180006cef2179fe11d01"
 dependencies = [
  "cfg-if",
  "const-random",
- "getrandom 0.3.4",
+ "getrandom 0.2.16",
  "once_cell",
  "version_check",
- "zerocopy",
+ "zerocopy 0.7.35",
 ]
 
 [[package]]
@@ -132,6 +132,30 @@ version = "1.0.100"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61"
 
+[[package]]
+name = "apache-avro"
+version = "0.18.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "61a81f4e6304e455a9d52cf8ab667cb2fcf792f2cee2a31c28800901a335ecd5"
+dependencies = [
+ "bigdecimal",
+ "bon",
+ "digest",
+ "log",
+ "miniz_oxide",
+ "num-bigint",
+ "quad-rand",
+ "rand 0.9.2",
+ "regex-lite",
+ "serde",
+ "serde_bytes",
+ "serde_json",
+ "strum 0.27.2",
+ "strum_macros 0.27.2",
+ "thiserror 2.0.17",
+ "uuid",
+]
+
 [[package]]
 name = "ar_archive_writer"
 version = "0.5.1"
@@ -147,6 +171,15 @@ version = "1.4.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c3d036a3c4ab069c7b410a2ce876bd74808d2d0888a82667669f8e783a898bf1"
 
+[[package]]
+name = "arc-swap"
+version = "1.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a07d1f37ff60921c83bdfc7407723bdefe89b44b98a9b772f225c8f9d67141a6"
+dependencies = [
+ "rustversion",
+]
+
 [[package]]
 name = "arrayref"
 version = "0.3.9"
@@ -488,6 +521,133 @@ dependencies = [
  "regex-syntax",
 ]
 
+[[package]]
+name = "arroyo-datastream"
+version = "0.16.0-dev"
+dependencies = [
+ "anyhow",
+ "arrow-schema 55.2.0",
+ "arroyo-rpc",
+ "bincode",
+ "datafusion-proto 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "itertools 0.14.0",
+ "petgraph 0.8.3",
+ "prost",
+ "rand 0.9.2",
+ "serde",
+ "serde_json",
+ "strum 0.27.2",
+ "syn 2.0.113",
+]
+
+[[package]]
+name = "arroyo-rpc"
+version = "0.16.0-dev"
+dependencies = [
+ "ahash",
+ "anyhow",
+ "apache-avro",
+ "arc-swap",
+ "arrow",
+ "arrow-array 55.2.0",
+ "arrow-ord",
+ "arrow-schema 55.2.0",
+ "arroyo-types",
+ "async-trait",
+ "base64",
+ "bincode",
+ "bytes",
+ "datafusion 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "dirs",
+ "figment",
+ "futures",
+ "k8s-openapi",
+ "local-ip-address",
+ "log",
+ "nanoid",
+ "object_store",
+ "percent-encoding",
+ "prost",
+ "rand 0.9.2",
+ "regex",
+ "reqwest",
+ "rustls",
+ "rustls-native-certs",
+ "schemars 1.2.1",
+ "serde",
+ "serde_json",
+ "smallvec",
+ "strum 0.27.2",
+ "strum_macros 0.27.2",
+ "thiserror 2.0.17",
+ "tokio",
+ "tonic 0.13.1",
+ "tonic-build 0.13.1",
+ "tracing",
+ "url",
+ "utoipa",
+]
+
+[[package]]
+name = "arroyo-state"
+version = "0.16.0-dev"
+dependencies = [
+ "anyhow",
+ "arrow",
+ "arrow-array 55.2.0",
+ "arrow-ord",
+ "arrow-schema 55.2.0",
+ "arroyo-datastream",
+ "arroyo-rpc",
+ "arroyo-storage",
+ "arroyo-types",
+ "async-trait",
+ "bincode",
+ "datafusion 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "futures",
+ "lazy_static",
+ "object_store",
+ "once_cell",
+ "parquet 55.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "prometheus",
+ "prost",
+ "serde",
+ "serde_json",
+ "tokio",
+ "tracing",
+]
+
+[[package]]
+name = "arroyo-storage"
+version = "0.16.0-dev"
+dependencies = [
+ "arroyo-rpc",
+ "arroyo-types",
+ "async-trait",
+ "aws-config",
+ "aws-credential-types",
+ "bytes",
+ "futures",
+ "object_store",
+ "rand 0.9.2",
+ "regex",
+ "thiserror 2.0.17",
+ "tokio",
+ "tokio-util",
+ "tracing",
+]
+
+[[package]]
+name = "arroyo-types"
+version = "0.16.0-dev"
+dependencies = [
+ "arrow",
+ "arrow-array 55.2.0",
+ "bincode",
+ "chrono",
+ "serde",
+]
+
 [[package]]
 name = "async-compression"
 version = "0.4.19"
@@ -524,7 +684,7 @@ checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.113",
 ]
 
 [[package]]
@@ -535,7 +695,7 @@ checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.113",
 ]
 
 [[package]]
@@ -547,6 +707,15 @@ dependencies = [
  "num-traits",
 ]
 
+[[package]]
+name = "atomic"
+version = "0.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a89cbf775b137e9b968e67227ef7f775587cde3fd31b0d8599dbd0f598a48340"
+dependencies = [
+ "bytemuck",
+]
+
 [[package]]
 name = "atomic-waker"
 version = "1.1.2"
@@ -560,162 +729,588 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
 
 [[package]]
-name = "axum"
-version = "0.7.9"
+name = "aws-config"
+version = "1.6.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f"
+checksum = "02a18fd934af6ae7ca52410d4548b98eb895aab0f1ea417d168d85db1434a141"
 dependencies = [
- "async-trait",
- "axum-core",
+ "aws-credential-types",
+ "aws-runtime",
+ "aws-sdk-sso",
+ "aws-sdk-ssooidc",
+ "aws-sdk-sts",
+ "aws-smithy-async",
+ "aws-smithy-http 0.62.6",
+ "aws-smithy-json",
+ "aws-smithy-runtime",
+ "aws-smithy-runtime-api",
+ "aws-smithy-types",
+ "aws-types",
  "bytes",
- "futures-util",
- "http",
- "http-body",
- "http-body-util",
- "itoa",
- "matchit",
- "memchr",
- "mime",
- "percent-encoding",
- "pin-project-lite",
- "rustversion",
- "serde",
- "sync_wrapper",
- "tower 0.5.2",
- "tower-layer",
- "tower-service",
+ "fastrand",
+ "hex",
+ "http 1.4.0",
+ "ring",
+ "time",
+ "tokio",
+ "tracing",
+ "url",
+ "zeroize",
 ]
 
 [[package]]
-name = "axum-core"
-version = "0.4.5"
+name = "aws-credential-types"
+version = "1.2.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199"
+checksum = "6d203b0bf2626dcba8665f5cd0871d7c2c0930223d6b6be9097592fea21242d0"
 dependencies = [
- "async-trait",
- "bytes",
- "futures-util",
- "http",
- "http-body",
- "http-body-util",
- "mime",
- "pin-project-lite",
- "rustversion",
- "sync_wrapper",
- "tower-layer",
- "tower-service",
+ "aws-smithy-async",
+ "aws-smithy-runtime-api",
+ "aws-smithy-types",
+ "zeroize",
 ]
 
 [[package]]
-name = "backtrace"
-version = "0.3.76"
+name = "aws-lc-rs"
+version = "1.16.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bb531853791a215d7c62a30daf0dde835f381ab5de4589cfe7c649d2cbe92bd6"
+checksum = "a054912289d18629dc78375ba2c3726a3afe3ff71b4edba9dedfca0e3446d1fc"
 dependencies = [
- "addr2line",
- "cfg-if",
- "libc",
- "miniz_oxide",
- "object",
- "rustc-demangle",
- "windows-link",
+ "aws-lc-sys",
+ "zeroize",
 ]
 
 [[package]]
-name = "backtrace-ext"
-version = "0.2.1"
+name = "aws-lc-sys"
+version = "0.39.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "537beee3be4a18fb023b570f80e3ae28003db9167a751266b259926e25539d50"
+checksum = "1fa7e52a4c5c547c741610a2c6f123f3881e409b714cd27e6798ef020c514f0a"
 dependencies = [
- "backtrace",
+ "cc",
+ "cmake",
+ "dunce",
+ "fs_extra",
 ]
 
 [[package]]
-name = "base64"
-version = "0.22.1"
+name = "aws-runtime"
+version = "1.7.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
+checksum = "ede2ddc593e6c8acc6ce3358c28d6677a6dc49b65ba4b37a2befe14a11297e75"
+dependencies = [
+ "aws-credential-types",
+ "aws-sigv4",
+ "aws-smithy-async",
+ "aws-smithy-http 0.63.5",
+ "aws-smithy-runtime",
+ "aws-smithy-runtime-api",
+ "aws-smithy-types",
+ "aws-types",
+ "bytes",
+ "bytes-utils",
+ "fastrand",
+ "http 1.4.0",
+ "http-body 1.0.1",
+ "percent-encoding",
+ "pin-project-lite",
+ "tracing",
+ "uuid",
+]
 
 [[package]]
-name = "bigdecimal"
-version = "0.4.10"
+name = "aws-sdk-sso"
+version = "1.72.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4d6867f1565b3aad85681f1015055b087fcfd840d6aeee6eee7f2da317603695"
+checksum = "13118ad30741222f67b1a18e5071385863914da05124652b38e172d6d3d9ce31"
 dependencies = [
- "autocfg",
- "libm",
- "num-bigint",
- "num-integer",
- "num-traits",
+ "aws-credential-types",
+ "aws-runtime",
+ "aws-smithy-async",
+ "aws-smithy-http 0.62.6",
+ "aws-smithy-json",
+ "aws-smithy-runtime",
+ "aws-smithy-runtime-api",
+ "aws-smithy-types",
+ "aws-types",
+ "bytes",
+ "fastrand",
+ "http 0.2.12",
+ "regex-lite",
+ "tracing",
 ]
 
 [[package]]
-name = "bincode"
-version = "2.0.1"
+name = "aws-sdk-ssooidc"
+version = "1.73.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "36eaf5d7b090263e8150820482d5d93cd964a81e4019913c972f4edcc6edb740"
+checksum = "f879a8572b4683a8f84f781695bebf2f25cf11a81a2693c31fc0e0215c2c1726"
 dependencies = [
- "bincode_derive",
- "serde",
- "unty",
+ "aws-credential-types",
+ "aws-runtime",
+ "aws-smithy-async",
+ "aws-smithy-http 0.62.6",
+ "aws-smithy-json",
+ "aws-smithy-runtime",
+ "aws-smithy-runtime-api",
+ "aws-smithy-types",
+ "aws-types",
+ "bytes",
+ "fastrand",
+ "http 0.2.12",
+ "regex-lite",
+ "tracing",
 ]
 
 [[package]]
-name = "bincode_derive"
-version = "2.0.1"
+name = "aws-sdk-sts"
+version = "1.73.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bf95709a440f45e986983918d0e8a1f30a9b1df04918fc828670606804ac3c09"
+checksum = "f1e9c3c24e36183e2f698235ed38dcfbbdff1d09b9232dc866c4be3011e0b47e"
 dependencies = [
- "virtue",
+ "aws-credential-types",
+ "aws-runtime",
+ "aws-smithy-async",
+ "aws-smithy-http 0.62.6",
+ "aws-smithy-json",
+ "aws-smithy-query",
+ "aws-smithy-runtime",
+ "aws-smithy-runtime-api",
+ "aws-smithy-types",
+ "aws-smithy-xml",
+ "aws-types",
+ "fastrand",
+ "http 0.2.12",
+ "regex-lite",
+ "tracing",
 ]
 
 [[package]]
-name = "bindgen"
-version = "0.65.1"
+name = "aws-sigv4"
+version = "1.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cfdf7b466f9a4903edc73f95d6d2bcd5baf8ae620638762244d3f60143643cc5"
+checksum = "37411f8e0f4bea0c3ca0958ce7f18f6439db24d555dbd809787262cd00926aa9"
 dependencies = [
- "bitflags 1.3.2",
- "cexpr",
- "clang-sys",
- "lazy_static",
- "lazycell",
- "peeking_take_while",
- "prettyplease",
- "proc-macro2",
- "quote",
- "regex",
- "rustc-hash 1.1.0",
- "shlex",
- "syn",
+ "aws-credential-types",
+ "aws-smithy-http 0.63.5",
+ "aws-smithy-runtime-api",
+ "aws-smithy-types",
+ "bytes",
+ "form_urlencoded",
+ "hex",
+ "hmac",
+ "http 0.2.12",
+ "http 1.4.0",
+ "percent-encoding",
+ "sha2",
+ "time",
+ "tracing",
 ]
 
 [[package]]
-name = "bindgen"
-version = "0.72.1"
+name = "aws-smithy-async"
+version = "1.2.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895"
+checksum = "5cc50d0f63e714784b84223abd7abbc8577de8c35d699e0edd19f0a88a08ae13"
 dependencies = [
- "bitflags 2.10.0",
- "cexpr",
- "clang-sys",
- "itertools 0.13.0",
- "proc-macro2",
- "quote",
- "regex",
- "rustc-hash 2.1.1",
- "shlex",
- "syn",
+ "futures-util",
+ "pin-project-lite",
+ "tokio",
 ]
 
 [[package]]
-name = "bitflags"
-version = "1.3.2"
+name = "aws-smithy-http"
+version = "0.62.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
+checksum = "826141069295752372f8203c17f28e30c464d22899a43a0c9fd9c458d469c88b"
+dependencies = [
+ "aws-smithy-runtime-api",
+ "aws-smithy-types",
+ "bytes",
+ "bytes-utils",
+ "futures-core",
+ "futures-util",
+ "http 0.2.12",
+ "http 1.4.0",
+ "http-body 0.4.6",
+ "percent-encoding",
+ "pin-project-lite",
+ "pin-utils",
+ "tracing",
+]
 
 [[package]]
-name = "bitflags"
+name = "aws-smithy-http"
+version = "0.63.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d619373d490ad70966994801bc126846afaa0d1ee920697a031f0cf63f2568e7"
+dependencies = [
+ "aws-smithy-runtime-api",
+ "aws-smithy-types",
+ "bytes",
+ "bytes-utils",
+ "futures-core",
+ "futures-util",
+ "http 1.4.0",
+ "http-body 1.0.1",
+ "http-body-util",
+ "percent-encoding",
+ "pin-project-lite",
+ "pin-utils",
+ "tracing",
+]
+
+[[package]]
+name = "aws-smithy-http-client"
+version = "1.1.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "00ccbb08c10f6bcf912f398188e42ee2eab5f1767ce215a02a73bc5df1bbdd95"
+dependencies = [
+ "aws-smithy-async",
+ "aws-smithy-runtime-api",
+ "aws-smithy-types",
+ "h2",
+ "http 1.4.0",
+ "hyper",
+ "hyper-rustls",
+ "hyper-util",
+ "pin-project-lite",
+ "rustls",
+ "rustls-native-certs",
+ "rustls-pki-types",
+ "tokio",
+ "tokio-rustls",
+ "tower 0.5.2",
+ "tracing",
+]
+
+[[package]]
+name = "aws-smithy-json"
+version = "0.61.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "49fa1213db31ac95288d981476f78d05d9cbb0353d22cdf3472cc05bb02f6551"
+dependencies = [
+ "aws-smithy-types",
+]
+
+[[package]]
+name = "aws-smithy-observability"
+version = "0.2.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4d3f39d5bb871aaf461d59144557f16d5927a5248a983a40654d9cf3b9ba183b"
+dependencies = [
+ "aws-smithy-runtime-api",
+]
+
+[[package]]
+name = "aws-smithy-query"
+version = "0.60.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "05f76a580e3d8f8961e5d48763214025a2af65c2fa4cd1fb7f270a0e107a71b0"
+dependencies = [
+ "aws-smithy-types",
+ "urlencoding",
+]
+
+[[package]]
+name = "aws-smithy-runtime"
+version = "1.10.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "22ccf7f6eba8b2dcf8ce9b74806c6c185659c311665c4bf8d6e71ebd454db6bf"
+dependencies = [
+ "aws-smithy-async",
+ "aws-smithy-http 0.63.5",
+ "aws-smithy-http-client",
+ "aws-smithy-observability",
+ "aws-smithy-runtime-api",
+ "aws-smithy-types",
+ "bytes",
+ "fastrand",
+ "http 0.2.12",
+ "http 1.4.0",
+ "http-body 0.4.6",
+ "http-body 1.0.1",
+ "http-body-util",
+ "pin-project-lite",
+ "pin-utils",
+ "tokio",
+ "tracing",
+]
+
+[[package]]
+name = "aws-smithy-runtime-api"
+version = "1.11.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b4af6e5def28be846479bbeac55aa4603d6f7986fc5da4601ba324dd5d377516"
+dependencies = [
+ "aws-smithy-async",
+ "aws-smithy-types",
+ "bytes",
+ "http 0.2.12",
+ "http 1.4.0",
+ "pin-project-lite",
+ "tokio",
+ "tracing",
+ "zeroize",
+]
+
+[[package]]
+name = "aws-smithy-types"
+version = "1.4.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8ca2734c16913a45343b37313605d84e7d8b34a4611598ce1d25b35860a2bed3"
+dependencies = [
+ "base64-simd",
+ "bytes",
+ "bytes-utils",
+ "http 0.2.12",
+ "http 1.4.0",
+ "http-body 0.4.6",
+ "http-body 1.0.1",
+ "http-body-util",
+ "itoa",
+ "num-integer",
+ "pin-project-lite",
+ "pin-utils",
+ "ryu",
+ "serde",
+ "time",
+]
+
+[[package]]
+name = "aws-smithy-xml"
+version = "0.60.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b53543b4b86ed43f051644f704a98c7291b3618b67adf057ee77a366fa52fcaa"
+dependencies = [
+ "xmlparser",
+]
+
+[[package]]
+name = "aws-types"
+version = "1.3.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0470cc047657c6e286346bdf10a8719d26efd6a91626992e0e64481e44323e96"
+dependencies = [
+ "aws-credential-types",
+ "aws-smithy-async",
+ "aws-smithy-runtime-api",
+ "aws-smithy-types",
+ "rustc_version",
+ "tracing",
+]
+
+[[package]]
+name = "axum"
+version = "0.7.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f"
+dependencies = [
+ "async-trait",
+ "axum-core 0.4.5",
+ "bytes",
+ "futures-util",
+ "http 1.4.0",
+ "http-body 1.0.1",
+ "http-body-util",
+ "itoa",
+ "matchit 0.7.3",
+ "memchr",
+ "mime",
+ "percent-encoding",
+ "pin-project-lite",
+ "rustversion",
+ "serde",
+ "sync_wrapper",
+ "tower 0.5.2",
+ "tower-layer",
+ "tower-service",
+]
+
+[[package]]
+name = "axum"
+version = "0.8.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8b52af3cb4058c895d37317bb27508dccc8e5f2d39454016b297bf4a400597b8"
+dependencies = [
+ "axum-core 0.5.6",
+ "bytes",
+ "futures-util",
+ "http 1.4.0",
+ "http-body 1.0.1",
+ "http-body-util",
+ "itoa",
+ "matchit 0.8.4",
+ "memchr",
+ "mime",
+ "percent-encoding",
+ "pin-project-lite",
+ "serde_core",
+ "sync_wrapper",
+ "tower 0.5.2",
+ "tower-layer",
+ "tower-service",
+]
+
+[[package]]
+name = "axum-core"
+version = "0.4.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199"
+dependencies = [
+ "async-trait",
+ "bytes",
+ "futures-util",
+ "http 1.4.0",
+ "http-body 1.0.1",
+ "http-body-util",
+ "mime",
+ "pin-project-lite",
+ "rustversion",
+ "sync_wrapper",
+ "tower-layer",
+ "tower-service",
+]
+
+[[package]]
+name = "axum-core"
+version = "0.5.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "08c78f31d7b1291f7ee735c1c6780ccde7785daae9a9206026862dab7d8792d1"
+dependencies = [
+ "bytes",
+ "futures-core",
+ "http 1.4.0",
+ "http-body 1.0.1",
+ "http-body-util",
+ "mime",
+ "pin-project-lite",
+ "sync_wrapper",
+ "tower-layer",
+ "tower-service",
+]
+
+[[package]]
+name = "backtrace"
+version = "0.3.76"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bb531853791a215d7c62a30daf0dde835f381ab5de4589cfe7c649d2cbe92bd6"
+dependencies = [
+ "addr2line",
+ "cfg-if",
+ "libc",
+ "miniz_oxide",
+ "object",
+ "rustc-demangle",
+ "windows-link",
+]
+
+[[package]]
+name = "backtrace-ext"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "537beee3be4a18fb023b570f80e3ae28003db9167a751266b259926e25539d50"
+dependencies = [
+ "backtrace",
+]
+
+[[package]]
+name = "base64"
+version = "0.22.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
+
+[[package]]
+name = "base64-simd"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "339abbe78e73178762e23bea9dfd08e697eb3f3301cd4be981c0f78ba5859195"
+dependencies = [
+ "outref",
+ "vsimd",
+]
+
+[[package]]
+name = "bigdecimal"
+version = "0.4.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4d6867f1565b3aad85681f1015055b087fcfd840d6aeee6eee7f2da317603695"
+dependencies = [
+ "autocfg",
+ "libm",
+ "num-bigint",
+ "num-integer",
+ "num-traits",
+ "serde",
+ "serde_json",
+]
+
+[[package]]
+name = "bincode"
+version = "2.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "36eaf5d7b090263e8150820482d5d93cd964a81e4019913c972f4edcc6edb740"
+dependencies = [
+ "bincode_derive",
+ "serde",
+ "unty",
+]
+
+[[package]]
+name = "bincode_derive"
+version = "2.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bf95709a440f45e986983918d0e8a1f30a9b1df04918fc828670606804ac3c09"
+dependencies = [
+ "virtue",
+]
+
+[[package]]
+name = "bindgen"
+version = "0.65.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cfdf7b466f9a4903edc73f95d6d2bcd5baf8ae620638762244d3f60143643cc5"
+dependencies = [
+ "bitflags 1.3.2",
+ "cexpr",
+ "clang-sys",
+ "lazy_static",
+ "lazycell",
+ "peeking_take_while",
+ "prettyplease",
+ "proc-macro2",
+ "quote",
+ "regex",
+ "rustc-hash 1.1.0",
+ "shlex",
+ "syn 2.0.113",
+]
+
+[[package]]
+name = "bindgen"
+version = "0.72.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895"
+dependencies = [
+ "bitflags 2.10.0",
+ "cexpr",
+ "clang-sys",
+ "itertools 0.13.0",
+ "proc-macro2",
+ "quote",
+ "regex",
+ "rustc-hash 2.1.1",
+ "shlex",
+ "syn 2.0.113",
+]
+
+[[package]]
+name = "bitflags"
+version = "1.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
+
+[[package]]
+name = "bitflags"
 version = "2.10.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3"
@@ -773,6 +1368,31 @@ dependencies = [
  "generic-array",
 ]
 
+[[package]]
+name = "bon"
+version = "3.9.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f47dbe92550676ee653353c310dfb9cf6ba17ee70396e1f7cf0a2020ad49b2fe"
+dependencies = [
+ "bon-macros",
+ "rustversion",
+]
+
+[[package]]
+name = "bon-macros"
+version = "3.9.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "519bd3116aeeb42d5372c29d982d16d0170d3d4a5ed85fc7dd91642ffff3c67c"
+dependencies = [
+ "darling 0.23.0",
+ "ident_case",
+ "prettyplease",
+ "proc-macro2",
+ "quote",
+ "rustversion",
+ "syn 2.0.113",
+]
+
 [[package]]
 name = "brotli"
 version = "8.0.2"
@@ -803,6 +1423,18 @@ dependencies = [
  "allocator-api2",
 ]
 
+[[package]]
+name = "bytecount"
+version = "0.6.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "175812e0be2bccb6abe50bb8d566126198344f707e304f45c648fd8f2cc0365e"
+
+[[package]]
+name = "bytemuck"
+version = "1.25.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c8efb64bd706a16a1bdde310ae86b351e4d21550d98d056f22f8a7f7a2183fec"
+
 [[package]]
 name = "byteorder"
 version = "1.5.0"
@@ -811,9 +1443,19 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
 
 [[package]]
 name = "bytes"
-version = "1.11.0"
+version = "1.11.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3"
+checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33"
+
+[[package]]
+name = "bytes-utils"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7dafe3a8757b027e2be6e4e5601ed563c55989fcf1546e933c66c8eb3a058d35"
+dependencies = [
+ "bytes",
+ "either",
+]
 
 [[package]]
 name = "bzip2"
@@ -834,6 +1476,15 @@ dependencies = [
  "pkg-config",
 ]
 
+[[package]]
+name = "camino"
+version = "1.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e629a66d692cb9ff1a1c664e41771b3dcaf961985a9774c0eb0bd1b51cf60a48"
+dependencies = [
+ "serde_core",
+]
+
 [[package]]
 name = "cap-fs-ext"
 version = "3.4.5"
@@ -912,6 +1563,28 @@ dependencies = [
  "winx",
 ]
 
+[[package]]
+name = "cargo-platform"
+version = "0.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e35af189006b9c0f00a064685c727031e3ed2d8020f7ba284d78cc2671bd36ea"
+dependencies = [
+ "serde",
+]
+
+[[package]]
+name = "cargo_metadata"
+version = "0.14.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4acbb09d9ee8e23699b9634375c72795d095bf268439da88562cf9b501f181fa"
+dependencies = [
+ "camino",
+ "cargo-platform",
+ "semver",
+ "serde",
+ "serde_json",
+]
+
 [[package]]
 name = "cc"
 version = "1.2.51"
@@ -945,6 +1618,12 @@ version = "0.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "fd16c4719339c4530435d38e511904438d07cce7950afa3718a84ac36c10e89e"
 
+[[package]]
+name = "cfg_aliases"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724"
+
 [[package]]
 name = "chrono"
 version = "0.4.42"
@@ -954,6 +1633,7 @@ dependencies = [
  "iana-time-zone",
  "js-sys",
  "num-traits",
+ "serde",
  "wasm-bindgen",
  "windows-link",
 ]
@@ -1020,7 +1700,7 @@ dependencies = [
  "heck 0.5.0",
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.113",
 ]
 
 [[package]]
@@ -1108,6 +1788,26 @@ version = "0.4.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3d52eff69cd5e647efe296129160853a42795992097e8af39800e1060caeea9b"
 
+[[package]]
+name = "core-foundation"
+version = "0.9.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f"
+dependencies = [
+ "core-foundation-sys",
+ "libc",
+]
+
+[[package]]
+name = "core-foundation"
+version = "0.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6"
+dependencies = [
+ "core-foundation-sys",
+ "libc",
+]
+
 [[package]]
 name = "core-foundation-sys"
 version = "0.8.7"
@@ -1129,7 +1829,7 @@ dependencies = [
  "postgres-types",
  "prettyplease",
  "rusqlite",
- "syn",
+ "syn 2.0.113",
  "thiserror 1.0.69",
 ]
 
@@ -1417,6 +2117,88 @@ dependencies = [
  "memchr",
 ]
 
+[[package]]
+name = "darling"
+version = "0.20.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee"
+dependencies = [
+ "darling_core 0.20.11",
+ "darling_macro 0.20.11",
+]
+
+[[package]]
+name = "darling"
+version = "0.23.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "25ae13da2f202d56bd7f91c25fba009e7717a1e4a1cc98a76d844b65ae912e9d"
+dependencies = [
+ "darling_core 0.23.0",
+ "darling_macro 0.23.0",
+]
+
+[[package]]
+name = "darling_core"
+version = "0.20.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0d00b9596d185e565c2207a0b01f8bd1a135483d02d9b7b0a54b11da8d53412e"
+dependencies = [
+ "fnv",
+ "ident_case",
+ "proc-macro2",
+ "quote",
+ "strsim",
+ "syn 2.0.113",
+]
+
+[[package]]
+name = "darling_core"
+version = "0.23.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9865a50f7c335f53564bb694ef660825eb8610e0a53d3e11bf1b0d3df31e03b0"
+dependencies = [
+ "ident_case",
+ "proc-macro2",
+ "quote",
+ "strsim",
+ "syn 2.0.113",
+]
+
+[[package]]
+name = "darling_macro"
+version = "0.20.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead"
+dependencies = [
+ "darling_core 0.20.11",
+ "quote",
+ "syn 2.0.113",
+]
+
+[[package]]
+name = "darling_macro"
+version = "0.23.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ac3984ec7bd6cfa798e62b4a642426a5be0e68f9401cfc2a01e3fa9ea2fcdb8d"
+dependencies = [
+ "darling_core 0.23.0",
+ "quote",
+ "syn 2.0.113",
+]
+
+[[package]]
+name = "dashmap"
+version = "5.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856"
+dependencies = [
+ "cfg-if",
+ "hashbrown 0.14.5",
+ "lock_api",
+ "once_cell",
+ "parking_lot_core",
+]
+
 [[package]]
 name = "dashmap"
 version = "6.1.0"
@@ -1431,6 +2213,60 @@ dependencies = [
  "parking_lot_core",
 ]
 
+[[package]]
+name = "datafusion"
+version = "48.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8a11e19a7ccc5bb979c95c1dceef663eab39c9061b3bbf8d1937faf0f03bf41f"
+dependencies = [
+ "arrow",
+ "arrow-ipc 55.2.0",
+ "arrow-schema 55.2.0",
+ "async-trait",
+ "bytes",
+ "bzip2",
+ "chrono",
+ "datafusion-catalog 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-catalog-listing 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-common-runtime 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-datasource 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-datasource-csv 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-datasource-json 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-datasource-parquet 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-execution 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-functions 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-functions-aggregate 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-functions-nested 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-functions-table 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-functions-window 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-optimizer 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-physical-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-physical-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-physical-optimizer 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-physical-plan 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-session 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-sql 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "flate2",
+ "futures",
+ "itertools 0.14.0",
+ "log",
+ "object_store",
+ "parking_lot",
+ "parquet 55.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "rand 0.9.2",
+ "regex",
+ "sqlparser 0.55.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "tempfile",
+ "tokio",
+ "url",
+ "uuid",
+ "xz2",
+ "zstd",
+]
+
 [[package]]
 name = "datafusion"
 version = "48.0.1"
@@ -1443,29 +2279,29 @@ dependencies = [
  "bytes",
  "bzip2",
  "chrono",
- "datafusion-catalog",
- "datafusion-catalog-listing",
- "datafusion-common",
- "datafusion-common-runtime",
- "datafusion-datasource",
- "datafusion-datasource-csv",
- "datafusion-datasource-json",
- "datafusion-datasource-parquet",
- "datafusion-execution",
- "datafusion-expr",
- "datafusion-expr-common",
- "datafusion-functions",
- "datafusion-functions-aggregate",
- "datafusion-functions-nested",
- "datafusion-functions-table",
- "datafusion-functions-window",
- "datafusion-optimizer",
- "datafusion-physical-expr",
- "datafusion-physical-expr-common",
- "datafusion-physical-optimizer",
- "datafusion-physical-plan",
- "datafusion-session",
- "datafusion-sql",
+ "datafusion-catalog 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-catalog-listing 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-common-runtime 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-datasource 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-datasource-csv 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-datasource-json 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-datasource-parquet 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-execution 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-functions 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-functions-aggregate 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-functions-nested 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-functions-table 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-functions-window 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-optimizer 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-physical-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-physical-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-physical-optimizer 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-physical-plan 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-session 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-sql 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
  "flate2",
  "futures",
  "itertools 0.14.0",
@@ -1475,7 +2311,7 @@ dependencies = [
  "parquet 55.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "rand 0.9.2",
  "regex",
- "sqlparser",
+ "sqlparser 0.55.0 (git+https://github.com/FunctionStream/sqlparser-rs?branch=0.6.0%2Ffunction-sql-parser)",
  "tempfile",
  "tokio",
  "url",
@@ -1484,6 +2320,32 @@ dependencies = [
  "zstd",
 ]
 
+[[package]]
+name = "datafusion-catalog"
+version = "48.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "94985e67cab97b1099db2a7af11f31a45008b282aba921c1e1d35327c212ec18"
+dependencies = [
+ "arrow",
+ "async-trait",
+ "dashmap 6.1.0",
+ "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-common-runtime 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-datasource 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-execution 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-physical-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-physical-plan 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-session 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-sql 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "futures",
+ "itertools 0.14.0",
+ "log",
+ "object_store",
+ "parking_lot",
+ "tokio",
+]
+
 [[package]]
 name = "datafusion-catalog"
 version = "48.0.1"
@@ -1491,16 +2353,16 @@ source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2F
 dependencies = [
  "arrow",
  "async-trait",
- "dashmap",
- "datafusion-common",
- "datafusion-common-runtime",
- "datafusion-datasource",
- "datafusion-execution",
- "datafusion-expr",
- "datafusion-physical-expr",
- "datafusion-physical-plan",
- "datafusion-session",
- "datafusion-sql",
+ "dashmap 6.1.0",
+ "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-common-runtime 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-datasource 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-execution 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-physical-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-physical-plan 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-session 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-sql 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
  "futures",
  "itertools 0.14.0",
  "log",
@@ -1509,6 +2371,29 @@ dependencies = [
  "tokio",
 ]
 
+[[package]]
+name = "datafusion-catalog-listing"
+version = "48.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e002df133bdb7b0b9b429d89a69aa77b35caeadee4498b2ce1c7c23a99516988"
+dependencies = [
+ "arrow",
+ "async-trait",
+ "datafusion-catalog 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-datasource 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-execution 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-physical-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-physical-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-physical-plan 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-session 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "futures",
+ "log",
+ "object_store",
+ "tokio",
+]
+
 [[package]]
 name = "datafusion-catalog-listing"
 version = "48.0.1"
@@ -1516,21 +2401,45 @@ source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2F
 dependencies = [
  "arrow",
  "async-trait",
- "datafusion-catalog",
- "datafusion-common",
- "datafusion-datasource",
- "datafusion-execution",
- "datafusion-expr",
- "datafusion-physical-expr",
- "datafusion-physical-expr-common",
- "datafusion-physical-plan",
- "datafusion-session",
+ "datafusion-catalog 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-datasource 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-execution 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-physical-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-physical-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-physical-plan 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-session 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
  "futures",
  "log",
  "object_store",
  "tokio",
 ]
 
+[[package]]
+name = "datafusion-common"
+version = "48.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e13242fc58fd753787b0a538e5ae77d356cb9d0656fa85a591a33c5f106267f6"
+dependencies = [
+ "ahash",
+ "arrow",
+ "arrow-ipc 55.2.0",
+ "base64",
+ "half",
+ "hashbrown 0.14.5",
+ "indexmap 2.12.1",
+ "libc",
+ "log",
+ "object_store",
+ "parquet 55.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "paste",
+ "recursive",
+ "sqlparser 0.55.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "tokio",
+ "web-time",
+]
+
 [[package]]
 name = "datafusion-common"
 version = "48.0.1"
@@ -1549,11 +2458,22 @@ dependencies = [
  "parquet 55.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "paste",
  "recursive",
- "sqlparser",
+ "sqlparser 0.55.0 (git+https://github.com/FunctionStream/sqlparser-rs?branch=0.6.0%2Ffunction-sql-parser)",
  "tokio",
  "web-time",
 ]
 
+[[package]]
+name = "datafusion-common-runtime"
+version = "48.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d2239f964e95c3a5d6b4a8cde07e646de8995c1396a7fd62c6e784f5341db499"
+dependencies = [
+ "futures",
+ "log",
+ "tokio",
+]
+
 [[package]]
 name = "datafusion-common-runtime"
 version = "48.0.1"
@@ -1564,6 +2484,42 @@ dependencies = [
  "tokio",
 ]
 
+[[package]]
+name = "datafusion-datasource"
+version = "48.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2cf792579bc8bf07d1b2f68c2d5382f8a63679cce8fbebfd4ba95742b6e08864"
+dependencies = [
+ "arrow",
+ "async-compression",
+ "async-trait",
+ "bytes",
+ "bzip2",
+ "chrono",
+ "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-common-runtime 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-execution 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-physical-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-physical-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-physical-plan 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-session 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "flate2",
+ "futures",
+ "glob",
+ "itertools 0.14.0",
+ "log",
+ "object_store",
+ "parquet 55.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "rand 0.9.2",
+ "tempfile",
+ "tokio",
+ "tokio-util",
+ "url",
+ "xz2",
+ "zstd",
+]
+
 [[package]]
 name = "datafusion-datasource"
 version = "48.0.1"
@@ -1575,14 +2531,14 @@ dependencies = [
  "bytes",
  "bzip2",
  "chrono",
- "datafusion-common",
- "datafusion-common-runtime",
- "datafusion-execution",
- "datafusion-expr",
- "datafusion-physical-expr",
- "datafusion-physical-expr-common",
- "datafusion-physical-plan",
- "datafusion-session",
+ "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-common-runtime 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-execution 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-physical-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-physical-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-physical-plan 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-session 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
  "flate2",
  "futures",
  "glob",
@@ -1599,6 +2555,31 @@ dependencies = [
  "zstd",
 ]
 
+[[package]]
+name = "datafusion-datasource-csv"
+version = "48.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cfc114f9a1415174f3e8d2719c371fc72092ef2195a7955404cfe6b2ba29a706"
+dependencies = [
+ "arrow",
+ "async-trait",
+ "bytes",
+ "datafusion-catalog 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-common-runtime 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-datasource 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-execution 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-physical-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-physical-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-physical-plan 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-session 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "futures",
+ "object_store",
+ "regex",
+ "tokio",
+]
+
 [[package]]
 name = "datafusion-datasource-csv"
 version = "48.0.1"
@@ -1607,22 +2588,47 @@ dependencies = [
  "arrow",
  "async-trait",
  "bytes",
- "datafusion-catalog",
- "datafusion-common",
- "datafusion-common-runtime",
- "datafusion-datasource",
- "datafusion-execution",
- "datafusion-expr",
- "datafusion-physical-expr",
- "datafusion-physical-expr-common",
- "datafusion-physical-plan",
- "datafusion-session",
+ "datafusion-catalog 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-common-runtime 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-datasource 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-execution 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-physical-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-physical-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-physical-plan 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-session 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
  "futures",
  "object_store",
  "regex",
  "tokio",
 ]
 
+[[package]]
+name = "datafusion-datasource-json"
+version = "48.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d88dd5e215c420a52362b9988ecd4cefd71081b730663d4f7d886f706111fc75"
+dependencies = [
+ "arrow",
+ "async-trait",
+ "bytes",
+ "datafusion-catalog 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-common-runtime 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-datasource 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-execution 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-physical-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-physical-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-physical-plan 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-session 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "futures",
+ "object_store",
+ "serde_json",
+ "tokio",
+]
+
 [[package]]
 name = "datafusion-datasource-json"
 version = "48.0.1"
@@ -1631,22 +2637,53 @@ dependencies = [
  "arrow",
  "async-trait",
  "bytes",
- "datafusion-catalog",
- "datafusion-common",
- "datafusion-common-runtime",
- "datafusion-datasource",
- "datafusion-execution",
- "datafusion-expr",
- "datafusion-physical-expr",
- "datafusion-physical-expr-common",
- "datafusion-physical-plan",
- "datafusion-session",
+ "datafusion-catalog 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-common-runtime 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-datasource 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-execution 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-physical-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-physical-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-physical-plan 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-session 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
  "futures",
  "object_store",
  "serde_json",
  "tokio",
 ]
 
+[[package]]
+name = "datafusion-datasource-parquet"
+version = "48.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "33692acdd1fbe75280d14f4676fe43f39e9cb36296df56575aa2cac9a819e4cf"
+dependencies = [
+ "arrow",
+ "async-trait",
+ "bytes",
+ "datafusion-catalog 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-common-runtime 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-datasource 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-execution 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-functions-aggregate 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-physical-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-physical-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-physical-optimizer 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-physical-plan 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-session 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "futures",
+ "itertools 0.14.0",
+ "log",
+ "object_store",
+ "parking_lot",
+ "parquet 55.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "rand 0.9.2",
+ "tokio",
+]
+
 [[package]]
 name = "datafusion-datasource-parquet"
 version = "48.0.1"
@@ -1655,18 +2692,18 @@ dependencies = [
  "arrow",
  "async-trait",
  "bytes",
- "datafusion-catalog",
- "datafusion-common",
- "datafusion-common-runtime",
- "datafusion-datasource",
- "datafusion-execution",
- "datafusion-expr",
- "datafusion-functions-aggregate",
- "datafusion-physical-expr",
- "datafusion-physical-expr-common",
- "datafusion-physical-optimizer",
- "datafusion-physical-plan",
- "datafusion-session",
+ "datafusion-catalog 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-common-runtime 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-datasource 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-execution 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-functions-aggregate 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-physical-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-physical-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-physical-optimizer 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-physical-plan 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-session 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
  "futures",
  "itertools 0.14.0",
  "log",
@@ -1677,20 +2714,45 @@ dependencies = [
  "tokio",
 ]
 
+[[package]]
+name = "datafusion-doc"
+version = "48.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e0e7b648387b0c1937b83cb328533c06c923799e73a9e3750b762667f32662c0"
+
 [[package]]
 name = "datafusion-doc"
 version = "48.0.1"
 source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c"
 
+[[package]]
+name = "datafusion-execution"
+version = "48.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9609d83d52ff8315283c6dad3b97566e877d8f366fab4c3297742f33dcd636c7"
+dependencies = [
+ "arrow",
+ "dashmap 6.1.0",
+ "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "futures",
+ "log",
+ "object_store",
+ "parking_lot",
+ "rand 0.9.2",
+ "tempfile",
+ "url",
+]
+
 [[package]]
 name = "datafusion-execution"
 version = "48.0.1"
 source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c"
 dependencies = [
  "arrow",
- "dashmap",
- "datafusion-common",
- "datafusion-expr",
+ "dashmap 6.1.0",
+ "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
  "futures",
  "log",
  "object_store",
@@ -1700,6 +2762,27 @@ dependencies = [
  "url",
 ]
 
+[[package]]
+name = "datafusion-expr"
+version = "48.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e75230cd67f650ef0399eb00f54d4a073698f2c0262948298e5299fc7324da63"
+dependencies = [
+ "arrow",
+ "chrono",
+ "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-doc 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-functions-aggregate-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-functions-window-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-physical-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "indexmap 2.12.1",
+ "paste",
+ "recursive",
+ "serde_json",
+ "sqlparser 0.55.0 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
 [[package]]
 name = "datafusion-expr"
 version = "48.0.1"
@@ -1707,17 +2790,30 @@ source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2F
 dependencies = [
  "arrow",
  "chrono",
- "datafusion-common",
- "datafusion-doc",
- "datafusion-expr-common",
- "datafusion-functions-aggregate-common",
- "datafusion-functions-window-common",
- "datafusion-physical-expr-common",
+ "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-doc 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-functions-aggregate-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-functions-window-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-physical-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
  "indexmap 2.12.1",
  "paste",
  "recursive",
  "serde_json",
- "sqlparser",
+ "sqlparser 0.55.0 (git+https://github.com/FunctionStream/sqlparser-rs?branch=0.6.0%2Ffunction-sql-parser)",
+]
+
+[[package]]
+name = "datafusion-expr-common"
+version = "48.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "70fafb3a045ed6c49cfca0cd090f62cf871ca6326cc3355cb0aaf1260fa760b6"
+dependencies = [
+ "arrow",
+ "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "indexmap 2.12.1",
+ "itertools 0.14.0",
+ "paste",
 ]
 
 [[package]]
@@ -1726,12 +2822,41 @@ version = "48.0.1"
 source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c"
 dependencies = [
  "arrow",
- "datafusion-common",
+ "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
  "indexmap 2.12.1",
  "itertools 0.14.0",
  "paste",
 ]
 
+[[package]]
+name = "datafusion-functions"
+version = "48.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cdf9a9cf655265861a20453b1e58357147eab59bdc90ce7f2f68f1f35104d3bb"
+dependencies = [
+ "arrow",
+ "arrow-buffer 55.2.0",
+ "base64",
+ "blake2",
+ "blake3",
+ "chrono",
+ "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-doc 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-execution 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-macros 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "hex",
+ "itertools 0.14.0",
+ "log",
+ "md-5",
+ "rand 0.9.2",
+ "regex",
+ "sha2",
+ "unicode-segmentation",
+ "uuid",
+]
+
 [[package]]
 name = "datafusion-functions"
 version = "48.0.1"
@@ -1743,12 +2868,12 @@ dependencies = [
  "blake2",
  "blake3",
  "chrono",
- "datafusion-common",
- "datafusion-doc",
- "datafusion-execution",
- "datafusion-expr",
- "datafusion-expr-common",
- "datafusion-macros",
+ "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-doc 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-execution 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-macros 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
  "hex",
  "itertools 0.14.0",
  "log",
@@ -1760,6 +2885,27 @@ dependencies = [
  "uuid",
 ]
 
+[[package]]
+name = "datafusion-functions-aggregate"
+version = "48.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7f07e49733d847be0a05235e17b884d326a2fd402c97a89fe8bcf0bfba310005"
+dependencies = [
+ "ahash",
+ "arrow",
+ "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-doc 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-execution 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-functions-aggregate-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-macros 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-physical-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-physical-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "half",
+ "log",
+ "paste",
+]
+
 [[package]]
 name = "datafusion-functions-aggregate"
 version = "48.0.1"
@@ -1767,19 +2913,32 @@ source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2F
 dependencies = [
  "ahash",
  "arrow",
- "datafusion-common",
- "datafusion-doc",
- "datafusion-execution",
- "datafusion-expr",
- "datafusion-functions-aggregate-common",
- "datafusion-macros",
- "datafusion-physical-expr",
- "datafusion-physical-expr-common",
+ "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-doc 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-execution 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-functions-aggregate-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-macros 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-physical-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-physical-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
  "half",
  "log",
  "paste",
 ]
 
+[[package]]
+name = "datafusion-functions-aggregate-common"
+version = "48.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4512607e10d72b0b0a1dc08f42cb5bd5284cb8348b7fea49dc83409493e32b1b"
+dependencies = [
+ "ahash",
+ "arrow",
+ "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-physical-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
 [[package]]
 name = "datafusion-functions-aggregate-common"
 version = "48.0.1"
@@ -1787,9 +2946,30 @@ source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2F
 dependencies = [
  "ahash",
  "arrow",
- "datafusion-common",
- "datafusion-expr-common",
- "datafusion-physical-expr-common",
+ "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-physical-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+]
+
+[[package]]
+name = "datafusion-functions-nested"
+version = "48.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2ab331806e34f5545e5f03396e4d5068077395b1665795d8f88c14ec4f1e0b7a"
+dependencies = [
+ "arrow",
+ "arrow-ord",
+ "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-doc 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-execution 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-functions 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-functions-aggregate 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-macros 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-physical-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "itertools 0.14.0",
+ "log",
+ "paste",
 ]
 
 [[package]]
@@ -1799,19 +2979,35 @@ source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2F
 dependencies = [
  "arrow",
  "arrow-ord",
- "datafusion-common",
- "datafusion-doc",
- "datafusion-execution",
- "datafusion-expr",
- "datafusion-functions",
- "datafusion-functions-aggregate",
- "datafusion-macros",
- "datafusion-physical-expr-common",
+ "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-doc 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-execution 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-functions 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-functions-aggregate 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-macros 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-physical-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
  "itertools 0.14.0",
  "log",
  "paste",
 ]
 
+[[package]]
+name = "datafusion-functions-table"
+version = "48.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d4ac2c0be983a06950ef077e34e0174aa0cb9e346f3aeae459823158037ade37"
+dependencies = [
+ "arrow",
+ "async-trait",
+ "datafusion-catalog 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-physical-plan 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "parking_lot",
+ "paste",
+]
+
 [[package]]
 name = "datafusion-functions-table"
 version = "48.0.1"
@@ -1819,38 +3015,77 @@ source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2F
 dependencies = [
  "arrow",
  "async-trait",
- "datafusion-catalog",
- "datafusion-common",
- "datafusion-expr",
- "datafusion-physical-plan",
+ "datafusion-catalog 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-physical-plan 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
  "parking_lot",
  "paste",
 ]
 
+[[package]]
+name = "datafusion-functions-window"
+version = "48.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "36f3d92731de384c90906941d36dcadf6a86d4128409a9c5cd916662baed5f53"
+dependencies = [
+ "arrow",
+ "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-doc 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-functions-window-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-macros 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-physical-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-physical-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "log",
+ "paste",
+]
+
 [[package]]
 name = "datafusion-functions-window"
 version = "48.0.1"
 source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c"
 dependencies = [
  "arrow",
- "datafusion-common",
- "datafusion-doc",
- "datafusion-expr",
- "datafusion-functions-window-common",
- "datafusion-macros",
- "datafusion-physical-expr",
- "datafusion-physical-expr-common",
+ "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-doc 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-functions-window-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-macros 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-physical-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-physical-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
  "log",
  "paste",
 ]
 
+[[package]]
+name = "datafusion-functions-window-common"
+version = "48.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c679f8bf0971704ec8fd4249fcbb2eb49d6a12cc3e7a840ac047b4928d3541b5"
+dependencies = [
+ "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-physical-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
 [[package]]
 name = "datafusion-functions-window-common"
 version = "48.0.1"
 source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c"
 dependencies = [
- "datafusion-common",
- "datafusion-physical-expr-common",
+ "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-physical-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+]
+
+[[package]]
+name = "datafusion-macros"
+version = "48.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2821de7cb0362d12e75a5196b636a59ea3584ec1e1cc7dc6f5e34b9e8389d251"
+dependencies = [
+ "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "quote",
+ "syn 2.0.113",
 ]
 
 [[package]]
@@ -1858,9 +3093,28 @@ name = "datafusion-macros"
 version = "48.0.1"
 source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c"
 dependencies = [
- "datafusion-expr",
+ "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
  "quote",
- "syn",
+ "syn 2.0.113",
+]
+
+[[package]]
+name = "datafusion-optimizer"
+version = "48.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1594c7a97219ede334f25347ad8d57056621e7f4f35a0693c8da876e10dd6a53"
+dependencies = [
+ "arrow",
+ "chrono",
+ "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-physical-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "indexmap 2.12.1",
+ "itertools 0.14.0",
+ "log",
+ "recursive",
+ "regex",
+ "regex-syntax",
 ]
 
 [[package]]
@@ -1869,16 +3123,38 @@ version = "48.0.1"
 source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c"
 dependencies = [
  "arrow",
- "chrono",
- "datafusion-common",
- "datafusion-expr",
- "datafusion-physical-expr",
+ "chrono",
+ "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-physical-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "indexmap 2.12.1",
+ "itertools 0.14.0",
+ "log",
+ "recursive",
+ "regex",
+ "regex-syntax",
+]
+
+[[package]]
+name = "datafusion-physical-expr"
+version = "48.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc6da0f2412088d23f6b01929dedd687b5aee63b19b674eb73d00c3eb3c883b7"
+dependencies = [
+ "ahash",
+ "arrow",
+ "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-functions-aggregate-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-physical-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "half",
+ "hashbrown 0.14.5",
  "indexmap 2.12.1",
  "itertools 0.14.0",
  "log",
- "recursive",
- "regex",
- "regex-syntax",
+ "paste",
+ "petgraph 0.8.3",
 ]
 
 [[package]]
@@ -1888,11 +3164,11 @@ source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2F
 dependencies = [
  "ahash",
  "arrow",
- "datafusion-common",
- "datafusion-expr",
- "datafusion-expr-common",
- "datafusion-functions-aggregate-common",
- "datafusion-physical-expr-common",
+ "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-functions-aggregate-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-physical-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
  "half",
  "hashbrown 0.14.5",
  "indexmap 2.12.1",
@@ -1902,6 +3178,20 @@ dependencies = [
  "petgraph 0.8.3",
 ]
 
+[[package]]
+name = "datafusion-physical-expr-common"
+version = "48.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dcb0dbd9213078a593c3fe28783beaa625a4e6c6a6c797856ee2ba234311fb96"
+dependencies = [
+ "ahash",
+ "arrow",
+ "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "hashbrown 0.14.5",
+ "itertools 0.14.0",
+]
+
 [[package]]
 name = "datafusion-physical-expr-common"
 version = "48.0.1"
@@ -1909,30 +3199,79 @@ source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2F
 dependencies = [
  "ahash",
  "arrow",
- "datafusion-common",
- "datafusion-expr-common",
+ "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
  "hashbrown 0.14.5",
  "itertools 0.14.0",
 ]
 
+[[package]]
+name = "datafusion-physical-optimizer"
+version = "48.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6d140854b2db3ef8ac611caad12bfb2e1e1de827077429322a6188f18fc0026a"
+dependencies = [
+ "arrow",
+ "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-execution 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-physical-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-physical-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-physical-plan 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "itertools 0.14.0",
+ "log",
+ "recursive",
+]
+
 [[package]]
 name = "datafusion-physical-optimizer"
 version = "48.0.1"
 source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c"
 dependencies = [
  "arrow",
- "datafusion-common",
- "datafusion-execution",
- "datafusion-expr",
- "datafusion-expr-common",
- "datafusion-physical-expr",
- "datafusion-physical-expr-common",
- "datafusion-physical-plan",
+ "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-execution 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-physical-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-physical-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-physical-plan 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
  "itertools 0.14.0",
  "log",
  "recursive",
 ]
 
+[[package]]
+name = "datafusion-physical-plan"
+version = "48.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b46cbdf21a01206be76d467f325273b22c559c744a012ead5018dfe79597de08"
+dependencies = [
+ "ahash",
+ "arrow",
+ "arrow-ord",
+ "arrow-schema 55.2.0",
+ "async-trait",
+ "chrono",
+ "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-common-runtime 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-execution 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-functions-window-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-physical-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-physical-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "futures",
+ "half",
+ "hashbrown 0.14.5",
+ "indexmap 2.12.1",
+ "itertools 0.14.0",
+ "log",
+ "parking_lot",
+ "pin-project-lite",
+ "tokio",
+]
+
 [[package]]
 name = "datafusion-physical-plan"
 version = "48.0.1"
@@ -1944,13 +3283,13 @@ dependencies = [
  "arrow-schema 55.2.0",
  "async-trait",
  "chrono",
- "datafusion-common",
- "datafusion-common-runtime",
- "datafusion-execution",
- "datafusion-expr",
- "datafusion-functions-window-common",
- "datafusion-physical-expr",
- "datafusion-physical-expr-common",
+ "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-common-runtime 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-execution 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-functions-window-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-physical-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-physical-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
  "futures",
  "half",
  "hashbrown 0.14.5",
@@ -1962,6 +3301,22 @@ dependencies = [
  "tokio",
 ]
 
+[[package]]
+name = "datafusion-proto"
+version = "48.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e3fc7a2744332c2ef8804274c21f9fa664b4ca5889169250a6fd6b649ee5d16c"
+dependencies = [
+ "arrow",
+ "chrono",
+ "datafusion 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-proto-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "object_store",
+ "prost",
+]
+
 [[package]]
 name = "datafusion-proto"
 version = "48.0.1"
@@ -1969,24 +3324,59 @@ source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2F
 dependencies = [
  "arrow",
  "chrono",
- "datafusion",
- "datafusion-common",
- "datafusion-expr",
- "datafusion-proto-common",
+ "datafusion 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-proto-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
  "object_store",
  "prost",
 ]
 
+[[package]]
+name = "datafusion-proto-common"
+version = "48.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "800add86852f12e3d249867425de2224c1e9fb7adc2930460548868781fbeded"
+dependencies = [
+ "arrow",
+ "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "prost",
+]
+
 [[package]]
 name = "datafusion-proto-common"
 version = "48.0.1"
 source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c"
 dependencies = [
  "arrow",
- "datafusion-common",
+ "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
  "prost",
 ]
 
+[[package]]
+name = "datafusion-session"
+version = "48.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3a72733766ddb5b41534910926e8da5836622316f6283307fd9fb7e19811a59c"
+dependencies = [
+ "arrow",
+ "async-trait",
+ "dashmap 6.1.0",
+ "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-common-runtime 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-execution 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-physical-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-physical-plan 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-sql 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "futures",
+ "itertools 0.14.0",
+ "log",
+ "object_store",
+ "parking_lot",
+ "tokio",
+]
+
 [[package]]
 name = "datafusion-session"
 version = "48.0.1"
@@ -1994,14 +3384,14 @@ source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2F
 dependencies = [
  "arrow",
  "async-trait",
- "dashmap",
- "datafusion-common",
- "datafusion-common-runtime",
- "datafusion-execution",
- "datafusion-expr",
- "datafusion-physical-expr",
- "datafusion-physical-plan",
- "datafusion-sql",
+ "dashmap 6.1.0",
+ "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-common-runtime 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-execution 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-physical-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-physical-plan 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-sql 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
  "futures",
  "itertools 0.14.0",
  "log",
@@ -2010,6 +3400,23 @@ dependencies = [
  "tokio",
 ]
 
+[[package]]
+name = "datafusion-sql"
+version = "48.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c5162338cdec9cc7ea13a0e6015c361acad5ec1d88d83f7c86301f789473971f"
+dependencies = [
+ "arrow",
+ "bigdecimal",
+ "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "indexmap 2.12.1",
+ "log",
+ "recursive",
+ "regex",
+ "sqlparser 0.55.0 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
 [[package]]
 name = "datafusion-sql"
 version = "48.0.1"
@@ -2017,13 +3424,13 @@ source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2F
 dependencies = [
  "arrow",
  "bigdecimal",
- "datafusion-common",
- "datafusion-expr",
+ "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
  "indexmap 2.12.1",
  "log",
  "recursive",
  "regex",
- "sqlparser",
+ "sqlparser 0.55.0 (git+https://github.com/FunctionStream/sqlparser-rs?branch=0.6.0%2Ffunction-sql-parser)",
 ]
 
 [[package]]
@@ -2079,6 +3486,37 @@ dependencies = [
  "powerfmt",
 ]
 
+[[package]]
+name = "derive_builder"
+version = "0.20.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "507dfb09ea8b7fa618fcf76e953f4f5e192547945816d5358edffe39f6f94947"
+dependencies = [
+ "derive_builder_macro",
+]
+
+[[package]]
+name = "derive_builder_core"
+version = "0.20.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8"
+dependencies = [
+ "darling 0.20.11",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.113",
+]
+
+[[package]]
+name = "derive_builder_macro"
+version = "0.20.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c"
+dependencies = [
+ "derive_builder_core",
+ "syn 2.0.113",
+]
+
 [[package]]
 name = "digest"
 version = "0.10.7"
@@ -2100,6 +3538,27 @@ dependencies = [
  "dirs-sys-next",
 ]
 
+[[package]]
+name = "dirs"
+version = "6.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c3e8aa94d75141228480295a7d0e7feb620b1a5ad9f12bc40be62411e38cce4e"
+dependencies = [
+ "dirs-sys",
+]
+
+[[package]]
+name = "dirs-sys"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e01a3366d27ee9890022452ee61b2b63a67e6f13f58900b651ff5665f0bb1fab"
+dependencies = [
+ "libc",
+ "option-ext",
+ "redox_users 0.5.2",
+ "windows-sys 0.61.2",
+]
+
 [[package]]
 name = "dirs-sys-next"
 version = "0.1.2"
@@ -2107,7 +3566,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d"
 dependencies = [
  "libc",
- "redox_users",
+ "redox_users 0.4.6",
  "winapi",
 ]
 
@@ -2119,7 +3578,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.113",
 ]
 
 [[package]]
@@ -2143,6 +3602,12 @@ dependencies = [
  "shared_child",
 ]
 
+[[package]]
+name = "dunce"
+version = "1.0.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813"
+
 [[package]]
 name = "dyn-clone"
 version = "1.0.20"
@@ -2211,6 +3676,15 @@ dependencies = [
  "windows-sys 0.61.2",
 ]
 
+[[package]]
+name = "error-chain"
+version = "0.12.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2d2f06b9cac1506ece98fe3231e3cc9c4410ec3d5b1f24ae1c8946f0742cdefc"
+dependencies = [
+ "version_check",
+]
+
 [[package]]
 name = "error-code"
 version = "3.3.2"
@@ -2252,6 +3726,22 @@ dependencies = [
  "windows-sys 0.59.0",
 ]
 
+[[package]]
+name = "figment"
+version = "0.10.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8cb01cd46b0cf372153850f4c6c272d9cbea2da513e07538405148f95bd789f3"
+dependencies = [
+ "atomic",
+ "pear",
+ "serde",
+ "serde_json",
+ "serde_yaml",
+ "toml 0.8.23",
+ "uncased",
+ "version_check",
+]
+
 [[package]]
 name = "find-msvc-tools"
 version = "0.1.6"
@@ -2313,6 +3803,21 @@ version = "0.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
 
+[[package]]
+name = "foreign-types"
+version = "0.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1"
+dependencies = [
+ "foreign-types-shared",
+]
+
+[[package]]
+name = "foreign-types-shared"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b"
+
 [[package]]
 name = "form_urlencoded"
 version = "1.2.2"
@@ -2333,6 +3838,12 @@ dependencies = [
  "windows-sys 0.59.0",
 ]
 
+[[package]]
+name = "fs_extra"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
+
 [[package]]
 name = "function-stream"
 version = "0.6.0"
@@ -2343,6 +3854,7 @@ dependencies = [
  "arrow-ipc 55.2.0",
  "arrow-json 55.2.0 (git+https://github.com/ArroyoSystems/arrow-rs?branch=55.2.0%2Fjson)",
  "arrow-schema 55.2.0",
+ "arroyo-state",
  "async-trait",
  "base64",
  "bincode",
@@ -2351,21 +3863,24 @@ dependencies = [
  "cornucopia",
  "cornucopia_async",
  "crossbeam-channel",
- "datafusion",
- "datafusion-common",
- "datafusion-execution",
- "datafusion-expr",
- "datafusion-functions",
- "datafusion-functions-aggregate",
- "datafusion-functions-window",
- "datafusion-physical-expr",
- "datafusion-physical-plan",
- "datafusion-proto",
+ "datafusion 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-execution 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-functions 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-functions-aggregate 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-functions-window 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-physical-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-physical-plan 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-proto 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
  "futures",
+ "governor",
+ "hex",
  "itertools 0.14.0",
  "jiter",
  "log",
  "lru",
+ "mini-moka",
  "num_cpus",
  "parking_lot",
  "parquet 55.2.0 (git+https://github.com/ArroyoSystems/arrow-rs?branch=55.2.0%2Fparquet)",
@@ -2382,12 +3897,13 @@ dependencies = [
  "serde_json",
  "serde_json_path",
  "serde_yaml",
- "sqlparser",
- "strum",
+ "sha2",
+ "sqlparser 0.55.0 (git+https://github.com/FunctionStream/sqlparser-rs?branch=0.6.0%2Ffunction-sql-parser)",
+ "strum 0.26.3",
  "thiserror 2.0.17",
  "tokio",
  "tokio-stream",
- "tonic",
+ "tonic 0.12.3",
  "tracing",
  "tracing-appender",
  "tracing-subscriber",
@@ -2413,7 +3929,7 @@ dependencies = [
  "rustyline",
  "thiserror 2.0.17",
  "tokio",
- "tonic",
+ "tonic 0.12.3",
 ]
 
 [[package]]
@@ -2478,7 +3994,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.113",
 ]
 
 [[package]]
@@ -2493,6 +4009,12 @@ version = "0.3.31"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988"
 
+[[package]]
+name = "futures-timer"
+version = "3.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24"
+
 [[package]]
 name = "futures-util"
 version = "0.3.31"
@@ -2555,9 +4077,23 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd"
 dependencies = [
  "cfg-if",
+ "js-sys",
  "libc",
  "r-efi",
  "wasip2",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "getset"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9cf0fc11e47561d47397154977bc219f4cf809b2974facc3ccb3b89e2436f912"
+dependencies = [
+ "proc-macro-error2",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.113",
 ]
 
 [[package]]
@@ -2577,6 +4113,29 @@ version = "0.3.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280"
 
+[[package]]
+name = "governor"
+version = "0.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "be93b4ec2e4710b04d9264c0c7350cdd62a8c20e5e4ac732552ebb8f0debe8eb"
+dependencies = [
+ "cfg-if",
+ "dashmap 6.1.0",
+ "futures-sink",
+ "futures-timer",
+ "futures-util",
+ "getrandom 0.3.4",
+ "no-std-compat",
+ "nonzero_ext",
+ "parking_lot",
+ "portable-atomic",
+ "quanta",
+ "rand 0.9.2",
+ "smallvec",
+ "spinning_top",
+ "web-time",
+]
+
 [[package]]
 name = "h2"
 version = "0.4.12"
@@ -2588,7 +4147,7 @@ dependencies = [
  "fnv",
  "futures-core",
  "futures-sink",
- "http",
+ "http 1.4.0",
  "indexmap 2.12.1",
  "slab",
  "tokio",
@@ -2605,7 +4164,7 @@ dependencies = [
  "cfg-if",
  "crunchy",
  "num-traits",
- "zerocopy",
+ "zerocopy 0.8.31",
 ]
 
 [[package]]
@@ -2702,6 +4261,17 @@ dependencies = [
  "windows-sys 0.61.2",
 ]
 
+[[package]]
+name = "http"
+version = "0.2.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1"
+dependencies = [
+ "bytes",
+ "fnv",
+ "itoa",
+]
+
 [[package]]
 name = "http"
 version = "1.4.0"
@@ -2712,6 +4282,17 @@ dependencies = [
  "itoa",
 ]
 
+[[package]]
+name = "http-body"
+version = "0.4.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2"
+dependencies = [
+ "bytes",
+ "http 0.2.12",
+ "pin-project-lite",
+]
+
 [[package]]
 name = "http-body"
 version = "1.0.1"
@@ -2719,7 +4300,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184"
 dependencies = [
  "bytes",
- "http",
+ "http 1.4.0",
 ]
 
 [[package]]
@@ -2730,8 +4311,8 @@ checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a"
 dependencies = [
  "bytes",
  "futures-core",
- "http",
- "http-body",
+ "http 1.4.0",
+ "http-body 1.0.1",
  "pin-project-lite",
 ]
 
@@ -2764,8 +4345,8 @@ dependencies = [
  "futures-channel",
  "futures-core",
  "h2",
- "http",
- "http-body",
+ "http 1.4.0",
+ "http-body 1.0.1",
  "httparse",
  "httpdate",
  "itoa",
@@ -2776,6 +4357,23 @@ dependencies = [
  "want",
 ]
 
+[[package]]
+name = "hyper-rustls"
+version = "0.27.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58"
+dependencies = [
+ "http 1.4.0",
+ "hyper",
+ "hyper-util",
+ "rustls",
+ "rustls-native-certs",
+ "rustls-pki-types",
+ "tokio",
+ "tokio-rustls",
+ "tower-service",
+]
+
 [[package]]
 name = "hyper-timeout"
 version = "0.5.2"
@@ -2789,25 +4387,46 @@ dependencies = [
  "tower-service",
 ]
 
+[[package]]
+name = "hyper-tls"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0"
+dependencies = [
+ "bytes",
+ "http-body-util",
+ "hyper",
+ "hyper-util",
+ "native-tls",
+ "tokio",
+ "tokio-native-tls",
+ "tower-service",
+]
+
 [[package]]
 name = "hyper-util"
 version = "0.1.19"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "727805d60e7938b76b826a6ef209eb70eaa1812794f9424d4a4e2d740662df5f"
 dependencies = [
+ "base64",
  "bytes",
  "futures-channel",
  "futures-core",
  "futures-util",
- "http",
- "http-body",
+ "http 1.4.0",
+ "http-body 1.0.1",
  "hyper",
+ "ipnet",
  "libc",
+ "percent-encoding",
  "pin-project-lite",
  "socket2 0.6.1",
+ "system-configuration",
  "tokio",
  "tower-service",
  "tracing",
+ "windows-registry",
 ]
 
 [[package]]
@@ -2921,6 +4540,12 @@ version = "2.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "25a2bc672d1148e28034f176e01fffebb08b35768468cc954630da77a1449005"
 
+[[package]]
+name = "ident_case"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
+
 [[package]]
 name = "idna"
 version = "1.1.0"
@@ -2978,6 +4603,12 @@ dependencies = [
  "serde_core",
 ]
 
+[[package]]
+name = "inlinable_string"
+version = "0.1.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c8fae54786f62fb2918dcfae3d568594e50eb9b5c25bf04371af6fe7516452fb"
+
 [[package]]
 name = "integer-encoding"
 version = "3.0.4"
@@ -3015,6 +4646,16 @@ version = "2.11.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130"
 
+[[package]]
+name = "iri-string"
+version = "0.7.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c91338f0783edbd6195decb37bae672fd3b165faffb89bf7b9e6942f8b1a731a"
+dependencies = [
+ "memchr",
+ "serde",
+]
+
 [[package]]
 name = "is-terminal"
 version = "0.4.17"
@@ -3115,6 +4756,19 @@ dependencies = [
  "wasm-bindgen",
 ]
 
+[[package]]
+name = "k8s-openapi"
+version = "0.24.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2c75b990324f09bef15e791606b7b7a296d02fc88a344f6eba9390970a870ad5"
+dependencies = [
+ "base64",
+ "chrono",
+ "serde",
+ "serde-value",
+ "serde_json",
+]
+
 [[package]]
 name = "lazy_static"
 version = "1.5.0"
@@ -3355,6 +5009,17 @@ version = "1.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "11d3d7f243d5c5a8b9bb5d6dd2b1602c0cb0b9db1621bafc7ed66e35ff9fe092"
 
+[[package]]
+name = "local-ip-address"
+version = "0.6.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "79ef8c257c92ade496781a32a581d43e3d512cf8ce714ecf04ea80f93ed0ff4a"
+dependencies = [
+ "libc",
+ "neli",
+ "windows-sys 0.61.2",
+]
+
 [[package]]
 name = "lock_api"
 version = "0.4.14"
@@ -3379,6 +5044,12 @@ dependencies = [
  "hashbrown 0.15.5",
 ]
 
+[[package]]
+name = "lru-slab"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154"
+
 [[package]]
 name = "lz4-sys"
 version = "1.11.1+lz4-1.10.0"
@@ -3433,6 +5104,12 @@ version = "0.7.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94"
 
+[[package]]
+name = "matchit"
+version = "0.8.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3"
+
 [[package]]
 name = "maybe-owned"
 version = "0.3.4"
@@ -3493,7 +5170,7 @@ checksum = "49e7bc1560b95a3c4a25d03de42fe76ca718ab92d1a22a55b9b4cf67b3ae635c"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.113",
 ]
 
 [[package]]
@@ -3502,6 +5179,21 @@ version = "0.3.17"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
 
+[[package]]
+name = "mini-moka"
+version = "0.10.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c325dfab65f261f386debee8b0969da215b3fa0037e74c8a1234db7ba986d803"
+dependencies = [
+ "crossbeam-channel",
+ "crossbeam-utils",
+ "dashmap 5.5.3",
+ "skeptic",
+ "smallvec",
+ "tagptr",
+ "triomphe",
+]
+
 [[package]]
 name = "minimal-lexical"
 version = "0.2.1"
@@ -3535,6 +5227,61 @@ version = "0.10.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1d87ecb2933e8aeadb3e3a02b828fed80a7528047e68b4f424523a0981a3a084"
 
+[[package]]
+name = "nanoid"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3ffa00dec017b5b1a8b7cf5e2c008bfda1aa7e0697ac1508b491fdf2622fb4d8"
+dependencies = [
+ "rand 0.8.5",
+]
+
+[[package]]
+name = "native-tls"
+version = "0.2.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "465500e14ea162429d264d44189adc38b199b62b1c21eea9f69e4b73cb03bbf2"
+dependencies = [
+ "libc",
+ "log",
+ "openssl",
+ "openssl-probe",
+ "openssl-sys",
+ "schannel",
+ "security-framework",
+ "security-framework-sys",
+ "tempfile",
+]
+
+[[package]]
+name = "neli"
+version = "0.7.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "22f9786d56d972959e1408b6a93be6af13b9c1392036c5c1fafa08a1b0c6ee87"
+dependencies = [
+ "bitflags 2.10.0",
+ "byteorder",
+ "derive_builder",
+ "getset",
+ "libc",
+ "log",
+ "neli-proc-macros",
+ "parking_lot",
+]
+
+[[package]]
+name = "neli-proc-macros"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "05d8d08c6e98f20a62417478ebf7be8e1425ec9acecc6f63e22da633f6b71609"
+dependencies = [
+ "either",
+ "proc-macro2",
+ "quote",
+ "serde",
+ "syn 2.0.113",
+]
+
 [[package]]
 name = "nibble_vec"
 version = "0.1.0"
@@ -3552,10 +5299,16 @@ checksum = "ab2156c4fce2f8df6c499cc1c763e4394b7482525bf2a9701c9d79d215f519e4"
 dependencies = [
  "bitflags 2.10.0",
  "cfg-if",
- "cfg_aliases",
+ "cfg_aliases 0.1.1",
  "libc",
 ]
 
+[[package]]
+name = "no-std-compat"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b93853da6d84c2e3c7d730d6473e8817692dd89be387eb01b94d7f108ecb5b8c"
+
 [[package]]
 name = "nom"
 version = "7.1.3"
@@ -3566,6 +5319,12 @@ dependencies = [
  "minimal-lexical",
 ]
 
+[[package]]
+name = "nonzero_ext"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "38bf9645c8b145698bb0b18a4637dcacbc421ea49bef2317e4fd8065a387cf21"
+
 [[package]]
 name = "nu-ansi-term"
 version = "0.50.3"
@@ -3597,6 +5356,7 @@ checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9"
 dependencies = [
  "num-integer",
  "num-traits",
+ "serde",
 ]
 
 [[package]]
@@ -3684,7 +5444,7 @@ dependencies = [
  "proc-macro-crate",
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.113",
 ]
 
 [[package]]
@@ -3706,14 +5466,28 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "fbfbfff40aeccab00ec8a910b57ca8ecf4319b335c542f2edcd19dd25a1e2a00"
 dependencies = [
  "async-trait",
+ "base64",
  "bytes",
  "chrono",
+ "form_urlencoded",
  "futures",
- "http",
+ "http 1.4.0",
+ "http-body-util",
+ "httparse",
  "humantime",
+ "hyper",
  "itertools 0.14.0",
+ "md-5",
  "parking_lot",
  "percent-encoding",
+ "quick-xml",
+ "rand 0.9.2",
+ "reqwest",
+ "ring",
+ "rustls-pemfile",
+ "serde",
+ "serde_json",
+ "serde_urlencoded",
  "thiserror 2.0.17",
  "tokio",
  "tracing",
@@ -3735,6 +5509,38 @@ version = "1.70.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
 
+[[package]]
+name = "openssl"
+version = "0.10.75"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "08838db121398ad17ab8531ce9de97b244589089e290a384c900cb9ff7434328"
+dependencies = [
+ "bitflags 2.10.0",
+ "cfg-if",
+ "foreign-types",
+ "libc",
+ "once_cell",
+ "openssl-macros",
+ "openssl-sys",
+]
+
+[[package]]
+name = "openssl-macros"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.113",
+]
+
+[[package]]
+name = "openssl-probe"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe"
+
 [[package]]
 name = "openssl-sys"
 version = "0.9.111"
@@ -3747,6 +5553,12 @@ dependencies = [
  "vcpkg",
 ]
 
+[[package]]
+name = "option-ext"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d"
+
 [[package]]
 name = "ordered-float"
 version = "2.10.1"
@@ -3766,6 +5578,12 @@ dependencies = [
  "windows-sys 0.61.2",
 ]
 
+[[package]]
+name = "outref"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1a80800c0488c3a21695ea981a54918fbb37abf04f4d0720c453632255e2ff0e"
+
 [[package]]
 name = "owo-colors"
 version = "3.5.0"
@@ -3869,6 +5687,29 @@ version = "1.0.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"
 
+[[package]]
+name = "pear"
+version = "0.2.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bdeeaa00ce488657faba8ebf44ab9361f9365a97bd39ffb8a60663f57ff4b467"
+dependencies = [
+ "inlinable_string",
+ "pear_codegen",
+ "yansi",
+]
+
+[[package]]
+name = "pear_codegen"
+version = "0.2.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4bab5b985dc082b345f812b7df84e1bef27e7207b39e448439ba8bd69c93f147"
+dependencies = [
+ "proc-macro2",
+ "proc-macro2-diagnostics",
+ "quote",
+ "syn 2.0.113",
+]
+
 [[package]]
 name = "peeking_take_while"
 version = "0.1.2"
@@ -3911,7 +5752,7 @@ dependencies = [
  "pest_meta",
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.113",
 ]
 
 [[package]]
@@ -3954,6 +5795,7 @@ dependencies = [
  "hashbrown 0.15.5",
  "indexmap 2.12.1",
  "serde",
+ "serde_derive",
 ]
 
 [[package]]
@@ -4010,7 +5852,7 @@ checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.113",
 ]
 
 [[package]]
@@ -4031,6 +5873,12 @@ version = "0.3.32"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
 
+[[package]]
+name = "portable-atomic"
+version = "1.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49"
+
 [[package]]
 name = "postcard"
 version = "1.1.3"
@@ -4107,7 +5955,7 @@ version = "0.2.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9"
 dependencies = [
- "zerocopy",
+ "zerocopy 0.8.31",
 ]
 
 [[package]]
@@ -4117,7 +5965,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b"
 dependencies = [
  "proc-macro2",
- "syn",
+ "syn 2.0.113",
 ]
 
 [[package]]
@@ -4126,7 +5974,53 @@ version = "3.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "219cb19e96be00ab2e37d6e299658a0cfa83e52429179969b0f0121b4ac46983"
 dependencies = [
- "toml_edit",
+ "toml_edit 0.23.10+spec-1.0.0",
+]
+
+[[package]]
+name = "proc-macro-error"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c"
+dependencies = [
+ "proc-macro-error-attr",
+ "proc-macro2",
+ "quote",
+ "syn 1.0.109",
+ "version_check",
+]
+
+[[package]]
+name = "proc-macro-error-attr"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "version_check",
+]
+
+[[package]]
+name = "proc-macro-error-attr2"
+version = "2.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "96de42df36bb9bba5542fe9f1a054b8cc87e172759a1868aa05c1f3acc89dfc5"
+dependencies = [
+ "proc-macro2",
+ "quote",
+]
+
+[[package]]
+name = "proc-macro-error2"
+version = "2.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "11ec05c52be0a07b08061f7dd003e7d7092e0472bc731b4af7bb1ef876109802"
+dependencies = [
+ "proc-macro-error-attr2",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.113",
 ]
 
 [[package]]
@@ -4138,6 +6032,19 @@ dependencies = [
  "unicode-ident",
 ]
 
+[[package]]
+name = "proc-macro2-diagnostics"
+version = "0.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "af066a9c399a26e020ada66a034357a868728e72cd426f3adcd35f80d88d88c8"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.113",
+ "version_check",
+ "yansi",
+]
+
 [[package]]
 name = "proctitle"
 version = "0.1.1"
@@ -4149,6 +6056,21 @@ dependencies = [
  "winapi",
 ]
 
+[[package]]
+name = "prometheus"
+version = "0.14.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3ca5326d8d0b950a9acd87e6a3f94745394f62e4dae1b1ee22b2bc0c394af43a"
+dependencies = [
+ "cfg-if",
+ "fnv",
+ "lazy_static",
+ "memchr",
+ "parking_lot",
+ "protobuf",
+ "thiserror 2.0.17",
+]
+
 [[package]]
 name = "prost"
 version = "0.13.5"
@@ -4175,7 +6097,7 @@ dependencies = [
  "prost",
  "prost-types",
  "regex",
- "syn",
+ "syn 2.0.113",
  "tempfile",
 ]
 
@@ -4189,61 +6111,178 @@ dependencies = [
  "itertools 0.14.0",
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.113",
 ]
 
 [[package]]
 name = "prost-types"
 version = "0.13.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "52c2c1bf36ddb1a1c396b3601a3cec27c2462e45f07c386894ec3ccf5332bd16"
+checksum = "52c2c1bf36ddb1a1c396b3601a3cec27c2462e45f07c386894ec3ccf5332bd16"
+dependencies = [
+ "prost",
+]
+
+[[package]]
+name = "protobuf"
+version = "3.7.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d65a1d4ddae7d8b5de68153b48f6aa3bba8cb002b243dbdbc55a5afbc98f99f4"
+dependencies = [
+ "once_cell",
+ "protobuf-support",
+ "thiserror 1.0.69",
+]
+
+[[package]]
+name = "protobuf-support"
+version = "3.7.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3e36c2f31e0a47f9280fb347ef5e461ffcd2c52dd520d8e216b52f93b0b0d7d6"
+dependencies = [
+ "thiserror 1.0.69",
+]
+
+[[package]]
+name = "protocol"
+version = "0.1.0"
+dependencies = [
+ "env_logger",
+ "log",
+ "prost",
+ "serde",
+ "tonic 0.12.3",
+ "tonic-build 0.12.3",
+]
+
+[[package]]
+name = "psm"
+version = "0.1.30"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3852766467df634d74f0b2d7819bf8dc483a0eb2e3b0f50f756f9cfe8b0d18d8"
+dependencies = [
+ "ar_archive_writer",
+ "cc",
+]
+
+[[package]]
+name = "pulldown-cmark"
+version = "0.9.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "57206b407293d2bcd3af849ce869d52068623f19e1b5ff8e8778e3309439682b"
+dependencies = [
+ "bitflags 2.10.0",
+ "memchr",
+ "unicase",
+]
+
+[[package]]
+name = "pulley-interpreter"
+version = "41.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "01051a5b172e07f9197b85060e6583b942aec679dac08416647bf7e7dc916b65"
+dependencies = [
+ "cranelift-bitset",
+ "log",
+ "pulley-macros",
+ "wasmtime-internal-math",
+]
+
+[[package]]
+name = "pulley-macros"
+version = "41.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2cf194f5b1a415ef3a44ee35056f4009092cc4038a9f7e3c7c1e392f48ee7dbb"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.113",
+]
+
+[[package]]
+name = "quad-rand"
+version = "0.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5a651516ddc9168ebd67b24afd085a718be02f8858fe406591b013d101ce2f40"
+
+[[package]]
+name = "quanta"
+version = "0.12.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f3ab5a9d756f0d97bdc89019bd2e4ea098cf9cde50ee7564dde6b81ccc8f06c7"
 dependencies = [
- "prost",
+ "crossbeam-utils",
+ "libc",
+ "once_cell",
+ "raw-cpuid",
+ "wasi 0.11.1+wasi-snapshot-preview1",
+ "web-sys",
+ "winapi",
 ]
 
 [[package]]
-name = "protocol"
-version = "0.1.0"
+name = "quick-xml"
+version = "0.38.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b66c2058c55a409d601666cffe35f04333cf1013010882cec174a7467cd4e21c"
 dependencies = [
- "env_logger",
- "log",
- "prost",
+ "memchr",
  "serde",
- "tonic",
- "tonic-build",
 ]
 
 [[package]]
-name = "psm"
-version = "0.1.30"
+name = "quinn"
+version = "0.11.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3852766467df634d74f0b2d7819bf8dc483a0eb2e3b0f50f756f9cfe8b0d18d8"
+checksum = "b9e20a958963c291dc322d98411f541009df2ced7b5a4f2bd52337638cfccf20"
 dependencies = [
- "ar_archive_writer",
- "cc",
+ "bytes",
+ "cfg_aliases 0.2.1",
+ "pin-project-lite",
+ "quinn-proto",
+ "quinn-udp",
+ "rustc-hash 2.1.1",
+ "rustls",
+ "socket2 0.6.1",
+ "thiserror 2.0.17",
+ "tokio",
+ "tracing",
+ "web-time",
 ]
 
 [[package]]
-name = "pulley-interpreter"
-version = "41.0.3"
+name = "quinn-proto"
+version = "0.11.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "01051a5b172e07f9197b85060e6583b942aec679dac08416647bf7e7dc916b65"
+checksum = "434b42fec591c96ef50e21e886936e66d3cc3f737104fdb9b737c40ffb94c098"
 dependencies = [
- "cranelift-bitset",
- "log",
- "pulley-macros",
- "wasmtime-internal-math",
+ "bytes",
+ "getrandom 0.3.4",
+ "lru-slab",
+ "rand 0.9.2",
+ "ring",
+ "rustc-hash 2.1.1",
+ "rustls",
+ "rustls-pki-types",
+ "slab",
+ "thiserror 2.0.17",
+ "tinyvec",
+ "tracing",
+ "web-time",
 ]
 
 [[package]]
-name = "pulley-macros"
-version = "41.0.3"
+name = "quinn-udp"
+version = "0.5.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2cf194f5b1a415ef3a44ee35056f4009092cc4038a9f7e3c7c1e392f48ee7dbb"
+checksum = "addec6a0dcad8a8d96a771f815f0eaf55f9d1805756410b39f5fa81332574cbd"
 dependencies = [
- "proc-macro2",
- "quote",
- "syn",
+ "cfg_aliases 0.2.1",
+ "libc",
+ "once_cell",
+ "socket2 0.6.1",
+ "tracing",
+ "windows-sys 0.60.2",
 ]
 
 [[package]]
@@ -4345,6 +6384,15 @@ dependencies = [
  "rand_core 0.6.4",
 ]
 
+[[package]]
+name = "raw-cpuid"
+version = "11.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "498cd0dc59d73224351ee52a95fee0f1a617a2eae0e7d9d720cc622c73a54186"
+dependencies = [
+ "bitflags 2.10.0",
+]
+
 [[package]]
 name = "rayon"
 version = "1.11.0"
@@ -4415,7 +6463,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b"
 dependencies = [
  "quote",
- "syn",
+ "syn 2.0.113",
 ]
 
 [[package]]
@@ -4438,6 +6486,37 @@ dependencies = [
  "thiserror 1.0.69",
 ]
 
+[[package]]
+name = "redox_users"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a4e608c6638b9c18977b00b475ac1f28d14e84b27d8d42f70e0bf1e3dec127ac"
+dependencies = [
+ "getrandom 0.2.16",
+ "libredox",
+ "thiserror 2.0.17",
+]
+
+[[package]]
+name = "ref-cast"
+version = "1.0.25"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f354300ae66f76f1c85c5f84693f0ce81d747e2c3f21a45fef496d89c960bf7d"
+dependencies = [
+ "ref-cast-impl",
+]
+
+[[package]]
+name = "ref-cast-impl"
+version = "1.0.25"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b7186006dcb21920990093f30e3dea63b7d6e977bf1256be20c3563a5db070da"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.113",
+]
+
 [[package]]
 name = "regalloc2"
 version = "0.13.5"
@@ -4476,6 +6555,12 @@ dependencies = [
  "regex-syntax",
 ]
 
+[[package]]
+name = "regex-lite"
+version = "0.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cab834c73d247e67f4fae452806d17d3c7501756d98c8808d7c9c7aa7d18f973"
+
 [[package]]
 name = "regex-syntax"
 version = "0.8.8"
@@ -4492,6 +6577,67 @@ dependencies = [
  "memchr",
 ]
 
+[[package]]
+name = "reqwest"
+version = "0.12.28"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eddd3ca559203180a307f12d114c268abf583f59b03cb906fd0b3ff8646c1147"
+dependencies = [
+ "base64",
+ "bytes",
+ "encoding_rs",
+ "futures-core",
+ "futures-util",
+ "h2",
+ "http 1.4.0",
+ "http-body 1.0.1",
+ "http-body-util",
+ "hyper",
+ "hyper-rustls",
+ "hyper-tls",
+ "hyper-util",
+ "js-sys",
+ "log",
+ "mime",
+ "native-tls",
+ "percent-encoding",
+ "pin-project-lite",
+ "quinn",
+ "rustls",
+ "rustls-native-certs",
+ "rustls-pki-types",
+ "serde",
+ "serde_json",
+ "serde_urlencoded",
+ "sync_wrapper",
+ "tokio",
+ "tokio-native-tls",
+ "tokio-rustls",
+ "tokio-util",
+ "tower 0.5.2",
+ "tower-http",
+ "tower-service",
+ "url",
+ "wasm-bindgen",
+ "wasm-bindgen-futures",
+ "wasm-streams",
+ "web-sys",
+]
+
+[[package]]
+name = "ring"
+version = "0.17.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7"
+dependencies = [
+ "cc",
+ "cfg-if",
+ "getrandom 0.2.16",
+ "libc",
+ "untrusted",
+ "windows-sys 0.52.0",
+]
+
 [[package]]
 name = "rocksdb"
 version = "0.21.0"
@@ -4580,6 +6726,65 @@ dependencies = [
  "rustix 1.1.3",
 ]
 
+[[package]]
+name = "rustls"
+version = "0.23.37"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "758025cb5fccfd3bc2fd74708fd4682be41d99e5dff73c377c0646c6012c73a4"
+dependencies = [
+ "aws-lc-rs",
+ "log",
+ "once_cell",
+ "ring",
+ "rustls-pki-types",
+ "rustls-webpki",
+ "subtle",
+ "zeroize",
+]
+
+[[package]]
+name = "rustls-native-certs"
+version = "0.8.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "612460d5f7bea540c490b2b6395d8e34a953e52b491accd6c86c8164c5932a63"
+dependencies = [
+ "openssl-probe",
+ "rustls-pki-types",
+ "schannel",
+ "security-framework",
+]
+
+[[package]]
+name = "rustls-pemfile"
+version = "2.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dce314e5fee3f39953d46bb63bb8a46d40c2f8fb7cc5a3b6cab2bde9721d6e50"
+dependencies = [
+ "rustls-pki-types",
+]
+
+[[package]]
+name = "rustls-pki-types"
+version = "1.14.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "be040f8b0a225e40375822a563fa9524378b9d63112f53e19ffff34df5d33fdd"
+dependencies = [
+ "web-time",
+ "zeroize",
+]
+
+[[package]]
+name = "rustls-webpki"
+version = "0.103.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "df33b2b81ac578cabaf06b89b0631153a3f416b0a886e8a7a1707fb51abbd1ef"
+dependencies = [
+ "aws-lc-rs",
+ "ring",
+ "rustls-pki-types",
+ "untrusted",
+]
+
 [[package]]
 name = "rustversion"
 version = "1.0.22"
@@ -4635,6 +6840,15 @@ dependencies = [
  "pkg-config",
 ]
 
+[[package]]
+name = "schannel"
+version = "0.1.29"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "91c1b7e4904c873ef0710c1f407dde2e6287de2bebc1bbbf7d430bb7cbffd939"
+dependencies = [
+ "windows-sys 0.61.2",
+]
+
 [[package]]
 name = "schemars"
 version = "0.8.22"
@@ -4642,7 +6856,20 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3fbf2ae1b8bc8e02df939598064d22402220cd5bbcca1c76f7d6a310974d5615"
 dependencies = [
  "dyn-clone",
- "schemars_derive",
+ "schemars_derive 0.8.22",
+ "serde",
+ "serde_json",
+]
+
+[[package]]
+name = "schemars"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a2b42f36aa1cd011945615b92222f6bf73c599a102a300334cd7f8dbeec726cc"
+dependencies = [
+ "dyn-clone",
+ "ref-cast",
+ "schemars_derive 1.2.1",
  "serde",
  "serde_json",
 ]
@@ -4656,7 +6883,19 @@ dependencies = [
  "proc-macro2",
  "quote",
  "serde_derive_internals",
- "syn",
+ "syn 2.0.113",
+]
+
+[[package]]
+name = "schemars_derive"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7d115b50f4aaeea07e79c1912f645c7513d81715d0420f8bc77a18c6260b307f"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "serde_derive_internals",
+ "syn 2.0.113",
 ]
 
 [[package]]
@@ -4665,6 +6904,29 @@ version = "1.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
 
+[[package]]
+name = "security-framework"
+version = "3.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d17b898a6d6948c3a8ee4372c17cb384f90d2e6e912ef00895b14fd7ab54ec38"
+dependencies = [
+ "bitflags 2.10.0",
+ "core-foundation 0.10.1",
+ "core-foundation-sys",
+ "libc",
+ "security-framework-sys",
+]
+
+[[package]]
+name = "security-framework-sys"
+version = "2.17.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6ce2691df843ecc5d231c0b14ece2acc3efb62c0a398c7e1d875f3983ce020e3"
+dependencies = [
+ "core-foundation-sys",
+ "libc",
+]
+
 [[package]]
 name = "semver"
 version = "1.0.27"
@@ -4691,6 +6953,26 @@ dependencies = [
  "serde_derive",
 ]
 
+[[package]]
+name = "serde-value"
+version = "0.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f3a1a3341211875ef120e117ea7fd5228530ae7e7036a779fdc9117be6b3282c"
+dependencies = [
+ "ordered-float",
+ "serde",
+]
+
+[[package]]
+name = "serde_bytes"
+version = "0.11.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a5d440709e79d88e51ac01c4b72fc6cb7314017bb7da9eeff678aa94c10e3ea8"
+dependencies = [
+ "serde",
+ "serde_core",
+]
+
 [[package]]
 name = "serde_core"
 version = "1.0.228"
@@ -4708,7 +6990,7 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.113",
 ]
 
 [[package]]
@@ -4719,7 +7001,7 @@ checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.113",
 ]
 
 [[package]]
@@ -4782,7 +7064,16 @@ checksum = "aafbefbe175fa9bf03ca83ef89beecff7d2a95aaacd5732325b90ac8c3bd7b90"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.113",
+]
+
+[[package]]
+name = "serde_spanned"
+version = "0.6.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bf41e0cfaf7226dca15e8197172c295a782857fcb97fad1808a166870dee75a3"
+dependencies = [
+ "serde",
 ]
 
 [[package]]
@@ -4803,7 +7094,19 @@ dependencies = [
  "proc-macro2",
  "quote",
  "serde",
- "syn",
+ "syn 2.0.113",
+]
+
+[[package]]
+name = "serde_urlencoded"
+version = "0.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd"
+dependencies = [
+ "form_urlencoded",
+ "itoa",
+ "ryu",
+ "serde",
 ]
 
 [[package]]
@@ -4915,6 +7218,21 @@ dependencies = [
  "typenum",
 ]
 
+[[package]]
+name = "skeptic"
+version = "0.13.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "16d23b015676c90a0f01c197bfdc786c20342c73a0afdda9025adb0bc42940a8"
+dependencies = [
+ "bytecount",
+ "cargo_metadata",
+ "error-chain",
+ "glob",
+ "pulldown-cmark",
+ "tempfile",
+ "walkdir",
+]
+
 [[package]]
 name = "slab"
 version = "0.4.11"
@@ -4946,20 +7264,40 @@ checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b"
 name = "socket2"
 version = "0.5.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678"
+checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678"
+dependencies = [
+ "libc",
+ "windows-sys 0.52.0",
+]
+
+[[package]]
+name = "socket2"
+version = "0.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "17129e116933cf371d018bb80ae557e889637989d8638274fb25622827b03881"
+dependencies = [
+ "libc",
+ "windows-sys 0.60.2",
+]
+
+[[package]]
+name = "spinning_top"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d96d2d1d716fb500937168cc09353ffdc7a012be8475ac7308e1bdf0e3923300"
 dependencies = [
- "libc",
- "windows-sys 0.52.0",
+ "lock_api",
 ]
 
 [[package]]
-name = "socket2"
-version = "0.6.1"
+name = "sqlparser"
+version = "0.55.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "17129e116933cf371d018bb80ae557e889637989d8638274fb25622827b03881"
+checksum = "c4521174166bac1ff04fe16ef4524c70144cd29682a45978978ca3d7f4e0be11"
 dependencies = [
- "libc",
- "windows-sys 0.60.2",
+ "log",
+ "recursive",
+ "sqlparser_derive 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
 [[package]]
@@ -4969,7 +7307,18 @@ source = "git+https://github.com/FunctionStream/sqlparser-rs?branch=0.6.0%2Ffunc
 dependencies = [
  "log",
  "recursive",
- "sqlparser_derive",
+ "sqlparser_derive 0.3.0 (git+https://github.com/FunctionStream/sqlparser-rs?branch=0.6.0%2Ffunction-sql-parser)",
+]
+
+[[package]]
+name = "sqlparser_derive"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.113",
 ]
 
 [[package]]
@@ -4979,7 +7328,7 @@ source = "git+https://github.com/FunctionStream/sqlparser-rs?branch=0.6.0%2Ffunc
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.113",
 ]
 
 [[package]]
@@ -5030,7 +7379,16 @@ version = "0.26.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06"
 dependencies = [
- "strum_macros",
+ "strum_macros 0.26.4",
+]
+
+[[package]]
+name = "strum"
+version = "0.27.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "af23d6f6c1a224baef9d3f61e287d2761385a5b88fdab4eb4c6f11aeb54c4bcf"
+dependencies = [
+ "strum_macros 0.27.2",
 ]
 
 [[package]]
@@ -5043,7 +7401,19 @@ dependencies = [
  "proc-macro2",
  "quote",
  "rustversion",
- "syn",
+ "syn 2.0.113",
+]
+
+[[package]]
+name = "strum_macros"
+version = "0.27.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7695ce3845ea4b33927c055a39dc438a45b059f7c1b3d91d38d10355fb8cbca7"
+dependencies = [
+ "heck 0.5.0",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.113",
 ]
 
 [[package]]
@@ -5080,6 +7450,16 @@ dependencies = [
  "is-terminal",
 ]
 
+[[package]]
+name = "syn"
+version = "1.0.109"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
+dependencies = [
+ "proc-macro2",
+ "unicode-ident",
+]
+
 [[package]]
 name = "syn"
 version = "2.0.113"
@@ -5096,6 +7476,9 @@ name = "sync_wrapper"
 version = "1.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263"
+dependencies = [
+ "futures-core",
+]
 
 [[package]]
 name = "synstructure"
@@ -5105,7 +7488,28 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.113",
+]
+
+[[package]]
+name = "system-configuration"
+version = "0.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b"
+dependencies = [
+ "bitflags 2.10.0",
+ "core-foundation 0.9.4",
+ "system-configuration-sys",
+]
+
+[[package]]
+name = "system-configuration-sys"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e1d1b10ced5ca923a1fcb8d03e96b8d3268065d724548c0211415ff6ac6bac4"
+dependencies = [
+ "core-foundation-sys",
+ "libc",
 ]
 
 [[package]]
@@ -5124,6 +7528,12 @@ dependencies = [
  "winx",
 ]
 
+[[package]]
+name = "tagptr"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417"
+
 [[package]]
 name = "tap"
 version = "1.0.1"
@@ -5205,7 +7615,7 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.113",
 ]
 
 [[package]]
@@ -5216,7 +7626,7 @@ checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.113",
 ]
 
 [[package]]
@@ -5318,6 +7728,7 @@ dependencies = [
  "signal-hook-registry",
  "socket2 0.6.1",
  "tokio-macros",
+ "tracing",
  "windows-sys 0.61.2",
 ]
 
@@ -5329,7 +7740,17 @@ checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.113",
+]
+
+[[package]]
+name = "tokio-native-tls"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2"
+dependencies = [
+ "native-tls",
+ "tokio",
 ]
 
 [[package]]
@@ -5358,6 +7779,16 @@ dependencies = [
  "whoami",
 ]
 
+[[package]]
+name = "tokio-rustls"
+version = "0.26.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61"
+dependencies = [
+ "rustls",
+ "tokio",
+]
+
 [[package]]
 name = "tokio-stream"
 version = "0.1.18"
@@ -5382,6 +7813,18 @@ dependencies = [
  "tokio",
 ]
 
+[[package]]
+name = "toml"
+version = "0.8.23"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc1beb996b9d83529a9e75c17a1686767d148d70663143c7854d8b4a09ced362"
+dependencies = [
+ "serde",
+ "serde_spanned 0.6.9",
+ "toml_datetime 0.6.11",
+ "toml_edit 0.22.27",
+]
+
 [[package]]
 name = "toml"
 version = "0.9.11+spec-1.1.0"
@@ -5390,13 +7833,22 @@ checksum = "f3afc9a848309fe1aaffaed6e1546a7a14de1f935dc9d89d32afd9a44bab7c46"
 dependencies = [
  "indexmap 2.12.1",
  "serde_core",
- "serde_spanned",
- "toml_datetime",
+ "serde_spanned 1.0.4",
+ "toml_datetime 0.7.5+spec-1.1.0",
  "toml_parser",
  "toml_writer",
  "winnow",
 ]
 
+[[package]]
+name = "toml_datetime"
+version = "0.6.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c"
+dependencies = [
+ "serde",
+]
+
 [[package]]
 name = "toml_datetime"
 version = "0.7.5+spec-1.1.0"
@@ -5406,6 +7858,20 @@ dependencies = [
  "serde_core",
 ]
 
+[[package]]
+name = "toml_edit"
+version = "0.22.27"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a"
+dependencies = [
+ "indexmap 2.12.1",
+ "serde",
+ "serde_spanned 0.6.9",
+ "toml_datetime 0.6.11",
+ "toml_write",
+ "winnow",
+]
+
 [[package]]
 name = "toml_edit"
 version = "0.23.10+spec-1.0.0"
@@ -5413,7 +7879,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "84c8b9f757e028cee9fa244aea147aab2a9ec09d5325a9b01e0a49730c2b5269"
 dependencies = [
  "indexmap 2.12.1",
- "toml_datetime",
+ "toml_datetime 0.7.5+spec-1.1.0",
  "toml_parser",
  "winnow",
 ]
@@ -5427,6 +7893,12 @@ dependencies = [
  "winnow",
 ]
 
+[[package]]
+name = "toml_write"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801"
+
 [[package]]
 name = "toml_writer"
 version = "1.0.6+spec-1.1.0"
@@ -5441,12 +7913,12 @@ checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52"
 dependencies = [
  "async-stream",
  "async-trait",
- "axum",
+ "axum 0.7.9",
  "base64",
  "bytes",
  "h2",
- "http",
- "http-body",
+ "http 1.4.0",
+ "http-body 1.0.1",
  "http-body-util",
  "hyper",
  "hyper-timeout",
@@ -5463,6 +7935,39 @@ dependencies = [
  "tracing",
 ]
 
+[[package]]
+name = "tonic"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7e581ba15a835f4d9ea06c55ab1bd4dce26fc53752c69a04aac00703bfb49ba9"
+dependencies = [
+ "async-trait",
+ "axum 0.8.8",
+ "base64",
+ "bytes",
+ "h2",
+ "http 1.4.0",
+ "http-body 1.0.1",
+ "http-body-util",
+ "hyper",
+ "hyper-timeout",
+ "hyper-util",
+ "percent-encoding",
+ "pin-project",
+ "prost",
+ "rustls-native-certs",
+ "socket2 0.5.10",
+ "tokio",
+ "tokio-rustls",
+ "tokio-stream",
+ "tower 0.5.2",
+ "tower-layer",
+ "tower-service",
+ "tracing",
+ "webpki-roots 0.26.11",
+ "zstd",
+]
+
 [[package]]
 name = "tonic-build"
 version = "0.12.3"
@@ -5474,7 +7979,21 @@ dependencies = [
  "prost-build",
  "prost-types",
  "quote",
- "syn",
+ "syn 2.0.113",
+]
+
+[[package]]
+name = "tonic-build"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eac6f67be712d12f0b41328db3137e0d0757645d8904b4cb7d51cd9c2279e847"
+dependencies = [
+ "prettyplease",
+ "proc-macro2",
+ "prost-build",
+ "prost-types",
+ "quote",
+ "syn 2.0.113",
 ]
 
 [[package]]
@@ -5505,8 +8024,31 @@ checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9"
 dependencies = [
  "futures-core",
  "futures-util",
+ "indexmap 2.12.1",
  "pin-project-lite",
+ "slab",
  "sync_wrapper",
+ "tokio",
+ "tokio-util",
+ "tower-layer",
+ "tower-service",
+ "tracing",
+]
+
+[[package]]
+name = "tower-http"
+version = "0.6.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8"
+dependencies = [
+ "bitflags 2.10.0",
+ "bytes",
+ "futures-util",
+ "http 1.4.0",
+ "http-body 1.0.1",
+ "iri-string",
+ "pin-project-lite",
+ "tower 0.5.2",
  "tower-layer",
  "tower-service",
 ]
@@ -5554,7 +8096,7 @@ checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.113",
 ]
 
 [[package]]
@@ -5610,6 +8152,12 @@ dependencies = [
  "tracing-serde",
 ]
 
+[[package]]
+name = "triomphe"
+version = "0.1.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dd69c5aa8f924c7519d6372789a74eac5b94fb0f8fcf0d4a97eb0bfc3e785f39"
+
 [[package]]
 name = "try-lock"
 version = "0.2.5"
@@ -5647,9 +8195,9 @@ dependencies = [
  "proc-macro2",
  "quote",
  "regress",
- "schemars",
+ "schemars 0.8.22",
  "serde_json",
- "syn",
+ "syn 2.0.113",
  "thiserror 1.0.69",
  "unicode-ident",
 ]
@@ -5661,11 +8209,11 @@ source = "git+https://github.com/ArroyoSystems/typify.git?branch=arroyo#d14b6fc0
 dependencies = [
  "proc-macro2",
  "quote",
- "schemars",
+ "schemars 0.8.22",
  "serde",
  "serde_json",
  "serde_tokenstream",
- "syn",
+ "syn 2.0.113",
  "typify-impl",
 ]
 
@@ -5675,6 +8223,15 @@ version = "0.1.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971"
 
+[[package]]
+name = "uncased"
+version = "0.9.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e1b88fcfe09e89d3866a5c11019378088af2d24c3fbd4f0543f96b479ec90697"
+dependencies = [
+ "version_check",
+]
+
 [[package]]
 name = "unicase"
 version = "2.9.0"
@@ -5750,6 +8307,12 @@ version = "0.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e9df2af067a7953e9c3831320f35c1cc0600c30d44d9f7a12b01db1cd88d6b47"
 
+[[package]]
+name = "untrusted"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1"
+
 [[package]]
 name = "unty"
 version = "0.0.4"
@@ -5768,6 +8331,12 @@ dependencies = [
  "serde",
 ]
 
+[[package]]
+name = "urlencoding"
+version = "2.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da"
+
 [[package]]
 name = "utf8_iter"
 version = "1.0.4"
@@ -5780,6 +8349,30 @@ version = "0.2.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
 
+[[package]]
+name = "utoipa"
+version = "4.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c5afb1a60e207dca502682537fefcfd9921e71d0b83e9576060f09abc6efab23"
+dependencies = [
+ "indexmap 2.12.1",
+ "serde",
+ "serde_json",
+ "utoipa-gen",
+]
+
+[[package]]
+name = "utoipa-gen"
+version = "4.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "20c24e8ab68ff9ee746aad22d39b5535601e6416d1b0feeabf78be986a5c4392"
+dependencies = [
+ "proc-macro-error",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.113",
+]
+
 [[package]]
 name = "uuid"
 version = "1.19.0"
@@ -5788,6 +8381,7 @@ checksum = "e2e054861b4bd027cd373e18e8d8d8e6548085000e41290d95ce0c373a654b4a"
 dependencies = [
  "getrandom 0.3.4",
  "js-sys",
+ "serde_core",
  "wasm-bindgen",
 ]
 
@@ -5815,6 +8409,12 @@ version = "0.0.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "051eb1abcf10076295e815102942cc58f9d5e3b4560e46e53c21e8ff6f3af7b1"
 
+[[package]]
+name = "vsimd"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c3082ca00d5a5ef149bb8b555a72ae84c9c59f7250f013ac822ac2e49b19c64"
+
 [[package]]
 name = "walkdir"
 version = "2.5.0"
@@ -5912,7 +8512,7 @@ dependencies = [
  "bumpalo",
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.113",
  "wasm-bindgen-shared",
 ]
 
@@ -5956,6 +8556,19 @@ dependencies = [
  "wasmparser",
 ]
 
+[[package]]
+name = "wasm-streams"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65"
+dependencies = [
+ "futures-util",
+ "js-sys",
+ "wasm-bindgen",
+ "wasm-bindgen-futures",
+ "web-sys",
+]
+
 [[package]]
 name = "wasmparser"
 version = "0.243.0"
@@ -6078,7 +8691,7 @@ dependencies = [
  "serde",
  "serde_derive",
  "sha2",
- "toml",
+ "toml 0.9.11+spec-1.1.0",
  "wasmtime-environ",
  "windows-sys 0.61.2",
  "zstd",
@@ -6093,7 +8706,7 @@ dependencies = [
  "anyhow",
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.113",
  "wasmtime-internal-component-util",
  "wasmtime-internal-wit-bindgen",
  "wit-parser",
@@ -6207,7 +8820,7 @@ checksum = "63ba3124cc2cbcd362672f9f077303ccc4cd61daa908f73447b7fdaece75ff9f"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.113",
 ]
 
 [[package]]
@@ -6335,6 +8948,24 @@ dependencies = [
  "wasm-bindgen",
 ]
 
+[[package]]
+name = "webpki-roots"
+version = "0.26.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9"
+dependencies = [
+ "webpki-roots 1.0.6",
+]
+
+[[package]]
+name = "webpki-roots"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "22cfaf3c063993ff62e73cb4311efde4db1efb31ab78a3e5c457939ad5cc0bed"
+dependencies = [
+ "rustls-pki-types",
+]
+
 [[package]]
 name = "whoami"
 version = "2.1.0"
@@ -6370,7 +9001,7 @@ dependencies = [
  "heck 0.5.0",
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.113",
  "witx",
 ]
 
@@ -6382,7 +9013,7 @@ checksum = "0e976fe0cecd60041f66b15ad45ebc997952af13da9bf9d90261c7b025057edc"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.113",
  "wiggle-generate",
 ]
 
@@ -6458,7 +9089,7 @@ checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.113",
 ]
 
 [[package]]
@@ -6469,7 +9100,7 @@ checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.113",
 ]
 
 [[package]]
@@ -6478,6 +9109,17 @@ version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
 
+[[package]]
+name = "windows-registry"
+version = "0.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "02752bf7fbdcce7f2a27a742f798510f3e5ad88dbe84871e5168e2120c3d5720"
+dependencies = [
+ "windows-link",
+ "windows-result",
+ "windows-strings",
+]
+
 [[package]]
 name = "windows-result"
 version = "0.4.1"
@@ -6731,6 +9373,12 @@ dependencies = [
  "tap",
 ]
 
+[[package]]
+name = "xmlparser"
+version = "0.13.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "66fee0b777b0f5ac1c69bb06d361268faafa61cd4682ae064a171c16c433e9e4"
+
 [[package]]
 name = "xxhash-rust"
 version = "0.8.15"
@@ -6746,6 +9394,12 @@ dependencies = [
  "lzma-sys",
 ]
 
+[[package]]
+name = "yansi"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049"
+
 [[package]]
 name = "yoke"
 version = "0.8.1"
@@ -6765,17 +9419,37 @@ checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.113",
  "synstructure",
 ]
 
+[[package]]
+name = "zerocopy"
+version = "0.7.35"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0"
+dependencies = [
+ "zerocopy-derive 0.7.35",
+]
+
 [[package]]
 name = "zerocopy"
 version = "0.8.31"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "fd74ec98b9250adb3ca554bdde269adf631549f51d8a8f8f0a10b50f1cb298c3"
 dependencies = [
- "zerocopy-derive",
+ "zerocopy-derive 0.8.31",
+]
+
+[[package]]
+name = "zerocopy-derive"
+version = "0.7.35"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.113",
 ]
 
 [[package]]
@@ -6786,7 +9460,7 @@ checksum = "d8a8d209fdf45cf5138cbb5a506f6b52522a25afccc534d1475dad8e31105c6a"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.113",
 ]
 
 [[package]]
@@ -6806,10 +9480,16 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.113",
  "synstructure",
 ]
 
+[[package]]
+name = "zeroize"
+version = "1.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0"
+
 [[package]]
 name = "zerotrie"
 version = "0.2.3"
@@ -6840,7 +9520,7 @@ checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.113",
 ]
 
 [[package]]
diff --git a/Cargo.toml b/Cargo.toml
index cee98282..2c62a473 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -84,6 +84,11 @@ cornucopia_async = { git = "https://github.com/ArroyoSystems/cornucopia", branch
 cornucopia = { git = "https://github.com/ArroyoSystems/cornucopia", branch = "sqlite" }
 jiter = {git = "https://github.com/ArroyoSystems/jiter", branch = "disable_python" }
 
+arroyo-state = { path = "../arroyo/crates/arroyo-state" }
+governor = "0.8.0"
+mini-moka = "0.10"
+sha2 = "0.10"
+hex = "0.4"
 
 [features]
 default = ["incremental-cache", "python"]
diff --git a/src/runtime/mod.rs b/src/runtime/mod.rs
index f69ad017..814358ad 100644
--- a/src/runtime/mod.rs
+++ b/src/runtime/mod.rs
@@ -14,10 +14,13 @@
 
 pub mod buffer_and_event;
 pub mod common;
-pub mod input;
-pub mod output;
-pub mod processor;
 pub mod sink;
 pub mod source;
+pub mod streaming;
 pub mod task;
 pub mod taskexecutor;
+pub mod wasm;
+
+pub use wasm::input;
+pub use wasm::output;
+pub use wasm::processor;
diff --git a/src/runtime/source/mod.rs b/src/runtime/source/mod.rs
deleted file mode 100644
index 8a05bf30..00000000
--- a/src/runtime/source/mod.rs
+++ /dev/null
@@ -1,15 +0,0 @@
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Source module
-
-// TODO: Add source implementation here
diff --git a/src/runtime/streaming/api/context.rs b/src/runtime/streaming/api/context.rs
new file mode 100644
index 00000000..e81bd03a
--- /dev/null
+++ b/src/runtime/streaming/api/context.rs
@@ -0,0 +1,95 @@
+use crate::runtime::streaming::memory::MemoryPool;
+use crate::runtime::streaming::protocol::event::StreamEvent;
+use crate::runtime::streaming::protocol::tracked::TrackedEvent;
+use crate::runtime::streaming::network::endpoint::PhysicalSender;
+use arrow_array::RecordBatch;
+use arroyo_state::tables::table_manager::TableManager;
+use std::sync::Arc;
+use tokio::sync::Mutex;
+use tracing::error;
+
+pub struct TaskContext {
+    pub job_id: String,
+    pub vertex_id: u32,
+    pub subtask_idx: u32,
+    pub parallelism: u32,
+    pub outboxes: Vec<PhysicalSender>,
+    memory_pool: Arc<MemoryPool>,
+    table_manager: Option<Arc<Mutex<TableManager>>>,
+    pub last_present_watermark: Option<std::time::SystemTime>,
+}
+
+impl TaskContext {
+    pub fn new(
+        job_id: String,
+        vertex_id: u32,
+        subtask_idx: u32,
+        parallelism: u32,
+        outboxes: Vec<PhysicalSender>,
+        memory_pool: Arc<MemoryPool>,
+        table_manager: Option<Arc<Mutex<TableManager>>>,
+    ) -> Self {
+        Self {
+            job_id,
+            vertex_id,
+            subtask_idx,
+            parallelism,
+            outboxes,
+            memory_pool,
+            table_manager,
+            last_present_watermark: None,
+        }
+    }
+
+    pub async fn table_manager(&self) -> tokio::sync::MutexGuard<'_, TableManager> {
+        self.table_manager
+            .as_ref()
+            .expect("State backend not initialized")
+            .lock()
+            .await
+    }
+
+    /// 受内存池管控的数据发送：申请精准字节的内存船票后广播到所有下游
+    pub async fn collect(&self, batch: RecordBatch) -> anyhow::Result<()> {
+        if self.outboxes.is_empty() {
+            return Ok(());
+        }
+
+        let bytes_required = batch.get_array_memory_size();
+        let ticket = self.memory_pool.request_memory(bytes_required).await;
+        let tracked_event = TrackedEvent::new(StreamEvent::Data(batch), Some(ticket));
+
+        for outbox in &self.outboxes {
+            outbox.send(tracked_event.clone()).await?;
+        }
+        Ok(())
+    }
+
+    /// 按 Key 哈希路由到单分区（Shuffle / GroupBy）
+    pub async fn collect_keyed(
+        &self,
+        key_hash: u64,
+        batch: RecordBatch,
+    ) -> anyhow::Result<()> {
+        if self.outboxes.is_empty() {
+            return Ok(());
+        }
+
+        let bytes_required = batch.get_array_memory_size();
+        let ticket = self.memory_pool.request_memory(bytes_required).await;
+        let tracked_event = TrackedEvent::new(StreamEvent::Data(batch), Some(ticket));
+
+        let target_idx = (key_hash as usize) % self.outboxes.len();
+        self.outboxes[target_idx].send(tracked_event).await?;
+        Ok(())
+    }
+
+    /// 广播控制信号（不申请内存船票，保证在拥堵时畅通无阻）
+    pub async fn broadcast(&self, event: StreamEvent) -> anyhow::Result<()> {
+        let tracked_event = TrackedEvent::control(event);
+        for outbox in &self.outboxes {
+            outbox.send(tracked_event.clone()).await?;
+        }
+        Ok(())
+    }
+}
diff --git a/src/runtime/streaming/api/mod.rs b/src/runtime/streaming/api/mod.rs
new file mode 100644
index 00000000..e6bf674d
--- /dev/null
+++ b/src/runtime/streaming/api/mod.rs
@@ -0,0 +1,9 @@
+//! 接口层：算子与源实现需遵循的 trait 与运行时上下文。
+
+pub mod context;
+pub mod operator;
+pub mod source;
+
+pub use context::TaskContext;
+pub use operator::{ConstructedOperator, MessageOperator};
+pub use source::{SourceEvent, SourceOffset, SourceOperator};
diff --git a/src/runtime/streaming/api/operator.rs b/src/runtime/streaming/api/operator.rs
new file mode 100644
index 00000000..3974307b
--- /dev/null
+++ b/src/runtime/streaming/api/operator.rs
@@ -0,0 +1,90 @@
+use crate::runtime::streaming::api::context::TaskContext;
+use crate::runtime::streaming::api::source::SourceOperator;
+use crate::runtime::streaming::protocol::control::{ControlCommand, StopMode};
+use crate::runtime::streaming::protocol::stream_out::StreamOutput;
+use arrow_array::RecordBatch;
+use async_trait::async_trait;
+use std::time::Duration;
+use crate::sql::common::{CheckpointBarrier, Watermark};
+
+/// 工厂反射产出的具体算子实例
+pub enum ConstructedOperator {
+    Source(Box<dyn SourceOperator>),
+    Operator(Box<dyn MessageOperator>),
+}
+
+/// 多上游、被动驱动的消息算子。
+#[async_trait]
+pub trait MessageOperator: Send + 'static {
+    fn name(&self) -> &str;
+
+    async fn on_start(&mut self, _ctx: &mut TaskContext) -> anyhow::Result<()> {
+        Ok(())
+    }
+
+    /// `input_idx`：多输入拓扑下第几条边（与 `SubtaskRunner` 的 inbox 下标一致；单输入恒为 0）。
+    async fn process_data(
+        &mut self,
+        input_idx: usize,
+        batch: RecordBatch,
+        ctx: &mut TaskContext,
+    ) -> anyhow::Result<Vec<StreamOutput>>;
+
+    async fn process_watermark(
+        &mut self,
+        watermark: Watermark,
+        ctx: &mut TaskContext,
+    ) -> anyhow::Result<Vec<StreamOutput>>;
+
+    async fn snapshot_state(
+        &mut self,
+        barrier: CheckpointBarrier,
+        ctx: &mut TaskContext,
+    ) -> anyhow::Result<()>;
+
+    /// 全局 checkpoint 确认后由 `SubtaskRunner` 在 [`ControlCommand::Commit`] 上调用（如 Kafka EOS 二阶段提交）。
+    async fn commit_checkpoint(
+        &mut self,
+        _epoch: u32,
+        _ctx: &mut TaskContext,
+    ) -> anyhow::Result<()> {
+        Ok(())
+    }
+
+    /// 周期性时钟（如 Idle 检测）；`None` 表示不注册 tick。
+    fn tick_interval(&self) -> Option<Duration> {
+        None
+    }
+
+    /// 与 [`Self::tick_interval`] 配套，由 `SubtaskRunner` 按固定间隔调用。
+    async fn process_tick(
+        &mut self,
+        _tick_index: u64,
+        _ctx: &mut TaskContext,
+    ) -> anyhow::Result<Vec<StreamOutput>> {
+        Ok(vec![])
+    }
+
+    /// 返回 `true` 时应立即结束运行循环（如 `StopMode::Immediate`）。
+    async fn handle_control(
+        &mut self,
+        command: ControlCommand,
+        _ctx: &mut TaskContext,
+    ) -> anyhow::Result<bool> {
+        match command {
+            ControlCommand::Stop { mode } => {
+                if mode == StopMode::Immediate {
+                    return Ok(true);
+                }
+                Ok(false)
+            }
+            ControlCommand::DropState | ControlCommand::Commit { .. } => Ok(false),
+            ControlCommand::Start | ControlCommand::UpdateConfig { .. } => Ok(false),
+            ControlCommand::TriggerCheckpoint { .. } => Ok(false),
+        }
+    }
+
+    async fn on_close(&mut self, _ctx: &mut TaskContext) -> anyhow::Result<Vec<StreamOutput>> {
+        Ok(vec![])
+    }
+}
diff --git a/src/runtime/streaming/api/source.rs b/src/runtime/streaming/api/source.rs
new file mode 100644
index 00000000..8ddeb3cf
--- /dev/null
+++ b/src/runtime/streaming/api/source.rs
@@ -0,0 +1,43 @@
+//! 源算子：由 [`crate::runtime::streaming::execution::SourceRunner`] 驱动 `fetch_next`，不得在内部死循环阻塞控制面。
+
+use crate::runtime::streaming::api::context::TaskContext;
+use arrow_array::RecordBatch;
+use async_trait::async_trait;
+use crate::sql::common::{CheckpointBarrier, Watermark};
+
+/// Kafka 等外部源在 **无已存位点** 时的起始消费策略（与 `arroyo-connectors` 语义对齐）。
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
+pub enum SourceOffset {
+    Earliest,
+    Latest,
+    #[default]
+    Group,
+}
+
+#[derive(Debug)]
+pub enum SourceEvent {
+    Data(RecordBatch),
+    Watermark(Watermark),
+    Idle,
+}
+
+#[async_trait]
+pub trait SourceOperator: Send + 'static {
+    fn name(&self) -> &str;
+
+    async fn on_start(&mut self, _ctx: &mut TaskContext) -> anyhow::Result<()> {
+        Ok(())
+    }
+
+    async fn fetch_next(&mut self, ctx: &mut TaskContext) -> anyhow::Result<SourceEvent>;
+
+    async fn snapshot_state(
+        &mut self,
+        barrier: CheckpointBarrier,
+        ctx: &mut TaskContext,
+    ) -> anyhow::Result<()>;
+
+    async fn on_close(&mut self, _ctx: &mut TaskContext) -> anyhow::Result<()> {
+        Ok(())
+    }
+}
diff --git a/src/runtime/streaming/arrow/mod.rs b/src/runtime/streaming/arrow/mod.rs
new file mode 100644
index 00000000..fdfa87f7
--- /dev/null
+++ b/src/runtime/streaming/arrow/mod.rs
@@ -0,0 +1,68 @@
+//! Arrow / DataFusion 辅助：聚合表达式解码等。
+//!
+//! `UpdatingCache` 位于 [`crate::runtime::streaming::operators::updating_cache`]。
+
+use arrow::datatypes::SchemaRef;
+use datafusion::common::internal_err;
+use datafusion::common::Result as DFResult;
+use datafusion::execution::FunctionRegistry;
+use datafusion::physical_expr::aggregate::{AggregateExprBuilder, AggregateFunctionExpr};
+use datafusion::physical_expr::{LexOrdering, PhysicalExpr};
+use datafusion_proto::physical_plan::from_proto::{parse_physical_expr, parse_physical_sort_expr};
+use datafusion_proto::physical_plan::{DefaultPhysicalExtensionCodec, PhysicalExtensionCodec};
+use datafusion_proto::protobuf::physical_aggregate_expr_node::AggregateFunction;
+use datafusion_proto::protobuf::physical_expr_node::ExprType;
+use datafusion_proto::protobuf::{PhysicalExprNode, proto_error};
+use std::sync::Arc;
+
+/// 从 `PhysicalExprNode` 解码 UDAF 聚合表达式（与 worker `arrow/mod` 一致）。
+pub fn decode_aggregate(
+    schema: &SchemaRef,
+    name: &str,
+    expr: &PhysicalExprNode,
+    registry: &dyn FunctionRegistry,
+) -> DFResult<Arc<AggregateFunctionExpr>> {
+    let codec = &DefaultPhysicalExtensionCodec {};
+    let expr_type = expr
+        .expr_type
+        .as_ref()
+        .ok_or_else(|| proto_error("Unexpected empty aggregate physical expression"))?;
+
+    match expr_type {
+        ExprType::AggregateExpr(agg_node) => {
+            let input_phy_expr: Vec<Arc<dyn PhysicalExpr>> = agg_node
+                .expr
+                .iter()
+                .map(|e| parse_physical_expr(e, registry, schema, codec))
+                .collect::<DFResult<Vec<_>>>()?;
+            let ordering_req: LexOrdering = agg_node
+                .ordering_req
+                .iter()
+                .map(|e| parse_physical_sort_expr(e, registry, schema, codec))
+                .collect::<DFResult<LexOrdering>>()?;
+            agg_node
+                .aggregate_function
+                .as_ref()
+                .map(|func| match func {
+                    AggregateFunction::UserDefinedAggrFunction(udaf_name) => {
+                        let agg_udf = match &agg_node.fun_definition {
+                            Some(buf) => codec.try_decode_udaf(udaf_name, buf)?,
+                            None => registry.udaf(udaf_name)?,
+                        };
+
+                        AggregateExprBuilder::new(agg_udf, input_phy_expr)
+                            .schema(Arc::clone(schema))
+                            .alias(name)
+                            .with_ignore_nulls(agg_node.ignore_nulls)
+                            .with_distinct(agg_node.distinct)
+                            .order_by(ordering_req)
+                            .build()
+                            .map(Arc::new)
+                    }
+                })
+                .transpose()?
+                .ok_or_else(|| proto_error("Invalid AggregateExpr, missing aggregate_function"))
+        }
+        _ => internal_err!("Invalid aggregate expression for AggregateExec"),
+    }
+}
diff --git a/src/runtime/streaming/cluster/graph.rs b/src/runtime/streaming/cluster/graph.rs
new file mode 100644
index 00000000..1ee8f8f7
--- /dev/null
+++ b/src/runtime/streaming/cluster/graph.rs
@@ -0,0 +1,136 @@
+use std::fmt;
+use std::sync::Arc;
+
+use crate::sql::common::FsSchema;
+// ============ 强类型 ID (Strong-type IDs) ============
+
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct JobId(pub String);
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub struct VertexId(pub u32);
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub struct SubtaskIndex(pub u32);
+
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct OperatorUid(pub String);
+
+impl fmt::Display for JobId {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "{}", self.0)
+    }
+}
+
+impl fmt::Display for VertexId {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "{}", self.0)
+    }
+}
+
+impl fmt::Display for SubtaskIndex {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "{}", self.0)
+    }
+}
+
+impl fmt::Display for OperatorUid {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "{}", self.0)
+    }
+}
+
+// ============ 资源画像 (Resource Profile) ============
+
+#[derive(Debug, Clone)]
+pub struct ResourceProfile {
+    pub managed_memory_bytes: u64,
+    pub cpu_cores: f64,
+    pub network_memory_bytes: u64,
+}
+
+impl Default for ResourceProfile {
+    fn default() -> Self {
+        Self {
+            managed_memory_bytes: 64 * 1024 * 1024,
+            cpu_cores: 1.0,
+            network_memory_bytes: 32 * 1024 * 1024,
+        }
+    }
+}
+
+// ============ 分区策略 (Partitioning Strategy) ============
+
+#[derive(Debug, Clone)]
+pub enum PartitioningStrategy {
+    Forward,
+    HashByKeys(Vec<usize>),
+    Rebalance,
+}
+
+// ============ 交换模式 (Exchange Mode) ============
+
+#[derive(Debug, Clone)]
+pub enum ExchangeMode {
+    LocalThread,
+    RemoteNetwork { target_addr: String },
+}
+
+// ============ 部署描述符 (Deployment Descriptors) ============
+
+#[derive(Debug, Clone)]
+pub struct TaskDeploymentDescriptor {
+    pub job_id: JobId,
+    pub vertex_id: VertexId,
+    pub subtask_idx: SubtaskIndex,
+    pub parallelism: u32,
+    pub operator_name: String,
+    pub operator_uid: OperatorUid,
+    pub is_source: bool,
+    pub operator_config_payload: Vec<u8>,
+    pub resources: ResourceProfile,
+    pub in_schemas: Vec<Arc<FsSchema>>,
+    pub out_schema: Option<Arc<FsSchema>>,
+    pub input_gates_count: usize,
+    pub output_gates_count: usize,
+}
+
+#[derive(Debug, Clone)]
+pub struct PhysicalEdgeDescriptor {
+    pub src_vertex: VertexId,
+    pub src_subtask: SubtaskIndex,
+    pub dst_vertex: VertexId,
+    pub dst_subtask: SubtaskIndex,
+    pub partitioning: PartitioningStrategy,
+    pub exchange_mode: ExchangeMode,
+}
+
+// ============ 执行图 (Execution Graph) ============
+
+#[derive(Debug, Clone)]
+pub struct ExecutionGraph {
+    pub job_id: JobId,
+    pub tasks: Vec<TaskDeploymentDescriptor>,
+    pub edges: Vec<PhysicalEdgeDescriptor>,
+}
+
+impl ExecutionGraph {
+    pub fn validate(&self) -> Result<(), String> {
+        if self.tasks.is_empty() {
+            return Err("Execution graph has no tasks".into());
+        }
+        if self.edges.is_empty() && self.tasks.len() > 1 {
+            return Err("Multi-task graph has no edges".into());
+        }
+        let mut seen = std::collections::HashSet::new();
+        for tdd in &self.tasks {
+            if !seen.insert((tdd.vertex_id, tdd.subtask_idx)) {
+                return Err(format!(
+                    "Duplicate subtask: vertex={}, subtask={}",
+                    tdd.vertex_id, tdd.subtask_idx
+                ));
+            }
+        }
+        Ok(())
+    }
+}
diff --git a/src/runtime/streaming/cluster/manager.rs b/src/runtime/streaming/cluster/manager.rs
new file mode 100644
index 00000000..ce8ec881
--- /dev/null
+++ b/src/runtime/streaming/cluster/manager.rs
@@ -0,0 +1,164 @@
+use crate::runtime::streaming::api::context::TaskContext;
+use crate::runtime::streaming::api::operator::ConstructedOperator;
+use crate::runtime::streaming::cluster::graph::ExecutionGraph;
+use crate::runtime::streaming::execution::runner::SubtaskRunner;
+use crate::runtime::streaming::execution::source::SourceRunner;
+use crate::runtime::streaming::factory::OperatorFactory;
+use crate::runtime::streaming::memory::MemoryPool;
+use crate::runtime::streaming::network::NetworkEnvironment;
+use crate::runtime::streaming::protocol::control::{ControlCommand, StopMode};
+use arroyo_state::tables::table_manager::TableManager;
+use std::collections::HashMap;
+use std::sync::Arc;
+use tokio::sync::mpsc::{channel, Sender};
+use tokio::task::JoinSet;
+use tracing::{error, info, instrument, warn};
+
+pub struct TaskManager {
+    pub worker_id: String,
+    memory_pool: Arc<MemoryPool>,
+    table_manager: Arc<tokio::sync::Mutex<TableManager>>,
+    operator_factory: Arc<OperatorFactory>,
+    task_supervisors: JoinSet<()>,
+    pub controllers: HashMap<(u32, u32), Sender<ControlCommand>>,
+}
+
+impl TaskManager {
+    pub fn new(
+        worker_id: String,
+        max_memory_bytes: usize,
+        table_manager: Arc<tokio::sync::Mutex<TableManager>>,
+        operator_factory: Arc<OperatorFactory>,
+    ) -> Self {
+        Self {
+            worker_id,
+            memory_pool: MemoryPool::new(max_memory_bytes),
+            table_manager,
+            operator_factory,
+            task_supervisors: JoinSet::new(),
+            controllers: HashMap::new(),
+        }
+    }
+
+    #[instrument(skip(self, graph), fields(job_id = %graph.job_id))]
+    pub async fn deploy_and_start(&mut self, graph: ExecutionGraph) -> anyhow::Result<()> {
+        info!("TaskManager [{}] starting deployment...", self.worker_id);
+
+        graph
+            .validate()
+            .map_err(|e| anyhow::anyhow!("Graph validation failed: {}", e))?;
+
+        // 1. 网络连线期
+        let local_queue_size = 1024;
+        let mut network_env = NetworkEnvironment::build_from_graph(&graph, local_queue_size);
+
+        // 2. 控制通道初始化
+        let mut control_rxs = HashMap::new();
+        for tdd in &graph.tasks {
+            let key = (tdd.vertex_id.0, tdd.subtask_idx.0);
+            let (ctrl_tx, ctrl_rx) = channel(32);
+            self.controllers.insert(key, ctrl_tx);
+            control_rxs.insert(key, ctrl_rx);
+        }
+
+        // 3. 部署与算子实例化
+        for tdd in graph.tasks {
+            let v_id = tdd.vertex_id;
+            let s_idx = tdd.subtask_idx;
+            let key = (v_id.0, s_idx.0);
+
+            let ctrl_rx = control_rxs.remove(&key).unwrap();
+            let inboxes = network_env.take_inboxes(v_id, s_idx);
+            let outboxes = network_env.take_outboxes(v_id, s_idx);
+
+            let ctx = TaskContext::new(
+                tdd.job_id.0.clone(),
+                v_id.0,
+                s_idx.0,
+                tdd.parallelism,
+                outboxes,
+                self.memory_pool.clone(),
+                Some(self.table_manager.clone()),
+            );
+
+            let constructed_op = self.operator_factory.create_operator(
+                &tdd.operator_name,
+                &tdd.operator_config_payload,
+            )?;
+
+            // 4. 任务发射入监督树
+            let worker_id = self.worker_id.clone();
+            match constructed_op {
+                ConstructedOperator::Source(source_op) => {
+                    let runner = SourceRunner::new(source_op, ctx, ctrl_rx);
+                    self.task_supervisors.spawn(async move {
+                        if let Err(e) = runner.run().await {
+                            error!(
+                                worker = %worker_id,
+                                vertex = key.0,
+                                subtask = key.1,
+                                "SourceTask CRASHED: {:?}", e
+                            );
+                            panic!("SourceTask failed");
+                        }
+                    });
+                }
+                ConstructedOperator::Operator(msg_op) => {
+                    let runner = SubtaskRunner::new(msg_op, ctx, inboxes, ctrl_rx);
+                    self.task_supervisors.spawn(async move {
+                        if let Err(e) = runner.run().await {
+                            error!(
+                                worker = %worker_id,
+                                vertex = key.0,
+                                subtask = key.1,
+                                "StreamTask CRASHED: {:?}", e
+                            );
+                            panic!("StreamTask failed");
+                        }
+                    });
+                }
+            }
+        }
+
+        info!(
+            "TaskManager [{}] deployment complete. All tasks ignited.",
+            self.worker_id
+        );
+        Ok(())
+    }
+
+    /// 监控运行状态：Supervisor 模式防止级联崩溃
+    pub async fn wait_and_supervise(mut self) {
+        while let Some(result) = self.task_supervisors.join_next().await {
+            match result {
+                Ok(_) => {
+                    info!("A subtask finished successfully.");
+                }
+                Err(join_error) => {
+                    if join_error.is_panic() {
+                        error!(
+                            "FATAL: A subtask panicked! Initiating emergency shutdown \
+                             of the entire TaskManager to prevent data corruption."
+                        );
+                        self.task_supervisors.abort_all();
+                        break;
+                    } else if join_error.is_cancelled() {
+                        warn!("A subtask was cancelled.");
+                    }
+                }
+            }
+        }
+        info!("TaskManager shutdown process complete.");
+    }
+
+    pub async fn stop_all(&self, mode: StopMode) {
+        for (key, tx) in &self.controllers {
+            if let Err(e) = tx
+                .send(ControlCommand::Stop { mode: mode.clone() })
+                .await
+            {
+                warn!("Failed to send stop command to task {:?}: {}", key, e);
+            }
+        }
+    }
+}
diff --git a/src/runtime/streaming/cluster/master.rs b/src/runtime/streaming/cluster/master.rs
new file mode 100644
index 00000000..5817643d
--- /dev/null
+++ b/src/runtime/streaming/cluster/master.rs
@@ -0,0 +1,274 @@
+use std::collections::HashMap;
+use anyhow::Result;
+
+use crate::runtime::streaming::cluster::graph::{
+    ExchangeMode, ExecutionGraph, JobId, OperatorUid, PartitioningStrategy,
+    PhysicalEdgeDescriptor, ResourceProfile, SubtaskIndex, TaskDeploymentDescriptor, VertexId,
+};
+
+use arroyo_datastream::logical::{LogicalEdgeType, LogicalGraph, OperatorChain};
+use petgraph::Direction;
+use sha2::{Digest, Sha256};
+use crate::sql::logical_node::logical::{LogicalEdgeType, LogicalGraph};
+
+#[derive(thiserror::Error, Debug)]
+pub enum CompileError {
+    #[error("Topology Error: Forward edge between Vertex {src} (p={src_p}) and {dst} (p={dst_p}) requires identical parallelism.")]
+    ParallelismMismatch {
+        src: u32,
+        src_p: usize,
+        dst: u32,
+        dst_p: usize,
+    },
+
+    #[error("Serialization Error: Failed to serialize operator chain for Vertex {vertex_id}. Error: {source}")]
+    SerializationFailed {
+        vertex_id: u32,
+        source: anyhow::Error,
+    },
+
+    #[error("Validation Error: {0}")]
+    ValidationError(String),
+}
+
+pub struct JobCompiler;
+
+impl JobCompiler {
+    pub fn compile(
+        job_id: String,
+        logical: &LogicalGraph,
+    ) -> Result<ExecutionGraph, CompileError> {
+        let mut tasks = Vec::new();
+        let mut edges = Vec::new();
+        let job_id_typed = JobId(job_id.clone());
+
+        // ====================================================================
+        // 阶段 1：预计算网络门数量 (Pre-compute Network Gates)
+        // ====================================================================
+        let mut in_degrees: HashMap<(u32, u32), usize> = HashMap::new();
+        let mut out_degrees: HashMap<(u32, u32), usize> = HashMap::new();
+
+        for edge_idx in logical.edge_indices() {
+            let edge = logical.edge_weight(edge_idx).unwrap();
+            let (src_idx, dst_idx) = logical.edge_endpoints(edge_idx).unwrap();
+            let src_node = logical.node_weight(src_idx).unwrap();
+            let dst_node = logical.node_weight(dst_idx).unwrap();
+
+            match edge.edge_type {
+                LogicalEdgeType::Forward => {
+                    if src_node.parallelism != dst_node.parallelism {
+                        return Err(CompileError::ParallelismMismatch {
+                            src: src_node.node_id,
+                            src_p: src_node.parallelism,
+                            dst: dst_node.node_id,
+                            dst_p: dst_node.parallelism,
+                        });
+                    }
+                    for i in 0..src_node.parallelism as u32 {
+                        *out_degrees.entry((src_node.node_id, i)).or_insert(0) += 1;
+                        *in_degrees.entry((dst_node.node_id, i)).or_insert(0) += 1;
+                    }
+                }
+                LogicalEdgeType::Shuffle
+                | LogicalEdgeType::LeftJoin
+                | LogicalEdgeType::RightJoin => {
+                    for s in 0..src_node.parallelism as u32 {
+                        *out_degrees.entry((src_node.node_id, s)).or_insert(0) +=
+                            dst_node.parallelism;
+                    }
+                    for d in 0..dst_node.parallelism as u32 {
+                        *in_degrees.entry((dst_node.node_id, d)).or_insert(0) +=
+                            src_node.parallelism;
+                    }
+                }
+            }
+        }
+
+        // ====================================================================
+        // 阶段 2：节点展开与算子融合 (Node Expansion & Operator Fusion)
+        // ====================================================================
+        for idx in logical.node_indices() {
+            let node = logical.node_weight(idx).unwrap();
+            let parallelism = node.parallelism as u32;
+
+            let in_schemas: Vec<_> = logical
+                .edges_directed(idx, Direction::Incoming)
+                .map(|e| e.weight().schema.clone())
+                .collect();
+            let out_schema = logical
+                .edges_directed(idx, Direction::Outgoing)
+                .map(|e| e.weight().schema.clone())
+                .next();
+
+            let is_source = node.operator_chain.is_source();
+            let (head_op, _) = node
+                .operator_chain
+                .iter()
+                .next()
+                .expect("operator chain is non-empty");
+
+            let chain_payload =
+                Self::serialize_operator_chain(&node.operator_chain).map_err(|e| {
+                    CompileError::SerializationFailed {
+                        vertex_id: node.node_id,
+                        source: e,
+                    }
+                })?;
+
+            let base_uid = Self::generate_deterministic_uid(
+                &job_id,
+                node.node_id,
+                &node.operator_chain,
+            );
+
+            let resource_profile =
+                Self::calculate_resource_profile(&node.operator_chain, parallelism);
+
+            for subtask_idx in 0..parallelism {
+                let s_idx = SubtaskIndex(subtask_idx);
+                let v_id = VertexId(node.node_id);
+
+                let input_gates_count = *in_degrees
+                    .get(&(node.node_id, subtask_idx))
+                    .unwrap_or(&0);
+                let output_gates_count = *out_degrees
+                    .get(&(node.node_id, subtask_idx))
+                    .unwrap_or(&0);
+
+                tasks.push(TaskDeploymentDescriptor {
+                    job_id: job_id_typed.clone(),
+                    vertex_id: v_id,
+                    subtask_idx: s_idx,
+                    parallelism,
+                    operator_name: head_op.operator_name.to_string(),
+                    operator_uid: OperatorUid(format!("{}-{}", base_uid, subtask_idx)),
+                    is_source,
+                    operator_config_payload: chain_payload.clone(),
+                    resources: resource_profile.clone(),
+                    in_schemas: in_schemas.clone(),
+                    out_schema: out_schema.clone(),
+                    input_gates_count,
+                    output_gates_count,
+                });
+            }
+        }
+
+        // ====================================================================
+        // 阶段 3：物理边展开与路由策略推断 (Edge Expansion & Partitioning)
+        // ====================================================================
+        for edge_idx in logical.edge_indices() {
+            let edge = logical.edge_weight(edge_idx).unwrap();
+            let (src_graph_idx, dst_graph_idx) = logical.edge_endpoints(edge_idx).unwrap();
+            let src_node = logical.node_weight(src_graph_idx).unwrap();
+            let dst_node = logical.node_weight(dst_graph_idx).unwrap();
+
+            let partitioning = match edge.edge_type {
+                LogicalEdgeType::Forward => PartitioningStrategy::Forward,
+                LogicalEdgeType::Shuffle
+                | LogicalEdgeType::LeftJoin
+                | LogicalEdgeType::RightJoin => {
+                    if let Some(key_indices) = edge.schema.key_indices.as_ref() {
+                        if !key_indices.is_empty() {
+                            PartitioningStrategy::HashByKeys(key_indices.clone())
+                        } else {
+                            PartitioningStrategy::Rebalance
+                        }
+                    } else {
+                        PartitioningStrategy::Rebalance
+                    }
+                }
+            };
+
+            let default_exchange = ExchangeMode::LocalThread;
+
+            match edge.edge_type {
+                LogicalEdgeType::Forward => {
+                    for i in 0..src_node.parallelism as u32 {
+                        edges.push(PhysicalEdgeDescriptor {
+                            src_vertex: VertexId(src_node.node_id),
+                            src_subtask: SubtaskIndex(i),
+                            dst_vertex: VertexId(dst_node.node_id),
+                            dst_subtask: SubtaskIndex(i),
+                            partitioning: partitioning.clone(),
+                            exchange_mode: default_exchange.clone(),
+                        });
+                    }
+                }
+                _ => {
+                    for src_idx in 0..src_node.parallelism as u32 {
+                        for dst_idx in 0..dst_node.parallelism as u32 {
+                            edges.push(PhysicalEdgeDescriptor {
+                                src_vertex: VertexId(src_node.node_id),
+                                src_subtask: SubtaskIndex(src_idx),
+                                dst_vertex: VertexId(dst_node.node_id),
+                                dst_subtask: SubtaskIndex(dst_idx),
+                                partitioning: partitioning.clone(),
+                                exchange_mode: default_exchange.clone(),
+                            });
+                        }
+                    }
+                }
+            }
+        }
+
+        let exec_graph = ExecutionGraph {
+            job_id: job_id_typed,
+            tasks,
+            edges,
+        };
+
+        // ====================================================================
+        // 阶段 4：执行拓扑图防御性自检 (Validation)
+        // ====================================================================
+        exec_graph
+            .validate()
+            .map_err(CompileError::ValidationError)?;
+
+        Ok(exec_graph)
+    }
+
+    /// 确定性状态 UID 生成器：哪怕拓扑变化，只要算子内部逻辑不变就能继承状态。
+    fn generate_deterministic_uid(
+        job_id: &str,
+        node_id: u32,
+        chain: &OperatorChain,
+    ) -> String {
+        let mut hasher = Sha256::new();
+        hasher.update(job_id.as_bytes());
+        hasher.update(&node_id.to_le_bytes());
+
+        for (op, _) in chain.iter() {
+            hasher.update(op.operator_name.to_string().as_bytes());
+            hasher.update(&op.operator_config);
+        }
+
+        let result = hasher.finalize();
+        hex::encode(&result[..8])
+    }
+
+    /// 序列化整条算子链 (Operator Fusion)
+    fn serialize_operator_chain(chain: &OperatorChain) -> Result<Vec<u8>> {
+        bincode::serde::encode_to_vec(chain, bincode::config::standard())
+            .map_err(|e| anyhow::anyhow!("bincode encode failed: {}", e))
+    }
+
+    /// 资源画像智能推算
+    fn calculate_resource_profile(
+        chain: &OperatorChain,
+        parallelism: u32,
+    ) -> ResourceProfile {
+        let mut profile = ResourceProfile::default();
+
+        for (op, _) in chain.iter() {
+            let name = op.operator_name.to_string();
+            if name.contains("Window") || name.contains("Join") || name.contains("Aggregate") {
+                profile.managed_memory_bytes += 512 * 1024 * 1024 / parallelism as u64;
+                profile.cpu_cores += 0.5;
+            }
+            if name.contains("Source") || name.contains("Sink") {
+                profile.network_memory_bytes += 128 * 1024 * 1024 / parallelism as u64;
+            }
+        }
+        profile
+    }
+}
diff --git a/src/runtime/streaming/cluster/mod.rs b/src/runtime/streaming/cluster/mod.rs
new file mode 100644
index 00000000..f337078c
--- /dev/null
+++ b/src/runtime/streaming/cluster/mod.rs
@@ -0,0 +1,11 @@
+pub mod graph;
+pub mod manager;
+pub mod master;
+mod wiring;
+
+pub use graph::{
+    ExchangeMode, ExecutionGraph, JobId, OperatorUid, PartitioningStrategy,
+    PhysicalEdgeDescriptor, ResourceProfile, SubtaskIndex, TaskDeploymentDescriptor, VertexId,
+};
+pub use manager::TaskManager;
+pub use master::{CompileError, JobCompiler};
diff --git a/src/runtime/streaming/cluster/wiring.rs b/src/runtime/streaming/cluster/wiring.rs
new file mode 100644
index 00000000..eb3b4162
--- /dev/null
+++ b/src/runtime/streaming/cluster/wiring.rs
@@ -0,0 +1,46 @@
+//! 物理拓扑构建：channel 与一对一子任务边。
+//!
+//! 将 `arroyo_datastream::LogicalGraph` 完整编译为 Task 管道属于上层 worker/planner；
+//! 此处提供 **与图无关** 的 channel 工厂与边展开，供适配层调用。
+
+use crate::runtime::streaming::protocol::tracked::TrackedEvent;
+use std::collections::HashMap;
+use tokio::sync::mpsc::{self, Receiver, Sender};
+
+pub type SubtaskKey = (String, u32);
+
+pub type SubtaskOutChannels = HashMap<SubtaskKey, Vec<Sender<TrackedEvent>>>;
+pub type SubtaskInChannels = HashMap<SubtaskKey, Vec<Receiver<TrackedEvent>>>;
+
+pub fn stream_channel(capacity: usize) -> (Sender<TrackedEvent>, Receiver<TrackedEvent>) {
+    mpsc::channel(capacity)
+}
+
+#[derive(Debug, Clone, Eq, PartialEq, Hash)]
+pub struct NodeSpec {
+    pub id: String,
+    pub parallelism: u32,
+}
+
+#[derive(Debug, Clone)]
+pub struct PhysicalEdge {
+    pub from: (String, u32),
+    pub to: (String, u32),
+}
+
+/// 为每条 `PhysicalEdge` 建一条独立 channel，并挂到对应子任务的 sender/receiver 列表。
+pub fn build_one_to_one_channels(
+    edges: &[PhysicalEdge],
+    capacity: usize,
+) -> (SubtaskOutChannels, SubtaskInChannels) {
+    let mut senders: SubtaskOutChannels = HashMap::new();
+    let mut receivers: SubtaskInChannels = HashMap::new();
+
+    for e in edges {
+        let (tx, rx) = stream_channel(capacity);
+        senders.entry(e.from.clone()).or_default().push(tx);
+        receivers.entry(e.to.clone()).or_default().push(rx);
+    }
+
+    (senders, receivers)
+}
diff --git a/src/runtime/streaming/error.rs b/src/runtime/streaming/error.rs
new file mode 100644
index 00000000..f00bd9c4
--- /dev/null
+++ b/src/runtime/streaming/error.rs
@@ -0,0 +1,10 @@
+use thiserror::Error;
+
+/// 子任务 / 源任务运行中的错误。
+#[derive(Debug, Error)]
+pub enum RunError {
+    #[error("operator error: {0:#}")]
+    Operator(#[from] anyhow::Error),
+    #[error("downstream send: {0}")]
+    DownstreamSend(String),
+}
diff --git a/src/runtime/streaming/execution/mod.rs b/src/runtime/streaming/execution/mod.rs
new file mode 100644
index 00000000..34002193
--- /dev/null
+++ b/src/runtime/streaming/execution/mod.rs
@@ -0,0 +1,8 @@
+//! 执行层：Tokio Actor 运行容器。
+
+pub mod runner;
+pub mod source;
+pub mod tracker;
+
+pub use runner::SubtaskRunner;
+pub use source::{SourceRunner, SOURCE_IDLE_SLEEP};
diff --git a/src/runtime/streaming/execution/runner.rs b/src/runtime/streaming/execution/runner.rs
new file mode 100644
index 00000000..f1733b29
--- /dev/null
+++ b/src/runtime/streaming/execution/runner.rs
@@ -0,0 +1,298 @@
+use crate::runtime::streaming::api::context::TaskContext;
+use crate::runtime::streaming::api::operator::MessageOperator;
+use crate::runtime::streaming::error::RunError;
+use crate::runtime::streaming::protocol::control::ControlCommand;
+use crate::runtime::streaming::protocol::event::StreamEvent;
+use crate::runtime::streaming::protocol::stream_out::StreamOutput;
+use crate::runtime::streaming::protocol::tracked::TrackedEvent;
+use crate::runtime::streaming::protocol::Watermark;
+use super::tracker::barrier_aligner::{AlignmentStatus, BarrierAligner};
+use super::tracker::watermark_tracker::WatermarkTracker;
+use crate::runtime::streaming::network::endpoint::BoxedEventStream;
+use arroyo_types::CheckpointBarrier;
+use std::collections::VecDeque;
+use std::pin::Pin;
+use tokio::sync::mpsc::Receiver;
+use tokio_stream::{StreamExt, StreamMap};
+use tracing::{debug, error, info, warn};
+use crate::sql::common::{CheckpointBarrier, Watermark};
+
+pub struct SubtaskRunner {
+    operator: Box<dyn MessageOperator>,
+    ctx: TaskContext,
+    inboxes: Vec<BoxedEventStream>,
+    control_rx: Receiver<ControlCommand>,
+}
+
+impl SubtaskRunner {
+    pub fn new(
+        operator: Box<dyn MessageOperator>,
+        ctx: TaskContext,
+        inboxes: Vec<BoxedEventStream>,
+        control_rx: Receiver<ControlCommand>,
+    ) -> Self {
+        Self { operator, ctx, inboxes, control_rx }
+    }
+
+    pub async fn run(mut self) -> Result<(), RunError> {
+        let input_count = self.inboxes.len();
+        info!(
+            job_id = %self.ctx.job_id,
+            vertex = self.ctx.vertex_id,
+            subtask = self.ctx.subtask_idx,
+            inputs = input_count,
+            operator = %self.operator.name(),
+            "subtask starting"
+        );
+
+        self.operator.on_start(&mut self.ctx).await?;
+
+        if input_count == 0 {
+            return self.run_source_loop().await;
+        }
+
+        let mut stream_map: StreamMap<usize, Pin<Box<dyn tokio_stream::Stream<Item = TrackedEvent> + Send>>> = StreamMap::new();
+        for (i, inbox) in self.inboxes.into_iter().enumerate() {
+            stream_map.insert(i, inbox);
+        }
+
+        let mut wm_tracker = WatermarkTracker::new(input_count);
+        let mut barrier_aligner = BarrierAligner::new(input_count);
+        let mut eof_count = 0usize;
+        let mut closed_on_full_eof = false;
+
+        let tick_interval = self.operator.tick_interval();
+        let mut tick_sleep: Option<Pin<Box<tokio::time::Sleep>>> =
+            tick_interval.map(|d| Box::pin(tokio::time::sleep(d)));
+        let mut tick_index: u64 = 0;
+
+        'run: loop {
+            tokio::select! {
+                biased;
+
+                cmd_opt = self.control_rx.recv() => {
+                    match cmd_opt {
+                        None => {
+                            debug!(
+                                vertex = self.ctx.vertex_id,
+                                subtask = self.ctx.subtask_idx,
+                                "control channel closed"
+                            );
+                            break 'run;
+                        }
+                        Some(cmd) => {
+                            info!(
+                                vertex = self.ctx.vertex_id,
+                                subtask = self.ctx.subtask_idx,
+                                ?cmd,
+                                "control command"
+                            );
+                            if Self::handle_control_command(&mut self.operator, &mut self.ctx, cmd)
+                                .await?
+                            {
+                                break 'run;
+                            }
+                        }
+                    }
+                }
+
+                next_item = stream_map.next() => {
+                    let Some((input_idx, event)) = next_item else {
+                        break 'run;
+                    };
+
+                    if barrier_aligner.is_blocked(input_idx)
+                        && !matches!(event.event, StreamEvent::Barrier(_))
+                    {
+                        barrier_aligner.buffer_event(input_idx, event);
+                    } else {
+                        let mut work = VecDeque::new();
+                        work.push_back((input_idx, event));
+                        let mut exit_run = false;
+                        let mut dispatch = EventDispatchState {
+                            operator: &mut self.operator,
+                            ctx: &mut self.ctx,
+                            work: &mut work,
+                            wm_tracker: &mut wm_tracker,
+                            barrier_aligner: &mut barrier_aligner,
+                            eof_count: &mut eof_count,
+                            closed_on_full_eof: &mut closed_on_full_eof,
+                            input_count,
+                        };
+                        while let Some((idx, ev)) = dispatch.work.pop_front() {
+                            if Self::dispatch_stream_event(&mut dispatch, idx, ev).await? {
+                                exit_run = true;
+                                break;
+                            }
+                        }
+                        if exit_run {
+                            break 'run;
+                        }
+                    }
+                }
+
+                _ = async {
+                    match tick_sleep.as_mut() {
+                        Some(s) => s.as_mut().await,
+                        None => std::future::pending().await,
+                    }
+                }, if tick_interval.is_some() => {
+                    let outs = self
+                        .operator
+                        .process_tick(tick_index, &mut self.ctx)
+                        .await?;
+                    tick_index = tick_index.wrapping_add(1);
+                    Self::dispatch_stream_outputs(&mut self.ctx, outs).await?;
+                    if let (Some(d), Some(s)) = (tick_interval, tick_sleep.as_mut()) {
+                        s.as_mut()
+                            .reset(tokio::time::Instant::now() + d);
+                    }
+                }
+            }
+        }
+
+        if !closed_on_full_eof {
+            let close_outs = self.operator.on_close(&mut self.ctx).await?;
+            Self::dispatch_stream_outputs(&mut self.ctx, close_outs).await?;
+        }
+
+        info!(
+            vertex = self.ctx.vertex_id,
+            subtask = self.ctx.subtask_idx,
+            "subtask shutdown"
+        );
+        Ok(())
+    }
+
+    async fn run_source_loop(mut self) -> Result<(), RunError> {
+        while let Some(cmd) = self.control_rx.recv().await {
+            if Self::handle_control_command(&mut self.operator, &mut self.ctx, cmd).await? {
+                break;
+            }
+        }
+        let close_outs = self.operator.on_close(&mut self.ctx).await?;
+        Self::dispatch_stream_outputs(&mut self.ctx, close_outs).await?;
+        if !self.ctx.outboxes.is_empty() {
+            self.ctx.broadcast(StreamEvent::EndOfStream).await?;
+        }
+        info!(
+            vertex = self.ctx.vertex_id,
+            subtask = self.ctx.subtask_idx,
+            "Source subtask finished"
+        );
+        Ok(())
+    }
+
+    async fn handle_control_command(
+        operator: &mut Box<dyn MessageOperator>,
+        ctx: &mut TaskContext,
+        cmd: ControlCommand,
+    ) -> Result<bool, RunError> {
+        if let ControlCommand::TriggerCheckpoint { barrier } = &cmd {
+            let barrier: CheckpointBarrier = barrier.clone().into();
+            if let Err(e) = operator.snapshot_state(barrier, ctx).await {
+                error!("Source snapshot failed: {}", e);
+            }
+            ctx.broadcast(StreamEvent::Barrier(barrier)).await?;
+        }
+
+        if let ControlCommand::Commit { epoch } = &cmd {
+            if let Err(e) = operator.commit_checkpoint(*epoch, ctx).await {
+                error!("commit_checkpoint failed: {}", e);
+            }
+        }
+
+        match operator.handle_control(cmd, ctx).await {
+            Ok(should_stop) => Ok(should_stop),
+            Err(e) => {
+                warn!("handle_control error: {}", e);
+                Ok(false)
+            }
+        }
+    }
+
+    async fn dispatch_stream_outputs(
+        ctx: &mut TaskContext,
+        outputs: Vec<StreamOutput>,
+    ) -> Result<(), RunError> {
+        for out in outputs {
+            match out {
+                StreamOutput::Forward(b) => ctx.collect(b).await?,
+                StreamOutput::Keyed(hash, b) => ctx.collect_keyed(hash, b).await?,
+                StreamOutput::Broadcast(b) => ctx.collect(b).await?,
+                StreamOutput::Watermark(wm) => {
+                    ctx.broadcast(StreamEvent::Watermark(wm)).await?;
+                }
+            }
+        }
+        Ok(())
+    }
+
+    async fn dispatch_stream_event(
+        st: &mut EventDispatchState<'_>,
+        input_idx: usize,
+        tracked: TrackedEvent,
+    ) -> Result<bool, RunError> {
+        let event = tracked.event;
+        match event {
+            StreamEvent::Data(batch) => {
+                let outputs = st
+                    .operator
+                    .process_data(input_idx, batch, st.ctx)
+                    .await?;
+                Self::dispatch_stream_outputs(st.ctx, outputs).await?;
+            }
+            StreamEvent::Watermark(wm) => {
+                if let Some(aligned_wm) = st.wm_tracker.update(input_idx, wm) {
+                    if let Watermark::EventTime(t) = aligned_wm {
+                        st.ctx.last_present_watermark = Some(t);
+                    }
+                    let outputs = st
+                        .operator
+                        .process_watermark(aligned_wm.clone(), st.ctx)
+                        .await?;
+                    Self::dispatch_stream_outputs(st.ctx, outputs).await?;
+                    st.ctx
+                        .broadcast(StreamEvent::Watermark(aligned_wm))
+                        .await?;
+                }
+            }
+            StreamEvent::Barrier(barrier) => {
+                match st.barrier_aligner.mark(input_idx, &barrier) {
+                    AlignmentStatus::Pending => {}
+                    AlignmentStatus::Complete(buffered) => {
+                        if let Err(e) = st.operator.snapshot_state(barrier, st.ctx).await {
+                            error!("Operator snapshot failed: {}", e);
+                        }
+                        st.ctx.broadcast(StreamEvent::Barrier(barrier)).await?;
+                        for pair in buffered {
+                            st.work.push_back(pair);
+                        }
+                    }
+                }
+            }
+            StreamEvent::EndOfStream => {
+                *st.eof_count += 1;
+                if *st.eof_count == st.input_count {
+                    let close_outs = st.operator.on_close(st.ctx).await?;
+                    Self::dispatch_stream_outputs(st.ctx, close_outs).await?;
+                    *st.closed_on_full_eof = true;
+                    st.ctx.broadcast(StreamEvent::EndOfStream).await?;
+                    return Ok(true);
+                }
+            }
+        }
+        Ok(false)
+    }
+}
+
+struct EventDispatchState<'a> {
+    operator: &'a mut Box<dyn MessageOperator>,
+    ctx: &'a mut TaskContext,
+    work: &'a mut VecDeque<(usize, TrackedEvent)>,
+    wm_tracker: &'a mut WatermarkTracker,
+    barrier_aligner: &'a mut BarrierAligner,
+    eof_count: &'a mut usize,
+    closed_on_full_eof: &'a mut bool,
+    input_count: usize,
+}
diff --git a/src/runtime/streaming/execution/source.rs b/src/runtime/streaming/execution/source.rs
new file mode 100644
index 00000000..9fe1983e
--- /dev/null
+++ b/src/runtime/streaming/execution/source.rs
@@ -0,0 +1,120 @@
+use crate::runtime::streaming::api::context::TaskContext;
+use crate::runtime::streaming::api::source::{SourceEvent, SourceOperator};
+use crate::runtime::streaming::error::RunError;
+use crate::runtime::streaming::protocol::control::ControlCommand;
+use crate::runtime::streaming::protocol::event::StreamEvent;
+use std::time::Duration;
+use tokio::sync::mpsc::Receiver;
+use tokio::time::sleep;
+use tracing::{debug, info, warn};
+use crate::sql::common::CheckpointBarrier;
+
+pub const SOURCE_IDLE_SLEEP: Duration = Duration::from_millis(50);
+
+pub struct SourceRunner {
+    operator: Box<dyn SourceOperator>,
+    ctx: TaskContext,
+    control_rx: Receiver<ControlCommand>,
+}
+
+impl SourceRunner {
+    pub fn new(
+        operator: Box<dyn SourceOperator>,
+        ctx: TaskContext,
+        control_rx: Receiver<ControlCommand>,
+    ) -> Self {
+        Self {
+            operator,
+            ctx,
+            control_rx,
+        }
+    }
+
+    pub async fn run(mut self) -> Result<(), RunError> {
+        info!(
+            job_id = %self.ctx.job_id,
+            vertex = self.ctx.vertex_id,
+            subtask = self.ctx.subtask_idx,
+            operator = %self.operator.name(),
+            "source subtask starting"
+        );
+
+        self.operator.on_start(&mut self.ctx).await?;
+
+        let mut is_running = true;
+        let mut idle_pending = false;
+
+        while is_running {
+            tokio::select! {
+                biased;
+                cmd_opt = self.control_rx.recv() => {
+                    match cmd_opt {
+                        None => {
+                            debug!(
+                                vertex = self.ctx.vertex_id,
+                                subtask = self.ctx.subtask_idx,
+                                "source control channel closed"
+                            );
+                            is_running = false;
+                        }
+                        Some(cmd) => {
+                            match cmd {
+                                ControlCommand::Stop { .. } => {
+                                    is_running = false;
+                                }
+                                ControlCommand::TriggerCheckpoint { barrier } => {
+                                    let barrier: CheckpointBarrier = barrier.into();
+                                    self.operator
+                                        .snapshot_state(barrier, &mut self.ctx)
+                                        .await?;
+                                    self.ctx
+                                        .broadcast(StreamEvent::Barrier(barrier))
+                                        .await?;
+                                }
+                                ControlCommand::Start
+                                | ControlCommand::DropState
+                                | ControlCommand::Commit { .. }
+                                | ControlCommand::UpdateConfig { .. } => {
+                                    debug!(?cmd, "source: ignored control command");
+                                }
+                            }
+                        }
+                    }
+                }
+                _ = sleep(SOURCE_IDLE_SLEEP), if is_running && idle_pending => {
+                    idle_pending = false;
+                }
+                fetch_res = self.operator.fetch_next(&mut self.ctx), if is_running && !idle_pending => {
+                    match fetch_res {
+                        Ok(SourceEvent::Data(batch)) => {
+                            self.ctx.collect(batch).await?;
+                        }
+                        Ok(SourceEvent::Watermark(wm)) => {
+                            self.ctx.broadcast(StreamEvent::Watermark(wm)).await?;
+                        }
+                        Ok(SourceEvent::Idle) => {
+                            idle_pending = true;
+                        }
+                        Err(e) => {
+                            warn!(
+                                vertex = self.ctx.vertex_id,
+                                error = %e,
+                                "fetch_next error"
+                            );
+                            return Err(RunError::Operator(e));
+                        }
+                    }
+                }
+            }
+        }
+
+        self.operator.on_close(&mut self.ctx).await?;
+
+        info!(
+            vertex = self.ctx.vertex_id,
+            subtask = self.ctx.subtask_idx,
+            "source subtask shutdown"
+        );
+        Ok(())
+    }
+}
diff --git a/src/runtime/streaming/execution/tracker/barrier_aligner.rs b/src/runtime/streaming/execution/tracker/barrier_aligner.rs
new file mode 100644
index 00000000..e284922b
--- /dev/null
+++ b/src/runtime/streaming/execution/tracker/barrier_aligner.rs
@@ -0,0 +1,57 @@
+//! Chandy–Lamport 风格屏障对齐。
+
+use std::collections::HashSet;
+use crate::runtime::streaming::protocol::TrackedEvent;
+use crate::sql::common::CheckpointBarrier;
+
+#[derive(Debug)]
+pub enum AlignmentStatus {
+    Pending,
+    Complete(Vec<(usize, TrackedEvent)>),
+}
+
+#[derive(Debug)]
+pub struct BarrierAligner {
+    input_count: usize,
+    current_epoch: Option<u32>,
+    reached_inputs: HashSet<usize>,
+    buffered_events: Vec<(usize, TrackedEvent)>,
+}
+
+impl BarrierAligner {
+    pub fn new(input_count: usize) -> Self {
+        Self {
+            input_count,
+            current_epoch: None,
+            reached_inputs: HashSet::new(),
+            buffered_events: Vec::new(),
+        }
+    }
+
+    pub fn is_blocked(&self, input_idx: usize) -> bool {
+        self.current_epoch.is_some() && self.reached_inputs.contains(&input_idx)
+    }
+
+    pub fn buffer_event(&mut self, input_idx: usize, event: TrackedEvent) {
+        self.buffered_events.push((input_idx, event));
+    }
+
+    pub fn mark(&mut self, input_idx: usize, barrier: &CheckpointBarrier) -> AlignmentStatus {
+        if self.current_epoch != Some(barrier.epoch) {
+            self.current_epoch = Some(barrier.epoch);
+            self.reached_inputs.clear();
+            self.buffered_events.clear();
+        }
+
+        self.reached_inputs.insert(input_idx);
+
+        if self.reached_inputs.len() == self.input_count {
+            let released = std::mem::take(&mut self.buffered_events);
+            self.current_epoch = None;
+            self.reached_inputs.clear();
+            AlignmentStatus::Complete(released)
+        } else {
+            AlignmentStatus::Pending
+        }
+    }
+}
\ No newline at end of file
diff --git a/src/runtime/streaming/execution/tracker/mod.rs b/src/runtime/streaming/execution/tracker/mod.rs
new file mode 100644
index 00000000..bfa24e8b
--- /dev/null
+++ b/src/runtime/streaming/execution/tracker/mod.rs
@@ -0,0 +1,7 @@
+//! 协调层：屏障对齐与多路水位线追踪。
+
+pub mod barrier_aligner;
+pub mod watermark_tracker;
+
+pub use barrier_aligner::{AlignmentStatus, BarrierAligner};
+pub use watermark_tracker::WatermarkTracker;
diff --git a/src/runtime/streaming/execution/tracker/watermark_tracker.rs b/src/runtime/streaming/execution/tracker/watermark_tracker.rs
new file mode 100644
index 00000000..be7043b9
--- /dev/null
+++ b/src/runtime/streaming/execution/tracker/watermark_tracker.rs
@@ -0,0 +1,86 @@
+use crate::runtime::streaming::protocol::watermark::{merge_watermarks, watermark_strictly_advances, Watermark};
+use crate::sql::common::Watermark;
+
+#[derive(Debug)]
+pub struct WatermarkTracker {
+    watermarks: Vec<Option<Watermark>>,
+    current_min_watermark: Option<Watermark>,
+}
+
+impl WatermarkTracker {
+    pub fn new(input_count: usize) -> Self {
+        Self {
+            watermarks: vec![None; input_count],
+            current_min_watermark: None,
+        }
+    }
+
+    pub fn update(&mut self, input_idx: usize, wm: Watermark) -> Option<Watermark> {
+        self.watermarks[input_idx] = Some(wm);
+
+        if self.watermarks.iter().any(|w| w.is_none()) {
+            return None;
+        }
+
+        let new_min = merge_watermarks(&self.watermarks)?;
+
+        if !watermark_strictly_advances(new_min, self.current_min_watermark) {
+            return None;
+        }
+
+        self.current_min_watermark = Some(new_min);
+        Some(new_min)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::time::{Duration, SystemTime};
+
+    #[test]
+    fn no_emit_until_all_inputs_seen() {
+        let mut t = WatermarkTracker::new(2);
+        let w = Watermark::EventTime(SystemTime::UNIX_EPOCH + Duration::from_secs(3));
+        assert!(t.update(0, w).is_none());
+        let w2 = Watermark::EventTime(SystemTime::UNIX_EPOCH + Duration::from_secs(1));
+        assert_eq!(t.update(1, w2), Some(w2));
+    }
+
+    #[test]
+    fn dedup_same_aligned() {
+        let mut t = WatermarkTracker::new(1);
+        let w = Watermark::EventTime(SystemTime::UNIX_EPOCH + Duration::from_secs(1));
+        assert_eq!(t.update(0, w), Some(w));
+        assert!(t.update(0, w).is_none());
+    }
+
+    #[test]
+    fn advances_only_when_min_strictly_increases() {
+        let mut t = WatermarkTracker::new(2);
+        let t1 = SystemTime::UNIX_EPOCH + Duration::from_secs(1);
+        let t5 = SystemTime::UNIX_EPOCH + Duration::from_secs(5);
+        assert!(t.update(0, Watermark::EventTime(t5)).is_none());
+        assert_eq!(t.update(1, Watermark::EventTime(t1)), Some(Watermark::EventTime(t1)));
+        let t3 = SystemTime::UNIX_EPOCH + Duration::from_secs(3);
+        assert_eq!(
+            t.update(1, Watermark::EventTime(t3)),
+            Some(Watermark::EventTime(t3))
+        );
+        assert!(t.update(1, Watermark::EventTime(t3)).is_none());
+    }
+
+    #[test]
+    fn backward_aligned_min_is_ignored() {
+        let mut t = WatermarkTracker::new(2);
+        let t5 = SystemTime::UNIX_EPOCH + Duration::from_secs(5);
+        let t10 = SystemTime::UNIX_EPOCH + Duration::from_secs(10);
+        assert!(t.update(0, Watermark::EventTime(t10)).is_none());
+        assert_eq!(
+            t.update(1, Watermark::EventTime(t5)),
+            Some(Watermark::EventTime(t5))
+        );
+        let t2 = SystemTime::UNIX_EPOCH + Duration::from_secs(2);
+        assert!(t.update(0, Watermark::EventTime(t2)).is_none());
+    }
+}
diff --git a/src/runtime/streaming/factory/mod.rs b/src/runtime/streaming/factory/mod.rs
new file mode 100644
index 00000000..4cd52bf3
--- /dev/null
+++ b/src/runtime/streaming/factory/mod.rs
@@ -0,0 +1,3 @@
+pub mod registry;
+
+pub use registry:: OperatorFactory;
diff --git a/src/runtime/streaming/factory/registry.rs b/src/runtime/streaming/factory/registry.rs
new file mode 100644
index 00000000..5b53b920
--- /dev/null
+++ b/src/runtime/streaming/factory/registry.rs
@@ -0,0 +1,44 @@
+use anyhow::{anyhow, Result};
+use crate::runtime::streaming::api::operator::ConstructedOperator;
+use std::collections::HashMap;
+
+
+/// 工业级算子注册表与工厂
+pub struct OperatorFactory {
+    constructors: HashMap<String, Box<dyn OperatorConstructor>>,
+}
+
+impl OperatorFactory {
+    pub fn new() -> Self {
+        let factory = Self {
+            constructors: HashMap::new(),
+        };
+
+        // TODO: 在此注册具体算子构造器
+        factory.register("TumblingWindowAggregate", Box::new(TumblingWindowAggregateConstructor));
+        factory.register("ExpressionWatermark", Box::new(WatermarkGeneratorConstructor));
+        factory.register("KafkaSource", Box::new(KafkaSourceConstructor));
+
+        factory
+    }
+
+    pub fn register(&mut self, name: &str, constructor: Box<dyn OperatorConstructor>) {
+        self.constructors.insert(name.to_string(), constructor);
+    }
+
+    /// 反射与实例化：从 TDD 的字节流中拉起运行时的业务算子
+    pub fn create_operator(&self, name: &str, payload: &[u8]) -> Result<ConstructedOperator> {
+        let ctor = self
+            .constructors
+            .get(name)
+            .ok_or_else(|| {
+                anyhow!(
+                    "FATAL: Operator '{}' not found in Factory Registry. \
+                     Ensure the worker is compiled with the correct plugins.",
+                    name
+                )
+            })?;
+
+        ctor.with_config(payload)
+    }
+}
diff --git a/src/runtime/streaming/format/mod.rs b/src/runtime/streaming/format/mod.rs
new file mode 100644
index 00000000..e69de29b
diff --git a/src/runtime/streaming/lib.rs b/src/runtime/streaming/lib.rs
new file mode 100644
index 00000000..67cd8f70
--- /dev/null
+++ b/src/runtime/streaming/lib.rs
@@ -0,0 +1,44 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+//
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Streaming actor runtime (vendored from Arroyo `arroyo-actor-runtime`).
+
+pub mod api;
+pub mod arrow;
+pub mod cluster;
+pub mod error;
+pub mod execution;
+pub mod factory;
+pub mod memory;
+pub mod network;
+pub mod operators;
+pub mod protocol;
+pub mod state;
+
+pub use api::{
+    ConstructedOperator, MessageOperator, SourceEvent, SourceOffset, SourceOperator, TaskContext,
+};
+pub use cluster::{
+    CompileError, ExchangeMode, ExecutionGraph, JobCompiler, JobId, PartitioningStrategy,
+    PhysicalEdgeDescriptor, ResourceProfile, SubtaskIndex, TaskDeploymentDescriptor, TaskManager,
+    VertexId,
+};
+pub use error::RunError;
+pub use execution::{SOURCE_IDLE_SLEEP, SourceRunner, SubtaskRunner};
+pub use factory::{OperatorConstructor, OperatorFactory};
+pub use memory::{MemoryPool, MemoryTicket};
+pub use network::{BoxedEventStream, NetworkEnvironment, PhysicalSender, RemoteSenderStub};
+pub use protocol::{
+    CheckpointBarrierWire, ControlCommand, StopMode, StreamEvent, StreamOutput, Watermark,
+    control_channel, merge_watermarks, watermark_strictly_advances,
+};
diff --git a/src/runtime/streaming/memory/mod.rs b/src/runtime/streaming/memory/mod.rs
new file mode 100644
index 00000000..93101fa2
--- /dev/null
+++ b/src/runtime/streaming/memory/mod.rs
@@ -0,0 +1,5 @@
+pub mod pool;
+pub mod ticket;
+
+pub use pool::MemoryPool;
+pub use ticket::MemoryTicket;
diff --git a/src/runtime/streaming/memory/pool.rs b/src/runtime/streaming/memory/pool.rs
new file mode 100644
index 00000000..98ba4cf3
--- /dev/null
+++ b/src/runtime/streaming/memory/pool.rs
@@ -0,0 +1,75 @@
+use parking_lot::Mutex;
+use std::sync::atomic::{AtomicUsize, Ordering};
+use std::sync::Arc;
+use tokio::sync::Notify;
+use tracing::{debug, warn};
+
+use super::ticket::MemoryTicket;
+
+/// 工业级全局内存池 (Global Memory Pool)
+#[derive(Debug)]
+pub struct MemoryPool {
+    max_bytes: usize,
+    used_bytes: AtomicUsize,
+    available_bytes: Mutex<usize>,
+    notify: Notify,
+}
+
+impl MemoryPool {
+    pub fn new(max_bytes: usize) -> Arc<Self> {
+        Arc::new(Self {
+            max_bytes,
+            used_bytes: AtomicUsize::new(0),
+            available_bytes: Mutex::new(max_bytes),
+            notify: Notify::new(),
+        })
+    }
+
+    pub fn usage_metrics(&self) -> (usize, usize) {
+        (self.used_bytes.load(Ordering::Relaxed), self.max_bytes)
+    }
+
+    pub async fn request_memory(self: &Arc<Self>, bytes: usize) -> MemoryTicket {
+        if bytes == 0 {
+            return MemoryTicket::new(0, self.clone());
+        }
+
+        if bytes > self.max_bytes {
+            warn!(
+                "Requested memory ({} B) exceeds total pool size ({} B)! \
+                Permitting to avoid pipeline deadlock, but OOM risk is critical.",
+                bytes, self.max_bytes
+            );
+            self.used_bytes.fetch_add(bytes, Ordering::Relaxed);
+            return MemoryTicket::new(bytes, self.clone());
+        }
+
+        loop {
+            {
+                let mut available = self.available_bytes.lock();
+                if *available >= bytes {
+                    *available -= bytes;
+                    self.used_bytes.fetch_add(bytes, Ordering::Relaxed);
+                    return MemoryTicket::new(bytes, self.clone());
+                }
+            }
+
+            debug!("Backpressure engaged: waiting for {} bytes to be freed...", bytes);
+            self.notify.notified().await;
+        }
+    }
+
+    pub(crate) fn release(&self, bytes: usize) {
+        if bytes == 0 {
+            return;
+        }
+
+        {
+            let mut available = self.available_bytes.lock();
+            *available += bytes;
+        }
+
+        self.used_bytes.fetch_sub(bytes, Ordering::Relaxed);
+        self.notify.notify_waiters();
+    }
+}
diff --git a/src/runtime/streaming/memory/ticket.rs b/src/runtime/streaming/memory/ticket.rs
new file mode 100644
index 00000000..ca1759b9
--- /dev/null
+++ b/src/runtime/streaming/memory/ticket.rs
@@ -0,0 +1,24 @@
+use std::sync::Arc;
+
+use super::pool::MemoryPool;
+
+/// 内存船票 (RAII Guard)
+/// 不实现 Clone：生命周期严格对应唯一的字节扣减。
+/// 跨多路广播时应包裹在 `Arc<MemoryTicket>` 中。
+#[derive(Debug)]
+pub struct MemoryTicket {
+    bytes: usize,
+    pool: Arc<MemoryPool>,
+}
+
+impl MemoryTicket {
+    pub(crate) fn new(bytes: usize, pool: Arc<MemoryPool>) -> Self {
+        Self { bytes, pool }
+    }
+}
+
+impl Drop for MemoryTicket {
+    fn drop(&mut self) {
+        self.pool.release(self.bytes);
+    }
+}
diff --git a/src/runtime/streaming/mod.rs b/src/runtime/streaming/mod.rs
new file mode 100644
index 00000000..0edc0d2e
--- /dev/null
+++ b/src/runtime/streaming/mod.rs
@@ -0,0 +1,45 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+//
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Streaming actor runtime (vendored from Arroyo `arroyo-actor-runtime`).
+
+pub mod api;
+pub mod arrow;
+pub mod cluster;
+pub mod error;
+pub mod execution;
+pub mod factory;
+pub mod memory;
+pub mod network;
+pub mod operators;
+pub mod protocol;
+pub mod state;
+mod format;
+
+pub use api::{
+    ConstructedOperator, MessageOperator, SourceEvent, SourceOffset, SourceOperator, TaskContext,
+};
+pub use cluster::{
+    CompileError, ExchangeMode, ExecutionGraph, JobCompiler, JobId, PartitioningStrategy,
+    PhysicalEdgeDescriptor, ResourceProfile, SubtaskIndex, TaskDeploymentDescriptor, TaskManager,
+    VertexId,
+};
+pub use error::RunError;
+pub use execution::{SOURCE_IDLE_SLEEP, SourceRunner, SubtaskRunner};
+pub use factory:: OperatorFactory;
+pub use memory::{MemoryPool, MemoryTicket};
+pub use network::{BoxedEventStream, NetworkEnvironment, PhysicalSender, RemoteSenderStub};
+pub use protocol::{
+    CheckpointBarrierWire, ControlCommand, StopMode, StreamEvent, StreamOutput,
+    control_channel, merge_watermarks, watermark_strictly_advances,
+};
diff --git a/src/runtime/streaming/network/endpoint.rs b/src/runtime/streaming/network/endpoint.rs
new file mode 100644
index 00000000..3fc1fc57
--- /dev/null
+++ b/src/runtime/streaming/network/endpoint.rs
@@ -0,0 +1,59 @@
+use crate::runtime::streaming::protocol::event::StreamEvent;
+use crate::runtime::streaming::protocol::tracked::TrackedEvent;
+use anyhow::{anyhow, Result};
+use std::pin::Pin;
+use tokio::sync::mpsc;
+use tokio_stream::Stream;
+use tracing::debug;
+
+// ========================================================================
+// 1. 网络桩 (Stub)：为后续 gRPC/TCP 扩展预留孔位
+// ========================================================================
+
+#[derive(Clone)]
+pub struct RemoteSenderStub {
+    pub target_addr: String,
+}
+
+impl RemoteSenderStub {
+    pub async fn send_over_network(&self, _event: &StreamEvent) -> Result<()> {
+        unimplemented!("Remote network transport is not yet implemented")
+    }
+}
+
+// ========================================================================
+// 2. 物理发送端点 (Physical Sender Endpoint)
+// ========================================================================
+
+/// 统一的物理发送端点。
+/// 算子无需知道目标是同机还是异机，只管调用 `send`。
+#[derive(Clone)]
+pub enum PhysicalSender {
+    /// 本地线程间传输，携带内存船票，零开销
+    Local(mpsc::Sender<TrackedEvent>),
+    /// 跨机网络传输，需要序列化，并在发送后丢弃本地船票
+    Remote(RemoteSenderStub),
+}
+
+impl PhysicalSender {
+    pub async fn send(&self, tracked_event: TrackedEvent) -> Result<()> {
+        match self {
+            PhysicalSender::Local(tx) => {
+                tx.send(tracked_event)
+                    .await
+                    .map_err(|_| anyhow!("Local channel closed! Downstream task may have crashed."))?;
+            }
+            PhysicalSender::Remote(stub) => {
+                stub.send_over_network(&tracked_event.event).await?;
+                debug!("Sent event over network, local memory ticket will be released.");
+            }
+        }
+        Ok(())
+    }
+}
+
+// ========================================================================
+// 3. 物理接收端点 (Physical Receiver Endpoint)
+// ========================================================================
+
+pub type BoxedEventStream = Pin<Box<dyn Stream<Item = TrackedEvent> + Send>>;
diff --git a/src/runtime/streaming/network/environment.rs b/src/runtime/streaming/network/environment.rs
new file mode 100644
index 00000000..789af2a8
--- /dev/null
+++ b/src/runtime/streaming/network/environment.rs
@@ -0,0 +1,82 @@
+use crate::runtime::streaming::cluster::graph::{
+    ExchangeMode, ExecutionGraph, SubtaskIndex, VertexId,
+};
+use crate::runtime::streaming::protocol::tracked::TrackedEvent;
+use super::endpoint::{BoxedEventStream, PhysicalSender, RemoteSenderStub};
+use std::collections::HashMap;
+use tokio::sync::mpsc;
+use tokio_stream::wrappers::ReceiverStream;
+use tracing::info;
+
+/// 物理网络路由注册表
+pub struct NetworkEnvironment {
+    pub outboxes: HashMap<(VertexId, SubtaskIndex), Vec<PhysicalSender>>,
+    pub inboxes: HashMap<(VertexId, SubtaskIndex), Vec<BoxedEventStream>>,
+}
+
+impl NetworkEnvironment {
+    pub fn new() -> Self {
+        Self {
+            outboxes: HashMap::new(),
+            inboxes: HashMap::new(),
+        }
+    }
+
+    pub fn build_from_graph(graph: &ExecutionGraph, local_queue_size: usize) -> Self {
+        let mut env = Self::new();
+
+        for edge in &graph.edges {
+            let src_key = (edge.src_vertex, edge.src_subtask);
+            let dst_key = (edge.dst_vertex, edge.dst_subtask);
+
+            match &edge.exchange_mode {
+                ExchangeMode::LocalThread => {
+                    let (tx, rx) = mpsc::channel::<TrackedEvent>(local_queue_size);
+
+                    let sender = PhysicalSender::Local(tx);
+                    let receiver_stream =
+                        Box::pin(ReceiverStream::new(rx)) as BoxedEventStream;
+
+                    env.outboxes.entry(src_key).or_default().push(sender);
+                    env.inboxes.entry(dst_key).or_default().push(receiver_stream);
+                }
+                ExchangeMode::RemoteNetwork { target_addr } => {
+                    let remote_stub = RemoteSenderStub {
+                        target_addr: target_addr.clone(),
+                    };
+                    env.outboxes
+                        .entry(src_key)
+                        .or_default()
+                        .push(PhysicalSender::Remote(remote_stub));
+                }
+            }
+        }
+
+        info!(
+            "Network Environment built. Wired {} connections.",
+            graph.edges.len()
+        );
+
+        env
+    }
+
+    pub fn take_outboxes(
+        &mut self,
+        vertex_id: VertexId,
+        subtask_idx: SubtaskIndex,
+    ) -> Vec<PhysicalSender> {
+        self.outboxes
+            .remove(&(vertex_id, subtask_idx))
+            .unwrap_or_default()
+    }
+
+    pub fn take_inboxes(
+        &mut self,
+        vertex_id: VertexId,
+        subtask_idx: SubtaskIndex,
+    ) -> Vec<BoxedEventStream> {
+        self.inboxes
+            .remove(&(vertex_id, subtask_idx))
+            .unwrap_or_default()
+    }
+}
diff --git a/src/runtime/streaming/network/mod.rs b/src/runtime/streaming/network/mod.rs
new file mode 100644
index 00000000..259e0f12
--- /dev/null
+++ b/src/runtime/streaming/network/mod.rs
@@ -0,0 +1,5 @@
+pub mod endpoint;
+pub mod environment;
+
+pub use endpoint::{BoxedEventStream, PhysicalSender, RemoteSenderStub};
+pub use environment::NetworkEnvironment;
diff --git a/src/runtime/streaming/operators/grouping/incremental_aggregate.rs b/src/runtime/streaming/operators/grouping/incremental_aggregate.rs
new file mode 100644
index 00000000..c76111c5
--- /dev/null
+++ b/src/runtime/streaming/operators/grouping/incremental_aggregate.rs
@@ -0,0 +1,847 @@
+use anyhow::{anyhow, bail, Result};
+use arrow::compute::max_array;
+use arrow::row::{RowConverter, SortField};
+use arrow_array::builder::{
+    BinaryBuilder, TimestampNanosecondBuilder, UInt32Builder, UInt64Builder,
+};
+use arrow_array::cast::AsArray;
+use arrow_array::types::UInt64Type;
+use arrow_array::{
+    Array, ArrayRef, BinaryArray, BooleanArray, RecordBatch, StructArray, UInt32Array, UInt64Array,
+};
+use arrow_schema::{DataType, Field, FieldRef, Schema, SchemaBuilder, TimeUnit};
+use datafusion::common::{Result as DFResult, ScalarValue};
+use datafusion::physical_expr::aggregate::AggregateFunctionExpr;
+use datafusion::physical_plan::{Accumulator, PhysicalExpr};
+use datafusion_proto::physical_plan::DefaultPhysicalExtensionCodec;
+use datafusion_proto::physical_plan::from_proto::parse_physical_expr;
+use datafusion_proto::protobuf::PhysicalExprNode;
+use datafusion_proto::protobuf::PhysicalPlanNode;
+use datafusion_proto::protobuf::physical_plan_node::PhysicalPlanType;
+use futures::StreamExt;
+use itertools::Itertools;
+use prost::Message;
+use std::collections::HashSet;
+use std::sync::LazyLock;
+use std::time::{Duration, Instant, SystemTime};
+use std::{collections::HashMap, mem, sync::Arc};
+use tracing::{debug, warn};
+use tracing_subscriber::Registry;
+use protocol::grpc::api::UpdatingAggregateOperator;
+// =========================================================================
+// 引入全新的 Actor 框架核心协议 (取代了老旧的 ArrowOperator 和 Collector)
+// =========================================================================
+use crate::runtime::streaming::api::context::TaskContext;
+use crate::runtime::streaming::api::operator::MessageOperator;
+use crate::runtime::streaming::arrow::decode_aggregate;
+use crate::runtime::streaming::operators::{Key, UpdatingCache};
+use crate::runtime::streaming::StreamOutput;
+use crate::sql::common::{to_nanos, CheckpointBarrier, FsSchema, Watermark, TIMESTAMP_FIELD, UPDATING_META_FIELD};
+use crate::sql::logical_planner::updating_meta_fields;
+
+#[derive(Debug, Copy, Clone)]
+struct BatchData {
+    count: u64,
+    generation: u64,
+}
+
+impl BatchData {
+    fn new(generation: u64) -> Self {
+        Self { count: 1, generation }
+    }
+
+    fn inc(&mut self) {
+        self.count += 1;
+        self.generation += 1;
+    }
+
+    fn dec(&mut self) {
+        self.count = self.count.checked_sub(1).unwrap_or_default();
+        self.generation += 1;
+    }
+}
+
+#[derive(Debug)]
+enum IncrementalState {
+    Sliding {
+        expr: Arc<AggregateFunctionExpr>,
+        accumulator: Box<dyn Accumulator>,
+    },
+    Batch {
+        expr: Arc<AggregateFunctionExpr>,
+        data: HashMap<Key, BatchData>,
+        row_converter: Arc<RowConverter>,
+        changed_values: HashSet<Key>,
+    },
+}
+
+impl IncrementalState {
+    fn update_batch(&mut self, new_generation: u64, batch: &[ArrayRef]) -> DFResult<()> {
+        match self {
+            IncrementalState::Sliding { accumulator, .. } => {
+                accumulator.update_batch(batch)?;
+            }
+            IncrementalState::Batch { data, row_converter, changed_values, .. } => {
+                for r in row_converter.convert_columns(batch)?.iter() {
+                    if data.contains_key(r.as_ref()) {
+                        data.get_mut(r.as_ref()).unwrap().inc();
+                        changed_values.insert(data.get_key_value(r.as_ref()).unwrap().0.clone());
+                    } else {
+                        let key = Key(Arc::new(r.as_ref().to_vec()));
+                        data.insert(key.clone(), BatchData::new(new_generation));
+                        changed_values.insert(key);
+                    }
+                }
+            }
+        }
+        Ok(())
+    }
+
+    fn retract_batch(&mut self, batch: &[ArrayRef]) -> DFResult<()> {
+        match self {
+            IncrementalState::Sliding { accumulator, .. } => accumulator.retract_batch(batch),
+            IncrementalState::Batch { data, row_converter, changed_values, .. } => {
+                for r in row_converter.convert_columns(batch)?.iter() {
+                    match data.get(r.as_ref()).map(|d| d.count) {
+                        Some(0) => {
+                            debug!("tried to retract value for key with count 0; implies append lost");
+                        }
+                        Some(_) => {
+                            data.get_mut(r.as_ref()).unwrap().dec();
+                            changed_values.insert(data.get_key_value(r.as_ref()).unwrap().0.clone());
+                        }
+                        None => {
+                            debug!("tried to retract value for missing key: implies append lost");
+                        }
+                    }
+                }
+                Ok(())
+            }
+        }
+    }
+
+    fn evaluate(&mut self) -> DFResult<ScalarValue> {
+        match self {
+            IncrementalState::Sliding { accumulator, .. } => accumulator.evaluate(),
+            IncrementalState::Batch { expr, data, row_converter, .. } => {
+                let parser = row_converter.parser();
+                let input = row_converter.convert_rows(
+                    data.iter()
+                        .filter(|(_, c)| c.count > 0)
+                        .map(|(v, _)| parser.parse(&v.0)),
+                )?;
+                let mut acc = expr.create_accumulator()?;
+                acc.update_batch(&input)?;
+                acc.evaluate_mut()
+            }
+        }
+    }
+}
+
+#[derive(Debug, Copy, Clone, Eq, PartialEq)]
+enum AccumulatorType {
+    Sliding,
+    Batch,
+}
+
+impl AccumulatorType {
+    fn state_fields(&self, agg: &AggregateFunctionExpr) -> DFResult<Vec<FieldRef>> {
+        Ok(match self {
+            AccumulatorType::Sliding => agg.sliding_state_fields()?,
+            AccumulatorType::Batch => vec![],
+        })
+    }
+}
+
+#[derive(Debug)]
+struct Aggregator {
+    func: Arc<AggregateFunctionExpr>,
+    accumulator_type: AccumulatorType,
+    row_converter: Arc<RowConverter>,
+    state_cols: Vec<usize>,
+}
+
+// =========================================================================
+// 核心算子结构体
+// =========================================================================
+
+pub struct IncrementalAggregatingFunc {
+    flush_interval: Duration,
+    metadata_expr: Arc<dyn PhysicalExpr>,
+    aggregates: Vec<Aggregator>,
+    accumulators: UpdatingCache<Vec<IncrementalState>>,
+    updated_keys: HashMap<Key, Option<Vec<ScalarValue>>>,
+    
+    // 【新增】：算子自身持有输入元数据，不再依赖外部动态传入
+    input_schema: Arc<FsSchema>,
+    has_routing_keys: bool,
+
+    sliding_state_schema: Arc<FsSchema>,
+    batch_state_schema: Arc<FsSchema>,
+    schema_without_metadata: Arc<Schema>,
+    /// 下游 changelog 批次 schema（与 planner `final_schema` 一致）。
+    final_output_schema: Arc<Schema>,
+    ttl: Duration,
+    key_converter: RowConverter,
+    new_generation: u64,
+}
+
+/// 全局聚合使用的空 key（单分区无 routing key）。
+static GLOBAL_KEY: LazyLock<Arc<Vec<u8>>> = LazyLock::new(|| Arc::new(Vec::new()));
+
+impl IncrementalAggregatingFunc {
+    fn update_batch(&mut self, key: &[u8], batch: &[Vec<ArrayRef>], idx: Option<usize>) -> DFResult<()> {
+        self.accumulators
+            .modify_and_update(key, Instant::now(), |values| {
+                for (inputs, accs) in batch.iter().zip(values.iter_mut()) {
+                    let values = if let Some(idx) = idx {
+                        &inputs.iter().map(|c| c.slice(idx, 1)).collect()
+                    } else {
+                        inputs
+                    };
+                    accs.update_batch(self.new_generation, values)?;
+                }
+                Ok(())
+            })
+            .expect("tried to update for non-existent key")
+    }
+
+    fn retract_batch(&mut self, key: &[u8], batch: &[Vec<ArrayRef>], idx: Option<usize>) -> DFResult<()> {
+        self.accumulators
+            .modify(key, |values| {
+                for (inputs, accs) in batch.iter().zip(values.iter_mut()) {
+                    let values = if let Some(idx) = idx {
+                        &inputs.iter().map(|c| c.slice(idx, 1)).collect()
+                    } else {
+                        inputs
+                    };
+                    accs.retract_batch(values)?;
+                }
+                Ok::<(), datafusion::common::DataFusionError>(())
+            })
+            .expect("tried to retract state for non-existent key")?;
+        Ok(())
+    }
+
+    fn evaluate(&mut self, key: &[u8]) -> DFResult<Vec<ScalarValue>> {
+        self.accumulators
+            .get_mut(key)
+            .expect("tried to evaluate non-existent key")
+            .iter_mut()
+            .map(|s| s.evaluate())
+            .collect::<DFResult<_>>()
+    }
+
+    fn get_retracts(batch: &RecordBatch) -> Option<&BooleanArray> {
+        if let Some(meta_col) = batch.column_by_name(UPDATING_META_FIELD) {
+            let meta_struct = meta_col
+                .as_any()
+                .downcast_ref::<StructArray>()
+                .expect("_updating_meta must be StructArray");
+
+            let is_retract_array = meta_struct
+                .column_by_name("is_retract")
+                .expect("meta struct must have is_retract");
+            
+            Some(is_retract_array.as_any().downcast_ref::<BooleanArray>().expect("is_retract must be BooleanArray"))
+        } else {
+            None
+        }
+    }
+
+    fn make_accumulators(&self) -> Vec<IncrementalState> {
+        self.aggregates
+            .iter()
+            .map(|agg| match agg.accumulator_type {
+                AccumulatorType::Sliding => IncrementalState::Sliding {
+                    expr: agg.func.clone(),
+                    accumulator: agg.func.create_sliding_accumulator().unwrap(),
+                },
+                AccumulatorType::Batch => IncrementalState::Batch {
+                    expr: agg.func.clone(),
+                    data: Default::default(),
+                    row_converter: agg.row_converter.clone(),
+                    changed_values: Default::default(),
+                },
+            })
+            .collect()
+    }
+
+    fn compute_inputs(&self, batch: &RecordBatch) -> Vec<Vec<ArrayRef>> {
+        self.aggregates
+            .iter()
+            .map(|agg| {
+                agg.func
+                    .expressions()
+                    .iter()
+                    .map(|ex| ex.evaluate(batch).unwrap().into_array(batch.num_rows()).unwrap())
+                    .collect::<Vec<_>>()
+            })
+            .collect::<Vec<_>>()
+    }
+
+    fn global_aggregate(&mut self, batch: &RecordBatch) -> Result<()> {
+        let retracts = Self::get_retracts(batch);
+        let aggregate_input_cols = self.compute_inputs(&batch);
+
+        let mut first = false;
+        if !self
+            .accumulators
+            .contains_key(GLOBAL_KEY.as_ref().as_slice())
+        {
+            first = true;
+            self.accumulators.insert(
+                GLOBAL_KEY.clone(),
+                Instant::now(),
+                self.new_generation,
+                self.make_accumulators(),
+            );
+        }
+
+        if !self
+            .updated_keys
+            .contains_key(GLOBAL_KEY.as_ref().as_slice())
+        {
+            if first {
+                self.updated_keys.insert(Key(GLOBAL_KEY.clone()), None);
+            } else {
+                let v = Some(self.evaluate(GLOBAL_KEY.as_ref().as_slice())?);
+                self.updated_keys.insert(Key(GLOBAL_KEY.clone()), v);
+            }
+        }
+
+        if let Some(retracts) = retracts {
+            for (i, r) in retracts.iter().enumerate() {
+                if r.unwrap_or_default() {
+                    self.retract_batch(
+                        GLOBAL_KEY.as_ref().as_slice(),
+                        &aggregate_input_cols,
+                        Some(i),
+                    )?;
+                } else {
+                    self.update_batch(
+                        GLOBAL_KEY.as_ref().as_slice(),
+                        &aggregate_input_cols,
+                        Some(i),
+                    )?;
+                }
+            }
+        } else {
+            self.update_batch(
+                GLOBAL_KEY.as_ref().as_slice(),
+                &aggregate_input_cols,
+                None,
+            )
+            .unwrap();
+        }
+        Ok(())
+    }
+
+    fn keyed_aggregate(&mut self, batch: &RecordBatch) -> Result<()> {
+        let retracts = Self::get_retracts(batch);
+
+        let sort_columns = &self.input_schema
+            .sort_columns(batch, false)
+            .into_iter()
+            .map(|e| e.values)
+            .collect::<Vec<_>>();
+
+        let keys = self.key_converter.convert_columns(sort_columns).unwrap();
+
+        for k in &keys {
+            if !self.updated_keys.contains_key(k.as_ref()) {
+                if let Some((key, accs)) = self.accumulators.get_mut_key_value(k.as_ref()) {
+                    self.updated_keys.insert(key, Some(accs.iter_mut().map(|s| s.evaluate()).collect::<DFResult<_>>()?));
+                } else {
+                    self.updated_keys.insert(Key(Arc::new(k.as_ref().to_vec())), None);
+                }
+            }
+        }
+
+        let aggregate_input_cols = self.compute_inputs(&batch);
+
+        for (i, key) in keys.iter().enumerate() {
+            if !self.accumulators.contains_key(key.as_ref()) {
+                self.accumulators.insert(Arc::new(key.as_ref().to_vec()), Instant::now(), 0, self.make_accumulators());
+            };
+
+            let retract = retracts.map(|r| r.value(i)).unwrap_or_default();
+            if retract {
+                self.retract_batch(key.as_ref(), &aggregate_input_cols, Some(i))?;
+            } else {
+                self.update_batch(key.as_ref(), &aggregate_input_cols, Some(i))?;
+            }
+        }
+        Ok(())
+    }
+
+    // =========================================================================
+    // 状态读写逻辑 (Checkpointing & Restore)
+    // =========================================================================
+
+    fn checkpoint_sliding(&mut self) -> DFResult<Option<Vec<ArrayRef>>> {
+        if self.updated_keys.is_empty() { return Ok(None); }
+
+        let mut states = vec![vec![]; self.sliding_state_schema.schema.fields.len()];
+        let parser = self.key_converter.parser();
+        let mut generation_builder = UInt64Builder::with_capacity(self.updated_keys.len());
+
+        let mut cols = self.key_converter.convert_rows(self.updated_keys.keys().map(|k| {
+            let (accumulators, generation) = self.accumulators.get_mut_generation(k.0.as_ref()).unwrap();
+            generation_builder.append_value(generation);
+
+            for (state, agg) in accumulators.iter_mut().zip(self.aggregates.iter()) {
+                let IncrementalState::Sliding { expr, accumulator } = state else { continue; };
+                let state = accumulator.state().unwrap_or_else(|_| {
+                    let state = accumulator.state().unwrap();
+                    *accumulator = expr.create_sliding_accumulator().unwrap();
+                    let states: Vec<_> = state.iter().map(|s| s.to_array()).try_collect().unwrap();
+                    accumulator.merge_batch(&states).unwrap();
+                    state
+                });
+
+                for (idx, v) in agg.state_cols.iter().zip(state.into_iter()) {
+                    states[*idx].push(v);
+                }
+            }
+            parser.parse(k.0.as_ref())
+        }))?;
+
+        cols.extend(states.into_iter().skip(cols.len()).map(|c| ScalarValue::iter_to_array(c).unwrap()));
+
+        let generations = generation_builder.finish();
+        self.new_generation = self.new_generation.max(max_array::<UInt64Type, _>(&generations).unwrap());
+        cols.push(Arc::new(generations));
+
+        Ok(Some(cols))
+    }
+
+    fn checkpoint_batch(&mut self) -> DFResult<Option<Vec<ArrayRef>>> {
+        if self.aggregates.iter().all(|agg| agg.accumulator_type == AccumulatorType::Sliding) { return Ok(None); }
+        if self.updated_keys.is_empty() { return Ok(None); }
+
+        let size = self.updated_keys.len();
+        let mut rows = Vec::with_capacity(size);
+        let mut accumulator_builder = UInt32Builder::with_capacity(size);
+        let mut args_row_builder = BinaryBuilder::with_capacity(size, size * 4);
+        let mut count_builder = UInt64Builder::with_capacity(size);
+        let mut timestamp_builder = TimestampNanosecondBuilder::with_capacity(size);
+        let mut generation_builder = UInt64Builder::with_capacity(size);
+
+        let now = to_nanos(SystemTime::now()) as i64;
+        let parser = self.key_converter.parser();
+
+        for k in self.updated_keys.keys() {
+            let row = parser.parse(&k.0);
+            for (i, state) in self.accumulators.get_mut(k.0.as_ref()).unwrap().iter_mut().enumerate() {
+                let IncrementalState::Batch { data, changed_values, .. } = state else { continue; };
+
+                for vk in changed_values.iter() {
+                    if let Some(count) = data.get(vk) {
+                        accumulator_builder.append_value(i as u32);
+                        args_row_builder.append_value(&*vk.0);
+                        count_builder.append_value(count.count);
+                        generation_builder.append_value(count.generation);
+                        timestamp_builder.append_value(now);
+                        rows.push(row.to_owned())
+                    }
+                }
+                data.retain(|_, v| v.count > 0);
+            }
+        }
+
+        let mut cols = self.key_converter.convert_rows(rows.into_iter())?;
+        cols.push(Arc::new(accumulator_builder.finish()));
+        cols.push(Arc::new(args_row_builder.finish()));
+        cols.push(Arc::new(count_builder.finish()));
+        cols.push(Arc::new(timestamp_builder.finish()));
+
+        let generations = generation_builder.finish();
+        self.new_generation = self.new_generation.max(max_array::<UInt64Type, _>(&generations).unwrap());
+        cols.push(Arc::new(generations));
+
+        Ok(Some(cols))
+    }
+
+    fn restore_sliding(&mut self, key: &[u8], now: Instant, i: usize, aggregate_states: &Vec<Vec<ArrayRef>>, generation: u64) -> Result<()> {
+        let mut accumulators = self.make_accumulators();
+        for ((_, state_cols), acc) in self.aggregates.iter().zip(aggregate_states.iter()).zip(accumulators.iter_mut()) {
+            if let IncrementalState::Sliding { accumulator, .. } = acc {
+                accumulator.merge_batch(&state_cols.iter().map(|c| c.slice(i, 1)).collect_vec())?
+            }
+        }
+        self.accumulators.insert(Arc::new(key.to_vec()), now, generation, accumulators);
+        Ok(())
+    }
+
+    async fn initialize(&mut self, ctx: &mut TaskContext) -> Result<()> {
+        let mut tm = ctx.table_manager_guard().await?;
+
+        let table = tm
+            .get_uncached_key_value_view("a")
+            .await
+            .map_err(|e| anyhow!("state table a: {e}"))?;
+        let mut stream = Box::pin(table.get_all());
+        let key_converter = RowConverter::new(self.sliding_state_schema.sort_fields(false))?;
+
+        while let Some(batch) = stream.next().await {
+            let batch = batch?;
+            if batch.num_rows() == 0 { continue; }
+
+            let key_cols: Vec<_> = self.sliding_state_schema.sort_columns(&batch, false).into_iter().map(|c| c.values).collect();
+            let aggregate_states = self.aggregates.iter().map(|agg| {
+                agg.state_cols.iter().map(|idx| batch.column(*idx).clone()).collect_vec()
+            }).collect_vec();
+            let generations = batch.columns().last().unwrap().as_primitive::<UInt64Type>();
+            let now = Instant::now();
+
+            if key_cols.is_empty() {
+                self.restore_sliding(
+                    GLOBAL_KEY.as_ref().as_slice(),
+                    now,
+                    0,
+                    &aggregate_states,
+                    generations.value(0),
+                )?;
+            } else {
+                let key_rows = key_converter.convert_columns(&key_cols)?;
+                for (i, row) in key_rows.iter().enumerate() {
+                    if generations.is_null(i) {
+                        bail!("generation is null at row {i}");
+                    }
+                    let generation = generations.value(i);
+                    self.restore_sliding(
+                        row.as_ref(),
+                        now,
+                        i,
+                        &aggregate_states,
+                        generation,
+                    )?;
+                }
+            }
+        }
+        drop(stream);
+
+        // 初始化 Batch Accumulator
+        if self.aggregates.iter().any(|agg| agg.accumulator_type == AccumulatorType::Batch) {
+            let table = tm
+                .get_uncached_key_value_view("b")
+                .await
+                .map_err(|e| anyhow!("state table b: {e}"))?;
+            let mut stream = Box::pin(table.get_all());
+            while let Some(batch) = stream.next().await {
+                let batch = batch?;
+                if batch.num_rows() == 0 { continue; }
+
+                let key_cols: Vec<_> = self.sliding_state_schema.sort_columns(&batch, false).into_iter().map(|c| c.values).collect();
+                let count_column = batch.column(self.batch_state_schema.schema.index_of("count").unwrap()).as_any().downcast_ref::<UInt64Array>().unwrap();
+                let accumulator_column = batch.column(self.batch_state_schema.schema.index_of("accumulator").unwrap()).as_any().downcast_ref::<UInt32Array>().unwrap();
+                let args_row_column = batch.column(self.batch_state_schema.schema.index_of("args_row").unwrap()).as_any().downcast_ref::<BinaryArray>().unwrap();
+                let generations = batch.columns().last().unwrap().as_primitive::<UInt64Type>();
+
+                let key_rows = if key_cols.is_empty() {
+                    vec![GLOBAL_KEY.as_ref().clone()]
+                } else {
+                    self.key_converter
+                        .convert_columns(&key_cols)?
+                        .iter()
+                        .map(|k| k.as_ref().to_vec())
+                        .collect()
+                };
+
+                for (i, row) in key_rows.iter().enumerate() {
+                    let Some(accumulators) = self.accumulators.get_mut(row.as_ref()) else { continue; };
+                    let count = count_column.value(i);
+                    let accumulator_idx = accumulator_column.value(i) as usize;
+                    let args_row = args_row_column.value(i);
+                    let generation = generations.value(i);
+
+                    let IncrementalState::Batch { data, .. } = &mut accumulators[accumulator_idx] else { bail!("expected batch accumulator"); };
+
+                    if let Some(existing) = data.get_mut(args_row) {
+                        if existing.generation < generation { existing.count = count; existing.generation = generation; }
+                    } else {
+                        data.insert(Key(Arc::new(args_row.to_vec())), BatchData { count, generation });
+                    }
+                }
+            }
+        }
+
+        let mut deleted_keys = vec![];
+        for (k, v) in self.accumulators.iter_mut() {
+            let is_deleted = v.last_mut().unwrap().evaluate()?.is_null();
+            if is_deleted { deleted_keys.push(k.clone()); } 
+            else {
+                for is in v {
+                    if let IncrementalState::Batch { data, .. } = is { data.retain(|_, v| v.count > 0); }
+                }
+            }
+        }
+        for k in deleted_keys { self.accumulators.remove(&k.0); }
+        Ok(())
+    }
+
+    /// 核心逻辑：从内存中提取这段时间的所有变更，生成 Changelog（追加与撤回）
+    fn generate_changelog(&mut self) -> Result<Option<RecordBatch>> {
+        let mut output_keys = Vec::with_capacity(self.updated_keys.len() * 2);
+        let mut output_values = vec![Vec::with_capacity(self.updated_keys.len() * 2); self.aggregates.len()];
+        let mut is_retracts = Vec::with_capacity(self.updated_keys.len() * 2);
+
+        // 提取变更
+        let (updated_keys, updated_values): (Vec<_>, Vec<_>) = mem::take(&mut self.updated_keys).into_iter().unzip();
+        let mut deleted_keys = vec![];
+
+        for (k, retract) in updated_keys.iter().zip(updated_values.into_iter()) {
+            let append = self.evaluate(&k.0)?;
+
+            if let Some(v) = retract {
+                // 如果没有变化，直接跳过
+                if v.iter().zip(append.iter()).take(v.len() - 1).all(|(a, b)| a == b) { continue; }
+                is_retracts.push(true);
+                output_keys.push(k.clone());
+                for (out, val) in output_values.iter_mut().zip(v) { out.push(val); }
+            }
+
+            if !append.last().unwrap().is_null() {
+                is_retracts.push(false);
+                output_keys.push(k.clone());
+                for (out, val) in output_values.iter_mut().zip(append) { out.push(val); }
+            } else {
+                deleted_keys.push(k);
+            }
+        }
+
+        for k in deleted_keys { self.accumulators.remove(&k.0); }
+
+        // 处理 TTL 过期的键
+        let mut ttld_keys = vec![];
+        for (k, mut v) in self.accumulators.time_out(Instant::now()) {
+            is_retracts.push(true);
+            ttld_keys.push(k);
+            for (out, val) in output_values.iter_mut().zip(v.iter_mut().map(|s| s.evaluate())) { out.push(val?); }
+        }
+
+        if output_keys.is_empty() && ttld_keys.is_empty() { return Ok(None); }
+
+        let row_parser = self.key_converter.parser();
+        let mut result_cols = self.key_converter.convert_rows(
+            output_keys.iter().map(|k| row_parser.parse(k.0.as_slice()))
+            .chain(ttld_keys.iter().map(|k| row_parser.parse(k.as_slice())))
+        )?;
+
+        for acc in output_values.into_iter() { result_cols.push(ScalarValue::iter_to_array(acc).unwrap()); }
+
+        let record_batch = RecordBatch::try_new(self.schema_without_metadata.clone(), result_cols).unwrap();
+        
+        let metadata = self.metadata_expr.evaluate(&record_batch).unwrap().into_array(record_batch.num_rows()).unwrap();
+        let metadata = set_retract_metadata(metadata, Arc::new(BooleanArray::from(is_retracts)));
+        
+        let mut final_batch = record_batch.columns().to_vec();
+        final_batch.push(metadata);
+
+        // 注意这里需要匹配最终向外发送的 Schema
+        Ok(Some(RecordBatch::try_new(
+            self.final_output_schema.clone(),
+            final_batch,
+        )?))
+    }
+}
+
+fn set_retract_metadata(metadata: ArrayRef, is_retract: Arc<BooleanArray>) -> ArrayRef {
+    let metadata = metadata.as_struct();
+    let arrays: Vec<Arc<dyn Array>> = vec![is_retract, metadata.column(1).clone()];
+    Arc::new(StructArray::new(updating_meta_fields(), arrays, None))
+}
+
+// =========================================================================
+// 实现全新的 Actor MessageOperator 接口
+// =========================================================================
+
+#[async_trait::async_trait]
+impl MessageOperator for IncrementalAggregatingFunc {
+    fn name(&self) -> &str {
+        "UpdatingAggregatingFunc"
+    }
+
+    async fn on_start(&mut self, ctx: &mut TaskContext) -> Result<()> {
+        self.initialize(ctx).await?;
+        Ok(())
+    }
+
+    async fn process_data(
+        &mut self,
+        _input_idx: usize,
+        batch: RecordBatch,
+        _ctx: &mut TaskContext,
+    ) -> Result<Vec<StreamOutput>> {
+        // 数据进入仅更新内存中的 HashMap，暂不发送数据
+        if self.has_routing_keys {
+            self.keyed_aggregate(&batch)?;
+        } else {
+            self.global_aggregate(&batch)?;
+        }
+        
+        Ok(vec![])
+    }
+
+    async fn process_watermark(
+        &mut self,
+        _watermark: Watermark,
+        _ctx: &mut TaskContext,
+    ) -> Result<Vec<StreamOutput>> {
+        // 如果是基于时间的 flush (可根据业务决定是否在水位线推进时 flush)
+        if let Some(changelog_batch) = self.generate_changelog()? {
+            // Forward 表示按原路直连发送给下游
+            Ok(vec![StreamOutput::Forward(changelog_batch)])
+        } else {
+            Ok(vec![])
+        }
+    }
+
+    async fn snapshot_state(
+        &mut self,
+        _barrier: CheckpointBarrier,
+        ctx: &mut TaskContext,
+    ) -> Result<()> {
+        let mut tm = ctx.table_manager_guard().await?;
+
+        if let Some(sliding) = self.checkpoint_sliding()? {
+            let table = tm
+                .get_uncached_key_value_view("a")
+                .await
+                .map_err(|e| anyhow!("state table a: {e}"))?;
+            table
+                .insert_batch(sliding)
+                .await
+                .map_err(|e| anyhow!("insert_batch a: {e}"))?;
+        }
+
+        if let Some(batch) = self.checkpoint_batch()? {
+            let table = tm
+                .get_uncached_key_value_view("b")
+                .await
+                .map_err(|e| anyhow!("state table b: {e}"))?;
+            table
+                .insert_batch(batch)
+                .await
+                .map_err(|e| anyhow!("insert_batch b: {e}"))?;
+        }
+
+        // 清理已生成的 changelog 痕迹
+        self.updated_keys.clear();
+        Ok(())
+    }
+
+    async fn on_close(&mut self, _ctx: &mut TaskContext) -> Result<Vec<StreamOutput>> {
+        Ok(vec![])
+    }
+}
+
+// =========================================================================
+// 算子构造器保持对外 API 兼容
+// =========================================================================
+
+pub struct IncrementalAggregatingConstructor;
+
+impl IncrementalAggregatingConstructor {
+    pub fn with_config(
+        &self,
+        config: UpdatingAggregateOperator,
+        registry: Arc<Registry>,
+    ) -> anyhow::Result<IncrementalAggregatingFunc> {
+        let ttl = Duration::from_micros(if config.ttl_micros == 0 {
+            warn!("ttl was not set for updating aggregate");
+            24 * 60 * 60 * 1000 * 1000
+        } else {
+            config.ttl_micros
+        });
+
+        let input_schema: FsSchema = config.input_schema.unwrap().try_into()?;
+        let final_schema: FsSchema = config.final_schema.unwrap().try_into()?;
+        let mut schema_without_metadata = SchemaBuilder::from((*final_schema.schema).clone());
+        schema_without_metadata.remove(final_schema.schema.index_of(UPDATING_META_FIELD).unwrap());
+
+        let metadata_expr = parse_physical_expr(
+            &PhysicalExprNode::decode(&mut config.metadata_expr.as_slice())?,
+            registry.as_ref(),
+            &input_schema.schema,
+            &DefaultPhysicalExtensionCodec {},
+        )?;
+
+        let aggregate_exec = PhysicalPlanNode::decode(&mut config.aggregate_exec.as_ref())?;
+        let PhysicalPlanType::Aggregate(aggregate_exec) = aggregate_exec.physical_plan_type.unwrap() else { bail!("invalid proto"); };
+
+        let mut sliding_state_fields = input_schema.routing_keys()
+            .map(|v| v.iter().map(|idx| input_schema.schema.field(*idx).clone()).collect_vec())
+            .unwrap_or_default();
+
+        let has_routing_keys = input_schema.routing_keys().is_some();
+        let mut batch_state_fields = sliding_state_fields.clone();
+        let key_fields = (0..sliding_state_fields.len()).collect_vec();
+
+        let aggregates: Vec<_> = aggregate_exec.aggr_expr.iter().zip(aggregate_exec.aggr_expr_name.iter())
+            .map(|(expr, name)| Ok(decode_aggregate(&input_schema.schema, name, expr, registry.as_ref())?))
+            .map_ok(|agg| {
+                let retract = match agg.create_sliding_accumulator() { Ok(s) => s.supports_retract_batch(), _ => false };
+                (agg, if retract { AccumulatorType::Sliding } else { AccumulatorType::Batch })
+            })
+            .map_ok(|(agg, t)| {
+                let row_converter = Arc::new(RowConverter::new(
+                    agg.expressions().iter().map(|ex| Ok(SortField::new(ex.data_type(&input_schema.schema)?))).collect::<DFResult<_>>()?
+                )?);
+                let fields = t.state_fields(&agg)?;
+                let field_names = fields.iter().map(|f| f.name().to_string()).collect_vec();
+                sliding_state_fields.extend(fields.into_iter().map(|f| (*f).clone()));
+                Ok::<_, anyhow::Error>((agg, t, row_converter, field_names))
+            })
+            .flatten_ok()
+            .collect::<Result<_>>()?;
+
+        let state_schema = Schema::new(sliding_state_fields);
+
+        let aggregates = aggregates.into_iter().map(|(agg, t, row_converter, field_names)| Aggregator {
+            func: agg, accumulator_type: t, row_converter,
+            state_cols: field_names.iter().map(|f| state_schema.index_of(f).unwrap()).collect(),
+        }).collect();
+
+        let mut state_fields = state_schema.fields().to_vec();
+        let timestamp_field = state_fields.pop().unwrap();
+        state_fields.push(Arc::new((*timestamp_field).clone().with_name(TIMESTAMP_FIELD)));
+
+        let sliding_state_schema = Arc::new(FsSchema::from_schema_keys(Arc::new(Schema::new(state_fields)), key_fields.clone())?);
+
+        batch_state_fields.push(Field::new("accumulator", DataType::UInt32, false));
+        batch_state_fields.push(Field::new("args_row", DataType::Binary, false));
+        batch_state_fields.push(Field::new("count", DataType::UInt64, false));
+        batch_state_fields.push(Field::new(TIMESTAMP_FIELD, DataType::Timestamp(TimeUnit::Nanosecond, None), false));
+        let timestamp_index = batch_state_fields.len() - 1;
+
+        let mut storage_key_fields = key_fields.clone();
+        storage_key_fields.push(storage_key_fields.len());
+        storage_key_fields.push(storage_key_fields.len());
+
+        let batch_state_schema = Arc::new(FsSchema::new(
+            Arc::new(Schema::new(batch_state_fields)),
+            timestamp_index,
+            Some(storage_key_fields),
+            Some(key_fields),
+        ));
+
+        Ok(IncrementalAggregatingFunc {
+            flush_interval: Duration::from_micros(config.flush_interval_micros),
+            metadata_expr,
+            ttl,
+            aggregates,
+            accumulators: UpdatingCache::with_time_to_idle(ttl),
+            schema_without_metadata: Arc::new(schema_without_metadata.finish()),
+            final_output_schema: final_schema.schema.clone(),
+            updated_keys: Default::default(),
+            input_schema: Arc::new(input_schema.clone()),
+            has_routing_keys,
+            key_converter: RowConverter::new(input_schema.sort_fields(false))?,
+            sliding_state_schema,
+            batch_state_schema,
+            new_generation: 0,
+        })
+    }
+}
\ No newline at end of file
diff --git a/src/runtime/streaming/operators/grouping/mod.rs b/src/runtime/streaming/operators/grouping/mod.rs
new file mode 100644
index 00000000..fb2ae7b1
--- /dev/null
+++ b/src/runtime/streaming/operators/grouping/mod.rs
@@ -0,0 +1,5 @@
+pub mod incremental_aggregate;
+pub mod updating_cache;
+
+pub use incremental_aggregate::{IncrementalAggregatingConstructor, IncrementalAggregatingFunc};
+pub use updating_cache::{Key, UpdatingCache};
diff --git a/src/runtime/streaming/operators/grouping/updating_cache.rs b/src/runtime/streaming/operators/grouping/updating_cache.rs
new file mode 100644
index 00000000..b6fbcc99
--- /dev/null
+++ b/src/runtime/streaming/operators/grouping/updating_cache.rs
@@ -0,0 +1,498 @@
+//! 按 key 的增量状态缓存：LRU + TTL（idle），供 [`super::incremental_aggregate`] 等使用。
+
+use std::borrow::Borrow;
+use std::collections::HashMap;
+use std::sync::Arc;
+use std::time::{Duration, Instant};
+
+#[derive(Hash, Eq, PartialEq, Clone, Debug)]
+pub struct Key(pub Arc<Vec<u8>>);
+
+impl Borrow<[u8]> for Key {
+    fn borrow(&self) -> &[u8] {
+        &self.0
+    }
+}
+
+struct Node<T> {
+    key: Key,
+    data: Option<T>,
+    generation: u64,
+    updated: Instant,
+    prev: Option<usize>,
+    next: Option<usize>,
+}
+
+/// 基于数组槽位 + 双向链表（LRU）的 UpdatingCache，支持按代更新与 TTL 逐出。
+pub struct UpdatingCache<T: Send + Sync> {
+    map: HashMap<Key, usize>,
+    nodes: Vec<Node<T>>,
+    free_list: Vec<usize>,
+    head: Option<usize>,
+    tail: Option<usize>,
+    ttl: Duration,
+}
+
+struct TTLIter<'a, T: Send + Sync> {
+    now: Instant,
+    cache: &'a mut UpdatingCache<T>,
+}
+
+impl<T: Send + Sync> Iterator for TTLIter<'_, T> {
+    type Item = (Arc<Vec<u8>>, T);
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let head_idx = self.cache.head?;
+        let node = &self.cache.nodes[head_idx];
+
+        if self.now.saturating_duration_since(node.updated) < self.cache.ttl {
+            return None;
+        }
+
+        let (k, v) = self.cache.pop_front()?;
+        Some((k.0, v))
+    }
+}
+
+impl<T: Send + Sync> UpdatingCache<T> {
+    pub fn with_time_to_idle(ttl: Duration) -> Self {
+        Self {
+            map: HashMap::new(),
+            nodes: Vec::new(),
+            free_list: Vec::new(),
+            head: None,
+            tail: None,
+            ttl,
+        }
+    }
+
+    pub fn insert(&mut self, key: Arc<Vec<u8>>, now: Instant, generation: u64, value: T) {
+        let key_obj = Key(key);
+
+        if let Some(&idx) = self.map.get(&key_obj) {
+            if self.nodes[idx].generation >= generation {
+                return;
+            }
+            self.nodes[idx].data = Some(value);
+            self.nodes[idx].generation = generation;
+            self.nodes[idx].updated = now;
+            self.move_to_tail(idx);
+            return;
+        }
+
+        let idx = self.allocate_node(key_obj.clone(), value, generation, now);
+        self.map.insert(key_obj, idx);
+        self.push_back(idx);
+    }
+
+    pub fn time_out(&mut self, now: Instant) -> impl Iterator<Item = (Arc<Vec<u8>>, T)> + '_ {
+        TTLIter { now, cache: self }
+    }
+
+    pub fn iter_mut(&mut self) -> impl Iterator<Item = (&Key, &mut T)> {
+        self.nodes.iter_mut().filter_map(|n| {
+            if let Some(data) = &mut n.data {
+                Some((&n.key, data))
+            } else {
+                None
+            }
+        })
+    }
+
+    pub fn modify_and_update<E, F: Fn(&mut T) -> Result<(), E>>(
+        &mut self,
+        key: &[u8],
+        now: Instant,
+        f: F,
+    ) -> Option<Result<(), E>> {
+        let &idx = self.map.get(key)?;
+        let node = &mut self.nodes[idx];
+
+        if let Err(e) = f(node.data.as_mut().unwrap()) {
+            return Some(Err(e));
+        }
+
+        node.generation += 1;
+        node.updated = now;
+        self.move_to_tail(idx);
+
+        Some(Ok(()))
+    }
+
+    pub fn modify<E, F: Fn(&mut T) -> Result<(), E>>(
+        &mut self,
+        key: &[u8],
+        f: F,
+    ) -> Option<Result<(), E>> {
+        let &idx = self.map.get(key)?;
+        let node = &mut self.nodes[idx];
+
+        node.generation += 1;
+
+        if let Err(e) = f(node.data.as_mut().unwrap()) {
+            return Some(Err(e));
+        }
+
+        Some(Ok(()))
+    }
+
+    pub fn contains_key(&self, k: &[u8]) -> bool {
+        self.map.contains_key(k)
+    }
+
+    pub fn get_mut(&mut self, key: &[u8]) -> Option<&mut T> {
+        let &idx = self.map.get(key)?;
+        self.nodes[idx].data.as_mut()
+    }
+
+    pub fn get_mut_generation(&mut self, key: &[u8]) -> Option<(&mut T, u64)> {
+        let &idx = self.map.get(key)?;
+        let node = &mut self.nodes[idx];
+        Some((node.data.as_mut().unwrap(), node.generation))
+    }
+
+    pub fn get_mut_key_value(&mut self, key: &[u8]) -> Option<(Key, &mut T)> {
+        let &idx = self.map.get(key)?;
+        let node = &mut self.nodes[idx];
+        Some((node.key.clone(), node.data.as_mut().unwrap()))
+    }
+
+    pub fn remove(&mut self, key: &[u8]) -> Option<T> {
+        let &idx = self.map.get(key)?;
+        self.map.remove(key);
+        self.remove_node(idx);
+
+        let data = self.nodes[idx].data.take().unwrap();
+        self.free_list.push(idx);
+
+        Some(data)
+    }
+
+    fn pop_front(&mut self) -> Option<(Key, T)> {
+        let head_idx = self.head?;
+        self.remove_node(head_idx);
+
+        let node = &mut self.nodes[head_idx];
+        self.map.remove(&node.key);
+
+        let key = node.key.clone();
+        let data = node.data.take().unwrap();
+        self.free_list.push(head_idx);
+
+        Some((key, data))
+    }
+
+    fn allocate_node(&mut self, key: Key, data: T, generation: u64, updated: Instant) -> usize {
+        let new_node = Node {
+            key,
+            data: Some(data),
+            generation,
+            updated,
+            prev: None,
+            next: None,
+        };
+
+        if let Some(idx) = self.free_list.pop() {
+            self.nodes[idx] = new_node;
+            idx
+        } else {
+            let idx = self.nodes.len();
+            self.nodes.push(new_node);
+            idx
+        }
+    }
+
+    fn push_back(&mut self, index: usize) {
+        self.nodes[index].prev = self.tail;
+        self.nodes[index].next = None;
+
+        if let Some(tail_idx) = self.tail {
+            self.nodes[tail_idx].next = Some(index);
+        } else {
+            self.head = Some(index);
+        }
+        self.tail = Some(index);
+    }
+
+    fn remove_node(&mut self, index: usize) {
+        let prev = self.nodes[index].prev;
+        let next = self.nodes[index].next;
+
+        if let Some(p) = prev {
+            self.nodes[p].next = next;
+        } else {
+            self.head = next;
+        }
+
+        if let Some(n) = next {
+            self.nodes[n].prev = prev;
+        } else {
+            self.tail = prev;
+        }
+
+        self.nodes[index].prev = None;
+        self.nodes[index].next = None;
+    }
+
+    fn move_to_tail(&mut self, index: usize) {
+        if self.tail == Some(index) {
+            return;
+        }
+        self.remove_node(index);
+        self.push_back(index);
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_insert_and_modify() {
+        let mut cache = UpdatingCache::with_time_to_idle(Duration::from_secs(60));
+
+        let key = Arc::new(vec![1, 2, 3]);
+        let now = Instant::now();
+        cache.insert(key.clone(), now, 1, 42);
+
+        assert!(
+            cache
+                .modify(key.as_ref(), |x| {
+                    *x = 43;
+                    Ok::<(), ()>(())
+                })
+                .unwrap()
+                .is_ok()
+        );
+
+        assert_eq!(*cache.get_mut(key.as_ref()).unwrap(), 43);
+    }
+
+    #[test]
+    fn test_timeout() {
+        let mut cache = UpdatingCache::with_time_to_idle(Duration::from_millis(10));
+
+        let key1 = Arc::new(vec![1]);
+        let key2 = Arc::new(vec![2]);
+
+        let start = Instant::now();
+        cache.insert(key1.clone(), start, 1, "value1");
+        cache.insert(key2.clone(), start + Duration::from_millis(5), 2, "value2");
+
+        let check_time = start + Duration::from_millis(11);
+        let timed_out: Vec<_> = cache.time_out(check_time).collect();
+        assert_eq!(timed_out.len(), 1);
+        assert_eq!(&*timed_out[0].0, &*key1);
+
+        assert!(cache.contains_key(key2.as_ref()));
+        assert!(!cache.contains_key(key1.as_ref()));
+    }
+
+    #[test]
+    fn test_update_keeps_alive() {
+        let mut cache = UpdatingCache::with_time_to_idle(Duration::from_millis(10));
+
+        let key = Arc::new(vec![1]);
+        let start = Instant::now();
+        cache.insert(key.clone(), start, 1, "value");
+
+        let update_time = start + Duration::from_millis(5);
+        cache
+            .modify_and_update(key.as_ref(), update_time, |_| Ok::<(), ()>(()))
+            .unwrap()
+            .unwrap();
+
+        let check_time = start + Duration::from_millis(11);
+        let timed_out: Vec<_> = cache.time_out(check_time).collect();
+        assert!(timed_out.is_empty());
+        assert!(cache.contains_key(key.as_ref()));
+    }
+
+    #[test]
+    fn test_lru_eviction_order_matches_insertion() {
+        let mut cache = UpdatingCache::with_time_to_idle(Duration::from_secs(60));
+        let key1 = Arc::new(vec![1]);
+        let key2 = Arc::new(vec![2]);
+        let key3 = Arc::new(vec![3]);
+        let now = Instant::now();
+        cache.insert(key1.clone(), now, 1, 1);
+        cache.insert(key2.clone(), now, 2, 2);
+        cache.insert(key3.clone(), now, 3, 3);
+
+        let evicted: Vec<_> = cache.time_out(now + Duration::from_secs(61)).collect();
+        assert_eq!(evicted.len(), 3);
+        assert_eq!(evicted[0].0.as_ref(), &*key1);
+        assert_eq!(evicted[1].0.as_ref(), &*key2);
+        assert_eq!(evicted[2].0.as_ref(), &*key3);
+    }
+
+    #[test]
+    fn test_remove_middle_key() {
+        let mut cache = UpdatingCache::with_time_to_idle(Duration::from_secs(60));
+        let key1 = Arc::new(vec![1]);
+        let key2 = Arc::new(vec![2]);
+        let key3 = Arc::new(vec![3]);
+        let now = Instant::now();
+        cache.insert(key1.clone(), now, 1, 1);
+        cache.insert(key2.clone(), now, 2, 2);
+        cache.insert(key3.clone(), now, 3, 3);
+
+        assert_eq!(cache.remove(&[2]).unwrap(), 2);
+        assert!(cache.contains_key(&[1]));
+        assert!(!cache.contains_key(&[2]));
+        assert!(cache.contains_key(&[3]));
+
+        let evicted: Vec<_> = cache.time_out(now + Duration::from_secs(61)).collect();
+        assert_eq!(evicted.len(), 2);
+        assert_eq!(evicted[0].0.as_ref(), &*key1);
+        assert_eq!(evicted[1].0.as_ref(), &*key3);
+    }
+
+    #[test]
+    fn reorder_with_update() {
+        let mut cache = UpdatingCache::<i32>::with_time_to_idle(Duration::from_secs(10));
+        let key1 = Arc::new(vec![1]);
+        let key2 = Arc::new(vec![2]);
+        let now = Instant::now();
+
+        cache.insert(key1.clone(), now, 1, 100);
+        cache.insert(key2.clone(), now, 2, 200);
+
+        cache
+            .modify_and_update(&[1], now + Duration::from_secs(1), |v| {
+                *v += 1;
+                Ok::<(), ()>(())
+            })
+            .unwrap()
+            .unwrap();
+
+        let _ = cache.modify_and_update(&[1], now + Duration::from_secs(2), |v| {
+            *v += 1;
+            Ok::<(), ()>(())
+        });
+    }
+
+    #[test]
+    fn test_ttl_eviction() {
+        let ttl = Duration::from_millis(100);
+        let mut cache = UpdatingCache::with_time_to_idle(ttl);
+        let now = Instant::now();
+        let key1 = Arc::new(vec![1]);
+        let key2 = Arc::new(vec![2]);
+        cache.insert(key1.clone(), now, 1, 10);
+        cache.insert(key2.clone(), now, 2, 20);
+
+        cache
+            .modify_and_update(&[2], now + Duration::from_millis(50), |v| {
+                *v += 1;
+                Ok::<(), ()>(())
+            })
+            .unwrap()
+            .unwrap();
+
+        let now2 = now + Duration::from_millis(150);
+        let evicted: Vec<_> = cache.time_out(now2).collect();
+        assert_eq!(evicted.len(), 2);
+        assert_eq!(evicted[0].0.as_ref(), &[1]);
+        assert_eq!(evicted[1].0.as_ref(), &[2]);
+    }
+
+    #[test]
+    fn test_remove_key() {
+        let ttl = Duration::from_millis(100);
+        let mut cache = UpdatingCache::with_time_to_idle(ttl);
+        let now = Instant::now();
+        let key = Arc::new(vec![1]);
+        cache.insert(key.clone(), now, 1, 42);
+        let value = cache.remove(&[1]).unwrap();
+        assert_eq!(value, 42);
+        assert!(!cache.contains_key(&[1]));
+        let evicted: Vec<_> = cache.time_out(now + Duration::from_millis(200)).collect();
+        assert!(evicted.is_empty());
+    }
+
+    #[test]
+    fn test_update_order() {
+        let ttl = Duration::from_secs(1);
+        let mut cache = UpdatingCache::with_time_to_idle(ttl);
+        let base = Instant::now();
+        let key_a = Arc::new(vec![b'A']);
+        let key_b = Arc::new(vec![b'B']);
+        let key_c = Arc::new(vec![b'C']);
+        cache.insert(key_a.clone(), base, 1, 1);
+        cache.insert(key_b.clone(), base, 2, 2);
+        cache.insert(key_c.clone(), base, 3, 3);
+
+        let t_update = base + Duration::from_millis(500);
+        cache
+            .modify_and_update(b"B", t_update, |v| {
+                *v += 10;
+                Ok::<(), ()>(())
+            })
+            .unwrap()
+            .unwrap();
+
+        let t_eviction = base + Duration::from_secs(2);
+        let evicted: Vec<_> = cache.time_out(t_eviction).collect();
+        assert_eq!(evicted.len(), 3);
+        assert_eq!(evicted[0].0.as_ref(), b"A");
+        assert_eq!(evicted[1].0.as_ref(), b"C");
+        assert_eq!(evicted[2].0.as_ref(), b"B");
+    }
+
+    #[test]
+    fn test_get_mut_key_value() {
+        let ttl = Duration::from_secs(1);
+        let mut cache = UpdatingCache::with_time_to_idle(ttl);
+        let base = Instant::now();
+        let key = Arc::new(vec![1, 2, 3]);
+        cache.insert(key.clone(), base, 1, 42);
+        if let Some((k, v)) = cache.get_mut_key_value(&[1, 2, 3]) {
+            *v += 1;
+            assert_eq!(*v, 43);
+            assert_eq!(k.0.as_ref(), &[1, 2, 3]);
+        } else {
+            panic!("Key not found");
+        }
+    }
+
+    #[test]
+    fn test_modify_error() {
+        let ttl = Duration::from_secs(1);
+        let mut cache = UpdatingCache::with_time_to_idle(ttl);
+        let base = Instant::now();
+        let key = Arc::new(vec![1]);
+        cache.insert(key.clone(), base, 1, 42);
+        let res = cache.modify(&[1], |_v| Err("error"));
+        assert!(res.unwrap().is_err());
+    }
+
+    #[test]
+    fn test_drop_cleanup() {
+        let ttl = Duration::from_secs(1);
+        {
+            let mut cache = UpdatingCache::with_time_to_idle(ttl);
+            let base = Instant::now();
+            for i in 0..10 {
+                cache.insert(Arc::new(vec![i as u8]), base, i as u64, i);
+            }
+        }
+    }
+
+    #[test]
+    fn test_generational_replacement() {
+        let ttl = Duration::from_secs(1);
+        let mut cache = UpdatingCache::with_time_to_idle(ttl);
+        let base = Instant::now();
+        let key = Arc::new(vec![1]);
+
+        cache.insert(key.clone(), base, 1, "first");
+        assert_eq!(cache.get_mut(&[1]), Some(&mut "first"));
+
+        cache.insert(key.clone(), base, 2, "second");
+        assert_eq!(cache.get_mut(&[1]), Some(&mut "second"));
+
+        cache.insert(key.clone(), base, 1, "third");
+        assert_eq!(cache.get_mut(&[1]), Some(&mut "second"));
+    }
+}
diff --git a/src/runtime/streaming/operators/joins/join_instance.rs b/src/runtime/streaming/operators/joins/join_instance.rs
new file mode 100644
index 00000000..dbde4d8e
--- /dev/null
+++ b/src/runtime/streaming/operators/joins/join_instance.rs
@@ -0,0 +1,351 @@
+//! 瞬时 JOIN：双通道喂入 DataFusion 物理计划，水位线推进时闭合实例并抽干结果。
+
+use anyhow::{anyhow, Result};
+use arrow::compute::{max, min, partition, sort_to_indices, take};
+use arrow_array::{RecordBatch, TimestampNanosecondArray};
+use datafusion::execution::context::SessionContext;
+use datafusion::execution::runtime_env::RuntimeEnvBuilder;
+use datafusion::execution::SendableRecordBatchStream;
+use datafusion::physical_plan::ExecutionPlan;
+use datafusion_proto::physical_plan::AsExecutionPlan;
+use datafusion_proto::protobuf::PhysicalPlanNode;
+use futures::StreamExt;
+use prost::Message;
+use std::collections::BTreeMap;
+use std::sync::{Arc, RwLock};
+use std::time::SystemTime;
+use tokio::sync::mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender};
+use tracing::warn;
+
+use crate::runtime::streaming::api::context::TaskContext;
+use crate::runtime::streaming::api::operator::MessageOperator;
+use async_trait::async_trait;
+use tracing_subscriber::Registry;
+use protocol::grpc::api::JoinOperator;
+use crate::runtime::streaming::StreamOutput;
+use crate::sql::common::{from_nanos, CheckpointBarrier, FsSchema, FsSchemaRef, Watermark};
+use crate::sql::logical_planner::{DecodingContext, FsPhysicalExtensionCodec};
+
+#[derive(Debug, Copy, Clone, Eq, PartialEq)]
+enum JoinSide {
+    Left,
+    Right,
+}
+
+impl JoinSide {
+    fn name(&self) -> &'static str {
+        match self {
+            JoinSide::Left => "left",
+            JoinSide::Right => "right",
+        }
+    }
+}
+
+/// 瞬时 JOIN 执行实例：保存通道；窗口闭合时关闭通道并同步抽干 `SendableRecordBatchStream`。
+struct JoinInstance {
+    left_tx: UnboundedSender<RecordBatch>,
+    right_tx: UnboundedSender<RecordBatch>,
+    result_stream: SendableRecordBatchStream,
+}
+
+impl JoinInstance {
+    fn feed_data(&self, batch: RecordBatch, side: JoinSide) -> Result<()> {
+        match side {
+            JoinSide::Left => self
+                .left_tx
+                .send(batch)
+                .map_err(|e| anyhow!("Left send err: {}", e)),
+            JoinSide::Right => self
+                .right_tx
+                .send(batch)
+                .map_err(|e| anyhow!("Right send err: {}", e)),
+        }
+    }
+
+    /// 关闭输入流，促使执行计划结束，并拉取全部 JOIN 结果。
+    async fn close_and_drain(self) -> Result<Vec<RecordBatch>> {
+        drop(self.left_tx);
+        drop(self.right_tx);
+
+        let mut outputs = Vec::new();
+        let mut stream = self.result_stream;
+
+        while let Some(result_batch) = stream.next().await {
+            outputs.push(result_batch?);
+        }
+
+        Ok(outputs)
+    }
+}
+
+pub struct InstantJoinOperator {
+    left_input_schema: FsSchemaRef,
+    right_input_schema: FsSchemaRef,
+    active_joins: BTreeMap<SystemTime, JoinInstance>,
+    left_receiver_hook: Arc<RwLock<Option<UnboundedReceiver<RecordBatch>>>>,
+    right_receiver_hook: Arc<RwLock<Option<UnboundedReceiver<RecordBatch>>>>,
+    join_exec_plan: Arc<dyn ExecutionPlan>,
+}
+
+impl InstantJoinOperator {
+    fn input_schema(&self, side: JoinSide) -> FsSchemaRef {
+        match side {
+            JoinSide::Left => self.left_input_schema.clone(),
+            JoinSide::Right => self.right_input_schema.clone(),
+        }
+    }
+
+    fn get_or_create_join_instance(&mut self, time: SystemTime) -> Result<&mut JoinInstance> {
+        use std::collections::btree_map::Entry;
+
+        if let Entry::Vacant(e) = self.active_joins.entry(time) {
+            let (left_tx, left_rx) = unbounded_channel();
+            let (right_tx, right_rx) = unbounded_channel();
+
+            *self.left_receiver_hook.write().unwrap() = Some(left_rx);
+            *self.right_receiver_hook.write().unwrap() = Some(right_rx);
+
+            self.join_exec_plan.reset().map_err(|e| anyhow!("{e}"))?;
+            let result_stream = self
+                .join_exec_plan
+                .execute(0, SessionContext::new().task_ctx())
+                .map_err(|e| anyhow!("{e}"))?;
+
+            e.insert(JoinInstance {
+                left_tx,
+                right_tx,
+                result_stream,
+            });
+        }
+
+        self.active_joins
+            .get_mut(&time)
+            .ok_or_else(|| anyhow!("join instance missing after insert"))
+    }
+
+    async fn process_side_internal(
+        &mut self,
+        side: JoinSide,
+        batch: RecordBatch,
+        ctx: &mut TaskContext,
+    ) -> Result<()> {
+        if batch.num_rows() == 0 {
+            return Ok(());
+        }
+
+        let time_column = batch
+            .column(self.input_schema(side).timestamp_index)
+            .as_any()
+            .downcast_ref::<TimestampNanosecondArray>()
+            .ok_or_else(|| anyhow!("Missing timestamp column"))?;
+
+        let min_timestamp = min(time_column).ok_or_else(|| anyhow!("empty timestamp column"))?;
+        let max_timestamp = max(time_column).ok_or_else(|| anyhow!("empty timestamp column"))?;
+
+        if let Some(watermark) = ctx.last_present_watermark() {
+            if watermark > from_nanos(min_timestamp as u128) {
+                warn!("Dropped late batch from {:?} before watermark", side);
+                return Ok(());
+            }
+        }
+
+        let wm = ctx.last_present_watermark();
+        {
+            let mut tm = ctx.table_manager_guard().await?;
+            let table = tm
+                .get_expiring_time_key_table(side.name(), wm)
+                .await
+                .map_err(|e| anyhow!("{e:?}"))?;
+            table.insert(from_nanos(max_timestamp as u128), batch.clone());
+        }
+
+        let unkeyed_batch = self.input_schema(side).unkeyed_batch(&batch)?;
+
+        if max_timestamp == min_timestamp {
+            let time_key = from_nanos(max_timestamp as u128);
+            let join_instance = self.get_or_create_join_instance(time_key)?;
+            join_instance.feed_data(unkeyed_batch, side)?;
+            return Ok(());
+        }
+
+        let indices = sort_to_indices(time_column, None, None)?;
+        let columns: Vec<_> = unkeyed_batch
+            .columns()
+            .iter()
+            .map(|c| take(c, &indices, None).unwrap())
+            .collect();
+        let sorted_batch = RecordBatch::try_new(unkeyed_batch.schema(), columns)?;
+        let sorted_timestamps = take(time_column, &indices, None).unwrap();
+        let typed_timestamps = sorted_timestamps
+            .as_any()
+            .downcast_ref::<TimestampNanosecondArray>()
+            .ok_or_else(|| anyhow!("sorted timestamps downcast failed"))?;
+        let ranges = partition(std::slice::from_ref(&sorted_timestamps))
+            .unwrap()
+            .ranges();
+
+        for range in ranges {
+            let sub_batch = sorted_batch.slice(range.start, range.end - range.start);
+            let time_key = from_nanos(typed_timestamps.value(range.start) as u128);
+            let join_instance = self.get_or_create_join_instance(time_key)?;
+            join_instance.feed_data(sub_batch, side)?;
+        }
+
+        Ok(())
+    }
+}
+
+#[async_trait]
+impl MessageOperator for InstantJoinOperator {
+    fn name(&self) -> &str {
+        "InstantJoin"
+    }
+
+    async fn on_start(&mut self, ctx: &mut TaskContext) -> Result<()> {
+        let watermark = ctx.last_present_watermark();
+
+        let left_batches: Vec<_> = {
+            let mut tm = ctx.table_manager_guard().await?;
+            let left_table = tm
+                .get_expiring_time_key_table("left", watermark)
+                .await
+                .map_err(|e| anyhow!("{e:?}"))?;
+            left_table
+                .all_batches_for_watermark(watermark)
+                .flat_map(|(_time, batches)| batches.iter().cloned())
+                .collect()
+        };
+        for batch in left_batches {
+            self.process_side_internal(JoinSide::Left, batch, ctx).await?;
+        }
+
+        let right_batches: Vec<_> = {
+            let mut tm = ctx.table_manager_guard().await?;
+            let right_table = tm
+                .get_expiring_time_key_table("right", watermark)
+                .await
+                .map_err(|e| anyhow!("{e:?}"))?;
+            right_table
+                .all_batches_for_watermark(watermark)
+                .flat_map(|(_time, batches)| batches.iter().cloned())
+                .collect()
+        };
+        for batch in right_batches {
+            self.process_side_internal(JoinSide::Right, batch, ctx).await?;
+        }
+
+        Ok(())
+    }
+
+    async fn process_data(
+        &mut self,
+        input_idx: usize,
+        batch: RecordBatch,
+        ctx: &mut TaskContext,
+    ) -> Result<Vec<StreamOutput>> {
+        let side = if input_idx == 0 {
+            JoinSide::Left
+        } else {
+            JoinSide::Right
+        };
+        self.process_side_internal(side, batch, ctx).await?;
+        Ok(vec![])
+    }
+
+    async fn process_watermark(
+        &mut self,
+        watermark: Watermark,
+        _ctx: &mut TaskContext,
+    ) -> Result<Vec<StreamOutput>> {
+        let Watermark::EventTime(current_time) = watermark else {
+            return Ok(vec![]);
+        };
+        let mut emit_outputs = Vec::new();
+
+        let mut expired_times = Vec::new();
+        for key in self.active_joins.keys() {
+            if *key < current_time {
+                expired_times.push(*key);
+            } else {
+                break;
+            }
+        }
+
+        for time_key in expired_times {
+            if let Some(join_instance) = self.active_joins.remove(&time_key) {
+                let joined_batches = join_instance.close_and_drain().await?;
+                for batch in joined_batches {
+                    emit_outputs.push(StreamOutput::Forward(batch));
+                }
+            }
+        }
+
+        Ok(emit_outputs)
+    }
+
+    async fn snapshot_state(
+        &mut self,
+        _barrier: CheckpointBarrier,
+        ctx: &mut TaskContext,
+    ) -> Result<()> {
+        let watermark = ctx.last_present_watermark();
+        let mut tm = ctx.table_manager_guard().await?;
+        tm.get_expiring_time_key_table("left", watermark)
+            .await
+            .map_err(|e| anyhow!("{e:?}"))?
+            .flush(watermark)
+            .await
+            .map_err(|e| anyhow!("{e:?}"))?;
+        tm.get_expiring_time_key_table("right", watermark)
+            .await
+            .map_err(|e| anyhow!("{e:?}"))?
+            .flush(watermark)
+            .await
+            .map_err(|e| anyhow!("{e:?}"))?;
+        Ok(())
+    }
+}
+
+/// 与 `OperatorConstructor` 类似的配置入口；返回 [`InstantJoinOperator`]（实现 [`MessageOperator`]），
+/// 而非 `ConstructedOperator`（后者仅包装 `ArrowOperator`）。
+pub struct InstantJoinConstructor;
+
+impl InstantJoinConstructor {
+    pub fn with_config(
+        &self,
+        config: JoinOperator,
+        registry: Arc<Registry>,
+    ) -> anyhow::Result<InstantJoinOperator> {
+        let join_physical_plan_node = PhysicalPlanNode::decode(&mut config.join_plan.as_slice())?;
+
+        let left_input_schema: Arc<FsSchema> =
+            Arc::new(config.left_schema.unwrap().try_into()?);
+        let right_input_schema: Arc<FsSchema> =
+            Arc::new(config.right_schema.unwrap().try_into()?);
+
+        let left_receiver_hook = Arc::new(RwLock::new(None));
+        let right_receiver_hook = Arc::new(RwLock::new(None));
+
+        let codec = FsPhysicalExtensionCodec {
+            context: DecodingContext::LockedJoinStream {
+                left: left_receiver_hook.clone(),
+                right: right_receiver_hook.clone(),
+            },
+        };
+
+        let join_exec_plan = join_physical_plan_node.try_into_physical_plan(
+            registry.as_ref(),
+            &RuntimeEnvBuilder::new().build()?,
+            &codec,
+        )?;
+
+        Ok(InstantJoinOperator {
+            left_input_schema,
+            right_input_schema,
+            active_joins: BTreeMap::new(),
+            left_receiver_hook,
+            right_receiver_hook,
+            join_exec_plan,
+        })
+    }
+}
diff --git a/src/runtime/streaming/operators/joins/join_with_expiration.rs b/src/runtime/streaming/operators/joins/join_with_expiration.rs
new file mode 100644
index 00000000..d115ac10
--- /dev/null
+++ b/src/runtime/streaming/operators/joins/join_with_expiration.rs
@@ -0,0 +1,261 @@
+//! 带 TTL 的 Key-Time Join：两侧状态表 + DataFusion 物理计划成对计算。
+
+use anyhow::{anyhow, Result};
+use arrow::compute::concat_batches;
+use arrow_array::RecordBatch;
+use datafusion::execution::context::SessionContext;
+use datafusion::execution::runtime_env::RuntimeEnvBuilder;
+use datafusion::physical_plan::ExecutionPlan;
+use datafusion_proto::{physical_plan::AsExecutionPlan, protobuf::PhysicalPlanNode};
+use futures::StreamExt;
+use prost::Message;
+use std::sync::{Arc, RwLock};
+use std::time::Duration;
+use tracing::warn;
+
+use crate::runtime::streaming::api::context::TaskContext;
+use crate::runtime::streaming::api::operator::MessageOperator;
+use async_trait::async_trait;
+use tracing_subscriber::Registry;
+use protocol::grpc::api::JoinOperator;
+use crate::runtime::streaming::StreamOutput;
+use crate::sql::common::{CheckpointBarrier, FsSchema, Watermark};
+use crate::sql::logical_planner::{DecodingContext, FsPhysicalExtensionCodec};
+
+#[derive(Debug, Copy, Clone, Eq, PartialEq)]
+enum JoinSide {
+    Left,
+    Right,
+}
+
+impl JoinSide {
+    fn table_name(&self) -> &'static str {
+        match self {
+            JoinSide::Left => "left",
+            JoinSide::Right => "right",
+        }
+    }
+}
+
+pub struct JoinWithExpirationOperator {
+    /// 保留与配置/表注册语义一致；实际 TTL 由状态表配置决定。
+    #[allow(dead_code)]
+    left_expiration: Duration,
+    #[allow(dead_code)]
+    right_expiration: Duration,
+    left_input_schema: FsSchema,
+    right_input_schema: FsSchema,
+    left_schema: FsSchema,
+    right_schema: FsSchema,
+    left_passer: Arc<RwLock<Option<RecordBatch>>>,
+    right_passer: Arc<RwLock<Option<RecordBatch>>>,
+    join_exec_plan: Arc<dyn ExecutionPlan>,
+}
+
+impl JoinWithExpirationOperator {
+    /// 执行 DataFusion 物理计划，返回 JOIN 结果批次（不经过 Collector）。
+    async fn compute_pair(
+        &mut self,
+        left: RecordBatch,
+        right: RecordBatch,
+    ) -> Result<Vec<RecordBatch>> {
+        if left.num_rows() == 0 || right.num_rows() == 0 {
+            return Ok(vec![]);
+        }
+
+        {
+            self.left_passer.write().unwrap().replace(left);
+            self.right_passer.write().unwrap().replace(right);
+        }
+
+        self.join_exec_plan
+            .reset()
+            .map_err(|e| anyhow!("join plan reset: {e}"))?;
+        let mut result_stream = self
+            .join_exec_plan
+            .execute(0, SessionContext::new().task_ctx())
+            .map_err(|e| anyhow!("join execute: {e}"))?;
+
+        let mut outputs = Vec::new();
+        while let Some(batch) = result_stream.next().await {
+            outputs.push(batch.map_err(|e| anyhow!("{e}"))?);
+        }
+
+        Ok(outputs)
+    }
+
+    async fn process_side(
+        &mut self,
+        side: JoinSide,
+        batch: RecordBatch,
+        ctx: &mut TaskContext,
+    ) -> Result<Vec<StreamOutput>> {
+        let watermark = ctx.last_present_watermark();
+        let target_name = side.table_name();
+        let opposite_name = match side {
+            JoinSide::Left => JoinSide::Right.table_name(),
+            JoinSide::Right => JoinSide::Left.table_name(),
+        };
+
+        let mut tm = ctx.table_manager_guard().await?;
+
+        let inserted_rows = {
+            let target_table = tm
+                .get_key_time_table(target_name, watermark)
+                .await
+                .map_err(|e| anyhow!("{e:?}"))?;
+            target_table
+                .insert(batch.clone())
+                .await
+                .map_err(|e| anyhow!("{e:?}"))?
+        };
+
+        let opposite_table = tm
+            .get_key_time_table(opposite_name, watermark)
+            .await
+            .map_err(|e| anyhow!("{e:?}"))?;
+
+        let mut opposite_batches = Vec::new();
+        for row in inserted_rows {
+            if let Some(matched_batch) = opposite_table
+                .get_batch(row.as_ref())
+                .map_err(|e| anyhow!("{e:?}"))?
+            {
+                opposite_batches.push(matched_batch.clone());
+            }
+        }
+
+        drop(tm);
+
+        if opposite_batches.is_empty() {
+            return Ok(vec![]);
+        }
+
+        let opposite_schema = match side {
+            JoinSide::Left => &self.right_schema.schema,
+            JoinSide::Right => &self.left_schema.schema,
+        };
+        let combined_opposite_batch = concat_batches(opposite_schema, opposite_batches.iter())?;
+
+        let unkeyed_target_batch = match side {
+            JoinSide::Left => self.left_input_schema.unkeyed_batch(&batch)?,
+            JoinSide::Right => self.right_input_schema.unkeyed_batch(&batch)?,
+        };
+
+        let (left_input, right_input) = match side {
+            JoinSide::Left => (unkeyed_target_batch, combined_opposite_batch),
+            JoinSide::Right => (combined_opposite_batch, unkeyed_target_batch),
+        };
+
+        let result_batches = self.compute_pair(left_input, right_input).await?;
+
+        Ok(result_batches
+            .into_iter()
+            .map(StreamOutput::Forward)
+            .collect())
+    }
+}
+
+#[async_trait]
+impl MessageOperator for JoinWithExpirationOperator {
+    fn name(&self) -> &str {
+        "JoinWithExpiration"
+    }
+
+    async fn on_start(&mut self, _ctx: &mut TaskContext) -> Result<()> {
+        Ok(())
+    }
+
+    async fn process_data(
+        &mut self,
+        input_idx: usize,
+        batch: RecordBatch,
+        ctx: &mut TaskContext,
+    ) -> Result<Vec<StreamOutput>> {
+        let side = if input_idx == 0 {
+            JoinSide::Left
+        } else {
+            JoinSide::Right
+        };
+        self.process_side(side, batch, ctx).await
+    }
+
+    async fn process_watermark(
+        &mut self,
+        _watermark: Watermark,
+        _ctx: &mut TaskContext,
+    ) -> Result<Vec<StreamOutput>> {
+        Ok(vec![])
+    }
+
+    async fn snapshot_state(
+        &mut self,
+        _barrier: CheckpointBarrier,
+        _ctx: &mut TaskContext,
+    ) -> Result<()> {
+        // `KeyTimeView` 无 `flush`；写入已通过 `insert` 经 `state_tx` 进入后端刷写管线，
+        // 与 worker 侧 `JoinWithExpiration` 未单独实现 `handle_checkpoint` 一致。
+        Ok(())
+    }
+
+    async fn on_close(&mut self, _ctx: &mut TaskContext) -> Result<Vec<StreamOutput>> {
+        Ok(vec![])
+    }
+}
+
+/// 从配置构造 [`JoinWithExpirationOperator`]（实现 [`MessageOperator`]）。
+/// 注意：`ConstructedOperator` 仅包装 `ArrowOperator`，此处不返回该类型。
+pub struct JoinWithExpirationConstructor;
+
+impl JoinWithExpirationConstructor {
+    pub fn with_config(
+        &self,
+        config: JoinOperator,
+        registry: Arc<Registry>,
+    ) -> anyhow::Result<JoinWithExpirationOperator> {
+        let left_passer = Arc::new(RwLock::new(None));
+        let right_passer = Arc::new(RwLock::new(None));
+
+        let codec = FsPhysicalExtensionCodec {
+            context: DecodingContext::LockedJoinPair {
+                left: left_passer.clone(),
+                right: right_passer.clone(),
+            },
+        };
+
+        let join_physical_plan_node = PhysicalPlanNode::decode(&mut config.join_plan.as_slice())?;
+        let join_exec_plan = join_physical_plan_node.try_into_physical_plan(
+            registry.as_ref(),
+            &RuntimeEnvBuilder::new().build()?,
+            &codec,
+        )?;
+
+        let left_input_schema: FsSchema = config.left_schema.unwrap().try_into()?;
+        let right_input_schema: FsSchema = config.right_schema.unwrap().try_into()?;
+        let left_schema = left_input_schema.schema_without_keys()?;
+        let right_schema = right_input_schema.schema_without_keys()?;
+
+        let mut ttl = Duration::from_micros(
+            config
+                .ttl_micros
+                .expect("ttl must be set for non-instant join"),
+        );
+
+        if ttl == Duration::ZERO {
+            warn!("TTL was not set for join with expiration, defaulting to 24 hours.");
+            ttl = Duration::from_secs(24 * 60 * 60);
+        }
+
+        Ok(JoinWithExpirationOperator {
+            left_expiration: ttl,
+            right_expiration: ttl,
+            left_input_schema,
+            right_input_schema,
+            left_schema,
+            right_schema,
+            left_passer,
+            right_passer,
+            join_exec_plan,
+        })
+    }
+}
diff --git a/src/runtime/streaming/operators/joins/lookup_join.rs b/src/runtime/streaming/operators/joins/lookup_join.rs
new file mode 100644
index 00000000..b302d198
--- /dev/null
+++ b/src/runtime/streaming/operators/joins/lookup_join.rs
@@ -0,0 +1,363 @@
+//! 维表 Lookup Join（Enrichment）：与 worker `arrow/lookup_join` 逻辑对齐，实现 [`MessageOperator`]。
+
+use anyhow::{anyhow, Result};
+use arrow::compute::filter_record_batch;
+use arrow::row::{OwnedRow, RowConverter, SortField};
+use arrow_array::cast::AsArray;
+use arrow_array::types::UInt64Type;
+use arrow_array::{Array, BooleanArray, RecordBatch};
+use arrow_schema::{DataType, Field, FieldRef, Schema};
+use async_trait::async_trait;
+use datafusion::physical_expr::PhysicalExpr;
+use datafusion_proto::physical_plan::DefaultPhysicalExtensionCodec;
+use datafusion_proto::physical_plan::from_proto::parse_physical_expr;
+use datafusion_proto::protobuf::PhysicalExprNode;
+use mini_moka::sync::Cache;
+use prost::Message;
+use std::collections::HashMap;
+use std::sync::Arc;
+use std::time::Duration;
+use protocol::grpc::api::JoinType;
+use crate::runtime::streaming::api::context::TaskContext;
+use crate::runtime::streaming::api::operator::MessageOperator;
+use crate::runtime::streaming::protocol::stream_output::StreamOutput;
+use crate::runtime::streaming::StreamOutput;
+use crate::sql::common::{CheckpointBarrier, FsSchema, MetadataField, OperatorConfig, Watermark, LOOKUP_KEY_INDEX_FIELD};
+
+#[derive(Copy, Clone, PartialEq, Eq)]
+pub enum LookupJoinType {
+    Left,
+    Inner,
+}
+
+/// 维表查询连接算子：外部系统打宽 + 可选 LRU 缓存。
+pub struct LookupJoinOperator {
+    name: String,
+    connector: Box<dyn LookupConnector + Send>,
+    key_exprs: Vec<Arc<dyn PhysicalExpr>>,
+    cache: Option<Cache<OwnedRow, OwnedRow>>,
+    key_row_converter: RowConverter,
+    result_row_converter: RowConverter,
+    join_type: LookupJoinType,
+    lookup_schema: Arc<Schema>,
+    metadata_fields: Vec<MetadataField>,
+    input_schema: Arc<FsSchema>,
+    /// 与 worker 侧 `ctx.out_schema` 对齐：由 input 去 key + lookup 列 + 时间列拼成。
+    output_schema: Arc<Schema>,
+}
+
+fn build_lookup_output_schema(
+    input: &FsSchema,
+    lookup_columns: &[FieldRef],
+) -> anyhow::Result<Arc<Schema>> {
+    let key_indices = input.routing_keys().cloned().unwrap_or_default();
+    let ts = input.timestamp_index;
+    let mut out: Vec<FieldRef> = Vec::new();
+    for i in 0..input.schema.fields().len() {
+        if key_indices.contains(&i) || i == ts {
+            continue;
+        }
+        out.push(input.schema.fields()[i].clone());
+    }
+    out.extend(lookup_columns.iter().cloned());
+    out.push(input.schema.fields()[ts].clone());
+    Ok(Arc::new(Schema::new(out)))
+}
+
+impl LookupJoinOperator {
+    async fn process_lookup_batch(&mut self, batch: RecordBatch) -> Result<Vec<StreamOutput>> {
+        let num_rows = batch.num_rows();
+        if num_rows == 0 {
+            return Ok(vec![]);
+        }
+
+        let key_arrays: Vec<_> = self
+            .key_exprs
+            .iter()
+            .map(|expr| {
+                expr.evaluate(&batch)
+                    .map_err(|e| anyhow!("key expr evaluate: {e}"))?
+                    .into_array(num_rows)
+                    .map_err(|e| anyhow!("key expr into_array: {e}"))
+            })
+            .collect::<Result<_>>()?;
+
+        let rows = self
+            .key_row_converter
+            .convert_columns(&key_arrays)
+            .map_err(|e| anyhow!("key_row_converter: {e}"))?;
+
+        let mut key_map: HashMap<OwnedRow, Vec<usize>> = HashMap::new();
+        for (i, row) in rows.iter().enumerate() {
+            key_map.entry(row.owned()).or_default().push(i);
+        }
+
+        let uncached_keys: Vec<&OwnedRow> = if let Some(cache) = &mut self.cache {
+            key_map
+                .keys()
+                .filter(|k| !cache.contains_key(*k))
+                .collect()
+        } else {
+            key_map.keys().collect()
+        };
+
+        // 按 key 字节存 OwnedRow，避免借用 `convert_columns` 返回的临时行缓冲。
+        let mut results: HashMap<Vec<u8>, OwnedRow> = HashMap::new();
+
+        if !uncached_keys.is_empty() {
+            let cols = self
+                .key_row_converter
+                .convert_rows(uncached_keys.iter().map(|r| r.row()))
+                .map_err(|e| anyhow!("convert_rows for lookup: {e}"))?;
+
+            if let Some(result_batch) = self.connector.lookup(&cols).await {
+                let mut result_batch = result_batch.map_err(|e| anyhow!("connector lookup: {e}"))?;
+
+                let key_idx_col = result_batch
+                    .schema()
+                    .index_of(LOOKUP_KEY_INDEX_FIELD)
+                    .map_err(|e| anyhow!("{e}"))?;
+                let keys = result_batch.remove_column(key_idx_col);
+                let keys = keys.as_primitive::<UInt64Type>();
+
+                let result_rows = self
+                    .result_row_converter
+                    .convert_columns(result_batch.columns())
+                    .map_err(|e| anyhow!("result_row_converter: {e}"))?;
+
+                for (i, v) in result_rows.iter().enumerate() {
+                    if keys.is_null(i) {
+                        return Err(anyhow!("lookup key index is null at row {i}"));
+                    }
+                    let req_idx = keys.value(i) as usize;
+                    if req_idx >= uncached_keys.len() {
+                        return Err(anyhow!(
+                            "lookup key index {req_idx} out of range ({} keys)",
+                            uncached_keys.len()
+                        ));
+                    }
+                    let key_bytes = uncached_keys[req_idx].as_ref().to_vec();
+                    let owned = v.owned();
+                    results.insert(key_bytes.clone(), owned.clone());
+                    if let Some(cache) = &mut self.cache {
+                        cache.insert(uncached_keys[req_idx].clone(), owned);
+                    }
+                }
+            }
+        }
+
+        let mut output_rows = self
+            .result_row_converter
+            .empty_rows(batch.num_rows(), batch.num_rows().saturating_mul(10));
+
+        for row in rows.iter() {
+            let row_owned = self
+                .cache
+                .as_mut()
+                .and_then(|c| c.get(&row.owned()))
+                .unwrap_or_else(|| {
+                    results
+                        .get(row.as_ref())
+                        .expect("missing lookup result for key (cache miss without connector row)")
+                        .clone()
+                });
+            output_rows.push(row_owned.row());
+        }
+
+        let right_side = self
+            .result_row_converter
+            .convert_rows(output_rows.iter())
+            .map_err(|e| anyhow!("convert_rows output: {e}"))?;
+
+        let nonnull = (self.join_type == LookupJoinType::Inner).then(|| {
+            let mut nonnull = vec![false; batch.num_rows()];
+            for (_, a) in self
+                .lookup_schema
+                .fields()
+                .iter()
+                .zip(right_side.iter())
+                .filter(|(f, _)| {
+                    !self
+                        .metadata_fields
+                        .iter()
+                        .any(|m| &m.field_name == f.name())
+                })
+            {
+                if let Some(nulls) = a.logical_nulls() {
+                    for (valid, b) in nulls.iter().zip(nonnull.iter_mut()) {
+                        *b |= valid;
+                    }
+                } else {
+                    nonnull.fill(true);
+                    break;
+                }
+            }
+            BooleanArray::from(nonnull)
+        });
+
+        let key_indices = self
+            .input_schema
+            .routing_keys()
+            .cloned()
+            .unwrap_or_default();
+        let non_keys: Vec<_> = (0..batch.num_columns())
+            .filter(|i| !key_indices.contains(i) && *i != self.input_schema.timestamp_index)
+            .collect();
+
+        let mut result_cols = batch
+            .project(&non_keys)
+            .map_err(|e| anyhow!("project non_keys: {e}"))?
+            .columns()
+            .to_vec();
+        result_cols.extend(right_side);
+        result_cols.push(batch.column(self.input_schema.timestamp_index).clone());
+
+        let mut out_batch = RecordBatch::try_new(self.output_schema.clone(), result_cols)
+            .map_err(|e| anyhow!("try_new output batch: {e}"))?;
+
+        if let Some(mask) = nonnull {
+            out_batch = filter_record_batch(&out_batch, &mask).map_err(|e| anyhow!("{e}"))?;
+        }
+
+        if out_batch.num_rows() == 0 {
+            return Ok(vec![]);
+        }
+
+        Ok(vec![StreamOutput::Forward(out_batch)])
+    }
+}
+
+#[async_trait]
+impl MessageOperator for LookupJoinOperator {
+    fn name(&self) -> &str {
+        &self.name
+    }
+
+    async fn on_start(&mut self, _ctx: &mut TaskContext) -> Result<()> {
+        Ok(())
+    }
+
+    async fn process_data(
+        &mut self,
+        _input_idx: usize,
+        batch: RecordBatch,
+        _ctx: &mut TaskContext,
+    ) -> Result<Vec<StreamOutput>> {
+        self.process_lookup_batch(batch).await
+    }
+
+    async fn process_watermark(
+        &mut self,
+        _watermark: Watermark,
+        _ctx: &mut TaskContext,
+    ) -> Result<Vec<StreamOutput>> {
+        Ok(vec![])
+    }
+
+    async fn snapshot_state(
+        &mut self,
+        _barrier: CheckpointBarrier,
+        _ctx: &mut TaskContext,
+    ) -> Result<()> {
+        Ok(())
+    }
+
+    async fn on_close(&mut self, _ctx: &mut TaskContext) -> Result<Vec<StreamOutput>> {
+        Ok(vec![])
+    }
+}
+
+/// 从配置构造 [`LookupJoinOperator`]（非 `ConstructedOperator` / `ArrowOperator`）。
+pub struct LookupJoinConstructor;
+
+impl LookupJoinConstructor {
+    pub fn with_config(
+        &self,
+        config: LookupJoinOperator,
+        registry: Arc<Registry>,
+    ) -> anyhow::Result<LookupJoinOperator> {
+        let join_type = config.join_type();
+        let input_schema: FsSchema = config.input_schema.unwrap().try_into()?;
+        let lookup_schema: FsSchema = config.lookup_schema.unwrap().try_into()?;
+
+        let exprs = config
+            .key_exprs
+            .iter()
+            .map(|e| {
+                let expr = PhysicalExprNode::decode(&mut e.left_expr.as_slice())?;
+                Ok(parse_physical_expr(
+                    &expr,
+                    registry.as_ref(),
+                    &input_schema.schema,
+                    &DefaultPhysicalExtensionCodec {},
+                )?)
+            })
+            .collect::<anyhow::Result<Vec<_>>>()?;
+
+        let op = config.connector.unwrap();
+        let operator_config: OperatorConfig = serde_json::from_str(&op.config)?;
+
+        let result_row_converter = RowConverter::new(
+            lookup_schema
+                .schema_without_timestamp()
+                .fields
+                .iter()
+                .map(|f| SortField::new(f.data_type().clone()))
+                .collect(),
+        )?;
+
+        let lookup_schema_arc = Arc::new(
+            lookup_schema
+                .with_additional_fields(
+                    [Field::new(LOOKUP_KEY_INDEX_FIELD, DataType::UInt64, false)].into_iter(),
+                )?
+                .schema_without_timestamp(),
+        );
+
+        let output_schema = build_lookup_output_schema(&input_schema, lookup_schema_arc.fields())?;
+
+        let connector = connectors()
+            .get(op.connector.as_str())
+            .unwrap_or_else(|| panic!("No connector with name '{}'", op.connector))
+            .make_lookup(operator_config.clone(), lookup_schema_arc.clone())?;
+
+        let name = format!("LookupJoin({})", connector.name());
+
+        let max_capacity_bytes = config.max_capacity_bytes.unwrap_or(8 * 1024 * 1024);
+        let cache = (max_capacity_bytes > 0).then(|| {
+            let mut c = Cache::builder()
+                .weigher(|k: &OwnedRow, v: &OwnedRow| (k.as_ref().len() + v.as_ref().len()) as u32)
+                .max_capacity(max_capacity_bytes);
+
+            if let Some(ttl) = config.ttl_micros {
+                c = c.time_to_live(Duration::from_micros(ttl));
+            }
+            c.build()
+        });
+
+        let key_row_converter = RowConverter::new(
+            exprs
+                .iter()
+                .map(|e| Ok(SortField::new(e.data_type(&input_schema.schema)?)))
+                .collect::<anyhow::Result<_>>()?,
+        )?;
+
+        Ok(LookupJoinOperator {
+            name,
+            connector,
+            key_exprs: exprs,
+            cache,
+            key_row_converter,
+            result_row_converter,
+            join_type: match join_type {
+                JoinType::Inner => LookupJoinType::Inner,
+                JoinType::Left => LookupJoinType::Left,
+                jt => panic!("invalid lookup join type {:?}", jt),
+            },
+            lookup_schema: lookup_schema_arc,
+            metadata_fields: operator_config.metadata_fields,
+            input_schema: Arc::new(input_schema),
+            output_schema,
+        })
+    }
+}
diff --git a/src/runtime/streaming/operators/joins/mod.rs b/src/runtime/streaming/operators/joins/mod.rs
new file mode 100644
index 00000000..d53e4b91
--- /dev/null
+++ b/src/runtime/streaming/operators/joins/mod.rs
@@ -0,0 +1,7 @@
+pub mod join_instance;
+pub mod join_with_expiration;
+pub mod lookup_join;
+
+pub use join_instance::{InstantJoinConstructor, InstantJoinOperator};
+pub use join_with_expiration::{JoinWithExpirationConstructor, JoinWithExpirationOperator};
+pub use lookup_join::{LookupJoinConstructor, LookupJoinOperator, LookupJoinType};
diff --git a/src/runtime/streaming/operators/mod.rs b/src/runtime/streaming/operators/mod.rs
new file mode 100644
index 00000000..fe2a7d9e
--- /dev/null
+++ b/src/runtime/streaming/operators/mod.rs
@@ -0,0 +1,75 @@
+//! 内置算子。
+
+pub mod grouping;
+pub mod joins;
+pub mod sink;
+pub mod source;
+pub mod watermark;
+pub mod windows;
+
+pub use grouping::{
+    IncrementalAggregatingConstructor, IncrementalAggregatingFunc, Key, UpdatingCache,
+};
+pub use joins::{
+    InstantJoinConstructor, InstantJoinOperator, JoinWithExpirationConstructor,
+    JoinWithExpirationOperator, LookupJoinConstructor, LookupJoinOperator, LookupJoinType,
+};
+pub use sink::{ConsistencyMode, KafkaSinkOperator};
+pub use source::{BatchDeserializer, KafkaSourceOperator, KafkaState};
+pub use watermark::{WatermarkGeneratorConstructor, WatermarkGeneratorOperator, WatermarkGeneratorState};
+pub use windows::{
+    SessionAggregatingWindowConstructor, SessionWindowOperator,
+    SlidingAggregatingWindowConstructor, SlidingWindowOperator,
+    TumblingAggregateWindowConstructor, TumblingWindowOperator, WindowFunctionConstructor,
+    WindowFunctionOperator,
+};
+
+use crate::runtime::streaming::api::context::TaskContext;
+use crate::runtime::streaming::api::operator::MessageOperator;
+use arrow_array::RecordBatch;
+use async_trait::async_trait;
+use crate::runtime::streaming::StreamOutput;
+use crate::sql::common::{CheckpointBarrier, Watermark};
+
+/// 透传数据。
+pub struct PassthroughOperator {
+    name: String,
+}
+
+impl PassthroughOperator {
+    pub fn new(name: impl Into<String>) -> Self {
+        Self { name: name.into() }
+    }
+}
+
+#[async_trait]
+impl MessageOperator for PassthroughOperator {
+    fn name(&self) -> &str {
+        &self.name
+    }
+
+    async fn process_data(
+        &mut self,
+        _input_idx: usize,
+        batch: RecordBatch,
+        _ctx: &mut TaskContext,
+    ) -> anyhow::Result<Vec<StreamOutput>> {
+        Ok(vec![StreamOutput::Forward(batch)])
+    }
+
+    async fn process_watermark(
+        &mut self,
+        _watermark: Watermark,
+        _ctx: &mut TaskContext,
+    ) -> anyhow::Result<Vec<StreamOutput>> {
+        Ok(vec![])
+    }
+
+    async fn snapshot_state(
+        &mut self,
+        _barrier: CheckpointBarrier,
+        _ctx: &mut TaskContext,
+    ) -> anyhow::Result<()> {
+        Ok(())
+    }
+}
diff --git a/src/runtime/streaming/operators/sink/kafka/mod.rs b/src/runtime/streaming/operators/sink/kafka/mod.rs
new file mode 100644
index 00000000..9161ac7b
--- /dev/null
+++ b/src/runtime/streaming/operators/sink/kafka/mod.rs
@@ -0,0 +1,366 @@
+//! Kafka Sink：实现 [`crate::runtime::streaming::api::operator::MessageOperator`]，支持 At-Least-Once 与 Exactly-Once（事务 + 二阶段提交）。
+
+use anyhow::{anyhow, bail, Result};
+use arrow_array::cast::AsArray;
+use arrow_array::Array;
+use arrow_array::RecordBatch;
+use arrow_schema::{DataType, TimeUnit};
+use async_trait::async_trait;
+use rdkafka::error::{KafkaError, RDKafkaErrorCode};
+use rdkafka::producer::{DeliveryFuture, FutureProducer, FutureRecord, Producer};
+use rdkafka::util::Timeout;
+use rdkafka::ClientConfig;
+use std::collections::HashMap;
+use std::time::Duration;
+use tokio::time::sleep;
+use tracing::{info, warn};
+
+use crate::runtime::streaming::api::context::TaskContext;
+use crate::runtime::streaming::api::operator::MessageOperator;
+use crate::runtime::streaming::StreamOutput;
+use crate::sql::common::{CheckpointBarrier, FsSchema, Watermark};
+// ============================================================================
+// 1. 领域模型：一致性级别与事务状态机
+// ============================================================================
+
+#[derive(Debug, Clone)]
+pub enum ConsistencyMode {
+    AtLeastOnce,
+    ExactlyOnce,
+}
+
+struct TransactionalState {
+    next_transaction_index: usize,
+    active_producer: FutureProducer,
+    producer_awaiting_commit: Option<FutureProducer>,
+}
+
+// ============================================================================
+// 2. 核心算子外壳
+// ============================================================================
+
+pub struct KafkaSinkOperator {
+    pub topic: String,
+    pub bootstrap_servers: String,
+    pub consistency_mode: ConsistencyMode,
+    pub client_config: HashMap<String, String>,
+
+    pub input_schema: FsSchema,
+    pub timestamp_col_idx: Option<usize>,
+    pub key_col_idx: Option<usize>,
+
+    pub serializer: ArrowSerializer,
+
+    at_least_once_producer: Option<FutureProducer>,
+    transactional_state: Option<TransactionalState>,
+
+    write_futures: Vec<DeliveryFuture>,
+}
+
+impl KafkaSinkOperator {
+    pub fn new(
+        topic: String,
+        bootstrap_servers: String,
+        consistency_mode: ConsistencyMode,
+        client_config: HashMap<String, String>,
+        input_schema: FsSchema,
+        serializer: ArrowSerializer,
+    ) -> Self {
+        Self {
+            topic,
+            bootstrap_servers,
+            consistency_mode,
+            client_config,
+            input_schema,
+            timestamp_col_idx: None,
+            key_col_idx: None,
+            serializer,
+            at_least_once_producer: None,
+            transactional_state: None,
+            write_futures: Vec::new(),
+        }
+    }
+
+    fn resolve_schema_indices(&mut self) {
+        self.timestamp_col_idx = Some(self.input_schema.timestamp_index);
+
+        if let Some(routing_keys) = self.input_schema.routing_keys() {
+            if !routing_keys.is_empty() {
+                self.key_col_idx = Some(routing_keys[0]);
+            }
+        }
+    }
+
+    fn create_producer(&self, ctx: &TaskContext, tx_index: Option<usize>) -> Result<FutureProducer> {
+        let mut config = ClientConfig::new();
+        config.set("bootstrap.servers", &self.bootstrap_servers);
+
+        for (k, v) in &self.client_config {
+            config.set(k, v);
+        }
+
+        if let Some(idx) = tx_index {
+            config.set("enable.idempotence", "true");
+            let transactional_id = format!(
+                "arroyo-tx-{}-{}-{}-{}",
+                ctx.job_id, self.topic, ctx.subtask_idx, idx
+            );
+            config.set("transactional.id", &transactional_id);
+
+            let producer: FutureProducer = config.create()?;
+            producer
+                .init_transactions(Timeout::After(Duration::from_secs(30)))
+                .map_err(|e| anyhow!("Failed to init Kafka transactions: {}", e))?;
+            producer
+                .begin_transaction()
+                .map_err(|e| anyhow!("Failed to begin Kafka transaction: {}", e))?;
+
+            Ok(producer)
+        } else {
+            Ok(config.create()?)
+        }
+    }
+
+    async fn flush_to_broker(&mut self) -> Result<()> {
+        let producer = self.current_producer();
+
+        producer.poll(Timeout::After(Duration::ZERO));
+
+        for future in self.write_futures.drain(..) {
+            match future.await {
+                Ok(Ok(_)) => continue,
+                Ok(Err((e, _))) => bail!("Kafka producer delivery failed: {}", e),
+                Err(_) => bail!("Kafka delivery future canceled"),
+            }
+        }
+        Ok(())
+    }
+
+    fn current_producer(&self) -> &FutureProducer {
+        match &self.consistency_mode {
+            ConsistencyMode::AtLeastOnce => self.at_least_once_producer.as_ref().unwrap(),
+            ConsistencyMode::ExactlyOnce => &self.transactional_state.as_ref().unwrap().active_producer,
+        }
+    }
+}
+
+fn event_timestamp_ms(batch: &RecordBatch, row: usize, col: usize) -> Option<i64> {
+    let arr = batch.column(col);
+    match arr.data_type() {
+        DataType::Timestamp(TimeUnit::Second, _) => {
+            let a = arr.as_primitive::<arrow_array::types::TimestampSecondType>();
+            (!a.is_null(row)).then(|| a.value(row) * 1000)
+        }
+        DataType::Timestamp(TimeUnit::Millisecond, _) => {
+            let a = arr.as_primitive::<arrow_array::types::TimestampMillisecondType>();
+            (!a.is_null(row)).then(|| a.value(row))
+        }
+        DataType::Timestamp(TimeUnit::Microsecond, _) => {
+            let a = arr.as_primitive::<arrow_array::types::TimestampMicrosecondType>();
+            (!a.is_null(row)).then(|| a.value(row) / 1000)
+        }
+        DataType::Timestamp(TimeUnit::Nanosecond, _) => {
+            let a = arr.as_primitive::<arrow_array::types::TimestampNanosecondType>();
+            (!a.is_null(row)).then(|| a.value(row) / 1_000_000)
+        }
+        _ => None,
+    }
+}
+
+fn row_key_bytes(batch: &RecordBatch, row: usize, col: usize) -> Option<Vec<u8>> {
+    let arr = batch.column(col);
+    match arr.data_type() {
+        DataType::Utf8 => {
+            let s = arr.as_string::<i32>();
+            if s.is_null(row) {
+                None
+            } else {
+                Some(s.value(row).as_bytes().to_vec())
+            }
+        }
+        DataType::LargeUtf8 => {
+            let s = arr.as_string::<i64>();
+            if s.is_null(row) {
+                None
+            } else {
+                Some(s.value(row).as_bytes().to_vec())
+            }
+        }
+        _ => None,
+    }
+}
+
+// ============================================================================
+// 3. 实现 MessageOperator 协议
+// ============================================================================
+
+#[async_trait]
+impl MessageOperator for KafkaSinkOperator {
+    fn name(&self) -> &str {
+        "KafkaSink"
+    }
+
+    async fn on_start(&mut self, ctx: &mut TaskContext) -> Result<()> {
+        self.resolve_schema_indices();
+
+        match self.consistency_mode {
+            ConsistencyMode::AtLeastOnce => {
+                self.at_least_once_producer = Some(self.create_producer(ctx, None)?);
+            }
+            ConsistencyMode::ExactlyOnce => {
+                let mut next_idx = {
+                    let mut tm = ctx.table_manager_guard().await?;
+                    let index_table = tm
+                        .get_global_keyed_state::<u32, usize>("tx_idx")
+                        .await
+                        .map_err(|e| anyhow!(e))?;
+                    index_table.get(&ctx.subtask_idx).copied().unwrap_or(0)
+                };
+
+                let active_producer = self.create_producer(ctx, Some(next_idx))?;
+                next_idx += 1;
+
+                self.transactional_state = Some(TransactionalState {
+                    next_transaction_index: next_idx,
+                    active_producer,
+                    producer_awaiting_commit: None,
+                });
+            }
+        }
+        Ok(())
+    }
+
+    async fn process_data(
+        &mut self,
+        _input_idx: usize,
+        batch: RecordBatch,
+        _ctx: &mut TaskContext,
+    ) -> Result<Vec<StreamOutput>> {
+        let payload_iter = self.serializer.serialize(&batch);
+        let producer = self.current_producer().clone();
+
+        for (i, payload) in payload_iter.enumerate() {
+            let ts_millis = self
+                .timestamp_col_idx
+                .and_then(|idx| event_timestamp_ms(&batch, i, idx));
+            let key_bytes = self
+                .key_col_idx
+                .and_then(|idx| row_key_bytes(&batch, i, idx));
+
+            let mut record = FutureRecord::<Vec<u8>, Vec<u8>>::to(&self.topic).payload(&payload);
+            if let Some(ts) = ts_millis {
+                record = record.timestamp(ts);
+            }
+            if let Some(ref k) = key_bytes {
+                record = record.key(k);
+            }
+
+            loop {
+                match producer.send_result(record) {
+                    Ok(delivery_future) => {
+                        self.write_futures.push(delivery_future);
+                        break;
+                    }
+                    Err((KafkaError::MessageProduction(RDKafkaErrorCode::QueueFull), returned_record)) => {
+                        record = returned_record;
+                        sleep(Duration::from_millis(10)).await;
+                    }
+                    Err((e, _)) => bail!("Fatal Kafka send error: {}", e),
+                }
+            }
+        }
+
+        Ok(vec![])
+    }
+
+    async fn process_watermark(
+        &mut self,
+        _watermark: Watermark,
+        _ctx: &mut TaskContext,
+    ) -> Result<Vec<StreamOutput>> {
+        Ok(vec![])
+    }
+
+    async fn snapshot_state(
+        &mut self,
+        _barrier: CheckpointBarrier,
+        ctx: &mut TaskContext,
+    ) -> Result<()> {
+        self.flush_to_broker().await?;
+
+        if matches!(self.consistency_mode, ConsistencyMode::ExactlyOnce) {
+            let next_tx = self
+                .transactional_state
+                .as_ref()
+                .map(|s| s.next_transaction_index)
+                .unwrap();
+            let new_producer = self.create_producer(ctx, Some(next_tx))?;
+
+            let state = self.transactional_state.as_mut().unwrap();
+            let old_producer = std::mem::replace(&mut state.active_producer, new_producer);
+            state.producer_awaiting_commit = Some(old_producer);
+
+            {
+                let mut tm = ctx.table_manager_guard().await?;
+                let index_table = tm
+                    .get_global_keyed_state::<u32, usize>("tx_idx")
+                    .await
+                    .map_err(|e| anyhow!(e))?;
+                index_table
+                    .insert(ctx.subtask_idx, state.next_transaction_index)
+                    .await;
+            }
+
+            state.next_transaction_index += 1;
+        }
+
+        Ok(())
+    }
+
+    async fn commit_checkpoint(&mut self, epoch: u32, _ctx: &mut TaskContext) -> Result<()> {
+        if matches!(self.consistency_mode, ConsistencyMode::AtLeastOnce) {
+            return Ok(());
+        }
+
+        let state = self.transactional_state.as_mut().unwrap();
+        let Some(committing_producer) = state.producer_awaiting_commit.take() else {
+            warn!(
+                "Received Commit for epoch {}, but no stashed producer exists. Possibly a recovery duplicate.",
+                epoch
+            );
+            return Ok(());
+        };
+
+        let mut retries = 0;
+        loop {
+            match committing_producer.commit_transaction(Timeout::After(Duration::from_secs(10))) {
+                Ok(_) => {
+                    info!("Successfully committed Kafka transaction for epoch {}", epoch);
+                    break;
+                }
+                Err(e) => {
+                    retries += 1;
+                    if retries >= 5 {
+                        bail!(
+                            "Failed to commit Kafka transaction after 5 retries. Fatal error: {}",
+                            e
+                        );
+                    }
+                    warn!(
+                        "Failed to commit Kafka transaction (Attempt {}/5): {}. Retrying...",
+                        retries, e
+                    );
+                    sleep(Duration::from_secs(2)).await;
+                }
+            }
+        }
+
+        Ok(())
+    }
+
+    async fn on_close(&mut self, _ctx: &mut TaskContext) -> Result<Vec<StreamOutput>> {
+        self.flush_to_broker().await?;
+        info!("Kafka sink shut down gracefully.");
+        Ok(vec![])
+    }
+}
diff --git a/src/runtime/streaming/operators/sink/mod.rs b/src/runtime/streaming/operators/sink/mod.rs
new file mode 100644
index 00000000..3b88f563
--- /dev/null
+++ b/src/runtime/streaming/operators/sink/mod.rs
@@ -0,0 +1,5 @@
+//! 与外部系统对接的 Sink 实现（Kafka 等）。
+
+pub mod kafka;
+
+pub use kafka::{ConsistencyMode, KafkaSinkOperator};
diff --git a/src/runtime/streaming/operators/source/kafka/mod.rs b/src/runtime/streaming/operators/source/kafka/mod.rs
new file mode 100644
index 00000000..d0c67972
--- /dev/null
+++ b/src/runtime/streaming/operators/source/kafka/mod.rs
@@ -0,0 +1,325 @@
+//! Kafka 源算子：实现 [`crate::runtime::streaming::api::source::SourceOperator`]，由 [`crate::runtime::streaming::execution::SourceRunner`] 轮询 `fetch_next`。
+
+use anyhow::{anyhow, Context as _, Result};
+use async_trait::async_trait;
+use bincode::{Decode, Encode};
+use governor::{DefaultDirectRateLimiter, Quota, RateLimiter as GovernorRateLimiter};
+use rdkafka::consumer::{CommitMode, Consumer, StreamConsumer};
+use rdkafka::{ClientConfig, Message as KMessage, Offset, TopicPartitionList};
+use std::collections::HashMap;
+use std::num::NonZeroU32;
+use std::time::Duration;
+use tracing::{debug, error, info, warn};
+use arrow_array::RecordBatch;
+
+use crate::runtime::streaming::api::context::TaskContext;
+use crate::runtime::streaming::api::source::{SourceEvent, SourceOffset, SourceOperator};
+use crate::sql::common::{CheckpointBarrier, MetadataField};
+// ============================================================================
+// 1. 领域模型：Kafka 状态与配置
+// ============================================================================
+
+#[derive(Copy, Clone, Debug, Encode, Decode, PartialEq, PartialOrd)]
+pub struct KafkaState {
+    partition: i32,
+    offset: i64,
+}
+
+/// 模拟 Arroyo 原版的 Deserializer Buffer
+/// （工业实现中，反序列化常带 buffer，满 N 条或超时后吐出一个 [`RecordBatch`]）。
+pub trait BatchDeserializer: Send + 'static {
+    fn deserialize_slice(
+        &mut self,
+        payload: &[u8],
+        timestamp: u64,
+        metadata: Option<HashMap<&str, FieldValueType<'_>>>,
+    ) -> Result<()>;
+
+    fn should_flush(&self) -> bool;
+
+    fn flush_buffer(&mut self) -> Result<Option<RecordBatch>>;
+}
+
+impl SourceOffset {
+    fn rdkafka_offset(self) -> Offset {
+        match self {
+            SourceOffset::Earliest => Offset::Beginning,
+            SourceOffset::Latest => Offset::End,
+            SourceOffset::Group => Offset::Stored,
+        }
+    }
+}
+
+// ============================================================================
+// 2. 核心算子外壳
+// ============================================================================
+
+pub struct KafkaSourceOperator {
+    pub topic: String,
+    pub bootstrap_servers: String,
+    pub group_id: Option<String>,
+    pub group_id_prefix: Option<String>,
+    pub offset_mode: SourceOffset,
+
+    pub client_configs: HashMap<String, String>,
+    pub messages_per_second: NonZeroU32,
+    pub metadata_fields: Vec<MetadataField>,
+
+    consumer: Option<StreamConsumer>,
+    rate_limiter: Option<DefaultDirectRateLimiter>,
+    deserializer: Box<dyn BatchDeserializer>,
+
+    current_offsets: HashMap<i32, i64>,
+    is_empty_assignment: bool,
+}
+
+impl KafkaSourceOperator {
+    pub fn new(
+        topic: String,
+        bootstrap_servers: String,
+        group_id: Option<String>,
+        group_id_prefix: Option<String>,
+        offset_mode: SourceOffset,
+        client_configs: HashMap<String, String>,
+        messages_per_second: NonZeroU32,
+        metadata_fields: Vec<MetadataField>,
+        deserializer: Box<dyn BatchDeserializer>,
+    ) -> Self {
+        Self {
+            topic,
+            bootstrap_servers,
+            group_id,
+            group_id_prefix,
+            offset_mode,
+            client_configs,
+            messages_per_second,
+            metadata_fields,
+            consumer: None,
+            rate_limiter: None,
+            deserializer,
+            current_offsets: HashMap::new(),
+            is_empty_assignment: false,
+        }
+    }
+
+    async fn init_and_assign_consumer(&mut self, ctx: &mut TaskContext) -> Result<()> {
+        info!("Creating kafka consumer for {}", self.bootstrap_servers);
+        let mut client_config = ClientConfig::new();
+
+        let group_id = match (&self.group_id, &self.group_id_prefix) {
+            (Some(gid), _) => gid.clone(),
+            (None, Some(prefix)) => {
+                format!("{}-arroyo-{}-{}", prefix, ctx.job_id, ctx.subtask_idx)
+            }
+            (None, None) => format!("arroyo-{}-{}-consumer", ctx.job_id, ctx.subtask_idx),
+        };
+
+        for (key, value) in &self.client_configs {
+            client_config.set(key, value);
+        }
+
+        let consumer: StreamConsumer = client_config
+            .set("bootstrap.servers", &self.bootstrap_servers)
+            .set("enable.partition.eof", "false")
+            .set("enable.auto.commit", "false")
+            .set("group.id", &group_id)
+            .create()?;
+
+        let (has_state, state_map) = {
+            let mut tm = ctx.table_manager_guard().await?;
+            let global_state = tm
+                .get_global_keyed_state::<i32, KafkaState>("k")
+                .await
+                .map_err(|e| anyhow!(e))?;
+            let restored_states: Vec<_> = global_state.get_all().values().copied().collect();
+            let has_state = !restored_states.is_empty();
+            let state_map: HashMap<i32, KafkaState> =
+                restored_states.into_iter().map(|s| (s.partition, s)).collect();
+            (has_state, state_map)
+        };
+
+        let metadata = consumer
+            .fetch_metadata(Some(&self.topic), Duration::from_secs(30))
+            .context("Failed to fetch Kafka metadata")?;
+
+        let topic_meta = metadata
+            .topics()
+            .iter()
+            .find(|t| t.name() == self.topic)
+            .ok_or_else(|| anyhow!("topic {} not in metadata", self.topic))?;
+
+        let partitions = topic_meta.partitions();
+        let mut our_partitions = HashMap::new();
+        let pmax = ctx.parallelism.max(1) as i32;
+
+        for p in partitions {
+            if p.id().rem_euclid(pmax) == ctx.subtask_idx as i32 {
+                let offset = state_map
+                    .get(&p.id())
+                    .map(|s| Offset::Offset(s.offset))
+                    .unwrap_or_else(|| {
+                        if has_state {
+                            Offset::Beginning
+                        } else {
+                            self.offset_mode.rdkafka_offset()
+                        }
+                    });
+                our_partitions.insert((self.topic.clone(), p.id()), offset);
+            }
+        }
+
+        if our_partitions.is_empty() {
+            warn!(
+                "[Task {}] Subscribed to no partitions. Entering idle mode.",
+                ctx.subtask_idx
+            );
+            self.is_empty_assignment = true;
+        } else {
+            let topic_partitions = TopicPartitionList::from_topic_map(&our_partitions)?;
+            consumer.assign(&topic_partitions)?;
+        }
+
+        self.consumer = Some(consumer);
+        Ok(())
+    }
+}
+
+// ============================================================================
+// 3. 实现 SourceOperator 协议
+// ============================================================================
+
+#[async_trait]
+impl SourceOperator for KafkaSourceOperator {
+    fn name(&self) -> &str {
+        &self.topic
+    }
+
+    async fn on_start(&mut self, ctx: &mut TaskContext) -> Result<()> {
+        self.init_and_assign_consumer(ctx).await?;
+        self.rate_limiter = Some(GovernorRateLimiter::direct(Quota::per_second(
+            self.messages_per_second,
+        )));
+        Ok(())
+    }
+
+    async fn fetch_next(&mut self, _ctx: &mut TaskContext) -> Result<SourceEvent> {
+        if self.is_empty_assignment {
+            return Ok(SourceEvent::Idle);
+        }
+
+        let consumer = self
+            .consumer
+            .as_ref()
+            .ok_or_else(|| anyhow!("Kafka consumer not initialized"))?;
+        let rate_limiter = self
+            .rate_limiter
+            .as_ref()
+            .ok_or_else(|| anyhow!("rate limiter not initialized"))?;
+
+        let recv_result = tokio::time::timeout(Duration::from_millis(50), consumer.recv()).await;
+
+        match recv_result {
+            Ok(Ok(msg)) => {
+                if let Some(payload) = msg.payload() {
+                    let timestamp = msg.timestamp().to_millis().unwrap_or(0);
+                    let topic = msg.topic();
+
+                    let connector_metadata = if !self.metadata_fields.is_empty() {
+                        let mut meta = HashMap::new();
+                        for f in &self.metadata_fields {
+                            meta.insert(
+                                f.field_name.as_str(),
+                                match f.key.as_str() {
+                                    "key" => FieldValueType::Bytes(msg.key()),
+                                    "offset_id" => FieldValueType::Int64(Some(msg.offset())),
+                                    "partition" => FieldValueType::Int32(Some(msg.partition())),
+                                    "topic" => FieldValueType::String(Some(topic)),
+                                    "timestamp" => FieldValueType::Int64(Some(timestamp)),
+                                    _ => continue,
+                                },
+                            );
+                        }
+                        Some(meta)
+                    } else {
+                        None
+                    };
+
+                    self.deserializer.deserialize_slice(
+                        payload,
+                        timestamp.max(0) as u64,
+                        connector_metadata,
+                    )?;
+
+                    self.current_offsets.insert(msg.partition(), msg.offset());
+
+                    rate_limiter.until_ready().await;
+
+                    if self.deserializer.should_flush() {
+                        if let Some(batch) = self.deserializer.flush_buffer()? {
+                            return Ok(SourceEvent::Data(batch));
+                        }
+                    }
+                }
+                Ok(SourceEvent::Idle)
+            }
+            Ok(Err(e)) => {
+                error!("Kafka recv error: {}", e);
+                Err(anyhow!("Kafka error: {}", e))
+            }
+            Err(_) => {
+                if self.deserializer.should_flush() {
+                    if let Some(batch) = self.deserializer.flush_buffer()? {
+                        return Ok(SourceEvent::Data(batch));
+                    }
+                }
+                Ok(SourceEvent::Idle)
+            }
+        }
+    }
+
+    async fn snapshot_state(
+        &mut self,
+        _barrier: CheckpointBarrier,
+        ctx: &mut TaskContext,
+    ) -> Result<()> {
+        debug!("Source [{}] executing checkpoint", ctx.subtask_idx);
+
+        let mut tm = ctx.table_manager_guard().await?;
+        let global_state = tm
+            .get_global_keyed_state::<i32, KafkaState>("k")
+            .await
+            .map_err(|e| anyhow!(e))?;
+
+        let mut topic_partitions = TopicPartitionList::new();
+
+        for (&partition, &offset) in &self.current_offsets {
+            global_state
+                .insert(
+                    partition,
+                    KafkaState {
+                        partition,
+                        offset: offset + 1,
+                    },
+                )
+                .await;
+
+            topic_partitions
+                .add_partition_offset(&self.topic, partition, Offset::Offset(offset))
+                .map_err(|e| anyhow!("add_partition_offset: {e}"))?;
+        }
+
+        if let Some(consumer) = &self.consumer {
+            if let Err(e) = consumer.commit(&topic_partitions, CommitMode::Async) {
+                warn!("Failed to commit async offset to Kafka Broker: {:?}", e);
+            }
+        }
+
+        Ok(())
+    }
+
+    async fn on_close(&mut self, _ctx: &mut TaskContext) -> Result<()> {
+        info!("Kafka source shutting down gracefully");
+        self.consumer.take();
+        Ok(())
+    }
+}
diff --git a/src/runtime/streaming/operators/source/mod.rs b/src/runtime/streaming/operators/source/mod.rs
new file mode 100644
index 00000000..ef4e3cb6
--- /dev/null
+++ b/src/runtime/streaming/operators/source/mod.rs
@@ -0,0 +1,5 @@
+//! 与外部系统对接的源实现（Kafka 等）。
+
+pub mod kafka;
+
+pub use kafka::{BatchDeserializer, KafkaSourceOperator, KafkaState};
diff --git a/src/runtime/streaming/operators/watermark/mod.rs b/src/runtime/streaming/operators/watermark/mod.rs
new file mode 100644
index 00000000..becc0b8f
--- /dev/null
+++ b/src/runtime/streaming/operators/watermark/mod.rs
@@ -0,0 +1,3 @@
+pub mod watermark_generator;
+
+pub use watermark_generator::{WatermarkGeneratorConstructor, WatermarkGeneratorOperator, WatermarkGeneratorState};
diff --git a/src/runtime/streaming/operators/watermark/watermark_generator.rs b/src/runtime/streaming/operators/watermark/watermark_generator.rs
new file mode 100644
index 00000000..fa97b3d9
--- /dev/null
+++ b/src/runtime/streaming/operators/watermark/watermark_generator.rs
@@ -0,0 +1,244 @@
+//! 表达式水位生成器：与 worker `arrow/watermark_generator` 对齐，通过 [`StreamOutput::Watermark`] 向下游广播。
+
+use anyhow::{anyhow, Result};
+use arrow::compute::kernels::aggregate;
+use arrow_array::cast::AsArray;
+use arrow_array::types::TimestampNanosecondType;
+use arrow_array::{RecordBatch, TimestampNanosecondArray};
+use bincode::{Decode, Encode};
+use datafusion::physical_expr::PhysicalExpr;
+use datafusion_proto::physical_plan::DefaultPhysicalExtensionCodec;
+use datafusion_proto::physical_plan::from_proto::parse_physical_expr;
+use datafusion_proto::protobuf::PhysicalExprNode;
+use prost::Message;
+use std::sync::Arc;
+use std::time::{Duration, SystemTime};
+use tracing::{debug, info};
+
+use crate::runtime::streaming::api::context::TaskContext;
+use crate::runtime::streaming::api::operator::MessageOperator;
+use async_trait::async_trait;
+use tracing_subscriber::Registry;
+use protocol::grpc::api::ExpressionWatermarkConfig;
+use crate::runtime::streaming::StreamOutput;
+use crate::sql::common::{from_nanos, to_millis, CheckpointBarrier, FsSchema, Watermark};
+
+/// 需持久化到 Checkpoint 的状态（与 worker `WatermarkGeneratorState` 语义一致）。
+#[derive(Debug, Copy, Clone, Encode, Decode, PartialEq, Eq)]
+pub struct WatermarkGeneratorState {
+    pub last_watermark_emitted_at: SystemTime,
+    pub max_watermark: SystemTime,
+}
+
+impl Default for WatermarkGeneratorState {
+    fn default() -> Self {
+        Self {
+            last_watermark_emitted_at: SystemTime::UNIX_EPOCH,
+            max_watermark: SystemTime::UNIX_EPOCH,
+        }
+    }
+}
+
+pub struct WatermarkGeneratorOperator {
+    interval: Duration,
+    idle_time: Option<Duration>,
+    expression: Arc<dyn PhysicalExpr>,
+    timestamp_index: usize,
+    state: WatermarkGeneratorState,
+    last_event_wall: SystemTime,
+    is_idle: bool,
+}
+
+impl WatermarkGeneratorOperator {
+    pub fn new(
+        interval: Duration,
+        idle_time: Option<Duration>,
+        expression: Arc<dyn PhysicalExpr>,
+        timestamp_index: usize,
+    ) -> Self {
+        Self {
+            interval,
+            idle_time,
+            expression,
+            timestamp_index,
+            state: WatermarkGeneratorState::default(),
+            last_event_wall: SystemTime::now(),
+            is_idle: false,
+        }
+    }
+
+    fn extract_max_timestamp(&self, batch: &RecordBatch) -> Option<SystemTime> {
+        let ts_column = batch.column(self.timestamp_index);
+        let arr = ts_column.as_primitive::<TimestampNanosecondType>();
+        let max_ts = aggregate::max(arr)?;
+        Some(from_nanos(max_ts as u128))
+    }
+
+    fn evaluate_watermark(&self, batch: &RecordBatch) -> Result<SystemTime> {
+        let watermark_array = self
+            .expression
+            .evaluate(batch)?
+            .into_array(batch.num_rows())?;
+
+        let typed_array = watermark_array
+            .as_any()
+            .downcast_ref::<TimestampNanosecondArray>()
+            .ok_or_else(|| anyhow!("watermark expression must return TimestampNanosecondArray"))?;
+
+        let min_watermark_nanos = aggregate::min(typed_array)
+            .ok_or_else(|| anyhow!("failed to extract min watermark from batch"))?;
+
+        Ok(from_nanos(min_watermark_nanos as u128))
+    }
+}
+
+#[async_trait]
+impl MessageOperator for WatermarkGeneratorOperator {
+    fn name(&self) -> &str {
+        "ExpressionWatermarkGenerator"
+    }
+
+    fn tick_interval(&self) -> Option<Duration> {
+        Some(Duration::from_secs(1))
+    }
+
+    async fn on_start(&mut self, ctx: &mut TaskContext) -> Result<()> {
+        self.last_event_wall = SystemTime::now();
+
+        let mut tm = ctx.table_manager_guard().await?;
+        let gs = tm
+            .get_global_keyed_state::<u32, WatermarkGeneratorState>("s")
+            .await
+            .map_err(|e| anyhow!("global keyed state s: {e}"))?;
+
+        if let Some(recovered) = gs.get(&ctx.subtask_idx) {
+            self.state = *recovered;
+        }
+
+        Ok(())
+    }
+
+    async fn process_data(
+        &mut self,
+        _input_idx: usize,
+        batch: RecordBatch,
+        ctx: &mut TaskContext,
+    ) -> Result<Vec<StreamOutput>> {
+        self.last_event_wall = SystemTime::now();
+
+        let mut outputs = vec![StreamOutput::Forward(batch.clone())];
+
+        let Some(max_batch_ts) = self.extract_max_timestamp(&batch) else {
+            return Ok(outputs);
+        };
+
+        let new_watermark = self.evaluate_watermark(&batch)?;
+        self.state.max_watermark = self.state.max_watermark.max(new_watermark);
+
+        let time_since_last_emit = max_batch_ts
+            .duration_since(self.state.last_watermark_emitted_at)
+            .unwrap_or(Duration::ZERO);
+
+        if self.is_idle || time_since_last_emit > self.interval {
+            debug!(
+                "[{}] emitting expression watermark {}",
+                ctx.subtask_idx,
+                to_millis(self.state.max_watermark)
+            );
+
+            outputs.push(StreamOutput::Watermark(Watermark::EventTime(
+                self.state.max_watermark,
+            )));
+
+            self.state.last_watermark_emitted_at = max_batch_ts;
+            self.is_idle = false;
+        }
+
+        Ok(outputs)
+    }
+
+    async fn process_watermark(
+        &mut self,
+        _watermark: Watermark,
+        _ctx: &mut TaskContext,
+    ) -> Result<Vec<StreamOutput>> {
+        Ok(vec![])
+    }
+
+    async fn process_tick(
+        &mut self,
+        _tick_index: u64,
+        ctx: &mut TaskContext,
+    ) -> Result<Vec<StreamOutput>> {
+        if let Some(idle_timeout) = self.idle_time {
+            let elapsed = self
+                .last_event_wall
+                .elapsed()
+                .unwrap_or(Duration::ZERO);
+            if !self.is_idle && elapsed > idle_timeout {
+                info!(
+                    "task [{}] entering Idle after {:?}",
+                    ctx.subtask_idx, idle_timeout
+                );
+                self.is_idle = true;
+                return Ok(vec![StreamOutput::Watermark(Watermark::Idle)]);
+            }
+        }
+        Ok(vec![])
+    }
+
+    async fn snapshot_state(&mut self, _barrier: CheckpointBarrier, ctx: &mut TaskContext) -> Result<()> {
+        let mut tm = ctx.table_manager_guard().await?;
+        tm.get_global_keyed_state::<u32, WatermarkGeneratorState>("s")
+            .await
+            .map_err(|e| anyhow!("global keyed state s: {e}"))?
+            .insert(ctx.subtask_idx, self.state)
+            .await;
+        Ok(())
+    }
+
+    async fn on_close(&mut self, _ctx: &mut TaskContext) -> Result<Vec<StreamOutput>> {
+        Ok(vec![StreamOutput::Watermark(Watermark::EventTime(from_nanos(
+            u64::MAX as u128,
+        )))])
+    }
+}
+
+pub struct WatermarkGeneratorConstructor;
+
+impl WatermarkGeneratorConstructor {
+    pub fn with_config(
+        &self,
+        config: ExpressionWatermarkConfig,
+        registry: Arc<Registry>,
+    ) -> anyhow::Result<WatermarkGeneratorOperator> {
+        let input_schema: FsSchema = config
+            .input_schema
+            .ok_or_else(|| anyhow!("missing input schema"))?
+            .try_into()
+            .map_err(|e| anyhow!("input schema: {e}"))?;
+        let timestamp_index = input_schema.timestamp_index;
+
+        let expression_node =
+            PhysicalExprNode::decode(&mut config.expression.as_slice()).map_err(|e| {
+                anyhow!("decode expression: {e}")
+            })?;
+        let expression = parse_physical_expr(
+            &expression_node,
+            registry.as_ref(),
+            &input_schema.schema,
+            &DefaultPhysicalExtensionCodec {},
+        )
+        .map_err(|e| anyhow!("parse physical expr: {e}"))?;
+
+        let interval = Duration::from_micros(config.period_micros);
+        let idle_time = config.idle_time_micros.map(Duration::from_micros);
+
+        Ok(WatermarkGeneratorOperator::new(
+            interval,
+            idle_time,
+            expression,
+            timestamp_index,
+        ))
+    }
+}
diff --git a/src/runtime/streaming/operators/windows/mod.rs b/src/runtime/streaming/operators/windows/mod.rs
new file mode 100644
index 00000000..ba594016
--- /dev/null
+++ b/src/runtime/streaming/operators/windows/mod.rs
@@ -0,0 +1,9 @@
+pub mod session_aggregating_window;
+pub mod sliding_aggregating_window;
+pub mod tumbling_aggregating_window;
+pub mod window_function;
+
+pub use session_aggregating_window::{SessionAggregatingWindowConstructor, SessionWindowOperator};
+pub use sliding_aggregating_window::{SlidingAggregatingWindowConstructor, SlidingWindowOperator};
+pub use tumbling_aggregating_window::{TumblingAggregateWindowConstructor, TumblingWindowOperator};
+pub use window_function::{WindowFunctionConstructor, WindowFunctionOperator};
diff --git a/src/runtime/streaming/operators/windows/session_aggregating_window.rs b/src/runtime/streaming/operators/windows/session_aggregating_window.rs
new file mode 100644
index 00000000..ebe75c4c
--- /dev/null
+++ b/src/runtime/streaming/operators/windows/session_aggregating_window.rs
@@ -0,0 +1,804 @@
+//! 会话窗口聚合：与 worker `arrow/session_aggregating_window` 对齐，实现 [`MessageOperator`]。
+
+use anyhow::{anyhow, bail, Context, Result};
+use arrow::compute::{
+    concat_batches, filter_record_batch, kernels::cmp::gt_eq, lexsort_to_indices, max, partition, take,
+};
+use arrow::row::{RowConverter, SortField};
+use arrow_array::types::TimestampNanosecondType;
+use arrow_array::{
+    Array, BooleanArray, PrimitiveArray, RecordBatch, StructArray, TimestampNanosecondArray,
+};
+use arrow_schema::{DataType, Field, FieldRef, Schema};
+use datafusion::execution::context::SessionContext;
+use datafusion::execution::runtime_env::RuntimeEnvBuilder;
+use datafusion::execution::SendableRecordBatchStream;
+use datafusion::physical_plan::ExecutionPlan;
+use datafusion_proto::physical_plan::AsExecutionPlan;
+use datafusion_proto::protobuf::PhysicalPlanNode;
+use futures::StreamExt;
+use prost::Message;
+use std::collections::{BTreeMap, HashMap, HashSet};
+use std::sync::{Arc, RwLock};
+use std::time::{Duration, SystemTime};
+use tokio::sync::mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender};
+use tracing::warn;
+
+use crate::runtime::streaming::api::context::TaskContext;
+use crate::runtime::streaming::api::operator::MessageOperator;
+use async_trait::async_trait;
+use tracing_subscriber::Registry;
+use protocol::grpc::api::SessionWindowAggregateOperator;
+use crate::runtime::streaming::StreamOutput;
+use crate::sql::common::{from_nanos, to_nanos, CheckpointBarrier, FsSchema, FsSchemaRef, Watermark};
+use crate::sql::common::converter::Converter;
+use crate::sql::logical_planner::{DecodingContext, FsPhysicalExtensionCodec};
+use crate::sql::schema::utils::window_arrow_struct;
+// ============================================================================
+// 领域模型
+// ============================================================================
+
+struct SessionWindowConfig {
+    gap: Duration,
+    input_schema_ref: FsSchemaRef,
+    window_field: FieldRef,
+    window_index: usize,
+    final_physical_exec: Arc<dyn ExecutionPlan>,
+    receiver_hook: Arc<RwLock<Option<UnboundedReceiver<RecordBatch>>>>,
+    output_schema: Arc<Schema>,
+}
+
+struct ActiveSession {
+    data_start: SystemTime,
+    data_end: SystemTime,
+    sender: Option<UnboundedSender<RecordBatch>>,
+    result_stream: SendableRecordBatchStream,
+}
+
+impl ActiveSession {
+    async fn new(
+        aggregation_plan: Arc<dyn ExecutionPlan>,
+        initial_timestamp: SystemTime,
+        sender: UnboundedSender<RecordBatch>,
+    ) -> Result<Self> {
+        aggregation_plan.reset()?;
+        let result_exec = aggregation_plan.execute(0, SessionContext::new().task_ctx())?;
+        Ok(Self {
+            data_start: initial_timestamp,
+            data_end: initial_timestamp,
+            sender: Some(sender),
+            result_stream: result_exec,
+        })
+    }
+
+    fn ingest_batch(
+        &mut self,
+        batch: RecordBatch,
+        gap: Duration,
+        ts_idx: usize,
+    ) -> Result<Option<(SystemTime, RecordBatch)>> {
+        let ts_col = batch
+            .column(ts_idx)
+            .as_any()
+            .downcast_ref::<TimestampNanosecondArray>()
+            .ok_or_else(|| anyhow!("expected timestamp column"))?;
+        let start_ts = ts_col.value(0);
+        let end_ts = ts_col.value(batch.num_rows() - 1);
+
+        let current_end_with_gap = to_nanos(self.data_end + gap) as i64;
+
+        if end_ts < current_end_with_gap {
+            self.data_end = self.data_end.max(from_nanos(end_ts as u128));
+            self.data_start = self.data_start.min(from_nanos(start_ts as u128));
+            self.sender
+                .as_ref()
+                .ok_or_else(|| anyhow!("session sender already closed"))?
+                .send(batch)
+                .map_err(|e| anyhow!("session channel send: {e}"))?;
+            return Ok(None);
+        }
+
+        if current_end_with_gap < start_ts {
+            return Ok(Some((from_nanos(start_ts as u128), batch)));
+        }
+
+        self.data_start = self.data_start.min(from_nanos(start_ts as u128));
+
+        let mut split_idx = 1;
+        while split_idx < batch.num_rows() {
+            let val = ts_col.value(split_idx);
+            if val < to_nanos(self.data_end) as i64 {
+                split_idx += 1;
+                continue;
+            }
+            if val < to_nanos(self.data_end + gap) as i64 {
+                self.data_end = from_nanos(val as u128);
+                split_idx += 1;
+                continue;
+            }
+            break;
+        }
+
+        if split_idx == batch.num_rows() {
+            self.sender
+                .as_ref()
+                .ok_or_else(|| anyhow!("session sender already closed"))?
+                .send(batch)
+                .map_err(|e| anyhow!("session channel send: {e}"))?;
+            return Ok(None);
+        }
+
+        self.sender
+            .as_ref()
+            .ok_or_else(|| anyhow!("session sender already closed"))?
+            .send(batch.slice(0, split_idx))
+            .map_err(|e| anyhow!("session channel send: {e}"))?;
+        let remaining_batch = batch.slice(split_idx, batch.num_rows() - split_idx);
+        let new_start_time = from_nanos(ts_col.value(split_idx) as u128);
+        Ok(Some((new_start_time, remaining_batch)))
+    }
+
+    async fn close_and_drain(mut self, gap: Duration) -> Result<SessionWindowResult> {
+        self.sender.take();
+
+        let mut result_batches = Vec::new();
+        while let Some(batch) = self.result_stream.next().await {
+            result_batches.push(batch?);
+        }
+
+        if result_batches.len() != 1 || result_batches[0].num_rows() != 1 {
+            bail!("active session must yield exactly one aggregate row");
+        }
+
+        Ok(SessionWindowResult {
+            window_start: self.data_start,
+            window_end: self.data_end + gap,
+            batch: result_batches.into_iter().next().unwrap(),
+        })
+    }
+}
+
+struct SessionWindowResult {
+    window_start: SystemTime,
+    window_end: SystemTime,
+    batch: RecordBatch,
+}
+
+struct KeySessionState {
+    config: Arc<SessionWindowConfig>,
+    active_session: Option<ActiveSession>,
+    buffered_batches: BTreeMap<SystemTime, Vec<RecordBatch>>,
+}
+
+impl KeySessionState {
+    fn new(config: Arc<SessionWindowConfig>) -> Self {
+        Self {
+            config,
+            active_session: None,
+            buffered_batches: BTreeMap::new(),
+        }
+    }
+
+    fn is_empty(&self) -> bool {
+        self.active_session.is_none() && self.buffered_batches.is_empty()
+    }
+
+    fn earliest_data_time(&self) -> Option<SystemTime> {
+        self.active_session
+            .as_ref()
+            .map(|s| s.data_start)
+            .or_else(|| self.buffered_batches.keys().next().copied())
+    }
+
+    fn next_watermark_action_time(&self) -> Option<SystemTime> {
+        self.active_session
+            .as_ref()
+            .map(|s| s.data_end + self.config.gap)
+            .or_else(|| {
+                self.buffered_batches
+                    .keys()
+                    .next()
+                    .map(|t| *t - self.config.gap)
+            })
+    }
+
+    async fn advance_by_watermark(&mut self, watermark: SystemTime) -> Result<Vec<SessionWindowResult>> {
+        let mut results = vec![];
+
+        loop {
+            if let Some(session) = &mut self.active_session {
+                if session.data_end + self.config.gap < watermark {
+                    let closed_session = self
+                        .active_session
+                        .take()
+                        .unwrap()
+                        .close_and_drain(self.config.gap)
+                        .await?;
+                    results.push(closed_session);
+                } else {
+                    break;
+                }
+            } else {
+                let Some((initial_ts, _)) = self.buffered_batches.first_key_value() else {
+                    break;
+                };
+                if watermark + self.config.gap < *initial_ts {
+                    break;
+                }
+
+                let (tx, rx) = unbounded_channel();
+                *self.config.receiver_hook.write().unwrap() = Some(rx);
+
+                self.active_session = Some(
+                    ActiveSession::new(
+                        self.config.final_physical_exec.clone(),
+                        *initial_ts,
+                        tx,
+                    )
+                    .await?,
+                );
+
+                self.drain_buffer_to_active_session()?;
+            }
+        }
+        Ok(results)
+    }
+
+    fn drain_buffer_to_active_session(&mut self) -> Result<()> {
+        let session = self
+            .active_session
+            .as_mut()
+            .ok_or_else(|| anyhow!("drain_buffer_to_active_session without active session"))?;
+
+        while let Some((first_key, _)) = self.buffered_batches.first_key_value() {
+            if session.data_end + self.config.gap < *first_key {
+                break;
+            }
+
+            let (_, batches) = self.buffered_batches.pop_first().unwrap();
+            for batch in batches {
+                if let Some((rem_start, rem_batch)) = session.ingest_batch(
+                    batch,
+                    self.config.gap,
+                    self.config.input_schema_ref.timestamp_index,
+                )? {
+                    self.buffered_batches
+                        .entry(rem_start)
+                        .or_default()
+                        .push(rem_batch);
+                }
+            }
+        }
+        Ok(())
+    }
+
+    async fn add_data(
+        &mut self,
+        start_time: SystemTime,
+        batch: RecordBatch,
+        watermark: Option<SystemTime>,
+    ) -> Result<()> {
+        self.buffered_batches
+            .entry(start_time)
+            .or_default()
+            .push(batch);
+
+        if self.active_session.is_some() {
+            self.drain_buffer_to_active_session()?;
+        }
+
+        if let Some(wm) = watermark {
+            let flushed = self.advance_by_watermark(wm).await?;
+            if !flushed.is_empty() {
+                bail!("unexpected flush during data ingestion; session watermark invariant violated");
+            }
+        }
+        Ok(())
+    }
+}
+
+fn start_time_for_sorted_batch(batch: &RecordBatch, schema: &FsSchema) -> SystemTime {
+    let timestamp_array = batch.column(schema.timestamp_index);
+    let timestamp_array = timestamp_array
+        .as_any()
+        .downcast_ref::<PrimitiveArray<TimestampNanosecondType>>()
+        .expect("timestamp column");
+    from_nanos(timestamp_array.value(0) as u128)
+}
+
+fn build_session_output_schema(
+    input: &FsSchema,
+    window_field: FieldRef,
+    window_index: usize,
+    agg_schema: &Schema,
+) -> Result<Arc<Schema>> {
+    let key_count = input.routing_keys().map(|k| k.len()).unwrap_or(0);
+    let mut fields: Vec<FieldRef> = (0..key_count)
+        .map(|i| input.schema.fields()[i].clone())
+        .collect();
+    fields.insert(window_index, window_field);
+    fields.extend(agg_schema.fields().iter().cloned());
+    fields.push(input.schema.fields()[input.timestamp_index].clone());
+    Ok(Arc::new(Schema::new(fields)))
+}
+
+// ============================================================================
+// 算子
+// ============================================================================
+
+pub struct SessionWindowOperator {
+    config: Arc<SessionWindowConfig>,
+    row_converter: Converter,
+
+    session_states: HashMap<Vec<u8>, KeySessionState>,
+    pq_watermark_actions: BTreeMap<SystemTime, HashSet<Vec<u8>>>,
+    pq_start_times: BTreeMap<SystemTime, HashSet<Vec<u8>>>,
+}
+
+impl SessionWindowOperator {
+    fn filter_batch_by_time(&self, batch: RecordBatch, watermark: Option<SystemTime>) -> Result<RecordBatch> {
+        let Some(watermark) = watermark else {
+            return Ok(batch);
+        };
+
+        let timestamp_column = batch
+            .column(self.config.input_schema_ref.timestamp_index)
+            .as_any()
+            .downcast_ref::<TimestampNanosecondArray>()
+            .ok_or_else(|| anyhow!("expected timestamp column"))?;
+
+        let watermark_scalar = TimestampNanosecondArray::new_scalar(to_nanos(watermark) as i64);
+        let on_time = gt_eq(timestamp_column, &watermark_scalar)?;
+
+        Ok(filter_record_batch(&batch, &on_time)?)
+    }
+
+    fn sort_batch(&self, batch: &RecordBatch) -> Result<RecordBatch> {
+        let sort_columns = self.config.input_schema_ref.sort_columns(batch, true);
+        let sort_indices = lexsort_to_indices(&sort_columns, None)?;
+
+        let columns = batch
+            .columns()
+            .iter()
+            .map(|c| take(c, &sort_indices, None).unwrap())
+            .collect();
+
+        Ok(RecordBatch::try_new(batch.schema(), columns)?)
+    }
+
+    async fn ingest_sorted_batch(
+        &mut self,
+        sorted_batch: RecordBatch,
+        watermark: Option<SystemTime>,
+    ) -> Result<()> {
+        let partition_ranges = if !self.config.input_schema_ref.has_routing_keys() {
+            vec![0..sorted_batch.num_rows()]
+        } else {
+            let key_len = self
+                .config
+                .input_schema_ref
+                .routing_keys()
+                .as_ref()
+                .unwrap()
+                .len();
+            let key_cols = sorted_batch
+                .columns()
+                .iter()
+                .take(key_len)
+                .cloned()
+                .collect::<Vec<_>>();
+            partition(key_cols.as_slice())?.ranges()
+        };
+
+        let key_count = self
+            .config
+            .input_schema_ref
+            .routing_keys()
+            .map(|k| k.len())
+            .unwrap_or(0);
+
+        for range in partition_ranges {
+            let key_batch = sorted_batch.slice(range.start, range.end - range.start);
+
+            let row_key = if key_count == 0 {
+                Vec::new()
+            } else {
+                self.row_converter
+                    .convert_columns(&key_batch.slice(0, 1).columns()[0..key_count])
+                    .context("row key convert")?
+                    .as_ref()
+                    .to_vec()
+            };
+
+            let state = self
+                .session_states
+                .entry(row_key.clone())
+                .or_insert_with(|| KeySessionState::new(self.config.clone()));
+
+            let initial_action = state.next_watermark_action_time();
+            let initial_start = state.earliest_data_time();
+
+            let batch_start = start_time_for_sorted_batch(&key_batch, &self.config.input_schema_ref);
+
+            state
+                .add_data(batch_start, key_batch, watermark)
+                .await?;
+
+            let new_action = state
+                .next_watermark_action_time()
+                .ok_or_else(|| anyhow!("missing next watermark action after add_data"))?;
+            let new_start = state
+                .earliest_data_time()
+                .ok_or_else(|| anyhow!("missing earliest data after add_data"))?;
+
+            match initial_action {
+                Some(ia) => {
+                    if ia != new_action {
+                        self.pq_watermark_actions
+                            .get_mut(&ia)
+                            .expect("pq watermark entry")
+                            .remove(&row_key);
+                        self.pq_watermark_actions
+                            .entry(new_action)
+                            .or_default()
+                            .insert(row_key.clone());
+                    }
+                    let is = initial_start.expect("initial start");
+                    if is != new_start {
+                        self.pq_start_times
+                            .get_mut(&is)
+                            .expect("pq start entry")
+                            .remove(&row_key);
+                        self.pq_start_times
+                            .entry(new_start)
+                            .or_default()
+                            .insert(row_key.clone());
+                    }
+                }
+                None => {
+                    self.pq_watermark_actions
+                        .entry(new_action)
+                        .or_default()
+                        .insert(row_key.clone());
+                    self.pq_start_times
+                        .entry(new_start)
+                        .or_default()
+                        .insert(row_key);
+                }
+            }
+        }
+        Ok(())
+    }
+
+    async fn evaluate_watermark(&mut self, watermark: SystemTime) -> Result<Vec<RecordBatch>> {
+        let mut emit_results: Vec<(Vec<u8>, Vec<SessionWindowResult>)> = Vec::new();
+
+        loop {
+            let popped_action_time = match self.pq_watermark_actions.first_key_value() {
+                Some((t, _)) if *t < watermark => *t,
+                _ => break,
+            };
+            let keys = self
+                .pq_watermark_actions
+                .remove(&popped_action_time)
+                .expect("pop watermark pq");
+
+            for key in keys {
+                let state = self
+                    .session_states
+                    .get_mut(&key)
+                    .ok_or_else(|| anyhow!("missing session state for key"))?;
+                let initial_start = state
+                    .earliest_data_time()
+                    .ok_or_else(|| anyhow!("missing earliest data in evaluate_watermark"))?;
+
+                let completed_sessions = state.advance_by_watermark(watermark).await?;
+                if !completed_sessions.is_empty() {
+                    emit_results.push((key.clone(), completed_sessions));
+                }
+
+                self.pq_start_times
+                    .get_mut(&initial_start)
+                    .expect("pq start")
+                    .remove(&key);
+
+                if state.is_empty() {
+                    self.session_states.remove(&key);
+                } else {
+                    let new_start = state
+                        .earliest_data_time()
+                        .expect("earliest after advance");
+                    self.pq_start_times
+                        .entry(new_start)
+                        .or_default()
+                        .insert(key.clone());
+
+                    let new_next_action = state
+                        .next_watermark_action_time()
+                        .expect("next action after advance");
+                    if new_next_action == popped_action_time {
+                        bail!(
+                            "processed watermark at {:?} but next watermark action stayed at {:?}",
+                            watermark, popped_action_time
+                        );
+                    }
+                    self.pq_watermark_actions
+                        .entry(new_next_action)
+                        .or_default()
+                        .insert(key);
+                }
+            }
+        }
+
+        if emit_results.is_empty() {
+            return Ok(vec![]);
+        }
+
+        Ok(vec![self.format_to_arrow(emit_results)?])
+    }
+
+    fn format_to_arrow(&self, results: Vec<(Vec<u8>, Vec<SessionWindowResult>)>) -> Result<RecordBatch> {
+        let (rows, session_results): (Vec<_>, Vec<_>) = results
+            .into_iter()
+            .flat_map(|(row, s_results)| s_results.into_iter().map(move |res| (row.clone(), res)))
+            .unzip();
+
+        let key_columns = if let Some(parser) = self.row_converter.parser() {
+            self.row_converter.convert_rows(
+                rows.iter()
+                    .map(|row| parser.parse(row.as_ref()))
+                    .collect(),
+            )?
+        } else {
+            vec![]
+        };
+
+        let start_times: Vec<i64> = session_results
+            .iter()
+            .map(|r| to_nanos(r.window_start) as i64)
+            .collect();
+        let end_times: Vec<i64> = session_results
+            .iter()
+            .map(|r| to_nanos(r.window_end) as i64)
+            .collect();
+
+        let window_start_array = PrimitiveArray::<TimestampNanosecondType>::from(start_times);
+        let window_end_array = PrimitiveArray::<TimestampNanosecondType>::from(end_times.clone());
+        let timestamp_array = PrimitiveArray::<TimestampNanosecondType>::from(
+            end_times.into_iter().map(|t| t - 1).collect::<Vec<_>>(),
+        );
+
+        let result_batches: Vec<&RecordBatch> = session_results.iter().map(|res| &res.batch).collect();
+        let merged_batch = concat_batches(&session_results[0].batch.schema(), result_batches)?;
+
+        let DataType::Struct(window_fields) = self.config.window_field.data_type() else {
+            bail!("expected window field to be a struct");
+        };
+
+        let window_struct_array = StructArray::try_new(
+            window_fields.clone(),
+            vec![Arc::new(window_start_array), Arc::new(window_end_array)],
+            None,
+        )?;
+
+        let mut columns = key_columns;
+        columns.insert(self.config.window_index, Arc::new(window_struct_array));
+        columns.extend_from_slice(merged_batch.columns());
+        columns.push(Arc::new(timestamp_array));
+
+        RecordBatch::try_new(self.config.output_schema.clone(), columns)
+            .context("failed to create session window output batch")
+    }
+
+    fn earliest_batch_time(&self) -> Option<SystemTime> {
+        self.pq_start_times
+            .first_key_value()
+            .map(|(start_time, _keys)| *start_time)
+    }
+}
+
+#[async_trait]
+impl MessageOperator for SessionWindowOperator {
+    fn name(&self) -> &str {
+        "SessionWindow"
+    }
+
+    async fn on_start(&mut self, ctx: &mut TaskContext) -> Result<()> {
+        let mut tm = ctx.table_manager_guard().await?;
+        let start_time_opt = tm
+            .get_global_keyed_state::<u32, Option<SystemTime>>("e")
+            .await
+            .map_err(|e| anyhow!("global keyed state e: {e}"))?
+            .get_all()
+            .values()
+            .filter_map(|e| *e)
+            .min();
+
+        let Some(start_time) = start_time_opt else {
+            return Ok(());
+        };
+
+        let state_table = tm
+            .get_expiring_time_key_table("s", Some(start_time))
+            .await
+            .map_err(|e| anyhow!("expiring time key table s: {e}"))?;
+        for (_, batches) in state_table.all_batches_for_watermark(Some(start_time)) {
+            for batch in batches {
+                let filtered = self.filter_batch_by_time(batch.clone(), Some(start_time))?;
+                if filtered.num_rows() > 0 {
+                    let sorted = self.sort_batch(&filtered)?;
+                    self.ingest_sorted_batch(sorted, Some(start_time)).await?;
+                }
+            }
+        }
+
+        if let Some(ts) = ctx.last_present_watermark() {
+            let evicted = self.evaluate_watermark(ts).await?;
+            if !evicted.is_empty() {
+                warn!(
+                    "evicted {} session result batch(es) when restoring from state",
+                    evicted.len()
+                );
+            }
+        }
+        Ok(())
+    }
+
+    async fn process_data(
+        &mut self,
+        _input_idx: usize,
+        batch: RecordBatch,
+        ctx: &mut TaskContext,
+    ) -> Result<Vec<StreamOutput>> {
+        let watermark_time = ctx.last_present_watermark();
+
+        let filtered_batch = self.filter_batch_by_time(batch, watermark_time)?;
+        if filtered_batch.num_rows() == 0 {
+            return Ok(vec![]);
+        }
+
+        let sorted_batch = self.sort_batch(&filtered_batch)?;
+
+        let max_timestamp = max(
+            sorted_batch
+                .column(self.config.input_schema_ref.timestamp_index)
+                .as_any()
+                .downcast_ref::<TimestampNanosecondArray>()
+                .ok_or_else(|| anyhow!("expected timestamp column"))?,
+        )
+        .ok_or_else(|| anyhow!("expected max timestamp"))?;
+
+        let mut tm = ctx.table_manager_guard().await?;
+        let table = tm
+            .get_expiring_time_key_table("s", ctx.last_present_watermark())
+            .await
+            .map_err(|e| anyhow!("expiring time key table s: {e}"))?;
+        table.insert(from_nanos(max_timestamp as u128), sorted_batch.clone());
+        drop(tm);
+
+        self.ingest_sorted_batch(sorted_batch, watermark_time).await?;
+
+        Ok(vec![])
+    }
+
+    async fn process_watermark(
+        &mut self,
+        watermark: Watermark,
+        _ctx: &mut TaskContext,
+    ) -> Result<Vec<StreamOutput>> {
+        let Watermark::EventTime(current_time) = watermark else {
+            return Ok(vec![]);
+        };
+
+        let output_batches = self.evaluate_watermark(current_time).await?;
+        Ok(output_batches
+            .into_iter()
+            .map(StreamOutput::Forward)
+            .collect())
+    }
+
+    async fn snapshot_state(&mut self, _barrier: CheckpointBarrier, ctx: &mut TaskContext) -> Result<()> {
+        let watermark = ctx.last_present_watermark();
+        let mut tm = ctx.table_manager_guard().await?;
+
+        tm.get_expiring_time_key_table("s", watermark)
+            .await
+            .map_err(|e| anyhow!("expiring time key table s: {e}"))?
+            .flush(watermark)
+            .await?;
+
+        tm.get_global_keyed_state::<u32, Option<SystemTime>>("e")
+            .await
+            .map_err(|e| anyhow!("global keyed state e: {e}"))?
+            .insert(ctx.subtask_idx, self.earliest_batch_time())
+            .await;
+
+        Ok(())
+    }
+
+    async fn on_close(&mut self, _ctx: &mut TaskContext) -> Result<Vec<StreamOutput>> {
+        Ok(vec![])
+    }
+}
+
+// ============================================================================
+// 构造器（返回 [`SessionWindowOperator`]，供 Actor 子任务直接 `Box::new`）
+// ============================================================================
+
+pub struct SessionAggregatingWindowConstructor;
+
+impl SessionAggregatingWindowConstructor {
+    pub fn with_config(
+        &self,
+        config: SessionWindowAggregateOperator,
+        registry: Arc<Registry>,
+    ) -> anyhow::Result<SessionWindowOperator> {
+        let window_field = Arc::new(Field::new(
+            config.window_field_name,
+            window_arrow_struct(),
+            true,
+        ));
+
+        let receiver_hook = Arc::new(RwLock::new(None));
+
+        let codec = FsPhysicalExtensionCodec {
+            context: DecodingContext::UnboundedBatchStream(receiver_hook.clone()),
+        };
+
+        let final_plan = PhysicalPlanNode::decode(&mut config.final_aggregation_plan.as_slice())?;
+        let final_execution_plan = final_plan.try_into_physical_plan(
+            registry.as_ref(),
+            &RuntimeEnvBuilder::new().build()?,
+            &codec,
+        )?;
+
+        let input_schema: FsSchema = config
+            .input_schema
+            .ok_or_else(|| anyhow!("missing input schema"))?
+            .try_into()?;
+
+        let row_converter = if input_schema.routing_keys().is_none() {
+            let array = Arc::new(BooleanArray::from(vec![false]));
+            Converter::Empty(
+                RowConverter::new(vec![SortField::new(DataType::Boolean)])?,
+                array,
+            )
+        } else {
+            let key_count = input_schema.routing_keys().as_ref().unwrap().len();
+            Converter::RowConverter(RowConverter::new(
+                input_schema
+                    .schema
+                    .fields()
+                    .into_iter()
+                    .take(key_count)
+                    .map(|field| SortField::new(field.data_type().clone()))
+                    .collect(),
+            )?)
+        };
+
+        let output_schema = build_session_output_schema(
+            &input_schema,
+            window_field.clone(),
+            config.window_index as usize,
+            final_execution_plan.schema().as_ref(),
+        )?;
+
+        let session_config = Arc::new(SessionWindowConfig {
+            gap: Duration::from_micros(config.gap_micros),
+            window_field,
+            window_index: config.window_index as usize,
+            input_schema_ref: Arc::new(input_schema),
+            final_physical_exec: final_execution_plan,
+            receiver_hook,
+            output_schema,
+        });
+
+        Ok(SessionWindowOperator {
+            config: session_config,
+            session_states: HashMap::new(),
+            pq_start_times: BTreeMap::new(),
+            pq_watermark_actions: BTreeMap::new(),
+            row_converter,
+        })
+    }
+}
diff --git a/src/runtime/streaming/operators/windows/sliding_aggregating_window.rs b/src/runtime/streaming/operators/windows/sliding_aggregating_window.rs
new file mode 100644
index 00000000..29bad05a
--- /dev/null
+++ b/src/runtime/streaming/operators/windows/sliding_aggregating_window.rs
@@ -0,0 +1,578 @@
+//! 滑动窗口聚合：与 worker `arrow/sliding_aggregating_window` 对齐，实现 [`MessageOperator`]。
+
+use anyhow::{anyhow, bail, Result};
+use arrow::compute::{partition, sort_to_indices, take};
+use arrow_array::{Array, PrimitiveArray, RecordBatch, types::TimestampNanosecondType};
+use arrow_schema::SchemaRef;
+use datafusion::common::ScalarValue;
+use datafusion::execution::context::SessionContext;
+use datafusion::execution::runtime_env::RuntimeEnvBuilder;
+use datafusion::execution::SendableRecordBatchStream;
+use datafusion::physical_expr::PhysicalExpr;
+use datafusion::physical_plan::ExecutionPlan;
+use datafusion_proto::physical_plan::DefaultPhysicalExtensionCodec;
+use datafusion_proto::{
+    physical_plan::{from_proto::parse_physical_expr, AsExecutionPlan},
+    protobuf::{PhysicalExprNode, PhysicalPlanNode},
+};
+use futures::StreamExt;
+use prost::Message;
+use std::collections::{BTreeMap, VecDeque};
+use std::sync::{Arc, RwLock};
+use std::time::{Duration, SystemTime};
+use tokio::sync::mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender};
+
+use crate::runtime::streaming::api::context::TaskContext;
+use crate::runtime::streaming::api::operator::MessageOperator;
+use async_trait::async_trait;
+use tracing_subscriber::Registry;
+use protocol::grpc::api::SlidingWindowAggregateOperator;
+use crate::runtime::streaming::StreamOutput;
+use crate::sql::common::{from_nanos, to_nanos, CheckpointBarrier, FsSchema, Watermark};
+use crate::sql::logical_planner::{DecodingContext, FsPhysicalExtensionCodec};
+// ============================================================================
+// Tiered panes
+// ============================================================================
+
+#[derive(Default, Debug)]
+struct RecordBatchPane {
+    batches: Vec<RecordBatch>,
+}
+
+#[derive(Debug)]
+struct RecordBatchTier {
+    width: Duration,
+    start_time: Option<SystemTime>,
+    panes: VecDeque<RecordBatchPane>,
+}
+
+impl RecordBatchTier {
+    fn new(width: Duration) -> Self {
+        Self {
+            width,
+            start_time: None,
+            panes: VecDeque::new(),
+        }
+    }
+
+    fn bin_start(&self, timestamp: SystemTime) -> SystemTime {
+        if self.width == Duration::ZERO {
+            return timestamp;
+        }
+        let nanos = to_nanos(timestamp) - (to_nanos(timestamp) % self.width.as_nanos());
+        from_nanos(nanos)
+    }
+
+    fn insert(&mut self, batch: RecordBatch, timestamp: SystemTime) -> Result<()> {
+        let bin_start = self.bin_start(timestamp);
+        if self.start_time.is_none() {
+            self.start_time = Some(bin_start);
+            self.panes.push_back(RecordBatchPane {
+                batches: vec![batch],
+            });
+            return Ok(());
+        }
+
+        let start_time = self.start_time.unwrap();
+        let bin_index =
+            (bin_start.duration_since(start_time)?.as_nanos() / self.width.as_nanos()) as usize;
+        while self.panes.len() <= bin_index {
+            self.panes.push_back(RecordBatchPane::default());
+        }
+        self.panes[bin_index].batches.push(batch);
+        Ok(())
+    }
+
+    fn batches_for_timestamp(&self, bin_start: SystemTime) -> Result<Vec<RecordBatch>> {
+        if self
+            .start_time
+            .map(|st| st > bin_start)
+            .unwrap_or(true)
+        {
+            return Ok(vec![]);
+        }
+        let bin_index = (bin_start
+            .duration_since(self.start_time.unwrap())?
+            .as_nanos()
+            / self.width.as_nanos()) as usize;
+        if self.panes.len() <= bin_index {
+            return Ok(vec![]);
+        }
+        Ok(self.panes[bin_index].batches.clone())
+    }
+
+    fn delete_before(&mut self, cutoff: SystemTime) -> Result<()> {
+        let bin_start = self.bin_start(cutoff);
+        if self
+            .start_time
+            .map(|st| st >= bin_start)
+            .unwrap_or(true)
+        {
+            return Ok(());
+        }
+        let bin_index = (bin_start
+            .duration_since(self.start_time.unwrap())
+            .unwrap()
+            .as_nanos()
+            / self.width.as_nanos()) as usize;
+
+        if bin_index >= self.panes.len() {
+            self.panes.clear();
+        } else {
+            self.panes.drain(0..bin_index);
+        }
+        self.start_time = Some(bin_start);
+        Ok(())
+    }
+}
+
+#[derive(Debug)]
+struct TieredRecordBatchHolder {
+    tier_widths: Vec<Duration>,
+    tiers: Vec<RecordBatchTier>,
+}
+
+impl TieredRecordBatchHolder {
+    fn new(tier_widths: Vec<Duration>) -> Result<Self> {
+        for i in 0..tier_widths.len().saturating_sub(1) {
+            if !tier_widths[i + 1].as_nanos().is_multiple_of(tier_widths[i].as_nanos()) {
+                bail!(
+                    "tier width {} does not evenly divide next {}",
+                    tier_widths[i].as_nanos(),
+                    tier_widths[i + 1].as_nanos()
+                );
+            }
+        }
+        let tiers = tier_widths
+            .iter()
+            .map(|w| RecordBatchTier::new(*w))
+            .collect();
+        Ok(Self { tier_widths, tiers })
+    }
+
+    fn insert(&mut self, batch: RecordBatch, timestamp: SystemTime) -> Result<()> {
+        for tier in self.tiers.iter_mut() {
+            tier.insert(batch.clone(), timestamp)?;
+        }
+        Ok(())
+    }
+
+    fn batches_for_interval(
+        &self,
+        interval_start: SystemTime,
+        interval_end: SystemTime,
+    ) -> Result<Vec<RecordBatch>> {
+        let mut batches = Vec::new();
+        let mut current_tier = 0usize;
+        let mut current_start = interval_start;
+
+        while current_start < interval_end {
+            let tier_end = current_start + self.tier_widths[current_tier];
+            if tier_end > interval_end {
+                current_tier = current_tier.saturating_sub(1);
+                continue;
+            }
+            if current_tier < self.tier_widths.len() - 1 {
+                let next_tier = &self.tiers[current_tier + 1];
+                if next_tier.bin_start(current_start) == current_start
+                    && current_start + next_tier.width <= interval_end
+                {
+                    current_tier += 1;
+                    continue;
+                }
+            }
+            batches.extend(self.tiers[current_tier].batches_for_timestamp(current_start)?);
+            current_start += self.tier_widths[current_tier];
+        }
+        if current_start != interval_end {
+            bail!(
+                "interval end {:?} does not match current start {:?}",
+                interval_end, current_start
+            );
+        }
+        Ok(batches)
+    }
+
+    fn delete_before(&mut self, cutoff: SystemTime) -> Result<()> {
+        for tier in self.tiers.iter_mut() {
+            tier.delete_before(cutoff)?;
+        }
+        Ok(())
+    }
+}
+
+// ============================================================================
+// Per-bin partial aggregation
+// ============================================================================
+
+struct ActiveBin {
+    sender: Option<UnboundedSender<RecordBatch>>,
+    result_stream: Option<SendableRecordBatchStream>,
+    finished_batches: Vec<RecordBatch>,
+}
+
+impl Default for ActiveBin {
+    fn default() -> Self {
+        Self {
+            sender: None,
+            result_stream: None,
+            finished_batches: Vec::new(),
+        }
+    }
+}
+
+impl ActiveBin {
+    fn start_partial(
+        plan: Arc<dyn ExecutionPlan>,
+        hook: &Arc<RwLock<Option<UnboundedReceiver<RecordBatch>>>>,
+    ) -> Result<Self> {
+        let (tx, rx) = unbounded_channel();
+        *hook.write().unwrap() = Some(rx);
+        plan.reset()?;
+        let result_stream = plan.execute(0, SessionContext::new().task_ctx())?;
+        Ok(Self {
+            sender: Some(tx),
+            result_stream: Some(result_stream),
+            finished_batches: Vec::new(),
+        })
+    }
+
+    async fn close_and_drain(&mut self) -> Result<()> {
+        self.sender.take();
+        if let Some(mut stream) = self.result_stream.take() {
+            while let Some(batch) = stream.next().await {
+                self.finished_batches.push(batch?);
+            }
+        }
+        Ok(())
+    }
+}
+
+// ============================================================================
+// Operator
+// ============================================================================
+
+pub struct SlidingWindowOperator {
+    slide: Duration,
+    width: Duration,
+    binning_function: Arc<dyn PhysicalExpr>,
+
+    partial_aggregation_plan: Arc<dyn ExecutionPlan>,
+    partial_schema: FsSchema,
+
+    finish_execution_plan: Arc<dyn ExecutionPlan>,
+    final_projection: Arc<dyn ExecutionPlan>,
+    projection_input_schema: SchemaRef,
+
+    receiver_hook: Arc<RwLock<Option<UnboundedReceiver<RecordBatch>>>>,
+    final_batches_passer: Arc<RwLock<Vec<RecordBatch>>>,
+
+    active_bins: BTreeMap<SystemTime, ActiveBin>,
+    tiered_record_batches: TieredRecordBatchHolder,
+}
+
+impl SlidingWindowOperator {
+    fn bin_start(&self, timestamp: SystemTime) -> SystemTime {
+        if self.slide == Duration::ZERO {
+            return timestamp;
+        }
+        let nanos = to_nanos(timestamp) - (to_nanos(timestamp) % self.slide.as_nanos());
+        from_nanos(nanos)
+    }
+
+    fn add_bin_start_as_timestamp(
+        batch: &RecordBatch,
+        bin_start: SystemTime,
+        schema: SchemaRef,
+    ) -> Result<RecordBatch> {
+        let bin_start_scalar = ScalarValue::TimestampNanosecond(Some(to_nanos(bin_start) as i64), None);
+        let timestamp_array = bin_start_scalar.to_array_of_size(batch.num_rows())?;
+        let mut columns = batch.columns().to_vec();
+        columns.push(timestamp_array);
+        Ok(RecordBatch::try_new(schema, columns)?)
+    }
+
+    fn ensure_bin_running(
+        slot: &mut ActiveBin,
+        plan: Arc<dyn ExecutionPlan>,
+        hook: &Arc<RwLock<Option<UnboundedReceiver<RecordBatch>>>>,
+    ) -> Result<()> {
+        if slot.sender.is_some() {
+            return Ok(());
+        }
+        let preserved = std::mem::take(&mut slot.finished_batches);
+        let mut started = ActiveBin::start_partial(plan, hook)?;
+        started.finished_batches = preserved;
+        *slot = started;
+        Ok(())
+    }
+}
+
+#[async_trait]
+impl MessageOperator for SlidingWindowOperator {
+    fn name(&self) -> &str {
+        "SlidingWindow"
+    }
+
+    async fn on_start(&mut self, ctx: &mut TaskContext) -> Result<()> {
+        let watermark = ctx.last_present_watermark();
+        let mut tm = ctx.table_manager_guard().await?;
+        let table = tm
+            .get_expiring_time_key_table("t", watermark)
+            .await
+            .map_err(|e| anyhow!("expiring time key table t: {e}"))?;
+
+        let watermark_bin = self.bin_start(watermark.unwrap_or(SystemTime::UNIX_EPOCH));
+
+        for (timestamp, batches) in table.all_batches_for_watermark(watermark) {
+            let bin = self.bin_start(*timestamp);
+            if bin < watermark_bin {
+                for batch in batches {
+                    self.tiered_record_batches.insert(batch.clone(), bin)?;
+                }
+            } else {
+                let slot = self.active_bins.entry(bin).or_default();
+                for batch in batches {
+                    slot.finished_batches.push(batch.clone());
+                }
+            }
+        }
+        Ok(())
+    }
+
+    async fn process_data(
+        &mut self,
+        _input_idx: usize,
+        batch: RecordBatch,
+        ctx: &mut TaskContext,
+    ) -> Result<Vec<StreamOutput>> {
+        let bin_array = self
+            .binning_function
+            .evaluate(&batch)?
+            .into_array(batch.num_rows())?;
+        let indices = sort_to_indices(bin_array.as_ref(), None, None)?;
+
+        let columns = batch
+            .columns()
+            .iter()
+            .map(|c| take(c, &indices, None).unwrap())
+            .collect();
+        let sorted = RecordBatch::try_new(batch.schema(), columns)?;
+        let sorted_bins = take(bin_array.as_ref(), &indices, None)?;
+
+        let typed_bin = sorted_bins
+            .as_any()
+            .downcast_ref::<PrimitiveArray<TimestampNanosecondType>>()
+            .ok_or_else(|| anyhow!("binning function must produce TimestampNanosecond"))?;
+        let partition_ranges = partition(std::slice::from_ref(&sorted_bins))?.ranges();
+
+        let watermark = ctx.last_present_watermark();
+
+        for range in partition_ranges {
+            let bin_start = from_nanos(typed_bin.value(range.start) as u128);
+
+            if let Some(wm) = watermark {
+                if bin_start < self.bin_start(wm) {
+                    continue;
+                }
+            }
+
+            let bin_batch = sorted.slice(range.start, range.end - range.start);
+            let slot = self.active_bins.entry(bin_start).or_default();
+
+            Self::ensure_bin_running(
+                slot,
+                self.partial_aggregation_plan.clone(),
+                &self.receiver_hook,
+            )?;
+
+            let sender = slot
+                .sender
+                .as_ref()
+                .ok_or_else(|| anyhow!("partial bin sender missing after ensure"))?;
+            sender
+                .send(bin_batch)
+                .map_err(|e| anyhow!("partial channel send: {e}"))?;
+        }
+
+        Ok(vec![])
+    }
+
+    async fn process_watermark(
+        &mut self,
+        watermark: Watermark,
+        _ctx: &mut TaskContext,
+    ) -> Result<Vec<StreamOutput>> {
+        let Watermark::EventTime(current_time) = watermark else {
+            return Ok(vec![]);
+        };
+        let watermark_bin = self.bin_start(current_time);
+
+        let mut final_outputs = Vec::new();
+
+        let mut expired_bins = Vec::new();
+        for &k in self.active_bins.keys() {
+            if k + self.slide <= watermark_bin {
+                expired_bins.push(k);
+            } else {
+                break;
+            }
+        }
+
+        for bin_start in expired_bins {
+            let mut bin = self
+                .active_bins
+                .remove(&bin_start)
+                .ok_or_else(|| anyhow!("missing active bin"))?;
+            let bin_end = bin_start + self.slide;
+
+            bin.close_and_drain().await?;
+            for b in bin.finished_batches {
+                self.tiered_record_batches.insert(b, bin_start)?;
+            }
+
+            let interval_start = bin_end - self.width;
+            let interval_end = bin_end;
+
+            let partials = self
+                .tiered_record_batches
+                .batches_for_interval(interval_start, interval_end)?;
+            *self.final_batches_passer.write().unwrap() = partials;
+
+            self.finish_execution_plan.reset()?;
+            let mut final_exec = self
+                .finish_execution_plan
+                .execute(0, SessionContext::new().task_ctx())?;
+
+            let mut aggregate_results = Vec::new();
+            while let Some(batch) = final_exec.next().await {
+                aggregate_results.push(Self::add_bin_start_as_timestamp(
+                    &batch?,
+                    interval_start,
+                    self.projection_input_schema.clone(),
+                )?);
+            }
+
+            *self.final_batches_passer.write().unwrap() = aggregate_results;
+            self.final_projection.reset()?;
+            let mut proj_exec = self
+                .final_projection
+                .execute(0, SessionContext::new().task_ctx())?;
+
+            while let Some(batch) = proj_exec.next().await {
+                final_outputs.push(StreamOutput::Forward(batch?));
+            }
+
+            self.tiered_record_batches
+                .delete_before(bin_end + self.slide - self.width)?;
+        }
+
+        Ok(final_outputs)
+    }
+
+    async fn snapshot_state(&mut self, _barrier: CheckpointBarrier, ctx: &mut TaskContext) -> Result<()> {
+        let watermark = ctx.last_present_watermark();
+        let mut tm = ctx.table_manager_guard().await?;
+        let table = tm
+            .get_expiring_time_key_table("t", watermark)
+            .await
+            .map_err(|e| anyhow!("expiring time key table t: {e}"))?;
+
+        for (bin_start, active_bin) in self.active_bins.iter_mut() {
+            active_bin.close_and_drain().await?;
+
+            for batch in &active_bin.finished_batches {
+                let state_batch = Self::add_bin_start_as_timestamp(
+                    batch,
+                    *bin_start,
+                    self.partial_schema.schema.clone(),
+                )?;
+                table.insert(*bin_start, state_batch);
+            }
+        }
+
+        table.flush(watermark).await?;
+        Ok(())
+    }
+
+    async fn on_close(&mut self, _ctx: &mut TaskContext) -> Result<Vec<StreamOutput>> {
+        Ok(vec![])
+    }
+}
+
+// ============================================================================
+
+pub struct SlidingAggregatingWindowConstructor;
+
+impl SlidingAggregatingWindowConstructor {
+    pub fn with_config(
+        &self,
+        config: SlidingWindowAggregateOperator,
+        registry: Arc<Registry>,
+    ) -> anyhow::Result<SlidingWindowOperator> {
+        let width = Duration::from_micros(config.width_micros);
+        let slide = Duration::from_micros(config.slide_micros);
+        let input_schema: FsSchema = config
+            .input_schema
+            .ok_or_else(|| anyhow!("missing input schema"))?
+            .try_into()?;
+
+        let binning_function = parse_physical_expr(
+            &PhysicalExprNode::decode(&mut config.binning_function.as_slice())?,
+            registry.as_ref(),
+            &input_schema.schema,
+            &DefaultPhysicalExtensionCodec {},
+        )?;
+
+        let receiver_hook = Arc::new(RwLock::new(None));
+        let final_batches_passer = Arc::new(RwLock::new(Vec::new()));
+
+        let codec = FsPhysicalExtensionCodec {
+            context: DecodingContext::UnboundedBatchStream(receiver_hook.clone()),
+        };
+        let final_codec = FsPhysicalExtensionCodec {
+            context: DecodingContext::LockedBatchVec(final_batches_passer.clone()),
+        };
+
+        let partial_plan = PhysicalPlanNode::decode(&mut config.partial_aggregation_plan.as_slice())?
+            .try_into_physical_plan(
+                registry.as_ref(),
+                &RuntimeEnvBuilder::new().build()?,
+                &codec,
+            )?;
+
+        let finish_plan = PhysicalPlanNode::decode(&mut config.final_aggregation_plan.as_slice())?
+            .try_into_physical_plan(
+                registry.as_ref(),
+                &RuntimeEnvBuilder::new().build()?,
+                &final_codec,
+            )?;
+
+        let final_proj = PhysicalPlanNode::decode(&mut config.final_projection.as_slice())?
+            .try_into_physical_plan(
+                registry.as_ref(),
+                &RuntimeEnvBuilder::new().build()?,
+                &final_codec,
+            )?;
+
+        let partial_schema: FsSchema = config
+            .partial_schema
+            .ok_or_else(|| anyhow!("missing partial schema"))?
+            .try_into()?;
+
+        Ok(SlidingWindowOperator {
+            slide,
+            width,
+            binning_function,
+            partial_aggregation_plan: partial_plan,
+            partial_schema,
+            finish_execution_plan: finish_plan,
+            final_projection: final_proj.clone(),
+            projection_input_schema: final_proj.children()[0].schema().clone(),
+            receiver_hook,
+            final_batches_passer,
+            active_bins: BTreeMap::new(),
+            tiered_record_batches: TieredRecordBatchHolder::new(vec![slide])?,
+        })
+    }
+}
diff --git a/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs b/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs
new file mode 100644
index 00000000..c30950cb
--- /dev/null
+++ b/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs
@@ -0,0 +1,399 @@
+//! 滚动（tumbling）窗口聚合：与 worker `arrow/tumbling_aggregating_window` 对齐，实现 [`MessageOperator`]。
+
+use anyhow::{anyhow, Result};
+use arrow::compute::{partition, sort_to_indices, take};
+use arrow_array::{Array, PrimitiveArray, RecordBatch, types::TimestampNanosecondType};
+use arrow_schema::SchemaRef;
+use datafusion::common::ScalarValue;
+use datafusion::execution::context::SessionContext;
+use datafusion::execution::runtime_env::RuntimeEnvBuilder;
+use datafusion::execution::SendableRecordBatchStream;
+use datafusion::physical_expr::PhysicalExpr;
+use datafusion::physical_plan::ExecutionPlan;
+use datafusion_proto::physical_plan::DefaultPhysicalExtensionCodec;
+use datafusion_proto::{
+    physical_plan::{from_proto::parse_physical_expr, AsExecutionPlan},
+    protobuf::{PhysicalExprNode, PhysicalPlanNode},
+};
+use futures::StreamExt;
+use prost::Message;
+use std::collections::BTreeMap;
+use std::mem;
+use std::sync::{Arc, RwLock};
+use std::time::{Duration, SystemTime};
+use tokio::sync::mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender};
+use tracing::warn;
+
+use crate::runtime::streaming::api::context::TaskContext;
+use crate::runtime::streaming::api::operator::MessageOperator;
+use async_trait::async_trait;
+use tracing_subscriber::Registry;
+use protocol::grpc::api::TumblingWindowAggregateOperator;
+use crate::runtime::streaming::StreamOutput;
+use crate::sql::common::{from_nanos, to_nanos, CheckpointBarrier, FsSchema, Watermark};
+use crate::sql::common::time_utils::print_time;
+use crate::sql::logical_planner::{DecodingContext, FsPhysicalExtensionCodec};
+use crate::sql::schema::utils::add_timestamp_field_arrow;
+
+struct ActiveBin {
+    sender: Option<UnboundedSender<RecordBatch>>,
+    result_stream: Option<SendableRecordBatchStream>,
+    finished_batches: Vec<RecordBatch>,
+}
+
+impl Default for ActiveBin {
+    fn default() -> Self {
+        Self {
+            sender: None,
+            result_stream: None,
+            finished_batches: Vec::new(),
+        }
+    }
+}
+
+impl ActiveBin {
+    fn start_partial(
+        plan: Arc<dyn ExecutionPlan>,
+        hook: &Arc<RwLock<Option<UnboundedReceiver<RecordBatch>>>>,
+    ) -> Result<Self> {
+        let (tx, rx) = unbounded_channel();
+        *hook.write().unwrap() = Some(rx);
+        plan.reset()?;
+        let result_stream = plan.execute(0, SessionContext::new().task_ctx())?;
+        Ok(Self {
+            sender: Some(tx),
+            result_stream: Some(result_stream),
+            finished_batches: Vec::new(),
+        })
+    }
+
+    async fn close_and_drain(&mut self) -> Result<()> {
+        self.sender.take();
+        if let Some(mut stream) = self.result_stream.take() {
+            while let Some(batch) = stream.next().await {
+                self.finished_batches.push(batch?);
+            }
+        }
+        Ok(())
+    }
+}
+
+pub struct TumblingWindowOperator {
+    width: Duration,
+    binning_function: Arc<dyn PhysicalExpr>,
+
+    partial_aggregation_plan: Arc<dyn ExecutionPlan>,
+    partial_schema: FsSchema,
+
+    finish_execution_plan: Arc<dyn ExecutionPlan>,
+    aggregate_with_timestamp_schema: SchemaRef,
+    final_projection: Option<Arc<dyn ExecutionPlan>>,
+
+    receiver_hook: Arc<RwLock<Option<UnboundedReceiver<RecordBatch>>>>,
+    final_batches_passer: Arc<RwLock<Vec<RecordBatch>>>,
+
+    active_bins: BTreeMap<SystemTime, ActiveBin>,
+}
+
+impl TumblingWindowOperator {
+    fn bin_start(&self, timestamp: SystemTime) -> SystemTime {
+        if self.width == Duration::ZERO {
+            return timestamp;
+        }
+        let nanos = to_nanos(timestamp) - (to_nanos(timestamp) % self.width.as_nanos());
+        from_nanos(nanos)
+    }
+
+    fn add_bin_start_as_timestamp(
+        batch: &RecordBatch,
+        bin_start: SystemTime,
+        schema: SchemaRef,
+    ) -> Result<RecordBatch> {
+        let bin_start_scalar = ScalarValue::TimestampNanosecond(Some(to_nanos(bin_start) as i64), None);
+        let timestamp_array = bin_start_scalar.to_array_of_size(batch.num_rows())?;
+        let mut columns = batch.columns().to_vec();
+        columns.push(timestamp_array);
+        RecordBatch::try_new(schema.clone(), columns)
+            .map_err(|e| anyhow!("add _timestamp column: {e}"))
+    }
+
+    fn ensure_bin_running(
+        slot: &mut ActiveBin,
+        plan: Arc<dyn ExecutionPlan>,
+        hook: &Arc<RwLock<Option<UnboundedReceiver<RecordBatch>>>>,
+    ) -> Result<()> {
+        if slot.sender.is_some() {
+            return Ok(());
+        }
+        let preserved = mem::take(&mut slot.finished_batches);
+        let mut started = ActiveBin::start_partial(plan, hook)?;
+        started.finished_batches = preserved;
+        *slot = started;
+        Ok(())
+    }
+}
+
+#[async_trait]
+impl MessageOperator for TumblingWindowOperator {
+    fn name(&self) -> &str {
+        "TumblingWindow"
+    }
+
+    async fn on_start(&mut self, ctx: &mut TaskContext) -> Result<()> {
+        let watermark = ctx.last_present_watermark();
+        let mut tm = ctx.table_manager_guard().await?;
+        let table = tm
+            .get_expiring_time_key_table("t", watermark)
+            .await
+            .map_err(|e| anyhow!("expiring time key table t: {e}"))?;
+
+        for (timestamp, batches) in table.all_batches_for_watermark(watermark) {
+            let bin_start = self.bin_start(*timestamp);
+            let slot = self.active_bins.entry(bin_start).or_default();
+            for batch in batches {
+                slot.finished_batches.push(batch.clone());
+            }
+        }
+        Ok(())
+    }
+
+    async fn process_data(
+        &mut self,
+        _input_idx: usize,
+        batch: RecordBatch,
+        ctx: &mut TaskContext,
+    ) -> Result<Vec<StreamOutput>> {
+        let bin_array = self
+            .binning_function
+            .evaluate(&batch)?
+            .into_array(batch.num_rows())?;
+        let indices = sort_to_indices(bin_array.as_ref(), None, None)?;
+
+        let columns = batch
+            .columns()
+            .iter()
+            .map(|c| take(c, &indices, None).unwrap())
+            .collect();
+        let sorted = RecordBatch::try_new(batch.schema(), columns)?;
+        let sorted_bins = take(bin_array.as_ref(), &indices, None)?;
+
+        let typed_bin = sorted_bins
+            .as_any()
+            .downcast_ref::<PrimitiveArray<TimestampNanosecondType>>()
+            .ok_or_else(|| anyhow!("binning function must produce TimestampNanosecond"))?;
+        let partition_ranges = partition(std::slice::from_ref(&sorted_bins))?.ranges();
+
+        for range in partition_ranges {
+            let bin_start = from_nanos(typed_bin.value(range.start) as u128);
+
+            if let Some(watermark) = ctx.last_present_watermark() {
+                if bin_start < self.bin_start(watermark) {
+                    warn!(
+                        "late data dropped: bin {} < watermark {}",
+                        print_time(bin_start),
+                        print_time(watermark)
+                    );
+                    continue;
+                }
+            }
+
+            let bin_batch = sorted.slice(range.start, range.end - range.start);
+            let slot = self.active_bins.entry(bin_start).or_default();
+
+            Self::ensure_bin_running(
+                slot,
+                self.partial_aggregation_plan.clone(),
+                &self.receiver_hook,
+            )?;
+
+            let sender = slot
+                .sender
+                .as_ref()
+                .ok_or_else(|| anyhow!("tumbling bin sender missing after ensure"))?;
+            sender
+                .send(bin_batch)
+                .map_err(|e| anyhow!("partial channel send: {e}"))?;
+        }
+
+        Ok(vec![])
+    }
+
+    async fn process_watermark(
+        &mut self,
+        watermark: Watermark,
+        _ctx: &mut TaskContext,
+    ) -> Result<Vec<StreamOutput>> {
+        let Watermark::EventTime(current_time) = watermark else {
+            return Ok(vec![]);
+        };
+
+        let mut final_outputs = Vec::new();
+
+        let mut expired_bins = Vec::new();
+        for &k in self.active_bins.keys() {
+            if k + self.width <= current_time {
+                expired_bins.push(k);
+            } else {
+                break;
+            }
+        }
+
+        for bin_start in expired_bins {
+            let mut bin = self
+                .active_bins
+                .remove(&bin_start)
+                .ok_or_else(|| anyhow!("missing tumbling bin"))?;
+
+            bin.close_and_drain().await?;
+            let partial_batches = mem::take(&mut bin.finished_batches);
+
+            if partial_batches.is_empty() {
+                continue;
+            }
+
+            *self.final_batches_passer.write().unwrap() = partial_batches;
+            self.finish_execution_plan.reset()?;
+            let mut final_exec = self
+                .finish_execution_plan
+                .execute(0, SessionContext::new().task_ctx())?;
+
+            let mut aggregate_results = Vec::new();
+            while let Some(batch) = final_exec.next().await {
+                let batch = batch?;
+                let with_timestamp = Self::add_bin_start_as_timestamp(
+                    &batch,
+                    bin_start,
+                    self.aggregate_with_timestamp_schema.clone(),
+                )?;
+
+                if self.final_projection.is_none() {
+                    final_outputs.push(StreamOutput::Forward(with_timestamp));
+                } else {
+                    aggregate_results.push(with_timestamp);
+                }
+            }
+
+            if let Some(final_projection) = &self.final_projection {
+                *self.final_batches_passer.write().unwrap() = aggregate_results;
+                final_projection.reset()?;
+                let mut proj_exec = final_projection.execute(0, SessionContext::new().task_ctx())?;
+
+                while let Some(batch) = proj_exec.next().await {
+                    final_outputs.push(StreamOutput::Forward(batch?));
+                }
+            }
+        }
+
+        Ok(final_outputs)
+    }
+
+    async fn snapshot_state(&mut self, _barrier: CheckpointBarrier, ctx: &mut TaskContext) -> Result<()> {
+        let watermark = ctx.last_present_watermark();
+        let mut tm = ctx.table_manager_guard().await?;
+        let table = tm
+            .get_expiring_time_key_table("t", watermark)
+            .await
+            .map_err(|e| anyhow!("expiring time key table t: {e}"))?;
+
+        for (bin_start, active_bin) in self.active_bins.iter_mut() {
+            active_bin.close_and_drain().await?;
+
+            for batch in &active_bin.finished_batches {
+                let state_batch = Self::add_bin_start_as_timestamp(
+                    batch,
+                    *bin_start,
+                    self.partial_schema.schema.clone(),
+                )?;
+                table.insert(*bin_start, state_batch);
+            }
+        }
+
+        table.flush(watermark).await?;
+        Ok(())
+    }
+
+    async fn on_close(&mut self, _ctx: &mut TaskContext) -> Result<Vec<StreamOutput>> {
+        Ok(vec![])
+    }
+}
+
+pub struct TumblingAggregateWindowConstructor;
+
+impl TumblingAggregateWindowConstructor {
+    pub fn with_config(
+        &self,
+        config: TumblingWindowAggregateOperator,
+        registry: Arc<Registry>,
+    ) -> anyhow::Result<TumblingWindowOperator> {
+        let width = Duration::from_micros(config.width_micros);
+        let input_schema: FsSchema = config
+            .input_schema
+            .ok_or_else(|| anyhow!("missing input schema"))?
+            .try_into()?;
+
+        let binning_function = parse_physical_expr(
+            &PhysicalExprNode::decode(&mut config.binning_function.as_slice())?,
+            registry.as_ref(),
+            &input_schema.schema,
+            &DefaultPhysicalExtensionCodec {},
+        )?;
+
+        let receiver_hook = Arc::new(RwLock::new(None));
+        let final_batches_passer = Arc::new(RwLock::new(Vec::new()));
+
+        let codec = FsPhysicalExtensionCodec {
+            context: DecodingContext::UnboundedBatchStream(receiver_hook.clone()),
+        };
+        let final_codec = FsPhysicalExtensionCodec {
+            context: DecodingContext::LockedBatchVec(final_batches_passer.clone()),
+        };
+
+        let partial_plan = PhysicalPlanNode::decode(&mut config.partial_aggregation_plan.as_slice())?
+            .try_into_physical_plan(
+                registry.as_ref(),
+                &RuntimeEnvBuilder::new().build()?,
+                &codec,
+            )?;
+
+        let partial_schema: FsSchema = config
+            .partial_schema
+            .ok_or_else(|| anyhow!("missing partial schema"))?
+            .try_into()?;
+
+        let finish_plan = PhysicalPlanNode::decode(&mut config.final_aggregation_plan.as_slice())?;
+        let finish_execution_plan = finish_plan.try_into_physical_plan(
+            registry.as_ref(),
+            &RuntimeEnvBuilder::new().build()?,
+            &final_codec,
+        )?;
+
+        let final_projection_plan = match &config.final_projection {
+            Some(proto) if !proto.is_empty() => {
+                let node = PhysicalPlanNode::decode(&mut proto.as_slice())
+                    .map_err(|e| anyhow!("decode final_projection: {e}"))?;
+                Some(node.try_into_physical_plan(
+                    registry.as_ref(),
+                    &RuntimeEnvBuilder::new().build()?,
+                    &final_codec,
+                )?)
+            }
+            _ => None,
+        };
+
+        let aggregate_with_timestamp_schema =
+            add_timestamp_field_arrow((*finish_execution_plan.schema()).clone());
+
+        Ok(TumblingWindowOperator {
+            width,
+            binning_function,
+            partial_aggregation_plan: partial_plan,
+            partial_schema,
+            finish_execution_plan,
+            aggregate_with_timestamp_schema,
+            final_projection: final_projection_plan,
+            receiver_hook,
+            final_batches_passer,
+            active_bins: BTreeMap::new(),
+        })
+    }
+}
diff --git a/src/runtime/streaming/operators/windows/window_function.rs b/src/runtime/streaming/operators/windows/window_function.rs
new file mode 100644
index 00000000..cc51b820
--- /dev/null
+++ b/src/runtime/streaming/operators/windows/window_function.rs
@@ -0,0 +1,292 @@
+//! 窗口函数（按事件时间分桶的瞬时执行）：与 worker `arrow/window_fn` 对齐，实现 [`MessageOperator`]。
+
+use anyhow::{anyhow, Result};
+use arrow::compute::{max, min};
+use arrow_array::RecordBatch;
+use datafusion::execution::context::SessionContext;
+use datafusion::execution::runtime_env::RuntimeEnvBuilder;
+use datafusion::execution::SendableRecordBatchStream;
+use datafusion::physical_plan::ExecutionPlan;
+use datafusion_proto::physical_plan::AsExecutionPlan;
+use datafusion_proto::protobuf::PhysicalPlanNode;
+use futures::StreamExt;
+use prost::Message;
+use std::collections::BTreeMap;
+use std::sync::{Arc, RwLock};
+use std::time::SystemTime;
+use tokio::sync::mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender};
+use tracing::warn;
+
+use crate::runtime::streaming::api::context::TaskContext;
+use crate::runtime::streaming::api::operator::MessageOperator;
+use async_trait::async_trait;
+use tracing_subscriber::Registry;
+use crate::runtime::streaming::StreamOutput;
+use crate::sql::common::{from_nanos, CheckpointBarrier, FsSchema, FsSchemaRef, Watermark};
+use crate::sql::common::time_utils::print_time;
+use crate::sql::logical_planner::{DecodingContext, FsPhysicalExtensionCodec};
+
+struct ActiveWindowExec {
+    sender: Option<UnboundedSender<RecordBatch>>,
+    result_stream: Option<SendableRecordBatchStream>,
+}
+
+impl ActiveWindowExec {
+    fn new(
+        plan: Arc<dyn ExecutionPlan>,
+        hook: &Arc<RwLock<Option<UnboundedReceiver<RecordBatch>>>>,
+    ) -> Result<Self> {
+        let (tx, rx) = unbounded_channel();
+        *hook.write().unwrap() = Some(rx);
+        plan.reset()?;
+        let result_stream = plan.execute(0, SessionContext::new().task_ctx())?;
+        Ok(Self {
+            sender: Some(tx),
+            result_stream: Some(result_stream),
+        })
+    }
+
+    async fn close_and_drain(&mut self) -> Result<Vec<RecordBatch>> {
+        self.sender.take();
+        let mut results = Vec::new();
+        if let Some(mut stream) = self.result_stream.take() {
+            while let Some(batch) = stream.next().await {
+                results.push(batch?);
+            }
+        }
+        Ok(results)
+    }
+}
+
+pub struct WindowFunctionOperator {
+    input_schema: FsSchemaRef,
+    input_schema_unkeyed: FsSchemaRef,
+    window_exec_plan: Arc<dyn ExecutionPlan>,
+    receiver_hook: Arc<RwLock<Option<UnboundedReceiver<RecordBatch>>>>,
+    active_execs: BTreeMap<SystemTime, ActiveWindowExec>,
+}
+
+impl WindowFunctionOperator {
+    fn filter_and_split_batches(
+        &self,
+        batch: RecordBatch,
+        watermark: Option<SystemTime>,
+    ) -> Result<Vec<(RecordBatch, SystemTime)>> {
+        if batch.num_rows() == 0 {
+            return Ok(vec![]);
+        }
+
+        let timestamp_column = self.input_schema.timestamp_column(&batch);
+        let min_timestamp = from_nanos(min(timestamp_column).unwrap() as u128);
+        let max_timestamp = from_nanos(max(timestamp_column).unwrap() as u128);
+
+        if let Some(wm) = watermark {
+            if max_timestamp < wm {
+                warn!(
+                    "dropped late batch: max_ts {} < watermark {}",
+                    print_time(max_timestamp),
+                    print_time(wm)
+                );
+                return Ok(vec![]);
+            }
+        }
+
+        if min_timestamp == max_timestamp {
+            return Ok(vec![(batch, max_timestamp)]);
+        }
+
+        let sorted_batch = self
+            .input_schema_unkeyed
+            .sort(batch, true)
+            .map_err(|e| anyhow!("sort for window fn: {e}"))?;
+        let filtered_batch = self
+            .input_schema_unkeyed
+            .filter_by_time(sorted_batch, watermark)
+            .map_err(|e| anyhow!("filter_by_time: {e}"))?;
+        if filtered_batch.num_rows() == 0 {
+            return Ok(vec![]);
+        }
+
+        let filtered_timestamps = self.input_schema.timestamp_column(&filtered_batch);
+        let ranges = self
+            .input_schema_unkeyed
+            .partition(&filtered_batch, true)
+            .map_err(|e| anyhow!("partition by time: {e}"))?;
+
+        let mut batches = Vec::with_capacity(ranges.len());
+        for range in ranges {
+            let slice = filtered_batch.slice(range.start, range.end - range.start);
+            let ts = from_nanos(filtered_timestamps.value(range.start) as u128);
+            batches.push((slice, ts));
+        }
+        Ok(batches)
+    }
+
+    fn get_or_create_exec(&mut self, timestamp: SystemTime) -> Result<&mut ActiveWindowExec> {
+        use std::collections::btree_map::Entry;
+        match self.active_execs.entry(timestamp) {
+            Entry::Vacant(v) => {
+                let new_exec =
+                    ActiveWindowExec::new(self.window_exec_plan.clone(), &self.receiver_hook)?;
+                Ok(v.insert(new_exec))
+            }
+            Entry::Occupied(o) => Ok(o.into_mut()),
+        }
+    }
+}
+
+#[async_trait]
+impl MessageOperator for WindowFunctionOperator {
+    fn name(&self) -> &str {
+        "WindowFunction"
+    }
+
+    async fn on_start(&mut self, ctx: &mut TaskContext) -> Result<()> {
+        let watermark = ctx.last_present_watermark();
+        let mut tm = ctx.table_manager_guard().await?;
+        let table = tm
+            .get_expiring_time_key_table("input", watermark)
+            .await
+            .map_err(|e| anyhow!("expiring time key table input: {e}"))?;
+
+        for (timestamp, batches) in table.all_batches_for_watermark(watermark) {
+            let exec = self.get_or_create_exec(*timestamp)?;
+            for batch in batches {
+                exec
+                    .sender
+                    .as_ref()
+                    .ok_or_else(|| anyhow!("window exec sender missing on restore"))?
+                    .send(batch.clone())
+                    .map_err(|e| anyhow!("restore send: {e}"))?;
+            }
+        }
+        Ok(())
+    }
+
+    async fn process_data(
+        &mut self,
+        _input_idx: usize,
+        batch: RecordBatch,
+        ctx: &mut TaskContext,
+    ) -> Result<Vec<StreamOutput>> {
+        let current_watermark = ctx.last_present_watermark();
+        let split_batches = self.filter_and_split_batches(batch, current_watermark)?;
+
+        let mut tm = ctx.table_manager_guard().await?;
+        let table = tm
+            .get_expiring_time_key_table("input", current_watermark)
+            .await
+            .map_err(|e| anyhow!("expiring time key table input: {e}"))?;
+
+        for (sub_batch, timestamp) in split_batches {
+            table.insert(timestamp, sub_batch.clone());
+            let exec = self.get_or_create_exec(timestamp)?;
+            exec
+                .sender
+                .as_ref()
+                .ok_or_else(|| anyhow!("window exec sender missing"))?
+                .send(sub_batch)
+                .map_err(|e| anyhow!("route batch to plan: {e}"))?;
+        }
+
+        Ok(vec![])
+    }
+
+    async fn process_watermark(
+        &mut self,
+        watermark: Watermark,
+        _ctx: &mut TaskContext,
+    ) -> Result<Vec<StreamOutput>> {
+        let Watermark::EventTime(current_time) = watermark else {
+            return Ok(vec![]);
+        };
+
+        let mut final_outputs = Vec::new();
+
+        // 与 worker 一致：仅当桶时间戳 **严格小于** 当前事件时间水位时才结算（`watermark <= ts` 时保留）。
+        let mut expired_timestamps = Vec::new();
+        for &k in self.active_execs.keys() {
+            if k < current_time {
+                expired_timestamps.push(k);
+            } else {
+                break;
+            }
+        }
+
+        for ts in expired_timestamps {
+            let mut exec = self
+                .active_execs
+                .remove(&ts)
+                .ok_or_else(|| anyhow!("missing window exec"))?;
+            let result_batches = exec.close_and_drain().await?;
+            for batch in result_batches {
+                final_outputs.push(StreamOutput::Forward(batch));
+            }
+        }
+
+        Ok(final_outputs)
+    }
+
+    async fn snapshot_state(&mut self, _barrier: CheckpointBarrier, ctx: &mut TaskContext) -> Result<()> {
+        let watermark = ctx.last_present_watermark();
+        let mut tm = ctx.table_manager_guard().await?;
+        tm.get_expiring_time_key_table("input", watermark)
+            .await
+            .map_err(|e| anyhow!("expiring time key table input: {e}"))?
+            .flush(watermark)
+            .await?;
+        Ok(())
+    }
+
+    async fn on_close(&mut self, _ctx: &mut TaskContext) -> Result<Vec<StreamOutput>> {
+        Ok(vec![])
+    }
+}
+
+pub struct WindowFunctionConstructor;
+
+impl WindowFunctionConstructor {
+    pub fn with_config(
+        &self,
+        config: protocol::grpc::api::WindowFunctionOperator,
+        registry: Arc<Registry>,
+    ) -> anyhow::Result<WindowFunctionOperator> {
+        let input_schema = Arc::new(
+            FsSchema::try_from(
+                config
+                    .input_schema
+                    .ok_or_else(|| anyhow!("missing input schema"))?,
+            )
+            .map_err(|e| anyhow!("input schema: {e}"))?,
+        );
+
+        let input_schema_unkeyed = Arc::new(
+            FsSchema::from_schema_unkeyed(input_schema.schema.clone())
+                .map_err(|e| anyhow!("unkeyed schema: {e}"))?,
+        );
+
+        let receiver_hook = Arc::new(RwLock::new(None));
+        let codec = FsPhysicalExtensionCodec {
+            context: DecodingContext::UnboundedBatchStream(receiver_hook.clone()),
+        };
+
+        let window_exec_node =
+            PhysicalPlanNode::decode(&mut config.window_function_plan.as_slice())
+                .map_err(|e| anyhow!("decode window_function_plan: {e}"))?;
+        let window_exec_plan = window_exec_node
+            .try_into_physical_plan(
+                registry.as_ref(),
+                &RuntimeEnvBuilder::new().build()?,
+                &codec,
+            )
+            .map_err(|e| anyhow!("window physical plan: {e}"))?;
+
+        Ok(WindowFunctionOperator {
+            input_schema,
+            input_schema_unkeyed,
+            window_exec_plan,
+            receiver_hook,
+            active_execs: BTreeMap::new(),
+        })
+    }
+}
diff --git a/src/runtime/streaming/protocol/control.rs b/src/runtime/streaming/protocol/control.rs
new file mode 100644
index 00000000..a7a9da57
--- /dev/null
+++ b/src/runtime/streaming/protocol/control.rs
@@ -0,0 +1,74 @@
+//! 控制平面：与 [`super::event::StreamEvent`] 队列分离的高优先级指令。
+
+use serde::{Deserialize, Serialize};
+use std::time::Duration;
+use tokio::sync::mpsc::{self, Receiver, Sender};
+use crate::sql::common::CheckpointBarrier;
+
+/// 可序列化的 barrier 载荷（`CheckpointBarrier` 本身未实现 `serde`，供 RPC / 持久化使用）。
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+pub struct CheckpointBarrierWire {
+    pub epoch: u32,
+    pub min_epoch: u32,
+    pub timestamp_secs: u64,
+    pub timestamp_subsec_nanos: u32,
+    pub then_stop: bool,
+}
+
+impl From<CheckpointBarrier> for CheckpointBarrierWire {
+    fn from(b: CheckpointBarrier) -> Self {
+        let d = b
+            .timestamp
+            .duration_since(std::time::UNIX_EPOCH)
+            .unwrap_or_default();
+        Self {
+            epoch: b.epoch,
+            min_epoch: b.min_epoch,
+            timestamp_secs: d.as_secs(),
+            timestamp_subsec_nanos: d.subsec_nanos(),
+            then_stop: b.then_stop,
+        }
+    }
+}
+
+impl From<CheckpointBarrierWire> for CheckpointBarrier {
+    fn from(w: CheckpointBarrierWire) -> Self {
+        Self {
+            epoch: w.epoch,
+            min_epoch: w.min_epoch,
+            timestamp: std::time::UNIX_EPOCH
+                + Duration::new(w.timestamp_secs, w.timestamp_subsec_nanos),
+            then_stop: w.then_stop,
+        }
+    }
+}
+
+/// JobManager / 调度器下发的高优控制指令。
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub enum ControlCommand {
+    Start,
+    Stop { mode: StopMode },
+    DropState,
+    Commit { epoch: u32 },
+    UpdateConfig { config_json: String },
+    /// 通常由 [`crate::runtime::streaming::SourceRunner`] 接收，源头落盘后向下游注入 `Barrier`。
+    TriggerCheckpoint { barrier: CheckpointBarrierWire },
+}
+
+impl ControlCommand {
+    pub fn trigger_checkpoint(barrier: CheckpointBarrier) -> Self {
+        Self::TriggerCheckpoint {
+            barrier: barrier.into(),
+        }
+    }
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+pub enum StopMode {
+    Graceful,
+    Immediate,
+}
+
+pub fn control_channel(capacity: usize) -> (Sender<ControlCommand>, Receiver<ControlCommand>) {
+    mpsc::channel(capacity)
+}
diff --git a/src/runtime/streaming/protocol/event.rs b/src/runtime/streaming/protocol/event.rs
new file mode 100644
index 00000000..ee974e7e
--- /dev/null
+++ b/src/runtime/streaming/protocol/event.rs
@@ -0,0 +1,11 @@
+use arrow_array::RecordBatch;
+use crate::sql::common::{CheckpointBarrier, Watermark};
+
+/// 核心数据面事件
+#[derive(Debug, Clone)]
+pub enum StreamEvent {
+    Data(RecordBatch),
+    Watermark(Watermark),
+    Barrier(CheckpointBarrier),
+    EndOfStream,
+}
diff --git a/src/runtime/streaming/protocol/mod.rs b/src/runtime/streaming/protocol/mod.rs
new file mode 100644
index 00000000..852562de
--- /dev/null
+++ b/src/runtime/streaming/protocol/mod.rs
@@ -0,0 +1,15 @@
+//! 协议层：数据事件、控制命令、水位线合并与比较语义。
+
+pub mod control;
+pub mod event;
+pub mod stream_out;
+pub mod tracked;
+pub mod watermark;
+
+pub use control::{
+    control_channel, CheckpointBarrierWire, ControlCommand, StopMode,
+};
+pub use event::StreamEvent;
+pub use stream_out::StreamOutput;
+pub use tracked::TrackedEvent;
+pub use watermark::{merge_watermarks, watermark_strictly_advances};
diff --git a/src/runtime/streaming/protocol/stream_out.rs b/src/runtime/streaming/protocol/stream_out.rs
new file mode 100644
index 00000000..49d963df
--- /dev/null
+++ b/src/runtime/streaming/protocol/stream_out.rs
@@ -0,0 +1,15 @@
+use arrow_array::RecordBatch;
+use crate::sql::common::Watermark;
+
+/// 算子产出的数据及下游 **路由意图**（由 `SubtaskRunner` 选择 `collect` / `collect_keyed` / `broadcast` / 水位广播）。
+#[derive(Debug, Clone)]
+pub enum StreamOutput {
+    /// 发往所有下游（与 `TaskContext::collect` 一致：当前实现为每条边各发一份 `Data`）。
+    Forward(RecordBatch),
+    /// 按 `key_hash % outboxes.len()` 发往单一分区（KeyBy / Shuffle）。
+    Keyed(u64, RecordBatch),
+    /// 广播同一份数据到所有下游边（如 broadcast join）。
+    Broadcast(RecordBatch),
+    /// 向所有下游广播水位线（如表达式水位生成器）。
+    Watermark(Watermark),
+}
diff --git a/src/runtime/streaming/protocol/tracked.rs b/src/runtime/streaming/protocol/tracked.rs
new file mode 100644
index 00000000..c675b5bd
--- /dev/null
+++ b/src/runtime/streaming/protocol/tracked.rs
@@ -0,0 +1,31 @@
+use std::sync::Arc;
+
+use crate::runtime::streaming::memory::MemoryTicket;
+use crate::runtime::streaming::protocol::event::StreamEvent;
+
+/// 在 Channel 中实际传输的事件，完美解决多路广播 (Broadcast) 的内存管理问题。
+///
+/// `MemoryTicket` 包在 `Arc` 中：如果 Event 被发送给 N 个下游分区（Broadcast 路由），
+/// 只需 Clone 此 `TrackedEvent`，底层数据共享一块内存，Arc 引用计数 +N。
+/// 只有当所有下游全部处理完并 Drop 后，Arc 归零，内存才被真正释放给 Pool。
+#[derive(Debug, Clone)]
+pub struct TrackedEvent {
+    pub event: StreamEvent,
+    pub _ticket: Option<Arc<MemoryTicket>>,
+}
+
+impl TrackedEvent {
+    pub fn new(event: StreamEvent, ticket: Option<MemoryTicket>) -> Self {
+        Self {
+            event,
+            _ticket: ticket.map(Arc::new),
+        }
+    }
+
+    pub fn control(event: StreamEvent) -> Self {
+        Self {
+            event,
+            _ticket: None,
+        }
+    }
+}
diff --git a/src/runtime/streaming/protocol/watermark.rs b/src/runtime/streaming/protocol/watermark.rs
new file mode 100644
index 00000000..43baeabb
--- /dev/null
+++ b/src/runtime/streaming/protocol/watermark.rs
@@ -0,0 +1,80 @@
+//! 水位线类型来自 `arroyo_types::Watermark`；此处提供 **多路对齐合并** 与 **单调推进** 判断。
+
+use crate::sql::common::Watermark;
+
+/// 多输入对齐：`Idle` 不参与事件时间取最小；若全部为 `Idle` 则输出 `Idle`。
+/// 任一路尚未有水位线时返回 `None`（木桶短板未齐）。
+pub fn merge_watermarks(per_input: &[Option<Watermark>]) -> Option<Watermark> {
+    if per_input.iter().any(|w| w.is_none()) {
+        return None;
+    }
+
+    let mut min_event: Option<std::time::SystemTime> = None;
+    let mut all_idle = true;
+
+    for w in per_input.iter().flatten() {
+        match w {
+            Watermark::Idle => {}
+            Watermark::EventTime(t) => {
+                all_idle = false;
+                min_event = Some(match min_event {
+                    None => *t,
+                    Some(m) => m.min(*t),
+                });
+            }
+        }
+    }
+
+    if all_idle {
+        Some(Watermark::Idle)
+    } else {
+        Some(Watermark::EventTime(
+            min_event.expect("non-idle alignment must have at least one EventTime"),
+        ))
+    }
+}
+
+/// `new` 相对 `previous` 是否为 **严格推进**；`previous == None` 时恒为真。
+pub fn watermark_strictly_advances(new: Watermark, previous: Option<Watermark>) -> bool {
+    match previous {
+        None => true,
+        Some(prev) => match (new, prev) {
+            (Watermark::EventTime(tn), Watermark::EventTime(tp)) => tn > tp,
+            (Watermark::Idle, Watermark::Idle) => false,
+            (Watermark::Idle, Watermark::EventTime(_)) => true,
+            (Watermark::EventTime(_), Watermark::Idle) => true,
+        },
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::time::{Duration, SystemTime};
+
+    #[test]
+    fn merge_waits_for_all_channels() {
+        let wms = vec![Some(Watermark::EventTime(SystemTime::UNIX_EPOCH)), None];
+        assert!(merge_watermarks(&wms).is_none());
+    }
+
+    #[test]
+    fn merge_min_event_time_ignores_idle() {
+        let t1 = SystemTime::UNIX_EPOCH + Duration::from_secs(10);
+        let t2 = SystemTime::UNIX_EPOCH + Duration::from_secs(5);
+        let wms = vec![Some(Watermark::EventTime(t1)), Some(Watermark::Idle)];
+        assert_eq!(merge_watermarks(&wms), Some(Watermark::EventTime(t1)));
+
+        let wms = vec![
+            Some(Watermark::EventTime(t1)),
+            Some(Watermark::EventTime(t2)),
+        ];
+        assert_eq!(merge_watermarks(&wms), Some(Watermark::EventTime(t2)));
+    }
+
+    #[test]
+    fn merge_all_idle() {
+        let wms = vec![Some(Watermark::Idle), Some(Watermark::Idle)];
+        assert_eq!(merge_watermarks(&wms), Some(Watermark::Idle));
+    }
+}
diff --git a/src/runtime/streaming/state/mod.rs b/src/runtime/streaming/state/mod.rs
new file mode 100644
index 00000000..e69de29b
diff --git a/src/runtime/streaming/state/table_manager.rs b/src/runtime/streaming/state/table_manager.rs
new file mode 100644
index 00000000..e69de29b
diff --git a/src/runtime/input/input_protocol.rs b/src/runtime/wasm/input/input_protocol.rs
similarity index 100%
rename from src/runtime/input/input_protocol.rs
rename to src/runtime/wasm/input/input_protocol.rs
diff --git a/src/runtime/input/input_provider.rs b/src/runtime/wasm/input/input_provider.rs
similarity index 100%
rename from src/runtime/input/input_provider.rs
rename to src/runtime/wasm/input/input_provider.rs
diff --git a/src/runtime/input/input_runner.rs b/src/runtime/wasm/input/input_runner.rs
similarity index 100%
rename from src/runtime/input/input_runner.rs
rename to src/runtime/wasm/input/input_runner.rs
diff --git a/src/runtime/input/interface.rs b/src/runtime/wasm/input/interface.rs
similarity index 100%
rename from src/runtime/input/interface.rs
rename to src/runtime/wasm/input/interface.rs
diff --git a/src/runtime/input/mod.rs b/src/runtime/wasm/input/mod.rs
similarity index 100%
rename from src/runtime/input/mod.rs
rename to src/runtime/wasm/input/mod.rs
diff --git a/src/runtime/input/protocol/kafka/config.rs b/src/runtime/wasm/input/protocol/kafka/config.rs
similarity index 100%
rename from src/runtime/input/protocol/kafka/config.rs
rename to src/runtime/wasm/input/protocol/kafka/config.rs
diff --git a/src/runtime/input/protocol/kafka/kafka_protocol.rs b/src/runtime/wasm/input/protocol/kafka/kafka_protocol.rs
similarity index 100%
rename from src/runtime/input/protocol/kafka/kafka_protocol.rs
rename to src/runtime/wasm/input/protocol/kafka/kafka_protocol.rs
diff --git a/src/runtime/input/protocol/kafka/mod.rs b/src/runtime/wasm/input/protocol/kafka/mod.rs
similarity index 100%
rename from src/runtime/input/protocol/kafka/mod.rs
rename to src/runtime/wasm/input/protocol/kafka/mod.rs
diff --git a/src/runtime/input/protocol/mod.rs b/src/runtime/wasm/input/protocol/mod.rs
similarity index 100%
rename from src/runtime/input/protocol/mod.rs
rename to src/runtime/wasm/input/protocol/mod.rs
diff --git a/src/runtime/sink/mod.rs b/src/runtime/wasm/mod.rs
similarity index 86%
rename from src/runtime/sink/mod.rs
rename to src/runtime/wasm/mod.rs
index a0a2a6fc..b1c82f4c 100644
--- a/src/runtime/sink/mod.rs
+++ b/src/runtime/wasm/mod.rs
@@ -1,5 +1,6 @@
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
+//
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
@@ -10,6 +11,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-// Sink module
+//! WebAssembly runtime integration.
 
-// TODO: Add sink implementation here
+pub mod input;
+pub mod output;
+pub mod processor;
diff --git a/src/runtime/output/interface.rs b/src/runtime/wasm/output/interface.rs
similarity index 100%
rename from src/runtime/output/interface.rs
rename to src/runtime/wasm/output/interface.rs
diff --git a/src/runtime/output/mod.rs b/src/runtime/wasm/output/mod.rs
similarity index 100%
rename from src/runtime/output/mod.rs
rename to src/runtime/wasm/output/mod.rs
diff --git a/src/runtime/output/output_protocol.rs b/src/runtime/wasm/output/output_protocol.rs
similarity index 100%
rename from src/runtime/output/output_protocol.rs
rename to src/runtime/wasm/output/output_protocol.rs
diff --git a/src/runtime/output/output_provider.rs b/src/runtime/wasm/output/output_provider.rs
similarity index 100%
rename from src/runtime/output/output_provider.rs
rename to src/runtime/wasm/output/output_provider.rs
diff --git a/src/runtime/output/output_runner.rs b/src/runtime/wasm/output/output_runner.rs
similarity index 100%
rename from src/runtime/output/output_runner.rs
rename to src/runtime/wasm/output/output_runner.rs
diff --git a/src/runtime/output/protocol/kafka/kafka_protocol.rs b/src/runtime/wasm/output/protocol/kafka/kafka_protocol.rs
similarity index 100%
rename from src/runtime/output/protocol/kafka/kafka_protocol.rs
rename to src/runtime/wasm/output/protocol/kafka/kafka_protocol.rs
diff --git a/src/runtime/output/protocol/kafka/mod.rs b/src/runtime/wasm/output/protocol/kafka/mod.rs
similarity index 100%
rename from src/runtime/output/protocol/kafka/mod.rs
rename to src/runtime/wasm/output/protocol/kafka/mod.rs
diff --git a/src/runtime/output/protocol/kafka/producer_config.rs b/src/runtime/wasm/output/protocol/kafka/producer_config.rs
similarity index 100%
rename from src/runtime/output/protocol/kafka/producer_config.rs
rename to src/runtime/wasm/output/protocol/kafka/producer_config.rs
diff --git a/src/runtime/output/protocol/mod.rs b/src/runtime/wasm/output/protocol/mod.rs
similarity index 100%
rename from src/runtime/output/protocol/mod.rs
rename to src/runtime/wasm/output/protocol/mod.rs
diff --git a/src/runtime/processor/function_error.rs b/src/runtime/wasm/processor/function_error.rs
similarity index 100%
rename from src/runtime/processor/function_error.rs
rename to src/runtime/wasm/processor/function_error.rs
diff --git a/src/runtime/processor/mod.rs b/src/runtime/wasm/processor/mod.rs
similarity index 100%
rename from src/runtime/processor/mod.rs
rename to src/runtime/wasm/processor/mod.rs
diff --git a/src/runtime/processor/python/mod.rs b/src/runtime/wasm/processor/python/mod.rs
similarity index 100%
rename from src/runtime/processor/python/mod.rs
rename to src/runtime/wasm/processor/python/mod.rs
diff --git a/src/runtime/processor/python/python_host.rs b/src/runtime/wasm/processor/python/python_host.rs
similarity index 100%
rename from src/runtime/processor/python/python_host.rs
rename to src/runtime/wasm/processor/python/python_host.rs
diff --git a/src/runtime/processor/python/python_service.rs b/src/runtime/wasm/processor/python/python_service.rs
similarity index 100%
rename from src/runtime/processor/python/python_service.rs
rename to src/runtime/wasm/processor/python/python_service.rs
diff --git a/src/runtime/processor/wasm/input_strategy.rs b/src/runtime/wasm/processor/wasm/input_strategy.rs
similarity index 100%
rename from src/runtime/processor/wasm/input_strategy.rs
rename to src/runtime/wasm/processor/wasm/input_strategy.rs
diff --git a/src/runtime/processor/wasm/mod.rs b/src/runtime/wasm/processor/wasm/mod.rs
similarity index 100%
rename from src/runtime/processor/wasm/mod.rs
rename to src/runtime/wasm/processor/wasm/mod.rs
diff --git a/src/runtime/processor/wasm/thread_pool.rs b/src/runtime/wasm/processor/wasm/thread_pool.rs
similarity index 100%
rename from src/runtime/processor/wasm/thread_pool.rs
rename to src/runtime/wasm/processor/wasm/thread_pool.rs
diff --git a/src/runtime/processor/wasm/wasm_cache.rs b/src/runtime/wasm/processor/wasm/wasm_cache.rs
similarity index 100%
rename from src/runtime/processor/wasm/wasm_cache.rs
rename to src/runtime/wasm/processor/wasm/wasm_cache.rs
diff --git a/src/runtime/processor/wasm/wasm_host.rs b/src/runtime/wasm/processor/wasm/wasm_host.rs
similarity index 100%
rename from src/runtime/processor/wasm/wasm_host.rs
rename to src/runtime/wasm/processor/wasm/wasm_host.rs
diff --git a/src/runtime/processor/wasm/wasm_processor.rs b/src/runtime/wasm/processor/wasm/wasm_processor.rs
similarity index 100%
rename from src/runtime/processor/wasm/wasm_processor.rs
rename to src/runtime/wasm/processor/wasm/wasm_processor.rs
diff --git a/src/runtime/processor/wasm/wasm_processor_trait.rs b/src/runtime/wasm/processor/wasm/wasm_processor_trait.rs
similarity index 100%
rename from src/runtime/processor/wasm/wasm_processor_trait.rs
rename to src/runtime/wasm/processor/wasm/wasm_processor_trait.rs
diff --git a/src/runtime/processor/wasm/wasm_task.rs b/src/runtime/wasm/processor/wasm/wasm_task.rs
similarity index 100%
rename from src/runtime/processor/wasm/wasm_task.rs
rename to src/runtime/wasm/processor/wasm/wasm_task.rs
diff --git a/src/sql/common/errors.rs b/src/sql/common/errors.rs
index bcda8667..507851bd 100644
--- a/src/sql/common/errors.rs
+++ b/src/sql/common/errors.rs
@@ -1,5 +1,8 @@
 use std::fmt;
 
+/// Result type for streaming operators and collectors.
+pub type DataflowResult<T> = std::result::Result<T, DataflowError>;
+
 /// Unified error type for streaming dataflow operations.
 #[derive(Debug)]
 pub enum DataflowError {
@@ -26,6 +29,16 @@ impl fmt::Display for DataflowError {
 
 impl std::error::Error for DataflowError {}
 
+impl DataflowError {
+    pub fn with_operator(self, operator_id: impl Into<String>) -> Self {
+        let id = operator_id.into();
+        match self {
+            DataflowError::Operator(m) => DataflowError::Operator(format!("{id}: {m}")),
+            other => DataflowError::Operator(format!("{id}: {other}")),
+        }
+    }
+}
+
 impl From<arrow_schema::ArrowError> for DataflowError {
     fn from(e: arrow_schema::ArrowError) -> Self {
         DataflowError::Arrow(e)
diff --git a/src/sql/common/fs_schema.rs b/src/sql/common/fs_schema.rs
index 5233bd0c..c99af1e5 100644
--- a/src/sql/common/fs_schema.rs
+++ b/src/sql/common/fs_schema.rs
@@ -21,6 +21,15 @@ use super::{to_nanos, TIMESTAMP_FIELD};
 use std::ops::Range;
 use crate::sql::common::converter::Converter;
 
+#[derive(Debug, Copy, Clone)]
+pub enum FieldValueType<'a> {
+    Int64(Option<i64>),
+    UInt64(Option<u64>),
+    Int32(Option<i32>),
+    String(Option<&'a str>),
+    Bytes(Option<&'a [u8]>),
+}
+
 pub type FsSchemaRef = Arc<FsSchema>;
 
 #[derive(Debug, Clone, Eq, PartialEq, Hash, Serialize, Deserialize)]
diff --git a/src/sql/common/mod.rs b/src/sql/common/mod.rs
index 730d6f37..cb833c8e 100644
--- a/src/sql/common/mod.rs
+++ b/src/sql/common/mod.rs
@@ -30,7 +30,7 @@ pub mod operator_config;
 pub mod task_info;
 pub mod time_utils;
 pub mod worker;
-mod converter;
+pub mod converter;
 
 // ── Re-exports from existing modules ──
 pub use arrow_ext::{DisplayAsSql, FsExtensionType, GetArrowSchema, GetArrowType};
@@ -45,11 +45,11 @@ pub use worker::{MachineId, WorkerId};
 // ── Re-exports from new modules ──
 pub use control::{
     CheckpointCompleted, CheckpointEvent, CompactionResult, ControlMessage, ControlResp,
-    ErrorDomain, RetryHint, StopMode, TaskCheckpointEventType, TaskError,
+    ErrorDomain, RetryHint, StopMode, TableConfig, TaskCheckpointEventType, TaskError,
 };
 pub use fs_schema::{FsSchema, FsSchemaRef};
 pub use connector_options::{ConnectorOptions, FromOpts};
-pub use errors::DataflowError;
+pub use errors::{DataflowError, DataflowResult};
 pub use formats::{BadData, Format, Framing, JsonCompression, JsonFormat};
 pub use operator_config::{MetadataField, OperatorConfig, RateLimit};
 

From 3b86ea0f8378ba371d012e6ac90ffcfe6163e337 Mon Sep 17 00:00:00 2001
From: luoluoyuyu <zhenyu@apache.org>
Date: Wed, 25 Mar 2026 00:09:06 +0800
Subject: [PATCH 16/44] update

---
 Cargo.lock                                    | 3840 ++---------------
 Cargo.toml                                    |    6 +-
 src/runtime/mod.rs                            |    2 -
 src/runtime/streaming/api/context.rs          |   61 +-
 src/runtime/streaming/api/mod.rs              |    2 +-
 src/runtime/streaming/api/operator.rs         |   58 +
 src/runtime/streaming/execution/runner.rs     |    4 +-
 .../execution/tracker/watermark_tracker.rs    |    2 +-
 src/runtime/streaming/factory/mod.rs          |    5 +-
 src/runtime/streaming/factory/registry.rs     |  268 +-
 src/runtime/streaming/format/mod.rs           |    9 +
 src/runtime/streaming/mod.rs                  |    9 +-
 .../grouping/incremental_aggregate.rs         |  238 +-
 .../operators/joins/join_instance.rs          |   66 +-
 .../operators/joins/join_with_expiration.rs   |  120 +-
 .../streaming/operators/joins/lookup_join.rs  |   10 +-
 src/runtime/streaming/operators/mod.rs        |   17 +-
 .../streaming/operators/sink/kafka/mod.rs     |   11 +-
 .../streaming/operators/source/kafka/mod.rs   |   60 +-
 src/runtime/streaming/operators/source/mod.rs |    2 +-
 .../watermark/watermark_generator.rs          |    4 +-
 .../windows/session_aggregating_window.rs     |   97 +-
 .../windows/sliding_aggregating_window.rs     |   63 +-
 .../windows/tumbling_aggregating_window.rs    |    3 +-
 .../operators/windows/window_function.rs      |   59 +-
 src/runtime/streaming/storage/mod.rs          |   32 +
 src/sql/datastream/logical.rs                 |    2 +
 src/sql/logical_node/logical/operator_name.rs |    1 +
 28 files changed, 1178 insertions(+), 3873 deletions(-)
 create mode 100644 src/runtime/streaming/storage/mod.rs

diff --git a/Cargo.lock b/Cargo.lock
index 4cc46aef..fc3a898a 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -132,30 +132,6 @@ version = "1.0.100"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61"
 
-[[package]]
-name = "apache-avro"
-version = "0.18.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "61a81f4e6304e455a9d52cf8ab667cb2fcf792f2cee2a31c28800901a335ecd5"
-dependencies = [
- "bigdecimal",
- "bon",
- "digest",
- "log",
- "miniz_oxide",
- "num-bigint",
- "quad-rand",
- "rand 0.9.2",
- "regex-lite",
- "serde",
- "serde_bytes",
- "serde_json",
- "strum 0.27.2",
- "strum_macros 0.27.2",
- "thiserror 2.0.17",
- "uuid",
-]
-
 [[package]]
 name = "ar_archive_writer"
 version = "0.5.1"
@@ -171,15 +147,6 @@ version = "1.4.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c3d036a3c4ab069c7b410a2ce876bd74808d2d0888a82667669f8e783a898bf1"
 
-[[package]]
-name = "arc-swap"
-version = "1.9.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a07d1f37ff60921c83bdfc7407723bdefe89b44b98a9b772f225c8f9d67141a6"
-dependencies = [
- "rustversion",
-]
-
 [[package]]
 name = "arrayref"
 version = "0.3.9"
@@ -521,133 +488,6 @@ dependencies = [
  "regex-syntax",
 ]
 
-[[package]]
-name = "arroyo-datastream"
-version = "0.16.0-dev"
-dependencies = [
- "anyhow",
- "arrow-schema 55.2.0",
- "arroyo-rpc",
- "bincode",
- "datafusion-proto 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "itertools 0.14.0",
- "petgraph 0.8.3",
- "prost",
- "rand 0.9.2",
- "serde",
- "serde_json",
- "strum 0.27.2",
- "syn 2.0.113",
-]
-
-[[package]]
-name = "arroyo-rpc"
-version = "0.16.0-dev"
-dependencies = [
- "ahash",
- "anyhow",
- "apache-avro",
- "arc-swap",
- "arrow",
- "arrow-array 55.2.0",
- "arrow-ord",
- "arrow-schema 55.2.0",
- "arroyo-types",
- "async-trait",
- "base64",
- "bincode",
- "bytes",
- "datafusion 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "dirs",
- "figment",
- "futures",
- "k8s-openapi",
- "local-ip-address",
- "log",
- "nanoid",
- "object_store",
- "percent-encoding",
- "prost",
- "rand 0.9.2",
- "regex",
- "reqwest",
- "rustls",
- "rustls-native-certs",
- "schemars 1.2.1",
- "serde",
- "serde_json",
- "smallvec",
- "strum 0.27.2",
- "strum_macros 0.27.2",
- "thiserror 2.0.17",
- "tokio",
- "tonic 0.13.1",
- "tonic-build 0.13.1",
- "tracing",
- "url",
- "utoipa",
-]
-
-[[package]]
-name = "arroyo-state"
-version = "0.16.0-dev"
-dependencies = [
- "anyhow",
- "arrow",
- "arrow-array 55.2.0",
- "arrow-ord",
- "arrow-schema 55.2.0",
- "arroyo-datastream",
- "arroyo-rpc",
- "arroyo-storage",
- "arroyo-types",
- "async-trait",
- "bincode",
- "datafusion 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "futures",
- "lazy_static",
- "object_store",
- "once_cell",
- "parquet 55.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
- "prometheus",
- "prost",
- "serde",
- "serde_json",
- "tokio",
- "tracing",
-]
-
-[[package]]
-name = "arroyo-storage"
-version = "0.16.0-dev"
-dependencies = [
- "arroyo-rpc",
- "arroyo-types",
- "async-trait",
- "aws-config",
- "aws-credential-types",
- "bytes",
- "futures",
- "object_store",
- "rand 0.9.2",
- "regex",
- "thiserror 2.0.17",
- "tokio",
- "tokio-util",
- "tracing",
-]
-
-[[package]]
-name = "arroyo-types"
-version = "0.16.0-dev"
-dependencies = [
- "arrow",
- "arrow-array 55.2.0",
- "bincode",
- "chrono",
- "serde",
-]
-
 [[package]]
 name = "async-compression"
 version = "0.4.19"
@@ -684,7 +524,7 @@ checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.113",
+ "syn",
 ]
 
 [[package]]
@@ -695,7 +535,7 @@ checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.113",
+ "syn",
 ]
 
 [[package]]
@@ -707,15 +547,6 @@ dependencies = [
  "num-traits",
 ]
 
-[[package]]
-name = "atomic"
-version = "0.6.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a89cbf775b137e9b968e67227ef7f775587cde3fd31b0d8599dbd0f598a48340"
-dependencies = [
- "bytemuck",
-]
-
 [[package]]
 name = "atomic-waker"
 version = "1.1.2"
@@ -729,627 +560,165 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
 
 [[package]]
-name = "aws-config"
-version = "1.6.3"
+name = "axum"
+version = "0.7.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "02a18fd934af6ae7ca52410d4548b98eb895aab0f1ea417d168d85db1434a141"
+checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f"
 dependencies = [
- "aws-credential-types",
- "aws-runtime",
- "aws-sdk-sso",
- "aws-sdk-ssooidc",
- "aws-sdk-sts",
- "aws-smithy-async",
- "aws-smithy-http 0.62.6",
- "aws-smithy-json",
- "aws-smithy-runtime",
- "aws-smithy-runtime-api",
- "aws-smithy-types",
- "aws-types",
+ "async-trait",
+ "axum-core",
  "bytes",
- "fastrand",
- "hex",
- "http 1.4.0",
- "ring",
- "time",
- "tokio",
- "tracing",
- "url",
- "zeroize",
+ "futures-util",
+ "http",
+ "http-body",
+ "http-body-util",
+ "itoa",
+ "matchit",
+ "memchr",
+ "mime",
+ "percent-encoding",
+ "pin-project-lite",
+ "rustversion",
+ "serde",
+ "sync_wrapper",
+ "tower 0.5.2",
+ "tower-layer",
+ "tower-service",
 ]
 
 [[package]]
-name = "aws-credential-types"
-version = "1.2.13"
+name = "axum-core"
+version = "0.4.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6d203b0bf2626dcba8665f5cd0871d7c2c0930223d6b6be9097592fea21242d0"
+checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199"
 dependencies = [
- "aws-smithy-async",
- "aws-smithy-runtime-api",
- "aws-smithy-types",
- "zeroize",
+ "async-trait",
+ "bytes",
+ "futures-util",
+ "http",
+ "http-body",
+ "http-body-util",
+ "mime",
+ "pin-project-lite",
+ "rustversion",
+ "sync_wrapper",
+ "tower-layer",
+ "tower-service",
 ]
 
 [[package]]
-name = "aws-lc-rs"
-version = "1.16.2"
+name = "base64"
+version = "0.22.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a054912289d18629dc78375ba2c3726a3afe3ff71b4edba9dedfca0e3446d1fc"
-dependencies = [
- "aws-lc-sys",
- "zeroize",
-]
+checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
 
 [[package]]
-name = "aws-lc-sys"
-version = "0.39.0"
+name = "bigdecimal"
+version = "0.4.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1fa7e52a4c5c547c741610a2c6f123f3881e409b714cd27e6798ef020c514f0a"
+checksum = "4d6867f1565b3aad85681f1015055b087fcfd840d6aeee6eee7f2da317603695"
 dependencies = [
- "cc",
- "cmake",
- "dunce",
- "fs_extra",
+ "autocfg",
+ "libm",
+ "num-bigint",
+ "num-integer",
+ "num-traits",
 ]
 
 [[package]]
-name = "aws-runtime"
-version = "1.7.1"
+name = "bincode"
+version = "2.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ede2ddc593e6c8acc6ce3358c28d6677a6dc49b65ba4b37a2befe14a11297e75"
+checksum = "36eaf5d7b090263e8150820482d5d93cd964a81e4019913c972f4edcc6edb740"
 dependencies = [
- "aws-credential-types",
- "aws-sigv4",
- "aws-smithy-async",
- "aws-smithy-http 0.63.5",
- "aws-smithy-runtime",
- "aws-smithy-runtime-api",
- "aws-smithy-types",
- "aws-types",
- "bytes",
- "bytes-utils",
- "fastrand",
- "http 1.4.0",
- "http-body 1.0.1",
- "percent-encoding",
- "pin-project-lite",
- "tracing",
- "uuid",
+ "bincode_derive",
+ "serde",
+ "unty",
 ]
 
 [[package]]
-name = "aws-sdk-sso"
-version = "1.72.0"
+name = "bincode_derive"
+version = "2.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "13118ad30741222f67b1a18e5071385863914da05124652b38e172d6d3d9ce31"
+checksum = "bf95709a440f45e986983918d0e8a1f30a9b1df04918fc828670606804ac3c09"
 dependencies = [
- "aws-credential-types",
- "aws-runtime",
- "aws-smithy-async",
- "aws-smithy-http 0.62.6",
- "aws-smithy-json",
- "aws-smithy-runtime",
- "aws-smithy-runtime-api",
- "aws-smithy-types",
- "aws-types",
- "bytes",
- "fastrand",
- "http 0.2.12",
- "regex-lite",
- "tracing",
+ "virtue",
 ]
 
 [[package]]
-name = "aws-sdk-ssooidc"
-version = "1.73.0"
+name = "bindgen"
+version = "0.65.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f879a8572b4683a8f84f781695bebf2f25cf11a81a2693c31fc0e0215c2c1726"
+checksum = "cfdf7b466f9a4903edc73f95d6d2bcd5baf8ae620638762244d3f60143643cc5"
 dependencies = [
- "aws-credential-types",
- "aws-runtime",
- "aws-smithy-async",
- "aws-smithy-http 0.62.6",
- "aws-smithy-json",
- "aws-smithy-runtime",
- "aws-smithy-runtime-api",
- "aws-smithy-types",
- "aws-types",
- "bytes",
- "fastrand",
- "http 0.2.12",
- "regex-lite",
- "tracing",
+ "bitflags 1.3.2",
+ "cexpr",
+ "clang-sys",
+ "lazy_static",
+ "lazycell",
+ "peeking_take_while",
+ "prettyplease",
+ "proc-macro2",
+ "quote",
+ "regex",
+ "rustc-hash 1.1.0",
+ "shlex",
+ "syn",
 ]
 
 [[package]]
-name = "aws-sdk-sts"
-version = "1.73.0"
+name = "bindgen"
+version = "0.72.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f1e9c3c24e36183e2f698235ed38dcfbbdff1d09b9232dc866c4be3011e0b47e"
+checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895"
 dependencies = [
- "aws-credential-types",
- "aws-runtime",
- "aws-smithy-async",
- "aws-smithy-http 0.62.6",
- "aws-smithy-json",
- "aws-smithy-query",
- "aws-smithy-runtime",
- "aws-smithy-runtime-api",
- "aws-smithy-types",
- "aws-smithy-xml",
- "aws-types",
- "fastrand",
- "http 0.2.12",
- "regex-lite",
- "tracing",
+ "bitflags 2.10.0",
+ "cexpr",
+ "clang-sys",
+ "itertools 0.13.0",
+ "proc-macro2",
+ "quote",
+ "regex",
+ "rustc-hash 2.1.1",
+ "shlex",
+ "syn",
 ]
 
 [[package]]
-name = "aws-sigv4"
-version = "1.4.1"
+name = "bitflags"
+version = "1.3.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "37411f8e0f4bea0c3ca0958ce7f18f6439db24d555dbd809787262cd00926aa9"
-dependencies = [
- "aws-credential-types",
- "aws-smithy-http 0.63.5",
- "aws-smithy-runtime-api",
- "aws-smithy-types",
- "bytes",
- "form_urlencoded",
- "hex",
- "hmac",
- "http 0.2.12",
- "http 1.4.0",
- "percent-encoding",
- "sha2",
- "time",
- "tracing",
-]
+checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
+
+[[package]]
+name = "bitflags"
+version = "2.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3"
 
 [[package]]
-name = "aws-smithy-async"
-version = "1.2.13"
+name = "bitmaps"
+version = "2.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5cc50d0f63e714784b84223abd7abbc8577de8c35d699e0edd19f0a88a08ae13"
+checksum = "031043d04099746d8db04daf1fa424b2bc8bd69d92b25962dcde24da39ab64a2"
 dependencies = [
- "futures-util",
- "pin-project-lite",
- "tokio",
+ "typenum",
 ]
 
 [[package]]
-name = "aws-smithy-http"
-version = "0.62.6"
+name = "blake2"
+version = "0.10.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "826141069295752372f8203c17f28e30c464d22899a43a0c9fd9c458d469c88b"
+checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe"
 dependencies = [
- "aws-smithy-runtime-api",
- "aws-smithy-types",
- "bytes",
- "bytes-utils",
- "futures-core",
- "futures-util",
- "http 0.2.12",
- "http 1.4.0",
- "http-body 0.4.6",
- "percent-encoding",
- "pin-project-lite",
- "pin-utils",
- "tracing",
+ "digest",
 ]
 
 [[package]]
-name = "aws-smithy-http"
-version = "0.63.5"
+name = "blake3"
+version = "1.8.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d619373d490ad70966994801bc126846afaa0d1ee920697a031f0cf63f2568e7"
-dependencies = [
- "aws-smithy-runtime-api",
- "aws-smithy-types",
- "bytes",
- "bytes-utils",
- "futures-core",
- "futures-util",
- "http 1.4.0",
- "http-body 1.0.1",
- "http-body-util",
- "percent-encoding",
- "pin-project-lite",
- "pin-utils",
- "tracing",
-]
-
-[[package]]
-name = "aws-smithy-http-client"
-version = "1.1.11"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "00ccbb08c10f6bcf912f398188e42ee2eab5f1767ce215a02a73bc5df1bbdd95"
-dependencies = [
- "aws-smithy-async",
- "aws-smithy-runtime-api",
- "aws-smithy-types",
- "h2",
- "http 1.4.0",
- "hyper",
- "hyper-rustls",
- "hyper-util",
- "pin-project-lite",
- "rustls",
- "rustls-native-certs",
- "rustls-pki-types",
- "tokio",
- "tokio-rustls",
- "tower 0.5.2",
- "tracing",
-]
-
-[[package]]
-name = "aws-smithy-json"
-version = "0.61.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "49fa1213db31ac95288d981476f78d05d9cbb0353d22cdf3472cc05bb02f6551"
-dependencies = [
- "aws-smithy-types",
-]
-
-[[package]]
-name = "aws-smithy-observability"
-version = "0.2.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4d3f39d5bb871aaf461d59144557f16d5927a5248a983a40654d9cf3b9ba183b"
-dependencies = [
- "aws-smithy-runtime-api",
-]
-
-[[package]]
-name = "aws-smithy-query"
-version = "0.60.14"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "05f76a580e3d8f8961e5d48763214025a2af65c2fa4cd1fb7f270a0e107a71b0"
-dependencies = [
- "aws-smithy-types",
- "urlencoding",
-]
-
-[[package]]
-name = "aws-smithy-runtime"
-version = "1.10.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "22ccf7f6eba8b2dcf8ce9b74806c6c185659c311665c4bf8d6e71ebd454db6bf"
-dependencies = [
- "aws-smithy-async",
- "aws-smithy-http 0.63.5",
- "aws-smithy-http-client",
- "aws-smithy-observability",
- "aws-smithy-runtime-api",
- "aws-smithy-types",
- "bytes",
- "fastrand",
- "http 0.2.12",
- "http 1.4.0",
- "http-body 0.4.6",
- "http-body 1.0.1",
- "http-body-util",
- "pin-project-lite",
- "pin-utils",
- "tokio",
- "tracing",
-]
-
-[[package]]
-name = "aws-smithy-runtime-api"
-version = "1.11.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b4af6e5def28be846479bbeac55aa4603d6f7986fc5da4601ba324dd5d377516"
-dependencies = [
- "aws-smithy-async",
- "aws-smithy-types",
- "bytes",
- "http 0.2.12",
- "http 1.4.0",
- "pin-project-lite",
- "tokio",
- "tracing",
- "zeroize",
-]
-
-[[package]]
-name = "aws-smithy-types"
-version = "1.4.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8ca2734c16913a45343b37313605d84e7d8b34a4611598ce1d25b35860a2bed3"
-dependencies = [
- "base64-simd",
- "bytes",
- "bytes-utils",
- "http 0.2.12",
- "http 1.4.0",
- "http-body 0.4.6",
- "http-body 1.0.1",
- "http-body-util",
- "itoa",
- "num-integer",
- "pin-project-lite",
- "pin-utils",
- "ryu",
- "serde",
- "time",
-]
-
-[[package]]
-name = "aws-smithy-xml"
-version = "0.60.14"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b53543b4b86ed43f051644f704a98c7291b3618b67adf057ee77a366fa52fcaa"
-dependencies = [
- "xmlparser",
-]
-
-[[package]]
-name = "aws-types"
-version = "1.3.13"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0470cc047657c6e286346bdf10a8719d26efd6a91626992e0e64481e44323e96"
-dependencies = [
- "aws-credential-types",
- "aws-smithy-async",
- "aws-smithy-runtime-api",
- "aws-smithy-types",
- "rustc_version",
- "tracing",
-]
-
-[[package]]
-name = "axum"
-version = "0.7.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f"
-dependencies = [
- "async-trait",
- "axum-core 0.4.5",
- "bytes",
- "futures-util",
- "http 1.4.0",
- "http-body 1.0.1",
- "http-body-util",
- "itoa",
- "matchit 0.7.3",
- "memchr",
- "mime",
- "percent-encoding",
- "pin-project-lite",
- "rustversion",
- "serde",
- "sync_wrapper",
- "tower 0.5.2",
- "tower-layer",
- "tower-service",
-]
-
-[[package]]
-name = "axum"
-version = "0.8.8"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8b52af3cb4058c895d37317bb27508dccc8e5f2d39454016b297bf4a400597b8"
-dependencies = [
- "axum-core 0.5.6",
- "bytes",
- "futures-util",
- "http 1.4.0",
- "http-body 1.0.1",
- "http-body-util",
- "itoa",
- "matchit 0.8.4",
- "memchr",
- "mime",
- "percent-encoding",
- "pin-project-lite",
- "serde_core",
- "sync_wrapper",
- "tower 0.5.2",
- "tower-layer",
- "tower-service",
-]
-
-[[package]]
-name = "axum-core"
-version = "0.4.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199"
-dependencies = [
- "async-trait",
- "bytes",
- "futures-util",
- "http 1.4.0",
- "http-body 1.0.1",
- "http-body-util",
- "mime",
- "pin-project-lite",
- "rustversion",
- "sync_wrapper",
- "tower-layer",
- "tower-service",
-]
-
-[[package]]
-name = "axum-core"
-version = "0.5.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "08c78f31d7b1291f7ee735c1c6780ccde7785daae9a9206026862dab7d8792d1"
-dependencies = [
- "bytes",
- "futures-core",
- "http 1.4.0",
- "http-body 1.0.1",
- "http-body-util",
- "mime",
- "pin-project-lite",
- "sync_wrapper",
- "tower-layer",
- "tower-service",
-]
-
-[[package]]
-name = "backtrace"
-version = "0.3.76"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bb531853791a215d7c62a30daf0dde835f381ab5de4589cfe7c649d2cbe92bd6"
-dependencies = [
- "addr2line",
- "cfg-if",
- "libc",
- "miniz_oxide",
- "object",
- "rustc-demangle",
- "windows-link",
-]
-
-[[package]]
-name = "backtrace-ext"
-version = "0.2.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "537beee3be4a18fb023b570f80e3ae28003db9167a751266b259926e25539d50"
-dependencies = [
- "backtrace",
-]
-
-[[package]]
-name = "base64"
-version = "0.22.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
-
-[[package]]
-name = "base64-simd"
-version = "0.8.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "339abbe78e73178762e23bea9dfd08e697eb3f3301cd4be981c0f78ba5859195"
-dependencies = [
- "outref",
- "vsimd",
-]
-
-[[package]]
-name = "bigdecimal"
-version = "0.4.10"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4d6867f1565b3aad85681f1015055b087fcfd840d6aeee6eee7f2da317603695"
-dependencies = [
- "autocfg",
- "libm",
- "num-bigint",
- "num-integer",
- "num-traits",
- "serde",
- "serde_json",
-]
-
-[[package]]
-name = "bincode"
-version = "2.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "36eaf5d7b090263e8150820482d5d93cd964a81e4019913c972f4edcc6edb740"
-dependencies = [
- "bincode_derive",
- "serde",
- "unty",
-]
-
-[[package]]
-name = "bincode_derive"
-version = "2.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bf95709a440f45e986983918d0e8a1f30a9b1df04918fc828670606804ac3c09"
-dependencies = [
- "virtue",
-]
-
-[[package]]
-name = "bindgen"
-version = "0.65.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cfdf7b466f9a4903edc73f95d6d2bcd5baf8ae620638762244d3f60143643cc5"
-dependencies = [
- "bitflags 1.3.2",
- "cexpr",
- "clang-sys",
- "lazy_static",
- "lazycell",
- "peeking_take_while",
- "prettyplease",
- "proc-macro2",
- "quote",
- "regex",
- "rustc-hash 1.1.0",
- "shlex",
- "syn 2.0.113",
-]
-
-[[package]]
-name = "bindgen"
-version = "0.72.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895"
-dependencies = [
- "bitflags 2.10.0",
- "cexpr",
- "clang-sys",
- "itertools 0.13.0",
- "proc-macro2",
- "quote",
- "regex",
- "rustc-hash 2.1.1",
- "shlex",
- "syn 2.0.113",
-]
-
-[[package]]
-name = "bitflags"
-version = "1.3.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
-
-[[package]]
-name = "bitflags"
-version = "2.10.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3"
-
-[[package]]
-name = "bitmaps"
-version = "2.1.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "031043d04099746d8db04daf1fa424b2bc8bd69d92b25962dcde24da39ab64a2"
-dependencies = [
- "typenum",
-]
-
-[[package]]
-name = "bitvec"
-version = "1.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c"
-dependencies = [
- "funty",
- "radium",
- "tap",
- "wyz",
-]
-
-[[package]]
-name = "blake2"
-version = "0.10.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe"
-dependencies = [
- "digest",
-]
-
-[[package]]
-name = "blake3"
-version = "1.8.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2468ef7d57b3fb7e16b576e8377cdbde2320c60e1491e961d11da40fc4f02a2d"
+checksum = "2468ef7d57b3fb7e16b576e8377cdbde2320c60e1491e961d11da40fc4f02a2d"
 dependencies = [
  "arrayref",
  "arrayvec",
@@ -1368,31 +737,6 @@ dependencies = [
  "generic-array",
 ]
 
-[[package]]
-name = "bon"
-version = "3.9.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f47dbe92550676ee653353c310dfb9cf6ba17ee70396e1f7cf0a2020ad49b2fe"
-dependencies = [
- "bon-macros",
- "rustversion",
-]
-
-[[package]]
-name = "bon-macros"
-version = "3.9.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "519bd3116aeeb42d5372c29d982d16d0170d3d4a5ed85fc7dd91642ffff3c67c"
-dependencies = [
- "darling 0.23.0",
- "ident_case",
- "prettyplease",
- "proc-macro2",
- "quote",
- "rustversion",
- "syn 2.0.113",
-]
-
 [[package]]
 name = "brotli"
 version = "8.0.2"
@@ -1429,12 +773,6 @@ version = "0.6.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "175812e0be2bccb6abe50bb8d566126198344f707e304f45c648fd8f2cc0365e"
 
-[[package]]
-name = "bytemuck"
-version = "1.25.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c8efb64bd706a16a1bdde310ae86b351e4d21550d98d056f22f8a7f7a2183fec"
-
 [[package]]
 name = "byteorder"
 version = "1.5.0"
@@ -1447,16 +785,6 @@ version = "1.11.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33"
 
-[[package]]
-name = "bytes-utils"
-version = "0.1.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7dafe3a8757b027e2be6e4e5601ed563c55989fcf1546e933c66c8eb3a058d35"
-dependencies = [
- "bytes",
- "either",
-]
-
 [[package]]
 name = "bzip2"
 version = "0.5.2"
@@ -1618,12 +946,6 @@ version = "0.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "fd16c4719339c4530435d38e511904438d07cce7950afa3718a84ac36c10e89e"
 
-[[package]]
-name = "cfg_aliases"
-version = "0.2.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724"
-
 [[package]]
 name = "chrono"
 version = "0.4.42"
@@ -1633,7 +955,6 @@ dependencies = [
  "iana-time-zone",
  "js-sys",
  "num-traits",
- "serde",
  "wasm-bindgen",
  "windows-link",
 ]
@@ -1645,17 +966,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a6139a8597ed92cf816dfb33f5dd6cf0bb93a6adc938f11039f371bc5bcd26c3"
 dependencies = [
  "chrono",
- "phf 0.12.1",
-]
-
-[[package]]
-name = "chumsky"
-version = "0.9.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8eebd66744a15ded14960ab4ccdbfb51ad3b81f51f3f04a80adac98c985396c9"
-dependencies = [
- "hashbrown 0.14.5",
- "stacker",
+ "phf",
 ]
 
 [[package]]
@@ -1700,7 +1011,7 @@ dependencies = [
  "heck 0.5.0",
  "proc-macro2",
  "quote",
- "syn 2.0.113",
+ "syn",
 ]
 
 [[package]]
@@ -1736,15 +1047,6 @@ dependencies = [
  "thiserror 2.0.17",
 ]
 
-[[package]]
-name = "codegen_template"
-version = "0.1.0"
-source = "git+https://github.com/ArroyoSystems/cornucopia?branch=sqlite#6a1a87a8bab82068d4a41525995ed0e715382209"
-dependencies = [
- "unicode-xid",
- "unscanny",
-]
-
 [[package]]
 name = "colorchoice"
 version = "1.0.4"
@@ -1788,73 +1090,12 @@ version = "0.4.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3d52eff69cd5e647efe296129160853a42795992097e8af39800e1060caeea9b"
 
-[[package]]
-name = "core-foundation"
-version = "0.9.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f"
-dependencies = [
- "core-foundation-sys",
- "libc",
-]
-
-[[package]]
-name = "core-foundation"
-version = "0.10.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6"
-dependencies = [
- "core-foundation-sys",
- "libc",
-]
-
 [[package]]
 name = "core-foundation-sys"
 version = "0.8.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
 
-[[package]]
-name = "cornucopia"
-version = "0.9.0"
-source = "git+https://github.com/ArroyoSystems/cornucopia?branch=sqlite#6a1a87a8bab82068d4a41525995ed0e715382209"
-dependencies = [
- "chumsky",
- "clap",
- "codegen_template",
- "heck 0.4.1",
- "indexmap 2.12.1",
- "miette",
- "postgres",
- "postgres-types",
- "prettyplease",
- "rusqlite",
- "syn 2.0.113",
- "thiserror 1.0.69",
-]
-
-[[package]]
-name = "cornucopia_async"
-version = "0.6.0"
-source = "git+https://github.com/ArroyoSystems/cornucopia?branch=sqlite#6a1a87a8bab82068d4a41525995ed0e715382209"
-dependencies = [
- "async-trait",
- "cornucopia_client_core",
- "deadpool-postgres",
- "rusqlite",
- "tokio-postgres",
-]
-
-[[package]]
-name = "cornucopia_client_core"
-version = "0.4.0"
-source = "git+https://github.com/ArroyoSystems/cornucopia?branch=sqlite#6a1a87a8bab82068d4a41525995ed0e715382209"
-dependencies = [
- "fallible-iterator 0.2.0",
- "postgres-protocol",
- "postgres-types",
-]
-
 [[package]]
 name = "cpp_demangle"
 version = "0.4.5"
@@ -2117,75 +1358,6 @@ dependencies = [
  "memchr",
 ]
 
-[[package]]
-name = "darling"
-version = "0.20.11"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee"
-dependencies = [
- "darling_core 0.20.11",
- "darling_macro 0.20.11",
-]
-
-[[package]]
-name = "darling"
-version = "0.23.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "25ae13da2f202d56bd7f91c25fba009e7717a1e4a1cc98a76d844b65ae912e9d"
-dependencies = [
- "darling_core 0.23.0",
- "darling_macro 0.23.0",
-]
-
-[[package]]
-name = "darling_core"
-version = "0.20.11"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0d00b9596d185e565c2207a0b01f8bd1a135483d02d9b7b0a54b11da8d53412e"
-dependencies = [
- "fnv",
- "ident_case",
- "proc-macro2",
- "quote",
- "strsim",
- "syn 2.0.113",
-]
-
-[[package]]
-name = "darling_core"
-version = "0.23.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9865a50f7c335f53564bb694ef660825eb8610e0a53d3e11bf1b0d3df31e03b0"
-dependencies = [
- "ident_case",
- "proc-macro2",
- "quote",
- "strsim",
- "syn 2.0.113",
-]
-
-[[package]]
-name = "darling_macro"
-version = "0.20.11"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead"
-dependencies = [
- "darling_core 0.20.11",
- "quote",
- "syn 2.0.113",
-]
-
-[[package]]
-name = "darling_macro"
-version = "0.23.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ac3984ec7bd6cfa798e62b4a642426a5be0e68f9401cfc2a01e3fa9ea2fcdb8d"
-dependencies = [
- "darling_core 0.23.0",
- "quote",
- "syn 2.0.113",
-]
-
 [[package]]
 name = "dashmap"
 version = "5.5.3"
@@ -2193,78 +1365,24 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856"
 dependencies = [
  "cfg-if",
- "hashbrown 0.14.5",
- "lock_api",
- "once_cell",
- "parking_lot_core",
-]
-
-[[package]]
-name = "dashmap"
-version = "6.1.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf"
-dependencies = [
- "cfg-if",
- "crossbeam-utils",
- "hashbrown 0.14.5",
- "lock_api",
- "once_cell",
- "parking_lot_core",
-]
-
-[[package]]
-name = "datafusion"
-version = "48.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8a11e19a7ccc5bb979c95c1dceef663eab39c9061b3bbf8d1937faf0f03bf41f"
-dependencies = [
- "arrow",
- "arrow-ipc 55.2.0",
- "arrow-schema 55.2.0",
- "async-trait",
- "bytes",
- "bzip2",
- "chrono",
- "datafusion-catalog 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-catalog-listing 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-common-runtime 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-datasource 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-datasource-csv 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-datasource-json 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-datasource-parquet 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-execution 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-functions 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-functions-aggregate 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-functions-nested 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-functions-table 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-functions-window 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-optimizer 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-physical-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-physical-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-physical-optimizer 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-physical-plan 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-session 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-sql 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "flate2",
- "futures",
- "itertools 0.14.0",
- "log",
- "object_store",
- "parking_lot",
- "parquet 55.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
- "rand 0.9.2",
- "regex",
- "sqlparser 0.55.0 (registry+https://github.com/rust-lang/crates.io-index)",
- "tempfile",
- "tokio",
- "url",
- "uuid",
- "xz2",
- "zstd",
+ "hashbrown 0.14.5",
+ "lock_api",
+ "once_cell",
+ "parking_lot_core",
+]
+
+[[package]]
+name = "dashmap"
+version = "6.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf"
+dependencies = [
+ "cfg-if",
+ "crossbeam-utils",
+ "hashbrown 0.14.5",
+ "lock_api",
+ "once_cell",
+ "parking_lot_core",
 ]
 
 [[package]]
@@ -2279,29 +1397,29 @@ dependencies = [
  "bytes",
  "bzip2",
  "chrono",
- "datafusion-catalog 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-catalog-listing 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-common-runtime 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-datasource 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-datasource-csv 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-datasource-json 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-datasource-parquet 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-execution 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-functions 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-functions-aggregate 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-functions-nested 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-functions-table 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-functions-window 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-optimizer 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-physical-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-physical-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-physical-optimizer 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-physical-plan 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-session 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-sql 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-catalog",
+ "datafusion-catalog-listing",
+ "datafusion-common",
+ "datafusion-common-runtime",
+ "datafusion-datasource",
+ "datafusion-datasource-csv",
+ "datafusion-datasource-json",
+ "datafusion-datasource-parquet",
+ "datafusion-execution",
+ "datafusion-expr",
+ "datafusion-expr-common",
+ "datafusion-functions",
+ "datafusion-functions-aggregate",
+ "datafusion-functions-nested",
+ "datafusion-functions-table",
+ "datafusion-functions-window",
+ "datafusion-optimizer",
+ "datafusion-physical-expr",
+ "datafusion-physical-expr-common",
+ "datafusion-physical-optimizer",
+ "datafusion-physical-plan",
+ "datafusion-session",
+ "datafusion-sql",
  "flate2",
  "futures",
  "itertools 0.14.0",
@@ -2311,7 +1429,7 @@ dependencies = [
  "parquet 55.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "rand 0.9.2",
  "regex",
- "sqlparser 0.55.0 (git+https://github.com/FunctionStream/sqlparser-rs?branch=0.6.0%2Ffunction-sql-parser)",
+ "sqlparser",
  "tempfile",
  "tokio",
  "url",
@@ -2320,32 +1438,6 @@ dependencies = [
  "zstd",
 ]
 
-[[package]]
-name = "datafusion-catalog"
-version = "48.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "94985e67cab97b1099db2a7af11f31a45008b282aba921c1e1d35327c212ec18"
-dependencies = [
- "arrow",
- "async-trait",
- "dashmap 6.1.0",
- "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-common-runtime 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-datasource 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-execution 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-physical-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-physical-plan 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-session 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-sql 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "futures",
- "itertools 0.14.0",
- "log",
- "object_store",
- "parking_lot",
- "tokio",
-]
-
 [[package]]
 name = "datafusion-catalog"
 version = "48.0.1"
@@ -2354,15 +1446,15 @@ dependencies = [
  "arrow",
  "async-trait",
  "dashmap 6.1.0",
- "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-common-runtime 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-datasource 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-execution 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-physical-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-physical-plan 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-session 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-sql 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-common",
+ "datafusion-common-runtime",
+ "datafusion-datasource",
+ "datafusion-execution",
+ "datafusion-expr",
+ "datafusion-physical-expr",
+ "datafusion-physical-plan",
+ "datafusion-session",
+ "datafusion-sql",
  "futures",
  "itertools 0.14.0",
  "log",
@@ -2371,29 +1463,6 @@ dependencies = [
  "tokio",
 ]
 
-[[package]]
-name = "datafusion-catalog-listing"
-version = "48.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e002df133bdb7b0b9b429d89a69aa77b35caeadee4498b2ce1c7c23a99516988"
-dependencies = [
- "arrow",
- "async-trait",
- "datafusion-catalog 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-datasource 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-execution 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-physical-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-physical-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-physical-plan 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-session 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "futures",
- "log",
- "object_store",
- "tokio",
-]
-
 [[package]]
 name = "datafusion-catalog-listing"
 version = "48.0.1"
@@ -2401,45 +1470,21 @@ source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2F
 dependencies = [
  "arrow",
  "async-trait",
- "datafusion-catalog 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-datasource 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-execution 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-physical-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-physical-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-physical-plan 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-session 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-catalog",
+ "datafusion-common",
+ "datafusion-datasource",
+ "datafusion-execution",
+ "datafusion-expr",
+ "datafusion-physical-expr",
+ "datafusion-physical-expr-common",
+ "datafusion-physical-plan",
+ "datafusion-session",
  "futures",
  "log",
  "object_store",
  "tokio",
 ]
 
-[[package]]
-name = "datafusion-common"
-version = "48.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e13242fc58fd753787b0a538e5ae77d356cb9d0656fa85a591a33c5f106267f6"
-dependencies = [
- "ahash",
- "arrow",
- "arrow-ipc 55.2.0",
- "base64",
- "half",
- "hashbrown 0.14.5",
- "indexmap 2.12.1",
- "libc",
- "log",
- "object_store",
- "parquet 55.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
- "paste",
- "recursive",
- "sqlparser 0.55.0 (registry+https://github.com/rust-lang/crates.io-index)",
- "tokio",
- "web-time",
-]
-
 [[package]]
 name = "datafusion-common"
 version = "48.0.1"
@@ -2458,22 +1503,11 @@ dependencies = [
  "parquet 55.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "paste",
  "recursive",
- "sqlparser 0.55.0 (git+https://github.com/FunctionStream/sqlparser-rs?branch=0.6.0%2Ffunction-sql-parser)",
+ "sqlparser",
  "tokio",
  "web-time",
 ]
 
-[[package]]
-name = "datafusion-common-runtime"
-version = "48.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d2239f964e95c3a5d6b4a8cde07e646de8995c1396a7fd62c6e784f5341db499"
-dependencies = [
- "futures",
- "log",
- "tokio",
-]
-
 [[package]]
 name = "datafusion-common-runtime"
 version = "48.0.1"
@@ -2484,42 +1518,6 @@ dependencies = [
  "tokio",
 ]
 
-[[package]]
-name = "datafusion-datasource"
-version = "48.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2cf792579bc8bf07d1b2f68c2d5382f8a63679cce8fbebfd4ba95742b6e08864"
-dependencies = [
- "arrow",
- "async-compression",
- "async-trait",
- "bytes",
- "bzip2",
- "chrono",
- "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-common-runtime 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-execution 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-physical-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-physical-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-physical-plan 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-session 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "flate2",
- "futures",
- "glob",
- "itertools 0.14.0",
- "log",
- "object_store",
- "parquet 55.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
- "rand 0.9.2",
- "tempfile",
- "tokio",
- "tokio-util",
- "url",
- "xz2",
- "zstd",
-]
-
 [[package]]
 name = "datafusion-datasource"
 version = "48.0.1"
@@ -2531,14 +1529,14 @@ dependencies = [
  "bytes",
  "bzip2",
  "chrono",
- "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-common-runtime 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-execution 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-physical-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-physical-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-physical-plan 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-session 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-common",
+ "datafusion-common-runtime",
+ "datafusion-execution",
+ "datafusion-expr",
+ "datafusion-physical-expr",
+ "datafusion-physical-expr-common",
+ "datafusion-physical-plan",
+ "datafusion-session",
  "flate2",
  "futures",
  "glob",
@@ -2555,31 +1553,6 @@ dependencies = [
  "zstd",
 ]
 
-[[package]]
-name = "datafusion-datasource-csv"
-version = "48.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cfc114f9a1415174f3e8d2719c371fc72092ef2195a7955404cfe6b2ba29a706"
-dependencies = [
- "arrow",
- "async-trait",
- "bytes",
- "datafusion-catalog 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-common-runtime 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-datasource 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-execution 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-physical-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-physical-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-physical-plan 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-session 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "futures",
- "object_store",
- "regex",
- "tokio",
-]
-
 [[package]]
 name = "datafusion-datasource-csv"
 version = "48.0.1"
@@ -2588,47 +1561,22 @@ dependencies = [
  "arrow",
  "async-trait",
  "bytes",
- "datafusion-catalog 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-common-runtime 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-datasource 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-execution 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-physical-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-physical-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-physical-plan 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-session 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-catalog",
+ "datafusion-common",
+ "datafusion-common-runtime",
+ "datafusion-datasource",
+ "datafusion-execution",
+ "datafusion-expr",
+ "datafusion-physical-expr",
+ "datafusion-physical-expr-common",
+ "datafusion-physical-plan",
+ "datafusion-session",
  "futures",
  "object_store",
  "regex",
  "tokio",
 ]
 
-[[package]]
-name = "datafusion-datasource-json"
-version = "48.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d88dd5e215c420a52362b9988ecd4cefd71081b730663d4f7d886f706111fc75"
-dependencies = [
- "arrow",
- "async-trait",
- "bytes",
- "datafusion-catalog 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-common-runtime 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-datasource 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-execution 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-physical-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-physical-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-physical-plan 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-session 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "futures",
- "object_store",
- "serde_json",
- "tokio",
-]
-
 [[package]]
 name = "datafusion-datasource-json"
 version = "48.0.1"
@@ -2637,53 +1585,22 @@ dependencies = [
  "arrow",
  "async-trait",
  "bytes",
- "datafusion-catalog 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-common-runtime 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-datasource 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-execution 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-physical-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-physical-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-physical-plan 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-session 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-catalog",
+ "datafusion-common",
+ "datafusion-common-runtime",
+ "datafusion-datasource",
+ "datafusion-execution",
+ "datafusion-expr",
+ "datafusion-physical-expr",
+ "datafusion-physical-expr-common",
+ "datafusion-physical-plan",
+ "datafusion-session",
  "futures",
  "object_store",
  "serde_json",
  "tokio",
 ]
 
-[[package]]
-name = "datafusion-datasource-parquet"
-version = "48.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "33692acdd1fbe75280d14f4676fe43f39e9cb36296df56575aa2cac9a819e4cf"
-dependencies = [
- "arrow",
- "async-trait",
- "bytes",
- "datafusion-catalog 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-common-runtime 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-datasource 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-execution 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-functions-aggregate 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-physical-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-physical-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-physical-optimizer 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-physical-plan 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-session 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "futures",
- "itertools 0.14.0",
- "log",
- "object_store",
- "parking_lot",
- "parquet 55.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
- "rand 0.9.2",
- "tokio",
-]
-
 [[package]]
 name = "datafusion-datasource-parquet"
 version = "48.0.1"
@@ -2692,18 +1609,18 @@ dependencies = [
  "arrow",
  "async-trait",
  "bytes",
- "datafusion-catalog 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-common-runtime 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-datasource 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-execution 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-functions-aggregate 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-physical-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-physical-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-physical-optimizer 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-physical-plan 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-session 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-catalog",
+ "datafusion-common",
+ "datafusion-common-runtime",
+ "datafusion-datasource",
+ "datafusion-execution",
+ "datafusion-expr",
+ "datafusion-functions-aggregate",
+ "datafusion-physical-expr",
+ "datafusion-physical-expr-common",
+ "datafusion-physical-optimizer",
+ "datafusion-physical-plan",
+ "datafusion-session",
  "futures",
  "itertools 0.14.0",
  "log",
@@ -2714,36 +1631,11 @@ dependencies = [
  "tokio",
 ]
 
-[[package]]
-name = "datafusion-doc"
-version = "48.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e0e7b648387b0c1937b83cb328533c06c923799e73a9e3750b762667f32662c0"
-
 [[package]]
 name = "datafusion-doc"
 version = "48.0.1"
 source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c"
 
-[[package]]
-name = "datafusion-execution"
-version = "48.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9609d83d52ff8315283c6dad3b97566e877d8f366fab4c3297742f33dcd636c7"
-dependencies = [
- "arrow",
- "dashmap 6.1.0",
- "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "futures",
- "log",
- "object_store",
- "parking_lot",
- "rand 0.9.2",
- "tempfile",
- "url",
-]
-
 [[package]]
 name = "datafusion-execution"
 version = "48.0.1"
@@ -2751,8 +1643,8 @@ source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2F
 dependencies = [
  "arrow",
  "dashmap 6.1.0",
- "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-common",
+ "datafusion-expr",
  "futures",
  "log",
  "object_store",
@@ -2762,27 +1654,6 @@ dependencies = [
  "url",
 ]
 
-[[package]]
-name = "datafusion-expr"
-version = "48.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e75230cd67f650ef0399eb00f54d4a073698f2c0262948298e5299fc7324da63"
-dependencies = [
- "arrow",
- "chrono",
- "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-doc 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-functions-aggregate-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-functions-window-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-physical-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "indexmap 2.12.1",
- "paste",
- "recursive",
- "serde_json",
- "sqlparser 0.55.0 (registry+https://github.com/rust-lang/crates.io-index)",
-]
-
 [[package]]
 name = "datafusion-expr"
 version = "48.0.1"
@@ -2790,30 +1661,17 @@ source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2F
 dependencies = [
  "arrow",
  "chrono",
- "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-doc 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-functions-aggregate-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-functions-window-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-physical-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-common",
+ "datafusion-doc",
+ "datafusion-expr-common",
+ "datafusion-functions-aggregate-common",
+ "datafusion-functions-window-common",
+ "datafusion-physical-expr-common",
  "indexmap 2.12.1",
  "paste",
  "recursive",
  "serde_json",
- "sqlparser 0.55.0 (git+https://github.com/FunctionStream/sqlparser-rs?branch=0.6.0%2Ffunction-sql-parser)",
-]
-
-[[package]]
-name = "datafusion-expr-common"
-version = "48.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "70fafb3a045ed6c49cfca0cd090f62cf871ca6326cc3355cb0aaf1260fa760b6"
-dependencies = [
- "arrow",
- "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "indexmap 2.12.1",
- "itertools 0.14.0",
- "paste",
+ "sqlparser",
 ]
 
 [[package]]
@@ -2822,41 +1680,12 @@ version = "48.0.1"
 source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c"
 dependencies = [
  "arrow",
- "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-common",
  "indexmap 2.12.1",
  "itertools 0.14.0",
  "paste",
 ]
 
-[[package]]
-name = "datafusion-functions"
-version = "48.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cdf9a9cf655265861a20453b1e58357147eab59bdc90ce7f2f68f1f35104d3bb"
-dependencies = [
- "arrow",
- "arrow-buffer 55.2.0",
- "base64",
- "blake2",
- "blake3",
- "chrono",
- "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-doc 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-execution 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-macros 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "hex",
- "itertools 0.14.0",
- "log",
- "md-5",
- "rand 0.9.2",
- "regex",
- "sha2",
- "unicode-segmentation",
- "uuid",
-]
-
 [[package]]
 name = "datafusion-functions"
 version = "48.0.1"
@@ -2868,108 +1697,53 @@ dependencies = [
  "blake2",
  "blake3",
  "chrono",
- "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-doc 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-execution 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-macros 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-common",
+ "datafusion-doc",
+ "datafusion-execution",
+ "datafusion-expr",
+ "datafusion-expr-common",
+ "datafusion-macros",
  "hex",
  "itertools 0.14.0",
  "log",
  "md-5",
  "rand 0.9.2",
- "regex",
- "sha2",
- "unicode-segmentation",
- "uuid",
-]
-
-[[package]]
-name = "datafusion-functions-aggregate"
-version = "48.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7f07e49733d847be0a05235e17b884d326a2fd402c97a89fe8bcf0bfba310005"
-dependencies = [
- "ahash",
- "arrow",
- "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-doc 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-execution 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-functions-aggregate-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-macros 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-physical-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-physical-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "half",
- "log",
- "paste",
-]
-
-[[package]]
-name = "datafusion-functions-aggregate"
-version = "48.0.1"
-source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c"
-dependencies = [
- "ahash",
- "arrow",
- "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-doc 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-execution 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-functions-aggregate-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-macros 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-physical-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-physical-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "half",
- "log",
- "paste",
-]
-
-[[package]]
-name = "datafusion-functions-aggregate-common"
-version = "48.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4512607e10d72b0b0a1dc08f42cb5bd5284cb8348b7fea49dc83409493e32b1b"
-dependencies = [
- "ahash",
- "arrow",
- "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-physical-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "regex",
+ "sha2",
+ "unicode-segmentation",
+ "uuid",
 ]
 
 [[package]]
-name = "datafusion-functions-aggregate-common"
+name = "datafusion-functions-aggregate"
 version = "48.0.1"
 source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c"
 dependencies = [
  "ahash",
  "arrow",
- "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-physical-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-common",
+ "datafusion-doc",
+ "datafusion-execution",
+ "datafusion-expr",
+ "datafusion-functions-aggregate-common",
+ "datafusion-macros",
+ "datafusion-physical-expr",
+ "datafusion-physical-expr-common",
+ "half",
+ "log",
+ "paste",
 ]
 
 [[package]]
-name = "datafusion-functions-nested"
+name = "datafusion-functions-aggregate-common"
 version = "48.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2ab331806e34f5545e5f03396e4d5068077395b1665795d8f88c14ec4f1e0b7a"
+source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c"
 dependencies = [
+ "ahash",
  "arrow",
- "arrow-ord",
- "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-doc 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-execution 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-functions 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-functions-aggregate 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-macros 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-physical-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "itertools 0.14.0",
- "log",
- "paste",
+ "datafusion-common",
+ "datafusion-expr-common",
+ "datafusion-physical-expr-common",
 ]
 
 [[package]]
@@ -2979,35 +1753,19 @@ source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2F
 dependencies = [
  "arrow",
  "arrow-ord",
- "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-doc 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-execution 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-functions 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-functions-aggregate 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-macros 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-physical-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-common",
+ "datafusion-doc",
+ "datafusion-execution",
+ "datafusion-expr",
+ "datafusion-functions",
+ "datafusion-functions-aggregate",
+ "datafusion-macros",
+ "datafusion-physical-expr-common",
  "itertools 0.14.0",
  "log",
  "paste",
 ]
 
-[[package]]
-name = "datafusion-functions-table"
-version = "48.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d4ac2c0be983a06950ef077e34e0174aa0cb9e346f3aeae459823158037ade37"
-dependencies = [
- "arrow",
- "async-trait",
- "datafusion-catalog 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-physical-plan 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "parking_lot",
- "paste",
-]
-
 [[package]]
 name = "datafusion-functions-table"
 version = "48.0.1"
@@ -3015,77 +1773,38 @@ source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2F
 dependencies = [
  "arrow",
  "async-trait",
- "datafusion-catalog 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-physical-plan 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-catalog",
+ "datafusion-common",
+ "datafusion-expr",
+ "datafusion-physical-plan",
  "parking_lot",
  "paste",
 ]
 
-[[package]]
-name = "datafusion-functions-window"
-version = "48.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "36f3d92731de384c90906941d36dcadf6a86d4128409a9c5cd916662baed5f53"
-dependencies = [
- "arrow",
- "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-doc 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-functions-window-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-macros 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-physical-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-physical-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "log",
- "paste",
-]
-
 [[package]]
 name = "datafusion-functions-window"
 version = "48.0.1"
 source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c"
 dependencies = [
  "arrow",
- "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-doc 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-functions-window-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-macros 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-physical-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-physical-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-common",
+ "datafusion-doc",
+ "datafusion-expr",
+ "datafusion-functions-window-common",
+ "datafusion-macros",
+ "datafusion-physical-expr",
+ "datafusion-physical-expr-common",
  "log",
  "paste",
 ]
 
-[[package]]
-name = "datafusion-functions-window-common"
-version = "48.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c679f8bf0971704ec8fd4249fcbb2eb49d6a12cc3e7a840ac047b4928d3541b5"
-dependencies = [
- "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-physical-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
-]
-
 [[package]]
 name = "datafusion-functions-window-common"
 version = "48.0.1"
 source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c"
 dependencies = [
- "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-physical-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
-]
-
-[[package]]
-name = "datafusion-macros"
-version = "48.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2821de7cb0362d12e75a5196b636a59ea3584ec1e1cc7dc6f5e34b9e8389d251"
-dependencies = [
- "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "quote",
- "syn 2.0.113",
+ "datafusion-common",
+ "datafusion-physical-expr-common",
 ]
 
 [[package]]
@@ -3093,28 +1812,9 @@ name = "datafusion-macros"
 version = "48.0.1"
 source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c"
 dependencies = [
- "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-expr",
  "quote",
- "syn 2.0.113",
-]
-
-[[package]]
-name = "datafusion-optimizer"
-version = "48.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1594c7a97219ede334f25347ad8d57056621e7f4f35a0693c8da876e10dd6a53"
-dependencies = [
- "arrow",
- "chrono",
- "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-physical-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "indexmap 2.12.1",
- "itertools 0.14.0",
- "log",
- "recursive",
- "regex",
- "regex-syntax",
+ "syn",
 ]
 
 [[package]]
@@ -3124,9 +1824,9 @@ source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2F
 dependencies = [
  "arrow",
  "chrono",
- "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-physical-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-common",
+ "datafusion-expr",
+ "datafusion-physical-expr",
  "indexmap 2.12.1",
  "itertools 0.14.0",
  "log",
@@ -3135,28 +1835,6 @@ dependencies = [
  "regex-syntax",
 ]
 
-[[package]]
-name = "datafusion-physical-expr"
-version = "48.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dc6da0f2412088d23f6b01929dedd687b5aee63b19b674eb73d00c3eb3c883b7"
-dependencies = [
- "ahash",
- "arrow",
- "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-functions-aggregate-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-physical-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "half",
- "hashbrown 0.14.5",
- "indexmap 2.12.1",
- "itertools 0.14.0",
- "log",
- "paste",
- "petgraph 0.8.3",
-]
-
 [[package]]
 name = "datafusion-physical-expr"
 version = "48.0.1"
@@ -3164,11 +1842,11 @@ source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2F
 dependencies = [
  "ahash",
  "arrow",
- "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-functions-aggregate-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-physical-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-common",
+ "datafusion-expr",
+ "datafusion-expr-common",
+ "datafusion-functions-aggregate-common",
+ "datafusion-physical-expr-common",
  "half",
  "hashbrown 0.14.5",
  "indexmap 2.12.1",
@@ -3178,20 +1856,6 @@ dependencies = [
  "petgraph 0.8.3",
 ]
 
-[[package]]
-name = "datafusion-physical-expr-common"
-version = "48.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dcb0dbd9213078a593c3fe28783beaa625a4e6c6a6c797856ee2ba234311fb96"
-dependencies = [
- "ahash",
- "arrow",
- "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "hashbrown 0.14.5",
- "itertools 0.14.0",
-]
-
 [[package]]
 name = "datafusion-physical-expr-common"
 version = "48.0.1"
@@ -3199,79 +1863,30 @@ source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2F
 dependencies = [
  "ahash",
  "arrow",
- "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-common",
+ "datafusion-expr-common",
  "hashbrown 0.14.5",
  "itertools 0.14.0",
 ]
 
-[[package]]
-name = "datafusion-physical-optimizer"
-version = "48.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6d140854b2db3ef8ac611caad12bfb2e1e1de827077429322a6188f18fc0026a"
-dependencies = [
- "arrow",
- "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-execution 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-physical-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-physical-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-physical-plan 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "itertools 0.14.0",
- "log",
- "recursive",
-]
-
 [[package]]
 name = "datafusion-physical-optimizer"
 version = "48.0.1"
 source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c"
 dependencies = [
  "arrow",
- "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-execution 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-physical-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-physical-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-physical-plan 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-common",
+ "datafusion-execution",
+ "datafusion-expr",
+ "datafusion-expr-common",
+ "datafusion-physical-expr",
+ "datafusion-physical-expr-common",
+ "datafusion-physical-plan",
  "itertools 0.14.0",
  "log",
  "recursive",
 ]
 
-[[package]]
-name = "datafusion-physical-plan"
-version = "48.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b46cbdf21a01206be76d467f325273b22c559c744a012ead5018dfe79597de08"
-dependencies = [
- "ahash",
- "arrow",
- "arrow-ord",
- "arrow-schema 55.2.0",
- "async-trait",
- "chrono",
- "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-common-runtime 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-execution 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-functions-window-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-physical-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-physical-expr-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "futures",
- "half",
- "hashbrown 0.14.5",
- "indexmap 2.12.1",
- "itertools 0.14.0",
- "log",
- "parking_lot",
- "pin-project-lite",
- "tokio",
-]
-
 [[package]]
 name = "datafusion-physical-plan"
 version = "48.0.1"
@@ -3283,13 +1898,13 @@ dependencies = [
  "arrow-schema 55.2.0",
  "async-trait",
  "chrono",
- "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-common-runtime 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-execution 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-functions-window-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-physical-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-physical-expr-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-common",
+ "datafusion-common-runtime",
+ "datafusion-execution",
+ "datafusion-expr",
+ "datafusion-functions-window-common",
+ "datafusion-physical-expr",
+ "datafusion-physical-expr-common",
  "futures",
  "half",
  "hashbrown 0.14.5",
@@ -3301,22 +1916,6 @@ dependencies = [
  "tokio",
 ]
 
-[[package]]
-name = "datafusion-proto"
-version = "48.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e3fc7a2744332c2ef8804274c21f9fa664b4ca5889169250a6fd6b649ee5d16c"
-dependencies = [
- "arrow",
- "chrono",
- "datafusion 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-proto-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "object_store",
- "prost",
-]
-
 [[package]]
 name = "datafusion-proto"
 version = "48.0.1"
@@ -3324,59 +1923,24 @@ source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2F
 dependencies = [
  "arrow",
  "chrono",
- "datafusion 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-proto-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion",
+ "datafusion-common",
+ "datafusion-expr",
+ "datafusion-proto-common",
  "object_store",
  "prost",
 ]
 
-[[package]]
-name = "datafusion-proto-common"
-version = "48.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "800add86852f12e3d249867425de2224c1e9fb7adc2930460548868781fbeded"
-dependencies = [
- "arrow",
- "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "prost",
-]
-
 [[package]]
 name = "datafusion-proto-common"
 version = "48.0.1"
 source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo#916b45f5c28d94765ae4a6393c5e126b2ea55e1c"
 dependencies = [
  "arrow",
- "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-common",
  "prost",
 ]
 
-[[package]]
-name = "datafusion-session"
-version = "48.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3a72733766ddb5b41534910926e8da5836622316f6283307fd9fb7e19811a59c"
-dependencies = [
- "arrow",
- "async-trait",
- "dashmap 6.1.0",
- "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-common-runtime 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-execution 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-physical-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-physical-plan 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-sql 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "futures",
- "itertools 0.14.0",
- "log",
- "object_store",
- "parking_lot",
- "tokio",
-]
-
 [[package]]
 name = "datafusion-session"
 version = "48.0.1"
@@ -3385,13 +1949,13 @@ dependencies = [
  "arrow",
  "async-trait",
  "dashmap 6.1.0",
- "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-common-runtime 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-execution 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-physical-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-physical-plan 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-sql 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-common",
+ "datafusion-common-runtime",
+ "datafusion-execution",
+ "datafusion-expr",
+ "datafusion-physical-expr",
+ "datafusion-physical-plan",
+ "datafusion-sql",
  "futures",
  "itertools 0.14.0",
  "log",
@@ -3400,23 +1964,6 @@ dependencies = [
  "tokio",
 ]
 
-[[package]]
-name = "datafusion-sql"
-version = "48.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c5162338cdec9cc7ea13a0e6015c361acad5ec1d88d83f7c86301f789473971f"
-dependencies = [
- "arrow",
- "bigdecimal",
- "datafusion-common 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "datafusion-expr 48.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "indexmap 2.12.1",
- "log",
- "recursive",
- "regex",
- "sqlparser 0.55.0 (registry+https://github.com/rust-lang/crates.io-index)",
-]
-
 [[package]]
 name = "datafusion-sql"
 version = "48.0.1"
@@ -3424,48 +1971,13 @@ source = "git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2F
 dependencies = [
  "arrow",
  "bigdecimal",
- "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion-common",
+ "datafusion-expr",
  "indexmap 2.12.1",
  "log",
  "recursive",
  "regex",
- "sqlparser 0.55.0 (git+https://github.com/FunctionStream/sqlparser-rs?branch=0.6.0%2Ffunction-sql-parser)",
-]
-
-[[package]]
-name = "deadpool"
-version = "0.12.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0be2b1d1d6ec8d846f05e137292d0b89133caf95ef33695424c09568bdd39b1b"
-dependencies = [
- "deadpool-runtime",
- "lazy_static",
- "num_cpus",
- "tokio",
-]
-
-[[package]]
-name = "deadpool-postgres"
-version = "0.14.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3d697d376cbfa018c23eb4caab1fd1883dd9c906a8c034e8d9a3cb06a7e0bef9"
-dependencies = [
- "async-trait",
- "deadpool",
- "getrandom 0.2.16",
- "tokio",
- "tokio-postgres",
- "tracing",
-]
-
-[[package]]
-name = "deadpool-runtime"
-version = "0.1.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "092966b41edc516079bdf31ec78a2e0588d1d0c08f78b91d8307215928642b2b"
-dependencies = [
- "tokio",
+ "sqlparser",
 ]
 
 [[package]]
@@ -3486,37 +1998,6 @@ dependencies = [
  "powerfmt",
 ]
 
-[[package]]
-name = "derive_builder"
-version = "0.20.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "507dfb09ea8b7fa618fcf76e953f4f5e192547945816d5358edffe39f6f94947"
-dependencies = [
- "derive_builder_macro",
-]
-
-[[package]]
-name = "derive_builder_core"
-version = "0.20.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8"
-dependencies = [
- "darling 0.20.11",
- "proc-macro2",
- "quote",
- "syn 2.0.113",
-]
-
-[[package]]
-name = "derive_builder_macro"
-version = "0.20.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c"
-dependencies = [
- "derive_builder_core",
- "syn 2.0.113",
-]
-
 [[package]]
 name = "digest"
 version = "0.10.7"
@@ -3538,27 +2019,6 @@ dependencies = [
  "dirs-sys-next",
 ]
 
-[[package]]
-name = "dirs"
-version = "6.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c3e8aa94d75141228480295a7d0e7feb620b1a5ad9f12bc40be62411e38cce4e"
-dependencies = [
- "dirs-sys",
-]
-
-[[package]]
-name = "dirs-sys"
-version = "0.5.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e01a3366d27ee9890022452ee61b2b63a67e6f13f58900b651ff5665f0bb1fab"
-dependencies = [
- "libc",
- "option-ext",
- "redox_users 0.5.2",
- "windows-sys 0.61.2",
-]
-
 [[package]]
 name = "dirs-sys-next"
 version = "0.1.2"
@@ -3566,7 +2026,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d"
 dependencies = [
  "libc",
- "redox_users 0.4.6",
+ "redox_users",
  "winapi",
 ]
 
@@ -3578,7 +2038,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.113",
+ "syn",
 ]
 
 [[package]]
@@ -3602,12 +2062,6 @@ dependencies = [
  "shared_child",
 ]
 
-[[package]]
-name = "dunce"
-version = "1.0.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813"
-
 [[package]]
 name = "dyn-clone"
 version = "1.0.20"
@@ -3691,24 +2145,12 @@ version = "3.3.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "dea2df4cf52843e0452895c455a1a2cfbb842a1e7329671acf418fdc53ed4c59"
 
-[[package]]
-name = "fallible-iterator"
-version = "0.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7"
-
 [[package]]
 name = "fallible-iterator"
 version = "0.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649"
 
-[[package]]
-name = "fallible-streaming-iterator"
-version = "0.1.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a"
-
 [[package]]
 name = "fastrand"
 version = "2.3.0"
@@ -3722,24 +2164,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0ce92ff622d6dadf7349484f42c93271a0d49b7cc4d466a936405bacbe10aa78"
 dependencies = [
  "cfg-if",
- "rustix 1.1.3",
- "windows-sys 0.59.0",
-]
-
-[[package]]
-name = "figment"
-version = "0.10.19"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8cb01cd46b0cf372153850f4c6c272d9cbea2da513e07538405148f95bd789f3"
-dependencies = [
- "atomic",
- "pear",
- "serde",
- "serde_json",
- "serde_yaml",
- "toml 0.8.23",
- "uncased",
- "version_check",
+ "rustix 1.1.3",
+ "windows-sys 0.59.0",
 ]
 
 [[package]]
@@ -3803,21 +2229,6 @@ version = "0.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
 
-[[package]]
-name = "foreign-types"
-version = "0.3.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1"
-dependencies = [
- "foreign-types-shared",
-]
-
-[[package]]
-name = "foreign-types-shared"
-version = "0.1.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b"
-
 [[package]]
 name = "form_urlencoded"
 version = "1.2.2"
@@ -3838,46 +2249,37 @@ dependencies = [
  "windows-sys 0.59.0",
 ]
 
-[[package]]
-name = "fs_extra"
-version = "1.3.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
-
 [[package]]
 name = "function-stream"
 version = "0.6.0"
 dependencies = [
+ "ahash",
  "anyhow",
  "arrow",
  "arrow-array 55.2.0",
  "arrow-ipc 55.2.0",
  "arrow-json 55.2.0 (git+https://github.com/ArroyoSystems/arrow-rs?branch=55.2.0%2Fjson)",
  "arrow-schema 55.2.0",
- "arroyo-state",
  "async-trait",
  "base64",
  "bincode",
  "chrono",
  "clap",
- "cornucopia",
- "cornucopia_async",
  "crossbeam-channel",
- "datafusion 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-common 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-execution 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-functions 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-functions-aggregate 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-functions-window 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-physical-expr 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-physical-plan 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
- "datafusion-proto 48.0.1 (git+https://github.com/ArroyoSystems/arrow-datafusion?branch=48.0.1%2Farroyo)",
+ "datafusion",
+ "datafusion-common",
+ "datafusion-execution",
+ "datafusion-expr",
+ "datafusion-functions",
+ "datafusion-functions-aggregate",
+ "datafusion-functions-window",
+ "datafusion-physical-expr",
+ "datafusion-physical-plan",
+ "datafusion-proto",
  "futures",
  "governor",
  "hex",
  "itertools 0.14.0",
- "jiter",
  "log",
  "lru",
  "mini-moka",
@@ -3898,12 +2300,12 @@ dependencies = [
  "serde_json_path",
  "serde_yaml",
  "sha2",
- "sqlparser 0.55.0 (git+https://github.com/FunctionStream/sqlparser-rs?branch=0.6.0%2Ffunction-sql-parser)",
- "strum 0.26.3",
+ "sqlparser",
+ "strum",
  "thiserror 2.0.17",
  "tokio",
  "tokio-stream",
- "tonic 0.12.3",
+ "tonic",
  "tracing",
  "tracing-appender",
  "tracing-subscriber",
@@ -3929,15 +2331,9 @@ dependencies = [
  "rustyline",
  "thiserror 2.0.17",
  "tokio",
- "tonic 0.12.3",
+ "tonic",
 ]
 
-[[package]]
-name = "funty"
-version = "2.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c"
-
 [[package]]
 name = "futures"
 version = "0.3.31"
@@ -3994,7 +2390,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.113",
+ "syn",
 ]
 
 [[package]]
@@ -4064,10 +2460,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592"
 dependencies = [
  "cfg-if",
- "js-sys",
  "libc",
- "wasi 0.11.1+wasi-snapshot-preview1",
- "wasm-bindgen",
+ "wasi",
 ]
 
 [[package]]
@@ -4084,25 +2478,13 @@ dependencies = [
  "wasm-bindgen",
 ]
 
-[[package]]
-name = "getset"
-version = "0.1.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9cf0fc11e47561d47397154977bc219f4cf809b2974facc3ccb3b89e2436f912"
-dependencies = [
- "proc-macro-error2",
- "proc-macro2",
- "quote",
- "syn 2.0.113",
-]
-
 [[package]]
 name = "gimli"
 version = "0.32.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e629b9b98ef3dd8afe6ca2bd0f89306cec16d43d907889945bc5d6687f2f13c7"
 dependencies = [
- "fallible-iterator 0.3.0",
+ "fallible-iterator",
  "indexmap 2.12.1",
  "stable_deref_trait",
 ]
@@ -4147,7 +2529,7 @@ dependencies = [
  "fnv",
  "futures-core",
  "futures-sink",
- "http 1.4.0",
+ "http",
  "indexmap 2.12.1",
  "slab",
  "tokio",
@@ -4210,15 +2592,6 @@ version = "0.16.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100"
 
-[[package]]
-name = "hashlink"
-version = "0.9.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6ba4ff7128dee98c7dc9794b6a411377e1404dba1c97deb8d1a55297bd25d8af"
-dependencies = [
- "hashbrown 0.14.5",
-]
-
 [[package]]
 name = "heck"
 version = "0.4.1"
@@ -4243,15 +2616,6 @@ version = "0.4.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
 
-[[package]]
-name = "hmac"
-version = "0.12.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e"
-dependencies = [
- "digest",
-]
-
 [[package]]
 name = "home"
 version = "0.5.12"
@@ -4261,17 +2625,6 @@ dependencies = [
  "windows-sys 0.61.2",
 ]
 
-[[package]]
-name = "http"
-version = "0.2.12"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1"
-dependencies = [
- "bytes",
- "fnv",
- "itoa",
-]
-
 [[package]]
 name = "http"
 version = "1.4.0"
@@ -4282,17 +2635,6 @@ dependencies = [
  "itoa",
 ]
 
-[[package]]
-name = "http-body"
-version = "0.4.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2"
-dependencies = [
- "bytes",
- "http 0.2.12",
- "pin-project-lite",
-]
-
 [[package]]
 name = "http-body"
 version = "1.0.1"
@@ -4300,7 +2642,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184"
 dependencies = [
  "bytes",
- "http 1.4.0",
+ "http",
 ]
 
 [[package]]
@@ -4311,8 +2653,8 @@ checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a"
 dependencies = [
  "bytes",
  "futures-core",
- "http 1.4.0",
- "http-body 1.0.1",
+ "http",
+ "http-body",
  "pin-project-lite",
 ]
 
@@ -4345,8 +2687,8 @@ dependencies = [
  "futures-channel",
  "futures-core",
  "h2",
- "http 1.4.0",
- "http-body 1.0.1",
+ "http",
+ "http-body",
  "httparse",
  "httpdate",
  "itoa",
@@ -4357,23 +2699,6 @@ dependencies = [
  "want",
 ]
 
-[[package]]
-name = "hyper-rustls"
-version = "0.27.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58"
-dependencies = [
- "http 1.4.0",
- "hyper",
- "hyper-util",
- "rustls",
- "rustls-native-certs",
- "rustls-pki-types",
- "tokio",
- "tokio-rustls",
- "tower-service",
-]
-
 [[package]]
 name = "hyper-timeout"
 version = "0.5.2"
@@ -4387,46 +2712,25 @@ dependencies = [
  "tower-service",
 ]
 
-[[package]]
-name = "hyper-tls"
-version = "0.6.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0"
-dependencies = [
- "bytes",
- "http-body-util",
- "hyper",
- "hyper-util",
- "native-tls",
- "tokio",
- "tokio-native-tls",
- "tower-service",
-]
-
 [[package]]
 name = "hyper-util"
 version = "0.1.19"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "727805d60e7938b76b826a6ef209eb70eaa1812794f9424d4a4e2d740662df5f"
 dependencies = [
- "base64",
  "bytes",
  "futures-channel",
  "futures-core",
  "futures-util",
- "http 1.4.0",
- "http-body 1.0.1",
+ "http",
+ "http-body",
  "hyper",
- "ipnet",
  "libc",
- "percent-encoding",
  "pin-project-lite",
  "socket2 0.6.1",
- "system-configuration",
  "tokio",
  "tower-service",
  "tracing",
- "windows-registry",
 ]
 
 [[package]]
@@ -4540,12 +2844,6 @@ version = "2.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "25a2bc672d1148e28034f176e01fffebb08b35768468cc954630da77a1449005"
 
-[[package]]
-name = "ident_case"
-version = "1.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
-
 [[package]]
 name = "idna"
 version = "1.1.0"
@@ -4603,12 +2901,6 @@ dependencies = [
  "serde_core",
 ]
 
-[[package]]
-name = "inlinable_string"
-version = "0.1.15"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c8fae54786f62fb2918dcfae3d568594e50eb9b5c25bf04371af6fe7516452fb"
-
 [[package]]
 name = "integer-encoding"
 version = "3.0.4"
@@ -4646,16 +2938,6 @@ version = "2.11.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130"
 
-[[package]]
-name = "iri-string"
-version = "0.7.10"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c91338f0783edbd6195decb37bae672fd3b165faffb89bf7b9e6942f8b1a731a"
-dependencies = [
- "memchr",
- "serde",
-]
-
 [[package]]
 name = "is-terminal"
 version = "0.4.17"
@@ -4667,12 +2949,6 @@ dependencies = [
  "windows-sys 0.61.2",
 ]
 
-[[package]]
-name = "is_ci"
-version = "1.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7655c9839580ee829dfacba1d1278c2b7883e50a277ff7541299489d6bdfdc45"
-
 [[package]]
 name = "is_terminal_polyfill"
 version = "1.70.2"
@@ -4723,19 +2999,6 @@ dependencies = [
  "cc",
 ]
 
-[[package]]
-name = "jiter"
-version = "0.10.0"
-source = "git+https://github.com/ArroyoSystems/jiter?branch=disable_python#e5a90990780433a5972031a62eff87555d98884d"
-dependencies = [
- "ahash",
- "bitvec",
- "lexical-parse-float 1.0.6",
- "num-bigint",
- "num-traits",
- "smallvec",
-]
-
 [[package]]
 name = "jobserver"
 version = "0.1.34"
@@ -4756,19 +3019,6 @@ dependencies = [
  "wasm-bindgen",
 ]
 
-[[package]]
-name = "k8s-openapi"
-version = "0.24.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2c75b990324f09bef15e791606b7b7a296d02fc88a344f6eba9390970a870ad5"
-dependencies = [
- "base64",
- "chrono",
- "serde",
- "serde-value",
- "serde_json",
-]
-
 [[package]]
 name = "lazy_static"
 version = "1.5.0"
@@ -4962,17 +3212,6 @@ dependencies = [
  "zstd-sys",
 ]
 
-[[package]]
-name = "libsqlite3-sys"
-version = "0.28.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0c10584274047cb335c23d3e61bcef8e323adae7c5c8c760540f73610177fc3f"
-dependencies = [
- "cc",
- "pkg-config",
- "vcpkg",
-]
-
 [[package]]
 name = "libz-sys"
 version = "1.1.23"
@@ -5009,17 +3248,6 @@ version = "1.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "11d3d7f243d5c5a8b9bb5d6dd2b1602c0cb0b9db1621bafc7ed66e35ff9fe092"
 
-[[package]]
-name = "local-ip-address"
-version = "0.6.10"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "79ef8c257c92ade496781a32a581d43e3d512cf8ce714ecf04ea80f93ed0ff4a"
-dependencies = [
- "libc",
- "neli",
- "windows-sys 0.61.2",
-]
-
 [[package]]
 name = "lock_api"
 version = "0.4.14"
@@ -5044,12 +3272,6 @@ dependencies = [
  "hashbrown 0.15.5",
 ]
 
-[[package]]
-name = "lru-slab"
-version = "0.1.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154"
-
 [[package]]
 name = "lz4-sys"
 version = "1.11.1+lz4-1.10.0"
@@ -5104,12 +3326,6 @@ version = "0.7.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94"
 
-[[package]]
-name = "matchit"
-version = "0.8.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3"
-
 [[package]]
 name = "maybe-owned"
 version = "0.3.4"
@@ -5141,38 +3357,6 @@ dependencies = [
  "rustix 1.1.3",
 ]
 
-[[package]]
-name = "miette"
-version = "5.10.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "59bb584eaeeab6bd0226ccf3509a69d7936d148cf3d036ad350abe35e8c6856e"
-dependencies = [
- "backtrace",
- "backtrace-ext",
- "is-terminal",
- "miette-derive",
- "once_cell",
- "owo-colors",
- "supports-color",
- "supports-hyperlinks",
- "supports-unicode",
- "terminal_size",
- "textwrap",
- "thiserror 1.0.69",
- "unicode-width 0.1.14",
-]
-
-[[package]]
-name = "miette-derive"
-version = "5.10.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "49e7bc1560b95a3c4a25d03de42fe76ca718ab92d1a22a55b9b4cf67b3ae635c"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn 2.0.113",
-]
-
 [[package]]
 name = "mime"
 version = "0.3.17"
@@ -5217,7 +3401,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc"
 dependencies = [
  "libc",
- "wasi 0.11.1+wasi-snapshot-preview1",
+ "wasi",
  "windows-sys 0.61.2",
 ]
 
@@ -5227,61 +3411,6 @@ version = "0.10.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1d87ecb2933e8aeadb3e3a02b828fed80a7528047e68b4f424523a0981a3a084"
 
-[[package]]
-name = "nanoid"
-version = "0.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3ffa00dec017b5b1a8b7cf5e2c008bfda1aa7e0697ac1508b491fdf2622fb4d8"
-dependencies = [
- "rand 0.8.5",
-]
-
-[[package]]
-name = "native-tls"
-version = "0.2.18"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "465500e14ea162429d264d44189adc38b199b62b1c21eea9f69e4b73cb03bbf2"
-dependencies = [
- "libc",
- "log",
- "openssl",
- "openssl-probe",
- "openssl-sys",
- "schannel",
- "security-framework",
- "security-framework-sys",
- "tempfile",
-]
-
-[[package]]
-name = "neli"
-version = "0.7.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "22f9786d56d972959e1408b6a93be6af13b9c1392036c5c1fafa08a1b0c6ee87"
-dependencies = [
- "bitflags 2.10.0",
- "byteorder",
- "derive_builder",
- "getset",
- "libc",
- "log",
- "neli-proc-macros",
- "parking_lot",
-]
-
-[[package]]
-name = "neli-proc-macros"
-version = "0.2.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "05d8d08c6e98f20a62417478ebf7be8e1425ec9acecc6f63e22da633f6b71609"
-dependencies = [
- "either",
- "proc-macro2",
- "quote",
- "serde",
- "syn 2.0.113",
-]
-
 [[package]]
 name = "nibble_vec"
 version = "0.1.0"
@@ -5299,7 +3428,7 @@ checksum = "ab2156c4fce2f8df6c499cc1c763e4394b7482525bf2a9701c9d79d215f519e4"
 dependencies = [
  "bitflags 2.10.0",
  "cfg-if",
- "cfg_aliases 0.1.1",
+ "cfg_aliases",
  "libc",
 ]
 
@@ -5356,7 +3485,6 @@ checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9"
 dependencies = [
  "num-integer",
  "num-traits",
- "serde",
 ]
 
 [[package]]
@@ -5444,7 +3572,7 @@ dependencies = [
  "proc-macro-crate",
  "proc-macro2",
  "quote",
- "syn 2.0.113",
+ "syn",
 ]
 
 [[package]]
@@ -5466,28 +3594,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "fbfbfff40aeccab00ec8a910b57ca8ecf4319b335c542f2edcd19dd25a1e2a00"
 dependencies = [
  "async-trait",
- "base64",
  "bytes",
  "chrono",
- "form_urlencoded",
  "futures",
- "http 1.4.0",
- "http-body-util",
- "httparse",
+ "http",
  "humantime",
- "hyper",
  "itertools 0.14.0",
- "md-5",
  "parking_lot",
  "percent-encoding",
- "quick-xml",
- "rand 0.9.2",
- "reqwest",
- "ring",
- "rustls-pemfile",
- "serde",
- "serde_json",
- "serde_urlencoded",
  "thiserror 2.0.17",
  "tokio",
  "tracing",
@@ -5509,38 +3623,6 @@ version = "1.70.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
 
-[[package]]
-name = "openssl"
-version = "0.10.75"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "08838db121398ad17ab8531ce9de97b244589089e290a384c900cb9ff7434328"
-dependencies = [
- "bitflags 2.10.0",
- "cfg-if",
- "foreign-types",
- "libc",
- "once_cell",
- "openssl-macros",
- "openssl-sys",
-]
-
-[[package]]
-name = "openssl-macros"
-version = "0.1.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn 2.0.113",
-]
-
-[[package]]
-name = "openssl-probe"
-version = "0.2.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe"
-
 [[package]]
 name = "openssl-sys"
 version = "0.9.111"
@@ -5553,12 +3635,6 @@ dependencies = [
  "vcpkg",
 ]
 
-[[package]]
-name = "option-ext"
-version = "0.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d"
-
 [[package]]
 name = "ordered-float"
 version = "2.10.1"
@@ -5578,18 +3654,6 @@ dependencies = [
  "windows-sys 0.61.2",
 ]
 
-[[package]]
-name = "outref"
-version = "0.5.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1a80800c0488c3a21695ea981a54918fbb37abf04f4d0720c453632255e2ff0e"
-
-[[package]]
-name = "owo-colors"
-version = "3.5.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c1b04fb49957986fdce4d6ee7a65027d55d4b6d2265e5848bbb507b58ccfdb6f"
-
 [[package]]
 name = "parking_lot"
 version = "0.12.5"
@@ -5677,38 +3741,15 @@ dependencies = [
  "simdutf8",
  "snap",
  "thrift",
- "twox-hash",
- "zstd",
-]
-
-[[package]]
-name = "paste"
-version = "1.0.15"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"
-
-[[package]]
-name = "pear"
-version = "0.2.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bdeeaa00ce488657faba8ebf44ab9361f9365a97bd39ffb8a60663f57ff4b467"
-dependencies = [
- "inlinable_string",
- "pear_codegen",
- "yansi",
+ "twox-hash",
+ "zstd",
 ]
 
 [[package]]
-name = "pear_codegen"
-version = "0.2.9"
+name = "paste"
+version = "1.0.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4bab5b985dc082b345f812b7df84e1bef27e7207b39e448439ba8bd69c93f147"
-dependencies = [
- "proc-macro2",
- "proc-macro2-diagnostics",
- "quote",
- "syn 2.0.113",
-]
+checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"
 
 [[package]]
 name = "peeking_take_while"
@@ -5752,7 +3793,7 @@ dependencies = [
  "pest_meta",
  "proc-macro2",
  "quote",
- "syn 2.0.113",
+ "syn",
 ]
 
 [[package]]
@@ -5795,7 +3836,6 @@ dependencies = [
  "hashbrown 0.15.5",
  "indexmap 2.12.1",
  "serde",
- "serde_derive",
 ]
 
 [[package]]
@@ -5804,17 +3844,7 @@ version = "0.12.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "913273894cec178f401a31ec4b656318d95473527be05c0752cc41cdc32be8b7"
 dependencies = [
- "phf_shared 0.12.1",
-]
-
-[[package]]
-name = "phf"
-version = "0.13.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c1562dc717473dbaa4c1f85a36410e03c047b2e7df7f45ee938fbef64ae7fadf"
-dependencies = [
- "phf_shared 0.13.1",
- "serde",
+ "phf_shared",
 ]
 
 [[package]]
@@ -5826,15 +3856,6 @@ dependencies = [
  "siphasher",
 ]
 
-[[package]]
-name = "phf_shared"
-version = "0.13.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e57fef6bc5981e38c2ce2d63bfa546861309f875b8a75f092d1d54ae2d64f266"
-dependencies = [
- "siphasher",
-]
-
 [[package]]
 name = "pin-project"
 version = "1.1.10"
@@ -5852,7 +3873,7 @@ checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.113",
+ "syn",
 ]
 
 [[package]]
@@ -5891,49 +3912,6 @@ dependencies = [
  "serde",
 ]
 
-[[package]]
-name = "postgres"
-version = "0.19.12"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e7c48ece1c6cda0db61b058c1721378da76855140e9214339fa1317decacb176"
-dependencies = [
- "bytes",
- "fallible-iterator 0.2.0",
- "futures-util",
- "log",
- "tokio",
- "tokio-postgres",
-]
-
-[[package]]
-name = "postgres-protocol"
-version = "0.6.10"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3ee9dd5fe15055d2b6806f4736aa0c9637217074e224bbec46d4041b91bb9491"
-dependencies = [
- "base64",
- "byteorder",
- "bytes",
- "fallible-iterator 0.2.0",
- "hmac",
- "md-5",
- "memchr",
- "rand 0.9.2",
- "sha2",
- "stringprep",
-]
-
-[[package]]
-name = "postgres-types"
-version = "0.2.12"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "54b858f82211e84682fecd373f68e1ceae642d8d751a1ebd13f33de6257b3e20"
-dependencies = [
- "bytes",
- "fallible-iterator 0.2.0",
- "postgres-protocol",
-]
-
 [[package]]
 name = "potential_utf"
 version = "0.1.4"
@@ -5965,7 +3943,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b"
 dependencies = [
  "proc-macro2",
- "syn 2.0.113",
+ "syn",
 ]
 
 [[package]]
@@ -5974,53 +3952,7 @@ version = "3.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "219cb19e96be00ab2e37d6e299658a0cfa83e52429179969b0f0121b4ac46983"
 dependencies = [
- "toml_edit 0.23.10+spec-1.0.0",
-]
-
-[[package]]
-name = "proc-macro-error"
-version = "1.0.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c"
-dependencies = [
- "proc-macro-error-attr",
- "proc-macro2",
- "quote",
- "syn 1.0.109",
- "version_check",
-]
-
-[[package]]
-name = "proc-macro-error-attr"
-version = "1.0.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869"
-dependencies = [
- "proc-macro2",
- "quote",
- "version_check",
-]
-
-[[package]]
-name = "proc-macro-error-attr2"
-version = "2.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "96de42df36bb9bba5542fe9f1a054b8cc87e172759a1868aa05c1f3acc89dfc5"
-dependencies = [
- "proc-macro2",
- "quote",
-]
-
-[[package]]
-name = "proc-macro-error2"
-version = "2.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "11ec05c52be0a07b08061f7dd003e7d7092e0472bc731b4af7bb1ef876109802"
-dependencies = [
- "proc-macro-error-attr2",
- "proc-macro2",
- "quote",
- "syn 2.0.113",
+ "toml_edit",
 ]
 
 [[package]]
@@ -6032,19 +3964,6 @@ dependencies = [
  "unicode-ident",
 ]
 
-[[package]]
-name = "proc-macro2-diagnostics"
-version = "0.10.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "af066a9c399a26e020ada66a034357a868728e72cd426f3adcd35f80d88d88c8"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn 2.0.113",
- "version_check",
- "yansi",
-]
-
 [[package]]
 name = "proctitle"
 version = "0.1.1"
@@ -6056,21 +3975,6 @@ dependencies = [
  "winapi",
 ]
 
-[[package]]
-name = "prometheus"
-version = "0.14.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3ca5326d8d0b950a9acd87e6a3f94745394f62e4dae1b1ee22b2bc0c394af43a"
-dependencies = [
- "cfg-if",
- "fnv",
- "lazy_static",
- "memchr",
- "parking_lot",
- "protobuf",
- "thiserror 2.0.17",
-]
-
 [[package]]
 name = "prost"
 version = "0.13.5"
@@ -6097,7 +4001,7 @@ dependencies = [
  "prost",
  "prost-types",
  "regex",
- "syn 2.0.113",
+ "syn",
  "tempfile",
 ]
 
@@ -6111,7 +4015,7 @@ dependencies = [
  "itertools 0.14.0",
  "proc-macro2",
  "quote",
- "syn 2.0.113",
+ "syn",
 ]
 
 [[package]]
@@ -6123,26 +4027,6 @@ dependencies = [
  "prost",
 ]
 
-[[package]]
-name = "protobuf"
-version = "3.7.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d65a1d4ddae7d8b5de68153b48f6aa3bba8cb002b243dbdbc55a5afbc98f99f4"
-dependencies = [
- "once_cell",
- "protobuf-support",
- "thiserror 1.0.69",
-]
-
-[[package]]
-name = "protobuf-support"
-version = "3.7.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3e36c2f31e0a47f9280fb347ef5e461ffcd2c52dd520d8e216b52f93b0b0d7d6"
-dependencies = [
- "thiserror 1.0.69",
-]
-
 [[package]]
 name = "protocol"
 version = "0.1.0"
@@ -6151,8 +4035,8 @@ dependencies = [
  "log",
  "prost",
  "serde",
- "tonic 0.12.3",
- "tonic-build 0.12.3",
+ "tonic",
+ "tonic-build",
 ]
 
 [[package]]
@@ -6196,15 +4080,9 @@ checksum = "2cf194f5b1a415ef3a44ee35056f4009092cc4038a9f7e3c7c1e392f48ee7dbb"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.113",
+ "syn",
 ]
 
-[[package]]
-name = "quad-rand"
-version = "0.2.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5a651516ddc9168ebd67b24afd085a718be02f8858fe406591b013d101ce2f40"
-
 [[package]]
 name = "quanta"
 version = "0.12.6"
@@ -6215,76 +4093,11 @@ dependencies = [
  "libc",
  "once_cell",
  "raw-cpuid",
- "wasi 0.11.1+wasi-snapshot-preview1",
+ "wasi",
  "web-sys",
  "winapi",
 ]
 
-[[package]]
-name = "quick-xml"
-version = "0.38.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b66c2058c55a409d601666cffe35f04333cf1013010882cec174a7467cd4e21c"
-dependencies = [
- "memchr",
- "serde",
-]
-
-[[package]]
-name = "quinn"
-version = "0.11.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b9e20a958963c291dc322d98411f541009df2ced7b5a4f2bd52337638cfccf20"
-dependencies = [
- "bytes",
- "cfg_aliases 0.2.1",
- "pin-project-lite",
- "quinn-proto",
- "quinn-udp",
- "rustc-hash 2.1.1",
- "rustls",
- "socket2 0.6.1",
- "thiserror 2.0.17",
- "tokio",
- "tracing",
- "web-time",
-]
-
-[[package]]
-name = "quinn-proto"
-version = "0.11.14"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "434b42fec591c96ef50e21e886936e66d3cc3f737104fdb9b737c40ffb94c098"
-dependencies = [
- "bytes",
- "getrandom 0.3.4",
- "lru-slab",
- "rand 0.9.2",
- "ring",
- "rustc-hash 2.1.1",
- "rustls",
- "rustls-pki-types",
- "slab",
- "thiserror 2.0.17",
- "tinyvec",
- "tracing",
- "web-time",
-]
-
-[[package]]
-name = "quinn-udp"
-version = "0.5.14"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "addec6a0dcad8a8d96a771f815f0eaf55f9d1805756410b39f5fa81332574cbd"
-dependencies = [
- "cfg_aliases 0.2.1",
- "libc",
- "once_cell",
- "socket2 0.6.1",
- "tracing",
- "windows-sys 0.60.2",
-]
-
 [[package]]
 name = "quote"
 version = "1.0.42"
@@ -6300,12 +4113,6 @@ version = "5.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
 
-[[package]]
-name = "radium"
-version = "0.7.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09"
-
 [[package]]
 name = "radix_trie"
 version = "0.2.1"
@@ -6463,7 +4270,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b"
 dependencies = [
  "quote",
- "syn 2.0.113",
+ "syn",
 ]
 
 [[package]]
@@ -6486,37 +4293,6 @@ dependencies = [
  "thiserror 1.0.69",
 ]
 
-[[package]]
-name = "redox_users"
-version = "0.5.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a4e608c6638b9c18977b00b475ac1f28d14e84b27d8d42f70e0bf1e3dec127ac"
-dependencies = [
- "getrandom 0.2.16",
- "libredox",
- "thiserror 2.0.17",
-]
-
-[[package]]
-name = "ref-cast"
-version = "1.0.25"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f354300ae66f76f1c85c5f84693f0ce81d747e2c3f21a45fef496d89c960bf7d"
-dependencies = [
- "ref-cast-impl",
-]
-
-[[package]]
-name = "ref-cast-impl"
-version = "1.0.25"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b7186006dcb21920990093f30e3dea63b7d6e977bf1256be20c3563a5db070da"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn 2.0.113",
-]
-
 [[package]]
 name = "regalloc2"
 version = "0.13.5"
@@ -6539,128 +4315,46 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4"
 dependencies = [
  "aho-corasick",
- "memchr",
- "regex-automata",
- "regex-syntax",
-]
-
-[[package]]
-name = "regex-automata"
-version = "0.4.13"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c"
-dependencies = [
- "aho-corasick",
- "memchr",
- "regex-syntax",
-]
-
-[[package]]
-name = "regex-lite"
-version = "0.1.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cab834c73d247e67f4fae452806d17d3c7501756d98c8808d7c9c7aa7d18f973"
-
-[[package]]
-name = "regex-syntax"
-version = "0.8.8"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58"
-
-[[package]]
-name = "regress"
-version = "0.6.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "82a9ecfa0cb04d0b04dddb99b8ccf4f66bc8dfd23df694b398570bd8ae3a50fb"
-dependencies = [
- "hashbrown 0.13.2",
- "memchr",
-]
-
-[[package]]
-name = "reqwest"
-version = "0.12.28"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eddd3ca559203180a307f12d114c268abf583f59b03cb906fd0b3ff8646c1147"
-dependencies = [
- "base64",
- "bytes",
- "encoding_rs",
- "futures-core",
- "futures-util",
- "h2",
- "http 1.4.0",
- "http-body 1.0.1",
- "http-body-util",
- "hyper",
- "hyper-rustls",
- "hyper-tls",
- "hyper-util",
- "js-sys",
- "log",
- "mime",
- "native-tls",
- "percent-encoding",
- "pin-project-lite",
- "quinn",
- "rustls",
- "rustls-native-certs",
- "rustls-pki-types",
- "serde",
- "serde_json",
- "serde_urlencoded",
- "sync_wrapper",
- "tokio",
- "tokio-native-tls",
- "tokio-rustls",
- "tokio-util",
- "tower 0.5.2",
- "tower-http",
- "tower-service",
- "url",
- "wasm-bindgen",
- "wasm-bindgen-futures",
- "wasm-streams",
- "web-sys",
+ "memchr",
+ "regex-automata",
+ "regex-syntax",
 ]
 
 [[package]]
-name = "ring"
-version = "0.17.14"
+name = "regex-automata"
+version = "0.4.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7"
+checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c"
 dependencies = [
- "cc",
- "cfg-if",
- "getrandom 0.2.16",
- "libc",
- "untrusted",
- "windows-sys 0.52.0",
+ "aho-corasick",
+ "memchr",
+ "regex-syntax",
 ]
 
 [[package]]
-name = "rocksdb"
-version = "0.21.0"
+name = "regex-syntax"
+version = "0.8.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bb6f170a4041d50a0ce04b0d2e14916d6ca863ea2e422689a5b694395d299ffe"
+checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58"
+
+[[package]]
+name = "regress"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "82a9ecfa0cb04d0b04dddb99b8ccf4f66bc8dfd23df694b398570bd8ae3a50fb"
 dependencies = [
- "libc",
- "librocksdb-sys",
+ "hashbrown 0.13.2",
+ "memchr",
 ]
 
 [[package]]
-name = "rusqlite"
-version = "0.31.0"
+name = "rocksdb"
+version = "0.21.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b838eba278d213a8beaf485bd313fd580ca4505a00d5871caeb1457c55322cae"
+checksum = "bb6f170a4041d50a0ce04b0d2e14916d6ca863ea2e422689a5b694395d299ffe"
 dependencies = [
- "bitflags 2.10.0",
- "fallible-iterator 0.3.0",
- "fallible-streaming-iterator",
- "hashlink",
- "libsqlite3-sys",
- "serde_json",
- "smallvec",
+ "libc",
+ "librocksdb-sys",
 ]
 
 [[package]]
@@ -6726,65 +4420,6 @@ dependencies = [
  "rustix 1.1.3",
 ]
 
-[[package]]
-name = "rustls"
-version = "0.23.37"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "758025cb5fccfd3bc2fd74708fd4682be41d99e5dff73c377c0646c6012c73a4"
-dependencies = [
- "aws-lc-rs",
- "log",
- "once_cell",
- "ring",
- "rustls-pki-types",
- "rustls-webpki",
- "subtle",
- "zeroize",
-]
-
-[[package]]
-name = "rustls-native-certs"
-version = "0.8.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "612460d5f7bea540c490b2b6395d8e34a953e52b491accd6c86c8164c5932a63"
-dependencies = [
- "openssl-probe",
- "rustls-pki-types",
- "schannel",
- "security-framework",
-]
-
-[[package]]
-name = "rustls-pemfile"
-version = "2.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dce314e5fee3f39953d46bb63bb8a46d40c2f8fb7cc5a3b6cab2bde9721d6e50"
-dependencies = [
- "rustls-pki-types",
-]
-
-[[package]]
-name = "rustls-pki-types"
-version = "1.14.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "be040f8b0a225e40375822a563fa9524378b9d63112f53e19ffff34df5d33fdd"
-dependencies = [
- "web-time",
- "zeroize",
-]
-
-[[package]]
-name = "rustls-webpki"
-version = "0.103.10"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "df33b2b81ac578cabaf06b89b0631153a3f416b0a886e8a7a1707fb51abbd1ef"
-dependencies = [
- "aws-lc-rs",
- "ring",
- "rustls-pki-types",
- "untrusted",
-]
-
 [[package]]
 name = "rustversion"
 version = "1.0.22"
@@ -6840,15 +4475,6 @@ dependencies = [
  "pkg-config",
 ]
 
-[[package]]
-name = "schannel"
-version = "0.1.29"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "91c1b7e4904c873ef0710c1f407dde2e6287de2bebc1bbbf7d430bb7cbffd939"
-dependencies = [
- "windows-sys 0.61.2",
-]
-
 [[package]]
 name = "schemars"
 version = "0.8.22"
@@ -6856,20 +4482,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3fbf2ae1b8bc8e02df939598064d22402220cd5bbcca1c76f7d6a310974d5615"
 dependencies = [
  "dyn-clone",
- "schemars_derive 0.8.22",
- "serde",
- "serde_json",
-]
-
-[[package]]
-name = "schemars"
-version = "1.2.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a2b42f36aa1cd011945615b92222f6bf73c599a102a300334cd7f8dbeec726cc"
-dependencies = [
- "dyn-clone",
- "ref-cast",
- "schemars_derive 1.2.1",
+ "schemars_derive",
  "serde",
  "serde_json",
 ]
@@ -6883,19 +4496,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "serde_derive_internals",
- "syn 2.0.113",
-]
-
-[[package]]
-name = "schemars_derive"
-version = "1.2.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7d115b50f4aaeea07e79c1912f645c7513d81715d0420f8bc77a18c6260b307f"
-dependencies = [
- "proc-macro2",
- "quote",
- "serde_derive_internals",
- "syn 2.0.113",
+ "syn",
 ]
 
 [[package]]
@@ -6904,29 +4505,6 @@ version = "1.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
 
-[[package]]
-name = "security-framework"
-version = "3.6.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d17b898a6d6948c3a8ee4372c17cb384f90d2e6e912ef00895b14fd7ab54ec38"
-dependencies = [
- "bitflags 2.10.0",
- "core-foundation 0.10.1",
- "core-foundation-sys",
- "libc",
- "security-framework-sys",
-]
-
-[[package]]
-name = "security-framework-sys"
-version = "2.17.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6ce2691df843ecc5d231c0b14ece2acc3efb62c0a398c7e1d875f3983ce020e3"
-dependencies = [
- "core-foundation-sys",
- "libc",
-]
-
 [[package]]
 name = "semver"
 version = "1.0.27"
@@ -6953,26 +4531,6 @@ dependencies = [
  "serde_derive",
 ]
 
-[[package]]
-name = "serde-value"
-version = "0.7.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f3a1a3341211875ef120e117ea7fd5228530ae7e7036a779fdc9117be6b3282c"
-dependencies = [
- "ordered-float",
- "serde",
-]
-
-[[package]]
-name = "serde_bytes"
-version = "0.11.19"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a5d440709e79d88e51ac01c4b72fc6cb7314017bb7da9eeff678aa94c10e3ea8"
-dependencies = [
- "serde",
- "serde_core",
-]
-
 [[package]]
 name = "serde_core"
 version = "1.0.228"
@@ -6990,7 +4548,7 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.113",
+ "syn",
 ]
 
 [[package]]
@@ -7001,7 +4559,7 @@ checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.113",
+ "syn",
 ]
 
 [[package]]
@@ -7064,16 +4622,7 @@ checksum = "aafbefbe175fa9bf03ca83ef89beecff7d2a95aaacd5732325b90ac8c3bd7b90"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.113",
-]
-
-[[package]]
-name = "serde_spanned"
-version = "0.6.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bf41e0cfaf7226dca15e8197172c295a782857fcb97fad1808a166870dee75a3"
-dependencies = [
- "serde",
+ "syn",
 ]
 
 [[package]]
@@ -7094,19 +4643,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "serde",
- "syn 2.0.113",
-]
-
-[[package]]
-name = "serde_urlencoded"
-version = "0.7.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd"
-dependencies = [
- "form_urlencoded",
- "itoa",
- "ryu",
- "serde",
+ "syn",
 ]
 
 [[package]]
@@ -7248,12 +4785,6 @@ dependencies = [
  "serde",
 ]
 
-[[package]]
-name = "smawk"
-version = "0.3.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b7c388c1b5e93756d0c740965c41e8822f866621d41acbdf6336a6a168f8840c"
-
 [[package]]
 name = "snap"
 version = "1.1.1"
@@ -7289,17 +4820,6 @@ dependencies = [
  "lock_api",
 ]
 
-[[package]]
-name = "sqlparser"
-version = "0.55.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c4521174166bac1ff04fe16ef4524c70144cd29682a45978978ca3d7f4e0be11"
-dependencies = [
- "log",
- "recursive",
- "sqlparser_derive 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
-]
-
 [[package]]
 name = "sqlparser"
 version = "0.55.0"
@@ -7307,18 +4827,7 @@ source = "git+https://github.com/FunctionStream/sqlparser-rs?branch=0.6.0%2Ffunc
 dependencies = [
  "log",
  "recursive",
- "sqlparser_derive 0.3.0 (git+https://github.com/FunctionStream/sqlparser-rs?branch=0.6.0%2Ffunction-sql-parser)",
-]
-
-[[package]]
-name = "sqlparser_derive"
-version = "0.3.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn 2.0.113",
+ "sqlparser_derive",
 ]
 
 [[package]]
@@ -7328,7 +4837,7 @@ source = "git+https://github.com/FunctionStream/sqlparser-rs?branch=0.6.0%2Ffunc
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.113",
+ "syn",
 ]
 
 [[package]]
@@ -7356,17 +4865,6 @@ version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
 
-[[package]]
-name = "stringprep"
-version = "0.1.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7b4df3d392d81bd458a8a621b8bffbd2302a12ffe288a9d931670948749463b1"
-dependencies = [
- "unicode-bidi",
- "unicode-normalization",
- "unicode-properties",
-]
-
 [[package]]
 name = "strsim"
 version = "0.11.1"
@@ -7379,16 +4877,7 @@ version = "0.26.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06"
 dependencies = [
- "strum_macros 0.26.4",
-]
-
-[[package]]
-name = "strum"
-version = "0.27.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "af23d6f6c1a224baef9d3f61e287d2761385a5b88fdab4eb4c6f11aeb54c4bcf"
-dependencies = [
- "strum_macros 0.27.2",
+ "strum_macros",
 ]
 
 [[package]]
@@ -7401,19 +4890,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "rustversion",
- "syn 2.0.113",
-]
-
-[[package]]
-name = "strum_macros"
-version = "0.27.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7695ce3845ea4b33927c055a39dc438a45b059f7c1b3d91d38d10355fb8cbca7"
-dependencies = [
- "heck 0.5.0",
- "proc-macro2",
- "quote",
- "syn 2.0.113",
+ "syn",
 ]
 
 [[package]]
@@ -7422,44 +4899,6 @@ version = "2.6.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292"
 
-[[package]]
-name = "supports-color"
-version = "2.1.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d6398cde53adc3c4557306a96ce67b302968513830a77a95b2b17305d9719a89"
-dependencies = [
- "is-terminal",
- "is_ci",
-]
-
-[[package]]
-name = "supports-hyperlinks"
-version = "2.1.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f84231692eb0d4d41e4cdd0cabfdd2e6cd9e255e65f80c9aa7c98dd502b4233d"
-dependencies = [
- "is-terminal",
-]
-
-[[package]]
-name = "supports-unicode"
-version = "2.1.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f850c19edd184a205e883199a261ed44471c81e39bd95b1357f5febbef00e77a"
-dependencies = [
- "is-terminal",
-]
-
-[[package]]
-name = "syn"
-version = "1.0.109"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
-dependencies = [
- "proc-macro2",
- "unicode-ident",
-]
-
 [[package]]
 name = "syn"
 version = "2.0.113"
@@ -7476,9 +4915,6 @@ name = "sync_wrapper"
 version = "1.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263"
-dependencies = [
- "futures-core",
-]
 
 [[package]]
 name = "synstructure"
@@ -7488,28 +4924,7 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.113",
-]
-
-[[package]]
-name = "system-configuration"
-version = "0.6.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b"
-dependencies = [
- "bitflags 2.10.0",
- "core-foundation 0.9.4",
- "system-configuration-sys",
-]
-
-[[package]]
-name = "system-configuration-sys"
-version = "0.6.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8e1d1b10ced5ca923a1fcb8d03e96b8d3268065d724548c0211415ff6ac6bac4"
-dependencies = [
- "core-foundation-sys",
- "libc",
+ "syn",
 ]
 
 [[package]]
@@ -7534,12 +4949,6 @@ version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417"
 
-[[package]]
-name = "tap"
-version = "1.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369"
-
 [[package]]
 name = "target-lexicon"
 version = "0.13.4"
@@ -7551,42 +4960,21 @@ name = "tempfile"
 version = "3.24.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "655da9c7eb6305c55742045d5a8d2037996d61d8de95806335c7c86ce0f82e9c"
-dependencies = [
- "fastrand",
- "getrandom 0.3.4",
- "once_cell",
- "rustix 1.1.3",
- "windows-sys 0.61.2",
-]
-
-[[package]]
-name = "termcolor"
-version = "1.4.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755"
-dependencies = [
- "winapi-util",
-]
-
-[[package]]
-name = "terminal_size"
-version = "0.1.17"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "633c1a546cee861a1a6d0dc69ebeca693bf4296661ba7852b9d21d159e0506df"
-dependencies = [
- "libc",
- "winapi",
+dependencies = [
+ "fastrand",
+ "getrandom 0.3.4",
+ "once_cell",
+ "rustix 1.1.3",
+ "windows-sys 0.61.2",
 ]
 
 [[package]]
-name = "textwrap"
-version = "0.15.2"
+name = "termcolor"
+version = "1.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b7b3e525a49ec206798b40326a44121291b530c963cfb01018f63e135bac543d"
+checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755"
 dependencies = [
- "smawk",
- "unicode-linebreak",
- "unicode-width 0.1.14",
+ "winapi-util",
 ]
 
 [[package]]
@@ -7615,7 +5003,7 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.113",
+ "syn",
 ]
 
 [[package]]
@@ -7626,7 +5014,7 @@ checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.113",
+ "syn",
 ]
 
 [[package]]
@@ -7699,21 +5087,6 @@ dependencies = [
  "zerovec",
 ]
 
-[[package]]
-name = "tinyvec"
-version = "1.11.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3e61e67053d25a4e82c844e8424039d9745781b3fc4f32b8d55ed50f5f667ef3"
-dependencies = [
- "tinyvec_macros",
-]
-
-[[package]]
-name = "tinyvec_macros"
-version = "0.1.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
-
 [[package]]
 name = "tokio"
 version = "1.49.0"
@@ -7728,7 +5101,6 @@ dependencies = [
  "signal-hook-registry",
  "socket2 0.6.1",
  "tokio-macros",
- "tracing",
  "windows-sys 0.61.2",
 ]
 
@@ -7740,53 +5112,7 @@ checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.113",
-]
-
-[[package]]
-name = "tokio-native-tls"
-version = "0.3.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2"
-dependencies = [
- "native-tls",
- "tokio",
-]
-
-[[package]]
-name = "tokio-postgres"
-version = "0.7.16"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dcea47c8f71744367793f16c2db1f11cb859d28f436bdb4ca9193eb1f787ee42"
-dependencies = [
- "async-trait",
- "byteorder",
- "bytes",
- "fallible-iterator 0.2.0",
- "futures-channel",
- "futures-util",
- "log",
- "parking_lot",
- "percent-encoding",
- "phf 0.13.1",
- "pin-project-lite",
- "postgres-protocol",
- "postgres-types",
- "rand 0.9.2",
- "socket2 0.6.1",
- "tokio",
- "tokio-util",
- "whoami",
-]
-
-[[package]]
-name = "tokio-rustls"
-version = "0.26.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61"
-dependencies = [
- "rustls",
- "tokio",
+ "syn",
 ]
 
 [[package]]
@@ -7813,18 +5139,6 @@ dependencies = [
  "tokio",
 ]
 
-[[package]]
-name = "toml"
-version = "0.8.23"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dc1beb996b9d83529a9e75c17a1686767d148d70663143c7854d8b4a09ced362"
-dependencies = [
- "serde",
- "serde_spanned 0.6.9",
- "toml_datetime 0.6.11",
- "toml_edit 0.22.27",
-]
-
 [[package]]
 name = "toml"
 version = "0.9.11+spec-1.1.0"
@@ -7833,22 +5147,13 @@ checksum = "f3afc9a848309fe1aaffaed6e1546a7a14de1f935dc9d89d32afd9a44bab7c46"
 dependencies = [
  "indexmap 2.12.1",
  "serde_core",
- "serde_spanned 1.0.4",
- "toml_datetime 0.7.5+spec-1.1.0",
+ "serde_spanned",
+ "toml_datetime",
  "toml_parser",
  "toml_writer",
  "winnow",
 ]
 
-[[package]]
-name = "toml_datetime"
-version = "0.6.11"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c"
-dependencies = [
- "serde",
-]
-
 [[package]]
 name = "toml_datetime"
 version = "0.7.5+spec-1.1.0"
@@ -7858,20 +5163,6 @@ dependencies = [
  "serde_core",
 ]
 
-[[package]]
-name = "toml_edit"
-version = "0.22.27"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a"
-dependencies = [
- "indexmap 2.12.1",
- "serde",
- "serde_spanned 0.6.9",
- "toml_datetime 0.6.11",
- "toml_write",
- "winnow",
-]
-
 [[package]]
 name = "toml_edit"
 version = "0.23.10+spec-1.0.0"
@@ -7879,7 +5170,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "84c8b9f757e028cee9fa244aea147aab2a9ec09d5325a9b01e0a49730c2b5269"
 dependencies = [
  "indexmap 2.12.1",
- "toml_datetime 0.7.5+spec-1.1.0",
+ "toml_datetime",
  "toml_parser",
  "winnow",
 ]
@@ -7893,12 +5184,6 @@ dependencies = [
  "winnow",
 ]
 
-[[package]]
-name = "toml_write"
-version = "0.1.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801"
-
 [[package]]
 name = "toml_writer"
 version = "1.0.6+spec-1.1.0"
@@ -7913,12 +5198,12 @@ checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52"
 dependencies = [
  "async-stream",
  "async-trait",
- "axum 0.7.9",
+ "axum",
  "base64",
  "bytes",
  "h2",
- "http 1.4.0",
- "http-body 1.0.1",
+ "http",
+ "http-body",
  "http-body-util",
  "hyper",
  "hyper-timeout",
@@ -7935,39 +5220,6 @@ dependencies = [
  "tracing",
 ]
 
-[[package]]
-name = "tonic"
-version = "0.13.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7e581ba15a835f4d9ea06c55ab1bd4dce26fc53752c69a04aac00703bfb49ba9"
-dependencies = [
- "async-trait",
- "axum 0.8.8",
- "base64",
- "bytes",
- "h2",
- "http 1.4.0",
- "http-body 1.0.1",
- "http-body-util",
- "hyper",
- "hyper-timeout",
- "hyper-util",
- "percent-encoding",
- "pin-project",
- "prost",
- "rustls-native-certs",
- "socket2 0.5.10",
- "tokio",
- "tokio-rustls",
- "tokio-stream",
- "tower 0.5.2",
- "tower-layer",
- "tower-service",
- "tracing",
- "webpki-roots 0.26.11",
- "zstd",
-]
-
 [[package]]
 name = "tonic-build"
 version = "0.12.3"
@@ -7979,21 +5231,7 @@ dependencies = [
  "prost-build",
  "prost-types",
  "quote",
- "syn 2.0.113",
-]
-
-[[package]]
-name = "tonic-build"
-version = "0.13.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eac6f67be712d12f0b41328db3137e0d0757645d8904b4cb7d51cd9c2279e847"
-dependencies = [
- "prettyplease",
- "proc-macro2",
- "prost-build",
- "prost-types",
- "quote",
- "syn 2.0.113",
+ "syn",
 ]
 
 [[package]]
@@ -8024,31 +5262,8 @@ checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9"
 dependencies = [
  "futures-core",
  "futures-util",
- "indexmap 2.12.1",
  "pin-project-lite",
- "slab",
  "sync_wrapper",
- "tokio",
- "tokio-util",
- "tower-layer",
- "tower-service",
- "tracing",
-]
-
-[[package]]
-name = "tower-http"
-version = "0.6.8"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8"
-dependencies = [
- "bitflags 2.10.0",
- "bytes",
- "futures-util",
- "http 1.4.0",
- "http-body 1.0.1",
- "iri-string",
- "pin-project-lite",
- "tower 0.5.2",
  "tower-layer",
  "tower-service",
 ]
@@ -8096,7 +5311,7 @@ checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.113",
+ "syn",
 ]
 
 [[package]]
@@ -8195,9 +5410,9 @@ dependencies = [
  "proc-macro2",
  "quote",
  "regress",
- "schemars 0.8.22",
+ "schemars",
  "serde_json",
- "syn 2.0.113",
+ "syn",
  "thiserror 1.0.69",
  "unicode-ident",
 ]
@@ -8209,11 +5424,11 @@ source = "git+https://github.com/ArroyoSystems/typify.git?branch=arroyo#d14b6fc0
 dependencies = [
  "proc-macro2",
  "quote",
- "schemars 0.8.22",
+ "schemars",
  "serde",
  "serde_json",
  "serde_tokenstream",
- "syn 2.0.113",
+ "syn",
  "typify-impl",
 ]
 
@@ -8223,54 +5438,18 @@ version = "0.1.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971"
 
-[[package]]
-name = "uncased"
-version = "0.9.10"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e1b88fcfe09e89d3866a5c11019378088af2d24c3fbd4f0543f96b479ec90697"
-dependencies = [
- "version_check",
-]
-
 [[package]]
 name = "unicase"
 version = "2.9.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "dbc4bc3a9f746d862c45cb89d705aa10f187bb96c76001afab07a0d35ce60142"
 
-[[package]]
-name = "unicode-bidi"
-version = "0.3.18"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5c1cb5db39152898a79168971543b1cb5020dff7fe43c8dc468b0885f5e29df5"
-
 [[package]]
 name = "unicode-ident"
 version = "1.0.22"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5"
 
-[[package]]
-name = "unicode-linebreak"
-version = "0.1.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3b09c83c3c29d37506a3e260c08c03743a6bb66a9cd432c6934ab501a190571f"
-
-[[package]]
-name = "unicode-normalization"
-version = "0.1.25"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5fd4f6878c9cb28d874b009da9e8d183b5abc80117c40bbd187a1fde336be6e8"
-dependencies = [
- "tinyvec",
-]
-
-[[package]]
-name = "unicode-properties"
-version = "0.1.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7df058c713841ad818f1dc5d3fd88063241cc61f49f5fbea4b951e8cf5a8d71d"
-
 [[package]]
 name = "unicode-segmentation"
 version = "1.12.0"
@@ -8301,18 +5480,6 @@ version = "0.2.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861"
 
-[[package]]
-name = "unscanny"
-version = "0.1.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e9df2af067a7953e9c3831320f35c1cc0600c30d44d9f7a12b01db1cd88d6b47"
-
-[[package]]
-name = "untrusted"
-version = "0.9.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1"
-
 [[package]]
 name = "unty"
 version = "0.0.4"
@@ -8331,12 +5498,6 @@ dependencies = [
  "serde",
 ]
 
-[[package]]
-name = "urlencoding"
-version = "2.1.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da"
-
 [[package]]
 name = "utf8_iter"
 version = "1.0.4"
@@ -8349,30 +5510,6 @@ version = "0.2.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
 
-[[package]]
-name = "utoipa"
-version = "4.2.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c5afb1a60e207dca502682537fefcfd9921e71d0b83e9576060f09abc6efab23"
-dependencies = [
- "indexmap 2.12.1",
- "serde",
- "serde_json",
- "utoipa-gen",
-]
-
-[[package]]
-name = "utoipa-gen"
-version = "4.3.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "20c24e8ab68ff9ee746aad22d39b5535601e6416d1b0feeabf78be986a5c4392"
-dependencies = [
- "proc-macro-error",
- "proc-macro2",
- "quote",
- "syn 2.0.113",
-]
-
 [[package]]
 name = "uuid"
 version = "1.19.0"
@@ -8381,7 +5518,6 @@ checksum = "e2e054861b4bd027cd373e18e8d8d8e6548085000e41290d95ce0c373a654b4a"
 dependencies = [
  "getrandom 0.3.4",
  "js-sys",
- "serde_core",
  "wasm-bindgen",
 ]
 
@@ -8409,12 +5545,6 @@ version = "0.0.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "051eb1abcf10076295e815102942cc58f9d5e3b4560e46e53c21e8ff6f3af7b1"
 
-[[package]]
-name = "vsimd"
-version = "0.8.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5c3082ca00d5a5ef149bb8b555a72ae84c9c59f7250f013ac822ac2e49b19c64"
-
 [[package]]
 name = "walkdir"
 version = "2.5.0"
@@ -8440,15 +5570,6 @@ version = "0.11.1+wasi-snapshot-preview1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
 
-[[package]]
-name = "wasi"
-version = "0.14.7+wasi-0.2.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "883478de20367e224c0090af9cf5f9fa85bed63a95c1abf3afc5c083ebc06e8c"
-dependencies = [
- "wasip2",
-]
-
 [[package]]
 name = "wasip2"
 version = "1.0.1+wasi-0.2.4"
@@ -8458,15 +5579,6 @@ dependencies = [
  "wit-bindgen",
 ]
 
-[[package]]
-name = "wasite"
-version = "1.0.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "66fe902b4a6b8028a753d5424909b764ccf79b7a209eac9bf97e59cda9f71a42"
-dependencies = [
- "wasi 0.14.7+wasi-0.2.4",
-]
-
 [[package]]
 name = "wasm-bindgen"
 version = "0.2.106"
@@ -8512,7 +5624,7 @@ dependencies = [
  "bumpalo",
  "proc-macro2",
  "quote",
- "syn 2.0.113",
+ "syn",
  "wasm-bindgen-shared",
 ]
 
@@ -8556,19 +5668,6 @@ dependencies = [
  "wasmparser",
 ]
 
-[[package]]
-name = "wasm-streams"
-version = "0.4.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65"
-dependencies = [
- "futures-util",
- "js-sys",
- "wasm-bindgen",
- "wasm-bindgen-futures",
- "web-sys",
-]
-
 [[package]]
 name = "wasmparser"
 version = "0.243.0"
@@ -8691,7 +5790,7 @@ dependencies = [
  "serde",
  "serde_derive",
  "sha2",
- "toml 0.9.11+spec-1.1.0",
+ "toml",
  "wasmtime-environ",
  "windows-sys 0.61.2",
  "zstd",
@@ -8706,7 +5805,7 @@ dependencies = [
  "anyhow",
  "proc-macro2",
  "quote",
- "syn 2.0.113",
+ "syn",
  "wasmtime-internal-component-util",
  "wasmtime-internal-wit-bindgen",
  "wit-parser",
@@ -8820,7 +5919,7 @@ checksum = "63ba3124cc2cbcd362672f9f077303ccc4cd61daa908f73447b7fdaece75ff9f"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.113",
+ "syn",
 ]
 
 [[package]]
@@ -8948,35 +6047,6 @@ dependencies = [
  "wasm-bindgen",
 ]
 
-[[package]]
-name = "webpki-roots"
-version = "0.26.11"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9"
-dependencies = [
- "webpki-roots 1.0.6",
-]
-
-[[package]]
-name = "webpki-roots"
-version = "1.0.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "22cfaf3c063993ff62e73cb4311efde4db1efb31ab78a3e5c457939ad5cc0bed"
-dependencies = [
- "rustls-pki-types",
-]
-
-[[package]]
-name = "whoami"
-version = "2.1.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8fae98cf96deed1b7572272dfc777713c249ae40aa1cf8862e091e8b745f5361"
-dependencies = [
- "libredox",
- "wasite",
- "web-sys",
-]
-
 [[package]]
 name = "wiggle"
 version = "41.0.3"
@@ -9001,7 +6071,7 @@ dependencies = [
  "heck 0.5.0",
  "proc-macro2",
  "quote",
- "syn 2.0.113",
+ "syn",
  "witx",
 ]
 
@@ -9013,7 +6083,7 @@ checksum = "0e976fe0cecd60041f66b15ad45ebc997952af13da9bf9d90261c7b025057edc"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.113",
+ "syn",
  "wiggle-generate",
 ]
 
@@ -9089,7 +6159,7 @@ checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.113",
+ "syn",
 ]
 
 [[package]]
@@ -9100,7 +6170,7 @@ checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.113",
+ "syn",
 ]
 
 [[package]]
@@ -9109,17 +6179,6 @@ version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
 
-[[package]]
-name = "windows-registry"
-version = "0.6.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "02752bf7fbdcce7f2a27a742f798510f3e5ad88dbe84871e5168e2120c3d5720"
-dependencies = [
- "windows-link",
- "windows-result",
- "windows-strings",
-]
-
 [[package]]
 name = "windows-result"
 version = "0.4.1"
@@ -9364,21 +6423,6 @@ version = "0.6.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9"
 
-[[package]]
-name = "wyz"
-version = "0.5.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed"
-dependencies = [
- "tap",
-]
-
-[[package]]
-name = "xmlparser"
-version = "0.13.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "66fee0b777b0f5ac1c69bb06d361268faafa61cd4682ae064a171c16c433e9e4"
-
 [[package]]
 name = "xxhash-rust"
 version = "0.8.15"
@@ -9394,12 +6438,6 @@ dependencies = [
  "lzma-sys",
 ]
 
-[[package]]
-name = "yansi"
-version = "1.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049"
-
 [[package]]
 name = "yoke"
 version = "0.8.1"
@@ -9419,7 +6457,7 @@ checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.113",
+ "syn",
  "synstructure",
 ]
 
@@ -9449,7 +6487,7 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.113",
+ "syn",
 ]
 
 [[package]]
@@ -9460,7 +6498,7 @@ checksum = "d8a8d209fdf45cf5138cbb5a506f6b52522a25afccc534d1475dad8e31105c6a"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.113",
+ "syn",
 ]
 
 [[package]]
@@ -9480,16 +6518,10 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.113",
+ "syn",
  "synstructure",
 ]
 
-[[package]]
-name = "zeroize"
-version = "1.8.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0"
-
 [[package]]
 name = "zerotrie"
 version = "0.2.3"
@@ -9520,7 +6552,7 @@ checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.113",
+ "syn",
 ]
 
 [[package]]
diff --git a/Cargo.toml b/Cargo.toml
index 2c62a473..8e343baa 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -80,11 +80,7 @@ datafusion-functions-window = {git = 'https://github.com/ArroyoSystems/arrow-dat
 
 sqlparser = { git = "https://github.com/FunctionStream/sqlparser-rs", branch = "0.6.0/function-sql-parser" }
 
-cornucopia_async = { git = "https://github.com/ArroyoSystems/cornucopia", branch = "sqlite" }
-cornucopia = { git = "https://github.com/ArroyoSystems/cornucopia", branch = "sqlite" }
-jiter = {git = "https://github.com/ArroyoSystems/jiter", branch = "disable_python" }
-
-arroyo-state = { path = "../arroyo/crates/arroyo-state" }
+ahash = "0.8"
 governor = "0.8.0"
 mini-moka = "0.10"
 sha2 = "0.10"
diff --git a/src/runtime/mod.rs b/src/runtime/mod.rs
index 814358ad..0dce921e 100644
--- a/src/runtime/mod.rs
+++ b/src/runtime/mod.rs
@@ -14,8 +14,6 @@
 
 pub mod buffer_and_event;
 pub mod common;
-pub mod sink;
-pub mod source;
 pub mod streaming;
 pub mod task;
 pub mod taskexecutor;
diff --git a/src/runtime/streaming/api/context.rs b/src/runtime/streaming/api/context.rs
index e81bd03a..e838b06e 100644
--- a/src/runtime/streaming/api/context.rs
+++ b/src/runtime/streaming/api/context.rs
@@ -2,21 +2,24 @@ use crate::runtime::streaming::memory::MemoryPool;
 use crate::runtime::streaming::protocol::event::StreamEvent;
 use crate::runtime::streaming::protocol::tracked::TrackedEvent;
 use crate::runtime::streaming::network::endpoint::PhysicalSender;
+use crate::runtime::streaming::storage::manager::TableManager;
+
 use arrow_array::RecordBatch;
-use arroyo_state::tables::table_manager::TableManager;
 use std::sync::Arc;
 use tokio::sync::Mutex;
-use tracing::error;
 
 pub struct TaskContext {
     pub job_id: String,
     pub vertex_id: u32,
     pub subtask_idx: u32,
     pub parallelism: u32,
+
     pub outboxes: Vec<PhysicalSender>,
+
     memory_pool: Arc<MemoryPool>,
     table_manager: Option<Arc<Mutex<TableManager>>>,
-    pub last_present_watermark: Option<std::time::SystemTime>,
+
+    current_watermark: Option<std::time::SystemTime>,
 }
 
 impl TaskContext {
@@ -37,10 +40,46 @@ impl TaskContext {
             outboxes,
             memory_pool,
             table_manager,
-            last_present_watermark: None,
+            current_watermark: None,
+        }
+    }
+
+    // ========================================================================
+    // 水位线与时间流管理 API
+    // ========================================================================
+
+    /// 供业务算子调用：获取当前任务的安全水位线
+    pub fn last_present_watermark(&self) -> Option<std::time::SystemTime> {
+        self.current_watermark
+    }
+
+    /// 供底座框架 (SubtaskRunner) 调用：推进本地时间，保证单调递增
+    pub fn advance_watermark(&mut self, watermark: std::time::SystemTime) {
+        if let Some(current) = self.current_watermark {
+            if watermark > current {
+                self.current_watermark = Some(watermark);
+            }
+        } else {
+            self.current_watermark = Some(watermark);
         }
     }
 
+    // ========================================================================
+    // 可观测性 API (Observability)
+    // ========================================================================
+
+    /// 格式化当前 Task 的唯一标识，用于分布式追踪和日志打印
+    pub fn task_identity(&self) -> String {
+        format!(
+            "Job[{}], Vertex[{}], Subtask[{}/{}]",
+            self.job_id, self.vertex_id, self.subtask_idx, self.parallelism
+        )
+    }
+
+    // ========================================================================
+    // 状态管理与背压网络发送 API
+    // ========================================================================
+
     pub async fn table_manager(&self) -> tokio::sync::MutexGuard<'_, TableManager> {
         self.table_manager
             .as_ref()
@@ -49,6 +88,16 @@ impl TaskContext {
             .await
     }
 
+    pub async fn table_manager_guard(
+        &self,
+    ) -> anyhow::Result<tokio::sync::MutexGuard<'_, TableManager>> {
+        let arc = self
+            .table_manager
+            .as_ref()
+            .ok_or_else(|| anyhow::anyhow!("table_manager is not configured on TaskContext"))?;
+        Ok(arc.lock().await)
+    }
+
     /// 受内存池管控的数据发送：申请精准字节的内存船票后广播到所有下游
     pub async fn collect(&self, batch: RecordBatch) -> anyhow::Result<()> {
         if self.outboxes.is_empty() {
@@ -65,7 +114,7 @@ impl TaskContext {
         Ok(())
     }
 
-    /// 按 Key 哈希路由到单分区（Shuffle / GroupBy）
+    /// 按 Key 哈希路由到单分区（用于 Shuffle / KeyBy）
     pub async fn collect_keyed(
         &self,
         key_hash: u64,
@@ -84,7 +133,7 @@ impl TaskContext {
         Ok(())
     }
 
-    /// 广播控制信号（不申请内存船票，保证在拥堵时畅通无阻）
+    /// 广播控制信号（如 Watermark, Barrier：不申请内存船票，保证在拥堵时畅通无阻）
     pub async fn broadcast(&self, event: StreamEvent) -> anyhow::Result<()> {
         let tracked_event = TrackedEvent::control(event);
         for outbox in &self.outboxes {
diff --git a/src/runtime/streaming/api/mod.rs b/src/runtime/streaming/api/mod.rs
index e6bf674d..8115b0fe 100644
--- a/src/runtime/streaming/api/mod.rs
+++ b/src/runtime/streaming/api/mod.rs
@@ -5,5 +5,5 @@ pub mod operator;
 pub mod source;
 
 pub use context::TaskContext;
-pub use operator::{ConstructedOperator, MessageOperator};
+pub use operator::{ConstructedOperator, MessageOperator, Registry};
 pub use source::{SourceEvent, SourceOffset, SourceOperator};
diff --git a/src/runtime/streaming/api/operator.rs b/src/runtime/streaming/api/operator.rs
index 3974307b..3cd5a316 100644
--- a/src/runtime/streaming/api/operator.rs
+++ b/src/runtime/streaming/api/operator.rs
@@ -4,9 +4,67 @@ use crate::runtime::streaming::protocol::control::{ControlCommand, StopMode};
 use crate::runtime::streaming::protocol::stream_out::StreamOutput;
 use arrow_array::RecordBatch;
 use async_trait::async_trait;
+use datafusion::common::Result as DfResult;
+use datafusion::execution::context::SessionContext;
+use datafusion::execution::FunctionRegistry;
+use datafusion::logical_expr::{AggregateUDF, ScalarUDF, WindowUDF};
+use std::collections::HashSet;
+use std::sync::Arc;
 use std::time::Duration;
 use crate::sql::common::{CheckpointBarrier, Watermark};
 
+// ---------------------------------------------------------------------------
+// Registry — 算子 / UDF 注册表（取代 tracing_subscriber::Registry）
+// ---------------------------------------------------------------------------
+
+/// 运行时函数与状态注册表。
+///
+/// 包装 DataFusion [`SessionContext`]，为物理计划反序列化提供 UDF / UDAF / UDWF 查询能力。
+/// `Arc<Registry>` 在工厂中创建后，由各构造器共享。
+pub struct Registry {
+    ctx: SessionContext,
+}
+
+impl Registry {
+    pub fn new() -> Self {
+        Self {
+            ctx: SessionContext::new(),
+        }
+    }
+
+    pub fn session_context(&self) -> &SessionContext {
+        &self.ctx
+    }
+}
+
+impl Default for Registry {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl FunctionRegistry for Registry {
+    fn udfs(&self) -> HashSet<String> {
+        self.ctx.udfs()
+    }
+
+    fn udf(&self, name: &str) -> DfResult<Arc<ScalarUDF>> {
+        self.ctx.udf(name)
+    }
+
+    fn udaf(&self, name: &str) -> DfResult<Arc<AggregateUDF>> {
+        self.ctx.udaf(name)
+    }
+
+    fn udwf(&self, name: &str) -> DfResult<Arc<WindowUDF>> {
+        self.ctx.udwf(name)
+    }
+}
+
+// ---------------------------------------------------------------------------
+// ConstructedOperator
+// ---------------------------------------------------------------------------
+
 /// 工厂反射产出的具体算子实例
 pub enum ConstructedOperator {
     Source(Box<dyn SourceOperator>),
diff --git a/src/runtime/streaming/execution/runner.rs b/src/runtime/streaming/execution/runner.rs
index f1733b29..d824d025 100644
--- a/src/runtime/streaming/execution/runner.rs
+++ b/src/runtime/streaming/execution/runner.rs
@@ -5,11 +5,9 @@ use crate::runtime::streaming::protocol::control::ControlCommand;
 use crate::runtime::streaming::protocol::event::StreamEvent;
 use crate::runtime::streaming::protocol::stream_out::StreamOutput;
 use crate::runtime::streaming::protocol::tracked::TrackedEvent;
-use crate::runtime::streaming::protocol::Watermark;
 use super::tracker::barrier_aligner::{AlignmentStatus, BarrierAligner};
 use super::tracker::watermark_tracker::WatermarkTracker;
 use crate::runtime::streaming::network::endpoint::BoxedEventStream;
-use arroyo_types::CheckpointBarrier;
 use std::collections::VecDeque;
 use std::pin::Pin;
 use tokio::sync::mpsc::Receiver;
@@ -245,7 +243,7 @@ impl SubtaskRunner {
             StreamEvent::Watermark(wm) => {
                 if let Some(aligned_wm) = st.wm_tracker.update(input_idx, wm) {
                     if let Watermark::EventTime(t) = aligned_wm {
-                        st.ctx.last_present_watermark = Some(t);
+                        st.ctx.advance_watermark(t);
                     }
                     let outputs = st
                         .operator
diff --git a/src/runtime/streaming/execution/tracker/watermark_tracker.rs b/src/runtime/streaming/execution/tracker/watermark_tracker.rs
index be7043b9..29233fc3 100644
--- a/src/runtime/streaming/execution/tracker/watermark_tracker.rs
+++ b/src/runtime/streaming/execution/tracker/watermark_tracker.rs
@@ -1,4 +1,4 @@
-use crate::runtime::streaming::protocol::watermark::{merge_watermarks, watermark_strictly_advances, Watermark};
+use crate::runtime::streaming::protocol::watermark::{merge_watermarks, watermark_strictly_advances};
 use crate::sql::common::Watermark;
 
 #[derive(Debug)]
diff --git a/src/runtime/streaming/factory/mod.rs b/src/runtime/streaming/factory/mod.rs
index 4cd52bf3..2cc0cfba 100644
--- a/src/runtime/streaming/factory/mod.rs
+++ b/src/runtime/streaming/factory/mod.rs
@@ -1,3 +1,6 @@
 pub mod registry;
 
-pub use registry:: OperatorFactory;
+pub use registry::{
+    ConnectorSinkDispatcher, ConnectorSourceDispatcher, OperatorConstructor, OperatorFactory,
+    PassthroughConstructor,
+};
diff --git a/src/runtime/streaming/factory/registry.rs b/src/runtime/streaming/factory/registry.rs
index 5b53b920..b8b45fff 100644
--- a/src/runtime/streaming/factory/registry.rs
+++ b/src/runtime/streaming/factory/registry.rs
@@ -1,24 +1,64 @@
 use anyhow::{anyhow, Result};
-use crate::runtime::streaming::api::operator::ConstructedOperator;
+use prost::Message;
 use std::collections::HashMap;
+use std::sync::Arc;
+
+use crate::runtime::streaming::api::operator::Registry;
+
+use crate::runtime::streaming::api::operator::ConstructedOperator;
+use crate::runtime::streaming::operators::PassthroughOperator;
+use crate::runtime::streaming::operators::grouping::IncrementalAggregatingConstructor;
+use crate::runtime::streaming::operators::joins::{
+    InstantJoinConstructor, JoinWithExpirationConstructor, LookupJoinConstructor,
+};
+use crate::runtime::streaming::operators::key_by::KeyByConstructor;
+use crate::runtime::streaming::operators::watermark::WatermarkGeneratorConstructor;
+use crate::runtime::streaming::operators::windows::{
+    SessionAggregatingWindowConstructor, SlidingAggregatingWindowConstructor,
+    TumblingAggregateWindowConstructor, WindowFunctionConstructor,
+};
+
+use protocol::grpc::api::{
+    ConnectorOp, ExpressionWatermarkConfig,
+    JoinOperator as JoinOperatorProto,
+    KeyPlanOperator as KeyByProto,
+    LookupJoinOperator as LookupJoinProto,
+    SessionWindowAggregateOperator, SlidingWindowAggregateOperator,
+    TumblingWindowAggregateOperator, UpdatingAggregateOperator,
+    WindowFunctionOperator as WindowFunctionProto,
+};
 
+// ---------------------------------------------------------------------------
+// 1. Core Trait (工厂契约)
+// ---------------------------------------------------------------------------
 
-/// 工业级算子注册表与工厂
+/// 算子构造器 trait：每个实现者负责从 protobuf 字节流反序列化配置并构造 [`ConstructedOperator`]。
+///
+/// 外部插件可实现此 trait 并通过 [`OperatorFactory::register`] 注入。
+pub trait OperatorConstructor: Send + Sync {
+    fn with_config(&self, config: &[u8], registry: Arc<Registry>) -> Result<ConstructedOperator>;
+}
+
+// ---------------------------------------------------------------------------
+// 2. 工业级工厂注册表
+// ---------------------------------------------------------------------------
+
+/// 持有 `name → OperatorConstructor` 映射与共享 [`Registry`]。
+///
+/// [`TaskManager`] 在部署 TDD 时调用 [`create_operator`]，完成从字节流到运行时算子的
+/// 反射式实例化。
 pub struct OperatorFactory {
     constructors: HashMap<String, Box<dyn OperatorConstructor>>,
+    registry: Arc<Registry>,
 }
 
 impl OperatorFactory {
-    pub fn new() -> Self {
-        let factory = Self {
+    pub fn new(registry: Arc<Registry>) -> Self {
+        let mut factory = Self {
             constructors: HashMap::new(),
+            registry,
         };
-
-        // TODO: 在此注册具体算子构造器
-        factory.register("TumblingWindowAggregate", Box::new(TumblingWindowAggregateConstructor));
-        factory.register("ExpressionWatermark", Box::new(WatermarkGeneratorConstructor));
-        factory.register("KafkaSource", Box::new(KafkaSourceConstructor));
-
+        factory.register_builtins();
         factory
     }
 
@@ -39,6 +79,212 @@ impl OperatorFactory {
                 )
             })?;
 
-        ctor.with_config(payload)
+        ctor.with_config(payload, self.registry.clone())
+    }
+
+    /// 列出已注册的所有算子名称（调试用）。
+    pub fn registered_operators(&self) -> Vec<&str> {
+        self.constructors.keys().map(|s| s.as_str()).collect()
+    }
+
+    fn register_builtins(&mut self) {
+        // ─── 窗口聚合 ───
+        self.register("TumblingWindowAggregate", Box::new(TumblingWindowBridge));
+        self.register("SlidingWindowAggregate", Box::new(SlidingWindowBridge));
+        self.register("SessionWindowAggregate", Box::new(SessionWindowBridge));
+
+        // ─── 水位 ───
+        self.register("ExpressionWatermark", Box::new(WatermarkBridge));
+
+        // ─── SQL Window Function ───
+        self.register("WindowFunction", Box::new(WindowFunctionBridge));
+
+        // ─── Join ───
+        self.register("Join", Box::new(JoinWithExpirationBridge));
+        self.register("InstantJoin", Box::new(InstantJoinBridge));
+        self.register("LookupJoin", Box::new(LookupJoinBridge));
+
+        // ─── 增量聚合 ───
+        self.register("UpdatingAggregate", Box::new(IncrementalAggregateBridge));
+
+        // ─── 物理网络路由 ───
+        self.register("KeyBy", Box::new(KeyByBridge));
+
+        // ─── 连接器 Source / Sink（分发器模式，不硬编码具体连接器） ───
+        self.register("ConnectorSource", Box::new(ConnectorSourceDispatcher));
+        self.register("ConnectorSink", Box::new(ConnectorSinkDispatcher));
+
+        // ─── 透传类算子 ───
+        self.register("Projection", Box::new(PassthroughConstructor("Projection")));
+        self.register("ArrowValue", Box::new(PassthroughConstructor("ArrowValue")));
+        self.register("ArrowKey", Box::new(PassthroughConstructor("ArrowKey")));
+    }
+}
+
+// ---------------------------------------------------------------------------
+// 3. 构造器适配 — 解码 protobuf 后委托给各算子模块的 Constructor
+// ---------------------------------------------------------------------------
+
+struct TumblingWindowBridge;
+impl OperatorConstructor for TumblingWindowBridge {
+    fn with_config(&self, config: &[u8], registry: Arc<Registry>) -> Result<ConstructedOperator> {
+        let proto = TumblingWindowAggregateOperator::decode(config)
+            .map_err(|e| anyhow!("Decode TumblingWindowAggregateOperator failed: {e}"))?;
+        let op = TumblingAggregateWindowConstructor.with_config(proto, registry)?;
+        Ok(ConstructedOperator::Operator(Box::new(op)))
+    }
+}
+
+struct SlidingWindowBridge;
+impl OperatorConstructor for SlidingWindowBridge {
+    fn with_config(&self, config: &[u8], registry: Arc<Registry>) -> Result<ConstructedOperator> {
+        let proto = SlidingWindowAggregateOperator::decode(config)
+            .map_err(|e| anyhow!("Decode SlidingWindowAggregateOperator failed: {e}"))?;
+        let op = SlidingAggregatingWindowConstructor.with_config(proto, registry)?;
+        Ok(ConstructedOperator::Operator(Box::new(op)))
+    }
+}
+
+struct SessionWindowBridge;
+impl OperatorConstructor for SessionWindowBridge {
+    fn with_config(&self, config: &[u8], registry: Arc<Registry>) -> Result<ConstructedOperator> {
+        let proto = SessionWindowAggregateOperator::decode(config)
+            .map_err(|e| anyhow!("Decode SessionWindowAggregateOperator failed: {e}"))?;
+        let op = SessionAggregatingWindowConstructor.with_config(proto, registry)?;
+        Ok(ConstructedOperator::Operator(Box::new(op)))
+    }
+}
+
+struct WatermarkBridge;
+impl OperatorConstructor for WatermarkBridge {
+    fn with_config(&self, config: &[u8], registry: Arc<Registry>) -> Result<ConstructedOperator> {
+        let proto = ExpressionWatermarkConfig::decode(config)
+            .map_err(|e| anyhow!("Decode ExpressionWatermarkConfig failed: {e}"))?;
+        let op = WatermarkGeneratorConstructor.with_config(proto, registry)?;
+        Ok(ConstructedOperator::Operator(Box::new(op)))
+    }
+}
+
+struct WindowFunctionBridge;
+impl OperatorConstructor for WindowFunctionBridge {
+    fn with_config(&self, config: &[u8], registry: Arc<Registry>) -> Result<ConstructedOperator> {
+        let proto = WindowFunctionProto::decode(config)
+            .map_err(|e| anyhow!("Decode WindowFunctionOperator failed: {e}"))?;
+        let op = WindowFunctionConstructor.with_config(proto, registry)?;
+        Ok(ConstructedOperator::Operator(Box::new(op)))
+    }
+}
+
+struct JoinWithExpirationBridge;
+impl OperatorConstructor for JoinWithExpirationBridge {
+    fn with_config(&self, config: &[u8], registry: Arc<Registry>) -> Result<ConstructedOperator> {
+        let proto = JoinOperatorProto::decode(config)
+            .map_err(|e| anyhow!("Decode JoinOperator (expiration) failed: {e}"))?;
+        let op = JoinWithExpirationConstructor.with_config(proto, registry)?;
+        Ok(ConstructedOperator::Operator(Box::new(op)))
+    }
+}
+
+struct InstantJoinBridge;
+impl OperatorConstructor for InstantJoinBridge {
+    fn with_config(&self, config: &[u8], registry: Arc<Registry>) -> Result<ConstructedOperator> {
+        let proto = JoinOperatorProto::decode(config)
+            .map_err(|e| anyhow!("Decode JoinOperator (instant) failed: {e}"))?;
+        let op = InstantJoinConstructor.with_config(proto, registry)?;
+        Ok(ConstructedOperator::Operator(Box::new(op)))
+    }
+}
+
+struct LookupJoinBridge;
+impl OperatorConstructor for LookupJoinBridge {
+    fn with_config(&self, config: &[u8], registry: Arc<Registry>) -> Result<ConstructedOperator> {
+        let proto = LookupJoinProto::decode(config)
+            .map_err(|e| anyhow!("Decode LookupJoinOperator failed: {e}"))?;
+        let op = LookupJoinConstructor.with_config(proto, registry)?;
+        Ok(ConstructedOperator::Operator(Box::new(op)))
+    }
+}
+
+struct IncrementalAggregateBridge;
+impl OperatorConstructor for IncrementalAggregateBridge {
+    fn with_config(&self, config: &[u8], registry: Arc<Registry>) -> Result<ConstructedOperator> {
+        let proto = UpdatingAggregateOperator::decode(config)
+            .map_err(|e| anyhow!("Decode UpdatingAggregateOperator failed: {e}"))?;
+        let op = IncrementalAggregatingConstructor.with_config(proto, registry)?;
+        Ok(ConstructedOperator::Operator(Box::new(op)))
+    }
+}
+
+struct KeyByBridge;
+impl OperatorConstructor for KeyByBridge {
+    fn with_config(&self, config: &[u8], _registry: Arc<Registry>) -> Result<ConstructedOperator> {
+        let proto = KeyByProto::decode(config)
+            .map_err(|e| anyhow!("Decode KeyPlanOperator failed: {e}"))?;
+        let op = KeyByConstructor.with_config(proto)?;
+        Ok(ConstructedOperator::Operator(Box::new(op)))
+    }
+}
+
+// ---------------------------------------------------------------------------
+// 4. 连接器分发抽象 (Connector Dispatcher) — 不硬编码具体连接器
+// ---------------------------------------------------------------------------
+
+pub struct ConnectorSourceDispatcher;
+
+impl OperatorConstructor for ConnectorSourceDispatcher {
+    fn with_config(&self, config: &[u8], _registry: Arc<Registry>) -> Result<ConstructedOperator> {
+        let op = ConnectorOp::decode(config)
+            .map_err(|e| anyhow!("decode ConnectorOp (source): {e}"))?;
+
+        match op.connector.as_str() {
+            "kafka" => {
+                // TODO: 委托给 crate::connectors::kafka::build_kafka_source(&op.config)
+                Err(anyhow!(
+                    "ConnectorSource '{}' factory wiring not yet implemented",
+                    op.connector
+                ))
+            }
+            "redis" => {
+                Err(anyhow!(
+                    "ConnectorSource '{}' factory wiring not yet implemented",
+                    op.connector
+                ))
+            }
+            other => Err(anyhow!("Unsupported source connector type: {}", other)),
+        }
+    }
+}
+
+pub struct ConnectorSinkDispatcher;
+
+impl OperatorConstructor for ConnectorSinkDispatcher {
+    fn with_config(&self, config: &[u8], _registry: Arc<Registry>) -> Result<ConstructedOperator> {
+        let op = ConnectorOp::decode(config)
+            .map_err(|e| anyhow!("decode ConnectorOp (sink): {e}"))?;
+
+        match op.connector.as_str() {
+            "kafka" => {
+                // TODO: 委托给 crate::connectors::kafka::build_kafka_sink(&op.config)
+                Err(anyhow!(
+                    "ConnectorSink '{}' factory wiring not yet implemented",
+                    op.connector
+                ))
+            }
+            other => Err(anyhow!("Unsupported sink connector type: {}", other)),
+        }
+    }
+}
+
+// ---------------------------------------------------------------------------
+// 5. 透传类算子
+// ---------------------------------------------------------------------------
+
+pub struct PassthroughConstructor(pub &'static str);
+
+impl OperatorConstructor for PassthroughConstructor {
+    fn with_config(&self, _config: &[u8], _registry: Arc<Registry>) -> Result<ConstructedOperator> {
+        Ok(ConstructedOperator::Operator(Box::new(
+            PassthroughOperator::new(self.0),
+        )))
     }
 }
diff --git a/src/runtime/streaming/format/mod.rs b/src/runtime/streaming/format/mod.rs
index e69de29b..c4dbbeda 100644
--- a/src/runtime/streaming/format/mod.rs
+++ b/src/runtime/streaming/format/mod.rs
@@ -0,0 +1,9 @@
+pub mod config;
+pub mod deserializer;
+pub mod json_encoder;
+pub mod serializer;
+
+pub use config::{BadDataPolicy, DecimalEncoding, Format, JsonFormat, TimestampFormat};
+pub use deserializer::DataDeserializer;
+pub use json_encoder::CustomEncoderFactory;
+pub use serializer::DataSerializer;
diff --git a/src/runtime/streaming/mod.rs b/src/runtime/streaming/mod.rs
index 0edc0d2e..5997623e 100644
--- a/src/runtime/streaming/mod.rs
+++ b/src/runtime/streaming/mod.rs
@@ -16,18 +16,21 @@
 pub mod api;
 pub mod arrow;
 pub mod cluster;
+pub mod connectors;
 pub mod error;
 pub mod execution;
 pub mod factory;
+pub mod format;
 pub mod memory;
 pub mod network;
 pub mod operators;
 pub mod protocol;
 pub mod state;
-mod format;
+pub mod storage;
 
 pub use api::{
-    ConstructedOperator, MessageOperator, SourceEvent, SourceOffset, SourceOperator, TaskContext,
+    ConstructedOperator, MessageOperator, Registry, SourceEvent, SourceOffset, SourceOperator,
+    TaskContext,
 };
 pub use cluster::{
     CompileError, ExchangeMode, ExecutionGraph, JobCompiler, JobId, PartitioningStrategy,
@@ -36,7 +39,7 @@ pub use cluster::{
 };
 pub use error::RunError;
 pub use execution::{SOURCE_IDLE_SLEEP, SourceRunner, SubtaskRunner};
-pub use factory:: OperatorFactory;
+pub use factory::{OperatorConstructor, OperatorFactory};
 pub use memory::{MemoryPool, MemoryTicket};
 pub use network::{BoxedEventStream, NetworkEnvironment, PhysicalSender, RemoteSenderStub};
 pub use protocol::{
diff --git a/src/runtime/streaming/operators/grouping/incremental_aggregate.rs b/src/runtime/streaming/operators/grouping/incremental_aggregate.rs
index c76111c5..ac2cd585 100644
--- a/src/runtime/streaming/operators/grouping/incremental_aggregate.rs
+++ b/src/runtime/streaming/operators/grouping/incremental_aggregate.rs
@@ -26,13 +26,12 @@ use std::sync::LazyLock;
 use std::time::{Duration, Instant, SystemTime};
 use std::{collections::HashMap, mem, sync::Arc};
 use tracing::{debug, warn};
-use tracing_subscriber::Registry;
 use protocol::grpc::api::UpdatingAggregateOperator;
 // =========================================================================
 // 引入全新的 Actor 框架核心协议 (取代了老旧的 ArrowOperator 和 Collector)
 // =========================================================================
 use crate::runtime::streaming::api::context::TaskContext;
-use crate::runtime::streaming::api::operator::MessageOperator;
+use crate::runtime::streaming::api::operator::{MessageOperator, Registry};
 use crate::runtime::streaming::arrow::decode_aggregate;
 use crate::runtime::streaming::operators::{Key, UpdatingCache};
 use crate::runtime::streaming::StreamOutput;
@@ -476,96 +475,95 @@ impl IncrementalAggregatingFunc {
 
     async fn initialize(&mut self, ctx: &mut TaskContext) -> Result<()> {
         let mut tm = ctx.table_manager_guard().await?;
-
-        let table = tm
-            .get_uncached_key_value_view("a")
-            .await
-            .map_err(|e| anyhow!("state table a: {e}"))?;
-        let mut stream = Box::pin(table.get_all());
-        let key_converter = RowConverter::new(self.sliding_state_schema.sort_fields(false))?;
-
-        while let Some(batch) = stream.next().await {
-            let batch = batch?;
-            if batch.num_rows() == 0 { continue; }
-
-            let key_cols: Vec<_> = self.sliding_state_schema.sort_columns(&batch, false).into_iter().map(|c| c.values).collect();
-            let aggregate_states = self.aggregates.iter().map(|agg| {
-                agg.state_cols.iter().map(|idx| batch.column(*idx).clone()).collect_vec()
-            }).collect_vec();
-            let generations = batch.columns().last().unwrap().as_primitive::<UInt64Type>();
-            let now = Instant::now();
-
-            if key_cols.is_empty() {
-                self.restore_sliding(
-                    GLOBAL_KEY.as_ref().as_slice(),
-                    now,
-                    0,
-                    &aggregate_states,
-                    generations.value(0),
-                )?;
-            } else {
-                let key_rows = key_converter.convert_columns(&key_cols)?;
-                for (i, row) in key_rows.iter().enumerate() {
-                    if generations.is_null(i) {
-                        bail!("generation is null at row {i}");
-                    }
-                    let generation = generations.value(i);
-                    self.restore_sliding(
-                        row.as_ref(),
-                        now,
-                        i,
-                        &aggregate_states,
-                        generation,
-                    )?;
-                }
-            }
-        }
-        drop(stream);
-
-        // 初始化 Batch Accumulator
-        if self.aggregates.iter().any(|agg| agg.accumulator_type == AccumulatorType::Batch) {
-            let table = tm
-                .get_uncached_key_value_view("b")
-                .await
-                .map_err(|e| anyhow!("state table b: {e}"))?;
-            let mut stream = Box::pin(table.get_all());
-            while let Some(batch) = stream.next().await {
-                let batch = batch?;
-                if batch.num_rows() == 0 { continue; }
-
-                let key_cols: Vec<_> = self.sliding_state_schema.sort_columns(&batch, false).into_iter().map(|c| c.values).collect();
-                let count_column = batch.column(self.batch_state_schema.schema.index_of("count").unwrap()).as_any().downcast_ref::<UInt64Array>().unwrap();
-                let accumulator_column = batch.column(self.batch_state_schema.schema.index_of("accumulator").unwrap()).as_any().downcast_ref::<UInt32Array>().unwrap();
-                let args_row_column = batch.column(self.batch_state_schema.schema.index_of("args_row").unwrap()).as_any().downcast_ref::<BinaryArray>().unwrap();
-                let generations = batch.columns().last().unwrap().as_primitive::<UInt64Type>();
-
-                let key_rows = if key_cols.is_empty() {
-                    vec![GLOBAL_KEY.as_ref().clone()]
-                } else {
-                    self.key_converter
-                        .convert_columns(&key_cols)?
-                        .iter()
-                        .map(|k| k.as_ref().to_vec())
-                        .collect()
-                };
-
-                for (i, row) in key_rows.iter().enumerate() {
-                    let Some(accumulators) = self.accumulators.get_mut(row.as_ref()) else { continue; };
-                    let count = count_column.value(i);
-                    let accumulator_idx = accumulator_column.value(i) as usize;
-                    let args_row = args_row_column.value(i);
-                    let generation = generations.value(i);
-
-                    let IncrementalState::Batch { data, .. } = &mut accumulators[accumulator_idx] else { bail!("expected batch accumulator"); };
-
-                    if let Some(existing) = data.get_mut(args_row) {
-                        if existing.generation < generation { existing.count = count; existing.generation = generation; }
-                    } else {
-                        data.insert(Key(Arc::new(args_row.to_vec())), BatchData { count, generation });
-                    }
-                }
-            }
-        }
+        // let table = tm
+        //     .get_uncached_key_value_view("a")
+        //     .await
+        //     .map_err(|e| anyhow!("state table a: {e}"))?;
+        // let mut stream = Box::pin(table.get_all());
+        // let key_converter = RowConverter::new(self.sliding_state_schema.sort_fields(false))?;
+        //
+        // while let Some(batch) = stream.next().await {
+        //     let batch = batch?;
+        //     if batch.num_rows() == 0 { continue; }
+        //
+        //     let key_cols: Vec<_> = self.sliding_state_schema.sort_columns(&batch, false).into_iter().map(|c| c.values).collect();
+        //     let aggregate_states = self.aggregates.iter().map(|agg| {
+        //         agg.state_cols.iter().map(|idx| batch.column(*idx).clone()).collect_vec()
+        //     }).collect_vec();
+        //     let generations = batch.columns().last().unwrap().as_primitive::<UInt64Type>();
+        //     let now = Instant::now();
+        //
+        //     if key_cols.is_empty() {
+        //         self.restore_sliding(
+        //             GLOBAL_KEY.as_ref().as_slice(),
+        //             now,
+        //             0,
+        //             &aggregate_states,
+        //             generations.value(0),
+        //         )?;
+        //     } else {
+        //         let key_rows = key_converter.convert_columns(&key_cols)?;
+        //         for (i, row) in key_rows.iter().enumerate() {
+        //             if generations.is_null(i) {
+        //                 bail!("generation is null at row {i}");
+        //             }
+        //             let generation = generations.value(i);
+        //             self.restore_sliding(
+        //                 row.as_ref(),
+        //                 now,
+        //                 i,
+        //                 &aggregate_states,
+        //                 generation,
+        //             )?;
+        //         }
+        //     }
+        // }
+        // drop(stream);
+
+        //
+        // if self.aggregates.iter().any(|agg| agg.accumulator_type == AccumulatorType::Batch) {
+        //     let table = tm
+        //         .get_uncached_key_value_view("b")
+        //         .await
+        //         .map_err(|e| anyhow!("state table b: {e}"))?;
+        //     let mut stream = Box::pin(table.get_all());
+        //     while let Some(batch) = stream.next().await {
+        //         let batch = batch?;
+        //         if batch.num_rows() == 0 { continue; }
+        //
+        //         let key_cols: Vec<_> = self.sliding_state_schema.sort_columns(&batch, false).into_iter().map(|c| c.values).collect();
+        //         let count_column = batch.column(self.batch_state_schema.schema.index_of("count").unwrap()).as_any().downcast_ref::<UInt64Array>().unwrap();
+        //         let accumulator_column = batch.column(self.batch_state_schema.schema.index_of("accumulator").unwrap()).as_any().downcast_ref::<UInt32Array>().unwrap();
+        //         let args_row_column = batch.column(self.batch_state_schema.schema.index_of("args_row").unwrap()).as_any().downcast_ref::<BinaryArray>().unwrap();
+        //         let generations = batch.columns().last().unwrap().as_primitive::<UInt64Type>();
+        //
+        //         let key_rows = if key_cols.is_empty() {
+        //             vec![GLOBAL_KEY.as_ref().clone()]
+        //         } else {
+        //             self.key_converter
+        //                 .convert_columns(&key_cols)?
+        //                 .iter()
+        //                 .map(|k| k.as_ref().to_vec())
+        //                 .collect()
+        //         };
+        //
+        //         for (i, row) in key_rows.iter().enumerate() {
+        //             let Some(accumulators) = self.accumulators.get_mut(row.as_ref()) else { continue; };
+        //             let count = count_column.value(i);
+        //             let accumulator_idx = accumulator_column.value(i) as usize;
+        //             let args_row = args_row_column.value(i);
+        //             let generation = generations.value(i);
+        //
+        //             let IncrementalState::Batch { data, .. } = &mut accumulators[accumulator_idx] else { bail!("expected batch accumulator"); };
+        //
+        //             if let Some(existing) = data.get_mut(args_row) {
+        //                 if existing.generation < generation { existing.count = count; existing.generation = generation; }
+        //             } else {
+        //                 data.insert(Key(Arc::new(args_row.to_vec())), BatchData { count, generation });
+        //             }
+        //         }
+        //     }
+        // }
 
         let mut deleted_keys = vec![];
         for (k, v) in self.accumulators.iter_mut() {
@@ -613,7 +611,6 @@ impl IncrementalAggregatingFunc {
 
         for k in deleted_keys { self.accumulators.remove(&k.0); }
 
-        // 处理 TTL 过期的键
         let mut ttld_keys = vec![];
         for (k, mut v) in self.accumulators.time_out(Instant::now()) {
             is_retracts.push(true);
@@ -639,7 +636,6 @@ impl IncrementalAggregatingFunc {
         let mut final_batch = record_batch.columns().to_vec();
         final_batch.push(metadata);
 
-        // 注意这里需要匹配最终向外发送的 Schema
         Ok(Some(RecordBatch::try_new(
             self.final_output_schema.clone(),
             final_batch,
@@ -703,33 +699,33 @@ impl MessageOperator for IncrementalAggregatingFunc {
         _barrier: CheckpointBarrier,
         ctx: &mut TaskContext,
     ) -> Result<()> {
-        let mut tm = ctx.table_manager_guard().await?;
-
-        if let Some(sliding) = self.checkpoint_sliding()? {
-            let table = tm
-                .get_uncached_key_value_view("a")
-                .await
-                .map_err(|e| anyhow!("state table a: {e}"))?;
-            table
-                .insert_batch(sliding)
-                .await
-                .map_err(|e| anyhow!("insert_batch a: {e}"))?;
-        }
-
-        if let Some(batch) = self.checkpoint_batch()? {
-            let table = tm
-                .get_uncached_key_value_view("b")
-                .await
-                .map_err(|e| anyhow!("state table b: {e}"))?;
-            table
-                .insert_batch(batch)
-                .await
-                .map_err(|e| anyhow!("insert_batch b: {e}"))?;
-        }
-
-        // 清理已生成的 changelog 痕迹
-        self.updated_keys.clear();
-        Ok(())
+        // let mut tm = ctx.table_manager_guard().await?;
+        //
+        // if let Some(sliding) = self.checkpoint_sliding()? {
+        //     let table = tm
+        //         .get_uncached_key_value_view("a")
+        //         .await
+        //         .map_err(|e| anyhow!("state table a: {e}"))?;
+        //     table
+        //         .insert_batch(sliding)
+        //         .await
+        //         .map_err(|e| anyhow!("insert_batch a: {e}"))?;
+        // }
+        //
+        // if let Some(batch) = self.checkpoint_batch()? {
+        //     let table = tm
+        //         .get_uncached_key_value_view("b")
+        //         .await
+        //         .map_err(|e| anyhow!("state table b: {e}"))?;
+        //     table
+        //         .insert_batch(batch)
+        //         .await
+        //         .map_err(|e| anyhow!("insert_batch b: {e}"))?;
+        // }
+        //
+        //
+        // self.updated_keys.clear();
+         Ok(())
     }
 
     async fn on_close(&mut self, _ctx: &mut TaskContext) -> Result<Vec<StreamOutput>> {
diff --git a/src/runtime/streaming/operators/joins/join_instance.rs b/src/runtime/streaming/operators/joins/join_instance.rs
index dbde4d8e..639876bf 100644
--- a/src/runtime/streaming/operators/joins/join_instance.rs
+++ b/src/runtime/streaming/operators/joins/join_instance.rs
@@ -1,4 +1,4 @@
-//! 瞬时 JOIN：双通道喂入 DataFusion 物理计划，水位线推进时闭合实例并抽干结果。
+//! 瞬时 JOIN：双通道喂入 DataFusion 物理计划，水位线推进时闭合实例并抽干结果（纯内存版）。
 
 use anyhow::{anyhow, Result};
 use arrow::compute::{max, min, partition, sort_to_indices, take};
@@ -18,9 +18,8 @@ use tokio::sync::mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender};
 use tracing::warn;
 
 use crate::runtime::streaming::api::context::TaskContext;
-use crate::runtime::streaming::api::operator::MessageOperator;
+use crate::runtime::streaming::api::operator::{MessageOperator, Registry};
 use async_trait::async_trait;
-use tracing_subscriber::Registry;
 use protocol::grpc::api::JoinOperator;
 use crate::runtime::streaming::StreamOutput;
 use crate::sql::common::{from_nanos, CheckpointBarrier, FsSchema, FsSchemaRef, Watermark};
@@ -33,6 +32,7 @@ enum JoinSide {
 }
 
 impl JoinSide {
+    #[allow(dead_code)]
     fn name(&self) -> &'static str {
         match self {
             JoinSide::Left => "left",
@@ -149,16 +149,6 @@ impl InstantJoinOperator {
             }
         }
 
-        let wm = ctx.last_present_watermark();
-        {
-            let mut tm = ctx.table_manager_guard().await?;
-            let table = tm
-                .get_expiring_time_key_table(side.name(), wm)
-                .await
-                .map_err(|e| anyhow!("{e:?}"))?;
-            table.insert(from_nanos(max_timestamp as u128), batch.clone());
-        }
-
         let unkeyed_batch = self.input_schema(side).unkeyed_batch(&batch)?;
 
         if max_timestamp == min_timestamp {
@@ -201,39 +191,7 @@ impl MessageOperator for InstantJoinOperator {
         "InstantJoin"
     }
 
-    async fn on_start(&mut self, ctx: &mut TaskContext) -> Result<()> {
-        let watermark = ctx.last_present_watermark();
-
-        let left_batches: Vec<_> = {
-            let mut tm = ctx.table_manager_guard().await?;
-            let left_table = tm
-                .get_expiring_time_key_table("left", watermark)
-                .await
-                .map_err(|e| anyhow!("{e:?}"))?;
-            left_table
-                .all_batches_for_watermark(watermark)
-                .flat_map(|(_time, batches)| batches.iter().cloned())
-                .collect()
-        };
-        for batch in left_batches {
-            self.process_side_internal(JoinSide::Left, batch, ctx).await?;
-        }
-
-        let right_batches: Vec<_> = {
-            let mut tm = ctx.table_manager_guard().await?;
-            let right_table = tm
-                .get_expiring_time_key_table("right", watermark)
-                .await
-                .map_err(|e| anyhow!("{e:?}"))?;
-            right_table
-                .all_batches_for_watermark(watermark)
-                .flat_map(|(_time, batches)| batches.iter().cloned())
-                .collect()
-        };
-        for batch in right_batches {
-            self.process_side_internal(JoinSide::Right, batch, ctx).await?;
-        }
-
+    async fn on_start(&mut self, _ctx: &mut TaskContext) -> Result<()> {
         Ok(())
     }
 
@@ -286,22 +244,8 @@ impl MessageOperator for InstantJoinOperator {
     async fn snapshot_state(
         &mut self,
         _barrier: CheckpointBarrier,
-        ctx: &mut TaskContext,
+        _ctx: &mut TaskContext,
     ) -> Result<()> {
-        let watermark = ctx.last_present_watermark();
-        let mut tm = ctx.table_manager_guard().await?;
-        tm.get_expiring_time_key_table("left", watermark)
-            .await
-            .map_err(|e| anyhow!("{e:?}"))?
-            .flush(watermark)
-            .await
-            .map_err(|e| anyhow!("{e:?}"))?;
-        tm.get_expiring_time_key_table("right", watermark)
-            .await
-            .map_err(|e| anyhow!("{e:?}"))?
-            .flush(watermark)
-            .await
-            .map_err(|e| anyhow!("{e:?}"))?;
         Ok(())
     }
 }
diff --git a/src/runtime/streaming/operators/joins/join_with_expiration.rs b/src/runtime/streaming/operators/joins/join_with_expiration.rs
index d115ac10..c2bb6259 100644
--- a/src/runtime/streaming/operators/joins/join_with_expiration.rs
+++ b/src/runtime/streaming/operators/joins/join_with_expiration.rs
@@ -1,4 +1,5 @@
-//! 带 TTL 的 Key-Time Join：两侧状态表 + DataFusion 物理计划成对计算。
+//! 带 TTL 的 Key-Time Join：纯内存状态版 + DataFusion 物理计划成对计算。
+//! 完全移除了底层 TableManager 和持久化状态依赖。
 
 use anyhow::{anyhow, Result};
 use arrow::compute::concat_batches;
@@ -9,14 +10,14 @@ use datafusion::physical_plan::ExecutionPlan;
 use datafusion_proto::{physical_plan::AsExecutionPlan, protobuf::PhysicalPlanNode};
 use futures::StreamExt;
 use prost::Message;
+use std::collections::VecDeque;
 use std::sync::{Arc, RwLock};
-use std::time::Duration;
+use std::time::{Duration, SystemTime};
 use tracing::warn;
 
 use crate::runtime::streaming::api::context::TaskContext;
-use crate::runtime::streaming::api::operator::MessageOperator;
+use crate::runtime::streaming::api::operator::{MessageOperator, Registry};
 use async_trait::async_trait;
-use tracing_subscriber::Registry;
 use protocol::grpc::api::JoinOperator;
 use crate::runtime::streaming::StreamOutput;
 use crate::sql::common::{CheckpointBarrier, FsSchema, Watermark};
@@ -28,32 +29,64 @@ enum JoinSide {
     Right,
 }
 
-impl JoinSide {
-    fn table_name(&self) -> &'static str {
-        match self {
-            JoinSide::Left => "left",
-            JoinSide::Right => "right",
+// ============================================================================
+// 纯内存状态缓冲区 (In-Memory TTL Buffer)
+// ============================================================================
+
+struct StateBuffer {
+    batches: VecDeque<(SystemTime, RecordBatch)>,
+    ttl: Duration,
+}
+
+impl StateBuffer {
+    fn new(ttl: Duration) -> Self {
+        Self {
+            batches: VecDeque::new(),
+            ttl,
+        }
+    }
+
+    fn insert(&mut self, batch: RecordBatch, time: SystemTime) {
+        self.batches.push_back((time, batch));
+    }
+
+    fn expire(&mut self, current_time: SystemTime) {
+        let cutoff = current_time
+            .checked_sub(self.ttl)
+            .unwrap_or(SystemTime::UNIX_EPOCH);
+        while let Some((time, _)) = self.batches.front() {
+            if *time < cutoff {
+                self.batches.pop_front();
+            } else {
+                break;
+            }
         }
     }
+
+    fn get_all_batches(&self) -> Vec<RecordBatch> {
+        self.batches.iter().map(|(_, b)| b.clone()).collect()
+    }
 }
 
+// ============================================================================
+// 算子主体
+// ============================================================================
+
 pub struct JoinWithExpirationOperator {
-    /// 保留与配置/表注册语义一致；实际 TTL 由状态表配置决定。
-    #[allow(dead_code)]
-    left_expiration: Duration,
-    #[allow(dead_code)]
-    right_expiration: Duration,
     left_input_schema: FsSchema,
     right_input_schema: FsSchema,
     left_schema: FsSchema,
     right_schema: FsSchema,
+
     left_passer: Arc<RwLock<Option<RecordBatch>>>,
     right_passer: Arc<RwLock<Option<RecordBatch>>>,
     join_exec_plan: Arc<dyn ExecutionPlan>,
+
+    left_state: StateBuffer,
+    right_state: StateBuffer,
 }
 
 impl JoinWithExpirationOperator {
-    /// 执行 DataFusion 物理计划，返回 JOIN 结果批次（不经过 Collector）。
     async fn compute_pair(
         &mut self,
         left: RecordBatch,
@@ -71,6 +104,7 @@ impl JoinWithExpirationOperator {
         self.join_exec_plan
             .reset()
             .map_err(|e| anyhow!("join plan reset: {e}"))?;
+
         let mut result_stream = self
             .join_exec_plan
             .execute(0, SessionContext::new().task_ctx())
@@ -90,42 +124,22 @@ impl JoinWithExpirationOperator {
         batch: RecordBatch,
         ctx: &mut TaskContext,
     ) -> Result<Vec<StreamOutput>> {
-        let watermark = ctx.last_present_watermark();
-        let target_name = side.table_name();
-        let opposite_name = match side {
-            JoinSide::Left => JoinSide::Right.table_name(),
-            JoinSide::Right => JoinSide::Left.table_name(),
-        };
+        let current_time = ctx
+            .last_present_watermark()
+            .unwrap_or_else(SystemTime::now);
 
-        let mut tm = ctx.table_manager_guard().await?;
-
-        let inserted_rows = {
-            let target_table = tm
-                .get_key_time_table(target_name, watermark)
-                .await
-                .map_err(|e| anyhow!("{e:?}"))?;
-            target_table
-                .insert(batch.clone())
-                .await
-                .map_err(|e| anyhow!("{e:?}"))?
-        };
+        self.left_state.expire(current_time);
+        self.right_state.expire(current_time);
 
-        let opposite_table = tm
-            .get_key_time_table(opposite_name, watermark)
-            .await
-            .map_err(|e| anyhow!("{e:?}"))?;
-
-        let mut opposite_batches = Vec::new();
-        for row in inserted_rows {
-            if let Some(matched_batch) = opposite_table
-                .get_batch(row.as_ref())
-                .map_err(|e| anyhow!("{e:?}"))?
-            {
-                opposite_batches.push(matched_batch.clone());
-            }
+        match side {
+            JoinSide::Left => self.left_state.insert(batch.clone(), current_time),
+            JoinSide::Right => self.right_state.insert(batch.clone(), current_time),
         }
 
-        drop(tm);
+        let opposite_batches = match side {
+            JoinSide::Left => self.right_state.get_all_batches(),
+            JoinSide::Right => self.left_state.get_all_batches(),
+        };
 
         if opposite_batches.is_empty() {
             return Ok(vec![]);
@@ -193,8 +207,6 @@ impl MessageOperator for JoinWithExpirationOperator {
         _barrier: CheckpointBarrier,
         _ctx: &mut TaskContext,
     ) -> Result<()> {
-        // `KeyTimeView` 无 `flush`；写入已通过 `insert` 经 `state_tx` 进入后端刷写管线，
-        // 与 worker 侧 `JoinWithExpiration` 未单独实现 `handle_checkpoint` 一致。
         Ok(())
     }
 
@@ -203,8 +215,10 @@ impl MessageOperator for JoinWithExpirationOperator {
     }
 }
 
-/// 从配置构造 [`JoinWithExpirationOperator`]（实现 [`MessageOperator`]）。
-/// 注意：`ConstructedOperator` 仅包装 `ArrowOperator`，此处不返回该类型。
+// ============================================================================
+// 构造器
+// ============================================================================
+
 pub struct JoinWithExpirationConstructor;
 
 impl JoinWithExpirationConstructor {
@@ -247,8 +261,6 @@ impl JoinWithExpirationConstructor {
         }
 
         Ok(JoinWithExpirationOperator {
-            left_expiration: ttl,
-            right_expiration: ttl,
             left_input_schema,
             right_input_schema,
             left_schema,
@@ -256,6 +268,8 @@ impl JoinWithExpirationConstructor {
             left_passer,
             right_passer,
             join_exec_plan,
+            left_state: StateBuffer::new(ttl),
+            right_state: StateBuffer::new(ttl),
         })
     }
 }
diff --git a/src/runtime/streaming/operators/joins/lookup_join.rs b/src/runtime/streaming/operators/joins/lookup_join.rs
index b302d198..c6458174 100644
--- a/src/runtime/streaming/operators/joins/lookup_join.rs
+++ b/src/runtime/streaming/operators/joins/lookup_join.rs
@@ -14,13 +14,14 @@ use datafusion_proto::physical_plan::from_proto::parse_physical_expr;
 use datafusion_proto::protobuf::PhysicalExprNode;
 use mini_moka::sync::Cache;
 use prost::Message;
+use protocol::grpc::api::{JoinType, LookupJoinOperator as LookupJoinProto};
 use std::collections::HashMap;
 use std::sync::Arc;
 use std::time::Duration;
-use protocol::grpc::api::JoinType;
+
 use crate::runtime::streaming::api::context::TaskContext;
-use crate::runtime::streaming::api::operator::MessageOperator;
-use crate::runtime::streaming::protocol::stream_output::StreamOutput;
+use crate::runtime::streaming::api::operator::{MessageOperator, Registry};
+use crate::runtime::streaming::connectors::{LookupConnector, connectors};
 use crate::runtime::streaming::StreamOutput;
 use crate::sql::common::{CheckpointBarrier, FsSchema, MetadataField, OperatorConfig, Watermark, LOOKUP_KEY_INDEX_FIELD};
 
@@ -273,7 +274,7 @@ pub struct LookupJoinConstructor;
 impl LookupJoinConstructor {
     pub fn with_config(
         &self,
-        config: LookupJoinOperator,
+        config: LookupJoinProto,
         registry: Arc<Registry>,
     ) -> anyhow::Result<LookupJoinOperator> {
         let join_type = config.join_type();
@@ -361,3 +362,4 @@ impl LookupJoinConstructor {
         })
     }
 }
+
diff --git a/src/runtime/streaming/operators/mod.rs b/src/runtime/streaming/operators/mod.rs
index fe2a7d9e..66d3e892 100644
--- a/src/runtime/streaming/operators/mod.rs
+++ b/src/runtime/streaming/operators/mod.rs
@@ -2,25 +2,22 @@
 
 pub mod grouping;
 pub mod joins;
+pub mod key_by;
 pub mod sink;
 pub mod source;
 pub mod watermark;
 pub mod windows;
 
-pub use grouping::{
-    IncrementalAggregatingConstructor, IncrementalAggregatingFunc, Key, UpdatingCache,
-};
+pub use grouping::{IncrementalAggregatingFunc, Key, UpdatingCache};
 pub use joins::{
-    InstantJoinConstructor, InstantJoinOperator, JoinWithExpirationConstructor,
-    JoinWithExpirationOperator, LookupJoinConstructor, LookupJoinOperator, LookupJoinType,
+    InstantJoinOperator, JoinWithExpirationOperator, LookupJoinOperator, LookupJoinType,
 };
+pub use key_by::KeyByOperator;
 pub use sink::{ConsistencyMode, KafkaSinkOperator};
-pub use source::{BatchDeserializer, KafkaSourceOperator, KafkaState};
-pub use watermark::{WatermarkGeneratorConstructor, WatermarkGeneratorOperator, WatermarkGeneratorState};
+pub use source::{BatchDeserializer, BufferedDeserializer, KafkaSourceOperator, KafkaState};
+pub use watermark::{WatermarkGeneratorOperator, WatermarkGeneratorState};
 pub use windows::{
-    SessionAggregatingWindowConstructor, SessionWindowOperator,
-    SlidingAggregatingWindowConstructor, SlidingWindowOperator,
-    TumblingAggregateWindowConstructor, TumblingWindowOperator, WindowFunctionConstructor,
+    SessionWindowOperator, SlidingWindowOperator, TumblingWindowOperator,
     WindowFunctionOperator,
 };
 
diff --git a/src/runtime/streaming/operators/sink/kafka/mod.rs b/src/runtime/streaming/operators/sink/kafka/mod.rs
index 9161ac7b..0b68b88b 100644
--- a/src/runtime/streaming/operators/sink/kafka/mod.rs
+++ b/src/runtime/streaming/operators/sink/kafka/mod.rs
@@ -17,6 +17,7 @@ use tracing::{info, warn};
 
 use crate::runtime::streaming::api::context::TaskContext;
 use crate::runtime::streaming::api::operator::MessageOperator;
+use crate::runtime::streaming::format::DataSerializer;
 use crate::runtime::streaming::StreamOutput;
 use crate::sql::common::{CheckpointBarrier, FsSchema, Watermark};
 // ============================================================================
@@ -49,7 +50,7 @@ pub struct KafkaSinkOperator {
     pub timestamp_col_idx: Option<usize>,
     pub key_col_idx: Option<usize>,
 
-    pub serializer: ArrowSerializer,
+    pub serializer: DataSerializer,
 
     at_least_once_producer: Option<FutureProducer>,
     transactional_state: Option<TransactionalState>,
@@ -64,7 +65,7 @@ impl KafkaSinkOperator {
         consistency_mode: ConsistencyMode,
         client_config: HashMap<String, String>,
         input_schema: FsSchema,
-        serializer: ArrowSerializer,
+        serializer: DataSerializer,
     ) -> Self {
         Self {
             topic,
@@ -102,7 +103,7 @@ impl KafkaSinkOperator {
         if let Some(idx) = tx_index {
             config.set("enable.idempotence", "true");
             let transactional_id = format!(
-                "arroyo-tx-{}-{}-{}-{}",
+                "fs-tx-{}-{}-{}-{}",
                 ctx.job_id, self.topic, ctx.subtask_idx, idx
             );
             config.set("transactional.id", &transactional_id);
@@ -236,10 +237,10 @@ impl MessageOperator for KafkaSinkOperator {
         batch: RecordBatch,
         _ctx: &mut TaskContext,
     ) -> Result<Vec<StreamOutput>> {
-        let payload_iter = self.serializer.serialize(&batch);
+        let payloads = self.serializer.serialize(&batch)?;
         let producer = self.current_producer().clone();
 
-        for (i, payload) in payload_iter.enumerate() {
+        for (i, payload) in payloads.iter().enumerate() {
             let ts_millis = self
                 .timestamp_col_idx
                 .and_then(|idx| event_timestamp_ms(&batch, i, idx));
diff --git a/src/runtime/streaming/operators/source/kafka/mod.rs b/src/runtime/streaming/operators/source/kafka/mod.rs
index d0c67972..595fbcc3 100644
--- a/src/runtime/streaming/operators/source/kafka/mod.rs
+++ b/src/runtime/streaming/operators/source/kafka/mod.rs
@@ -1,6 +1,8 @@
 //! Kafka 源算子：实现 [`crate::runtime::streaming::api::source::SourceOperator`]，由 [`crate::runtime::streaming::execution::SourceRunner`] 轮询 `fetch_next`。
 
 use anyhow::{anyhow, Context as _, Result};
+use arrow_array::RecordBatch;
+use arrow_schema::SchemaRef;
 use async_trait::async_trait;
 use bincode::{Decode, Encode};
 use governor::{DefaultDirectRateLimiter, Quota, RateLimiter as GovernorRateLimiter};
@@ -10,11 +12,12 @@ use std::collections::HashMap;
 use std::num::NonZeroU32;
 use std::time::Duration;
 use tracing::{debug, error, info, warn};
-use arrow_array::RecordBatch;
 
 use crate::runtime::streaming::api::context::TaskContext;
 use crate::runtime::streaming::api::source::{SourceEvent, SourceOffset, SourceOperator};
+use crate::runtime::streaming::format::{BadDataPolicy, DataDeserializer, Format};
 use crate::sql::common::{CheckpointBarrier, MetadataField};
+use crate::sql::common::fs_schema::FieldValueType;
 // ============================================================================
 // 1. 领域模型：Kafka 状态与配置
 // ============================================================================
@@ -25,8 +28,7 @@ pub struct KafkaState {
     offset: i64,
 }
 
-/// 模拟 Arroyo 原版的 Deserializer Buffer
-/// （工业实现中，反序列化常带 buffer，满 N 条或超时后吐出一个 [`RecordBatch`]）。
+/// 增量反序列化缓冲 trait：Source 逐条 `deserialize_slice`，攒满后 `flush_buffer` 输出 [`RecordBatch`]。
 pub trait BatchDeserializer: Send + 'static {
     fn deserialize_slice(
         &mut self,
@@ -40,6 +42,54 @@ pub trait BatchDeserializer: Send + 'static {
     fn flush_buffer(&mut self) -> Result<Option<RecordBatch>>;
 }
 
+// ---------------------------------------------------------------------------
+// BufferedDeserializer — 基于 DataDeserializer 的默认 BatchDeserializer 实现
+// ---------------------------------------------------------------------------
+
+/// 将 [`DataDeserializer`] 包装为 [`BatchDeserializer`]：逐条缓存 payload，达到阈值后批量反序列化。
+pub struct BufferedDeserializer {
+    inner: DataDeserializer,
+    buffer: Vec<Vec<u8>>,
+    batch_size: usize,
+}
+
+impl BufferedDeserializer {
+    pub fn new(format: Format, schema: SchemaRef, bad_data_policy: BadDataPolicy, batch_size: usize) -> Self {
+        Self {
+            inner: DataDeserializer::new(format, schema, bad_data_policy),
+            buffer: Vec::with_capacity(batch_size),
+            batch_size,
+        }
+    }
+}
+
+impl BatchDeserializer for BufferedDeserializer {
+    fn deserialize_slice(
+        &mut self,
+        payload: &[u8],
+        _timestamp: u64,
+        _metadata: Option<HashMap<&str, FieldValueType<'_>>>,
+    ) -> Result<()> {
+        self.buffer.push(payload.to_vec());
+        Ok(())
+    }
+
+    fn should_flush(&self) -> bool {
+        self.buffer.len() >= self.batch_size
+    }
+
+    fn flush_buffer(&mut self) -> Result<Option<RecordBatch>> {
+        if self.buffer.is_empty() {
+            return Ok(None);
+        }
+
+        let refs: Vec<&[u8]> = self.buffer.iter().map(|v| v.as_slice()).collect();
+        let batch = self.inner.deserialize_batch(&refs)?;
+        self.buffer.clear();
+        Ok(Some(batch))
+    }
+}
+
 impl SourceOffset {
     fn rdkafka_offset(self) -> Offset {
         match self {
@@ -109,9 +159,9 @@ impl KafkaSourceOperator {
         let group_id = match (&self.group_id, &self.group_id_prefix) {
             (Some(gid), _) => gid.clone(),
             (None, Some(prefix)) => {
-                format!("{}-arroyo-{}-{}", prefix, ctx.job_id, ctx.subtask_idx)
+                format!("{}-fs-{}-{}", prefix, ctx.job_id, ctx.subtask_idx)
             }
-            (None, None) => format!("arroyo-{}-{}-consumer", ctx.job_id, ctx.subtask_idx),
+            (None, None) => format!("fs-{}-{}-consumer", ctx.job_id, ctx.subtask_idx),
         };
 
         for (key, value) in &self.client_configs {
diff --git a/src/runtime/streaming/operators/source/mod.rs b/src/runtime/streaming/operators/source/mod.rs
index ef4e3cb6..59b3ff7c 100644
--- a/src/runtime/streaming/operators/source/mod.rs
+++ b/src/runtime/streaming/operators/source/mod.rs
@@ -2,4 +2,4 @@
 
 pub mod kafka;
 
-pub use kafka::{BatchDeserializer, KafkaSourceOperator, KafkaState};
+pub use kafka::{BatchDeserializer, BufferedDeserializer, KafkaSourceOperator, KafkaState};
diff --git a/src/runtime/streaming/operators/watermark/watermark_generator.rs b/src/runtime/streaming/operators/watermark/watermark_generator.rs
index fa97b3d9..f210c95a 100644
--- a/src/runtime/streaming/operators/watermark/watermark_generator.rs
+++ b/src/runtime/streaming/operators/watermark/watermark_generator.rs
@@ -16,9 +16,8 @@ use std::time::{Duration, SystemTime};
 use tracing::{debug, info};
 
 use crate::runtime::streaming::api::context::TaskContext;
-use crate::runtime::streaming::api::operator::MessageOperator;
+use crate::runtime::streaming::api::operator::{MessageOperator, Registry};
 use async_trait::async_trait;
-use tracing_subscriber::Registry;
 use protocol::grpc::api::ExpressionWatermarkConfig;
 use crate::runtime::streaming::StreamOutput;
 use crate::sql::common::{from_nanos, to_millis, CheckpointBarrier, FsSchema, Watermark};
@@ -242,3 +241,4 @@ impl WatermarkGeneratorConstructor {
         ))
     }
 }
+
diff --git a/src/runtime/streaming/operators/windows/session_aggregating_window.rs b/src/runtime/streaming/operators/windows/session_aggregating_window.rs
index ebe75c4c..cae0935c 100644
--- a/src/runtime/streaming/operators/windows/session_aggregating_window.rs
+++ b/src/runtime/streaming/operators/windows/session_aggregating_window.rs
@@ -1,8 +1,9 @@
-//! 会话窗口聚合：与 worker `arrow/session_aggregating_window` 对齐，实现 [`MessageOperator`]。
+//! 会话窗口聚合：纯内存版，完全脱离持久化状态存储。
+//! 利用 BTreeMap 充当优先队列，数据天然在内存中进行 Gap 合并与触发。
 
 use anyhow::{anyhow, bail, Context, Result};
 use arrow::compute::{
-    concat_batches, filter_record_batch, kernels::cmp::gt_eq, lexsort_to_indices, max, partition, take,
+    concat_batches, filter_record_batch, kernels::cmp::gt_eq, lexsort_to_indices, partition, take,
 };
 use arrow::row::{RowConverter, SortField};
 use arrow_array::types::TimestampNanosecondType;
@@ -22,12 +23,11 @@ use std::collections::{BTreeMap, HashMap, HashSet};
 use std::sync::{Arc, RwLock};
 use std::time::{Duration, SystemTime};
 use tokio::sync::mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender};
-use tracing::warn;
 
 use crate::runtime::streaming::api::context::TaskContext;
 use crate::runtime::streaming::api::operator::MessageOperator;
 use async_trait::async_trait;
-use tracing_subscriber::Registry;
+use crate::runtime::streaming::api::operator::Registry;
 use protocol::grpc::api::SessionWindowAggregateOperator;
 use crate::runtime::streaming::StreamOutput;
 use crate::sql::common::{from_nanos, to_nanos, CheckpointBarrier, FsSchema, FsSchemaRef, Watermark};
@@ -35,7 +35,7 @@ use crate::sql::common::converter::Converter;
 use crate::sql::logical_planner::{DecodingContext, FsPhysicalExtensionCodec};
 use crate::sql::schema::utils::window_arrow_struct;
 // ============================================================================
-// 领域模型
+// 领域模型与纯内存状态
 // ============================================================================
 
 struct SessionWindowConfig {
@@ -167,7 +167,7 @@ struct SessionWindowResult {
 struct KeySessionState {
     config: Arc<SessionWindowConfig>,
     active_session: Option<ActiveSession>,
-    buffered_batches: BTreeMap<SystemTime, Vec<RecordBatch>>,
+    buffered_batches: BTreeMap<SystemTime, Vec<RecordBatch>>, // 纯内存缓冲
 }
 
 impl KeySessionState {
@@ -323,7 +323,7 @@ fn build_session_output_schema(
 }
 
 // ============================================================================
-// 算子
+// 算子本体：负责处理输入数据与时间流，路由给具体的 KeySessionState
 // ============================================================================
 
 pub struct SessionWindowOperator {
@@ -564,10 +564,7 @@ impl SessionWindowOperator {
 
         let window_start_array = PrimitiveArray::<TimestampNanosecondType>::from(start_times);
         let window_end_array = PrimitiveArray::<TimestampNanosecondType>::from(end_times.clone());
-        let timestamp_array = PrimitiveArray::<TimestampNanosecondType>::from(
-            end_times.into_iter().map(|t| t - 1).collect::<Vec<_>>(),
-        );
-
+        
         let result_batches: Vec<&RecordBatch> = session_results.iter().map(|res| &res.batch).collect();
         let merged_batch = concat_batches(&session_results[0].batch.schema(), result_batches)?;
 
@@ -584,12 +581,12 @@ impl SessionWindowOperator {
         let mut columns = key_columns;
         columns.insert(self.config.window_index, Arc::new(window_struct_array));
         columns.extend_from_slice(merged_batch.columns());
-        columns.push(Arc::new(timestamp_array));
 
         RecordBatch::try_new(self.config.output_schema.clone(), columns)
             .context("failed to create session window output batch")
     }
 
+    #[allow(dead_code)]
     fn earliest_batch_time(&self) -> Option<SystemTime> {
         self.pq_start_times
             .first_key_value()
@@ -603,44 +600,7 @@ impl MessageOperator for SessionWindowOperator {
         "SessionWindow"
     }
 
-    async fn on_start(&mut self, ctx: &mut TaskContext) -> Result<()> {
-        let mut tm = ctx.table_manager_guard().await?;
-        let start_time_opt = tm
-            .get_global_keyed_state::<u32, Option<SystemTime>>("e")
-            .await
-            .map_err(|e| anyhow!("global keyed state e: {e}"))?
-            .get_all()
-            .values()
-            .filter_map(|e| *e)
-            .min();
-
-        let Some(start_time) = start_time_opt else {
-            return Ok(());
-        };
-
-        let state_table = tm
-            .get_expiring_time_key_table("s", Some(start_time))
-            .await
-            .map_err(|e| anyhow!("expiring time key table s: {e}"))?;
-        for (_, batches) in state_table.all_batches_for_watermark(Some(start_time)) {
-            for batch in batches {
-                let filtered = self.filter_batch_by_time(batch.clone(), Some(start_time))?;
-                if filtered.num_rows() > 0 {
-                    let sorted = self.sort_batch(&filtered)?;
-                    self.ingest_sorted_batch(sorted, Some(start_time)).await?;
-                }
-            }
-        }
-
-        if let Some(ts) = ctx.last_present_watermark() {
-            let evicted = self.evaluate_watermark(ts).await?;
-            if !evicted.is_empty() {
-                warn!(
-                    "evicted {} session result batch(es) when restoring from state",
-                    evicted.len()
-                );
-            }
-        }
+    async fn on_start(&mut self, _ctx: &mut TaskContext) -> Result<()> {
         Ok(())
     }
 
@@ -659,23 +619,6 @@ impl MessageOperator for SessionWindowOperator {
 
         let sorted_batch = self.sort_batch(&filtered_batch)?;
 
-        let max_timestamp = max(
-            sorted_batch
-                .column(self.config.input_schema_ref.timestamp_index)
-                .as_any()
-                .downcast_ref::<TimestampNanosecondArray>()
-                .ok_or_else(|| anyhow!("expected timestamp column"))?,
-        )
-        .ok_or_else(|| anyhow!("expected max timestamp"))?;
-
-        let mut tm = ctx.table_manager_guard().await?;
-        let table = tm
-            .get_expiring_time_key_table("s", ctx.last_present_watermark())
-            .await
-            .map_err(|e| anyhow!("expiring time key table s: {e}"))?;
-        table.insert(from_nanos(max_timestamp as u128), sorted_batch.clone());
-        drop(tm);
-
         self.ingest_sorted_batch(sorted_batch, watermark_time).await?;
 
         Ok(vec![])
@@ -697,22 +640,7 @@ impl MessageOperator for SessionWindowOperator {
             .collect())
     }
 
-    async fn snapshot_state(&mut self, _barrier: CheckpointBarrier, ctx: &mut TaskContext) -> Result<()> {
-        let watermark = ctx.last_present_watermark();
-        let mut tm = ctx.table_manager_guard().await?;
-
-        tm.get_expiring_time_key_table("s", watermark)
-            .await
-            .map_err(|e| anyhow!("expiring time key table s: {e}"))?
-            .flush(watermark)
-            .await?;
-
-        tm.get_global_keyed_state::<u32, Option<SystemTime>>("e")
-            .await
-            .map_err(|e| anyhow!("global keyed state e: {e}"))?
-            .insert(ctx.subtask_idx, self.earliest_batch_time())
-            .await;
-
+    async fn snapshot_state(&mut self, _barrier: CheckpointBarrier, _ctx: &mut TaskContext) -> Result<()> {
         Ok(())
     }
 
@@ -722,7 +650,7 @@ impl MessageOperator for SessionWindowOperator {
 }
 
 // ============================================================================
-// 构造器（返回 [`SessionWindowOperator`]，供 Actor 子任务直接 `Box::new`）
+// 构造器
 // ============================================================================
 
 pub struct SessionAggregatingWindowConstructor;
@@ -802,3 +730,4 @@ impl SessionAggregatingWindowConstructor {
         })
     }
 }
+
diff --git a/src/runtime/streaming/operators/windows/sliding_aggregating_window.rs b/src/runtime/streaming/operators/windows/sliding_aggregating_window.rs
index 29bad05a..aa2e2474 100644
--- a/src/runtime/streaming/operators/windows/sliding_aggregating_window.rs
+++ b/src/runtime/streaming/operators/windows/sliding_aggregating_window.rs
@@ -1,4 +1,6 @@
-//! 滑动窗口聚合：与 worker `arrow/sliding_aggregating_window` 对齐，实现 [`MessageOperator`]。
+//! 滑动窗口聚合：纯内存版。
+//! 完全依赖内部的 TieredRecordBatchHolder 和 ActiveBin 在内存中进行计算，
+//! 摆脱 TableManager 依赖，遇到 Barrier 自动透传。
 
 use anyhow::{anyhow, bail, Result};
 use arrow::compute::{partition, sort_to_indices, take};
@@ -25,13 +27,14 @@ use tokio::sync::mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender};
 use crate::runtime::streaming::api::context::TaskContext;
 use crate::runtime::streaming::api::operator::MessageOperator;
 use async_trait::async_trait;
-use tracing_subscriber::Registry;
+use crate::runtime::streaming::api::operator::Registry;
 use protocol::grpc::api::SlidingWindowAggregateOperator;
 use crate::runtime::streaming::StreamOutput;
 use crate::sql::common::{from_nanos, to_nanos, CheckpointBarrier, FsSchema, Watermark};
 use crate::sql::logical_planner::{DecodingContext, FsPhysicalExtensionCodec};
 // ============================================================================
-// Tiered panes
+// 纯内存状态：阶梯式时间面板 (Tiered panes)
+// 这部分本身就是极佳的内存数据结构，原样保留！
 // ============================================================================
 
 #[derive(Default, Debug)]
@@ -202,7 +205,7 @@ impl TieredRecordBatchHolder {
 }
 
 // ============================================================================
-// Per-bin partial aggregation
+// Per-bin partial aggregation (纯内存缓冲区)
 // ============================================================================
 
 struct ActiveBin {
@@ -249,7 +252,7 @@ impl ActiveBin {
 }
 
 // ============================================================================
-// Operator
+// 算子主体
 // ============================================================================
 
 pub struct SlidingWindowOperator {
@@ -314,29 +317,7 @@ impl MessageOperator for SlidingWindowOperator {
         "SlidingWindow"
     }
 
-    async fn on_start(&mut self, ctx: &mut TaskContext) -> Result<()> {
-        let watermark = ctx.last_present_watermark();
-        let mut tm = ctx.table_manager_guard().await?;
-        let table = tm
-            .get_expiring_time_key_table("t", watermark)
-            .await
-            .map_err(|e| anyhow!("expiring time key table t: {e}"))?;
-
-        let watermark_bin = self.bin_start(watermark.unwrap_or(SystemTime::UNIX_EPOCH));
-
-        for (timestamp, batches) in table.all_batches_for_watermark(watermark) {
-            let bin = self.bin_start(*timestamp);
-            if bin < watermark_bin {
-                for batch in batches {
-                    self.tiered_record_batches.insert(batch.clone(), bin)?;
-                }
-            } else {
-                let slot = self.active_bins.entry(bin).or_default();
-                for batch in batches {
-                    slot.finished_batches.push(batch.clone());
-                }
-            }
-        }
+    async fn on_start(&mut self, _ctx: &mut TaskContext) -> Result<()> {
         Ok(())
     }
 
@@ -470,28 +451,7 @@ impl MessageOperator for SlidingWindowOperator {
         Ok(final_outputs)
     }
 
-    async fn snapshot_state(&mut self, _barrier: CheckpointBarrier, ctx: &mut TaskContext) -> Result<()> {
-        let watermark = ctx.last_present_watermark();
-        let mut tm = ctx.table_manager_guard().await?;
-        let table = tm
-            .get_expiring_time_key_table("t", watermark)
-            .await
-            .map_err(|e| anyhow!("expiring time key table t: {e}"))?;
-
-        for (bin_start, active_bin) in self.active_bins.iter_mut() {
-            active_bin.close_and_drain().await?;
-
-            for batch in &active_bin.finished_batches {
-                let state_batch = Self::add_bin_start_as_timestamp(
-                    batch,
-                    *bin_start,
-                    self.partial_schema.schema.clone(),
-                )?;
-                table.insert(*bin_start, state_batch);
-            }
-        }
-
-        table.flush(watermark).await?;
+    async fn snapshot_state(&mut self, _barrier: CheckpointBarrier, _ctx: &mut TaskContext) -> Result<()> {
         Ok(())
     }
 
@@ -500,6 +460,8 @@ impl MessageOperator for SlidingWindowOperator {
     }
 }
 
+// ============================================================================
+// 构造器
 // ============================================================================
 
 pub struct SlidingAggregatingWindowConstructor;
@@ -576,3 +538,4 @@ impl SlidingAggregatingWindowConstructor {
         })
     }
 }
+
diff --git a/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs b/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs
index c30950cb..c23da40a 100644
--- a/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs
+++ b/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs
@@ -27,7 +27,7 @@ use tracing::warn;
 use crate::runtime::streaming::api::context::TaskContext;
 use crate::runtime::streaming::api::operator::MessageOperator;
 use async_trait::async_trait;
-use tracing_subscriber::Registry;
+use crate::runtime::streaming::api::operator::Registry;
 use protocol::grpc::api::TumblingWindowAggregateOperator;
 use crate::runtime::streaming::StreamOutput;
 use crate::sql::common::{from_nanos, to_nanos, CheckpointBarrier, FsSchema, Watermark};
@@ -397,3 +397,4 @@ impl TumblingAggregateWindowConstructor {
         })
     }
 }
+
diff --git a/src/runtime/streaming/operators/windows/window_function.rs b/src/runtime/streaming/operators/windows/window_function.rs
index cc51b820..03f02a19 100644
--- a/src/runtime/streaming/operators/windows/window_function.rs
+++ b/src/runtime/streaming/operators/windows/window_function.rs
@@ -1,4 +1,6 @@
-//! 窗口函数（按事件时间分桶的瞬时执行）：与 worker `arrow/window_fn` 对齐，实现 [`MessageOperator`]。
+//! 窗口函数（按事件时间分桶的瞬时执行）：纯内存版。
+//! 完全依赖内部的 ActiveWindowExec 通道在内存中缓冲数据，
+//! 摆脱持久化状态存储的依赖，遇到 Barrier 自动透传。
 
 use anyhow::{anyhow, Result};
 use arrow::compute::{max, min};
@@ -18,14 +20,17 @@ use tokio::sync::mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender};
 use tracing::warn;
 
 use crate::runtime::streaming::api::context::TaskContext;
-use crate::runtime::streaming::api::operator::MessageOperator;
+use crate::runtime::streaming::api::operator::{MessageOperator, Registry};
 use async_trait::async_trait;
-use tracing_subscriber::Registry;
 use crate::runtime::streaming::StreamOutput;
 use crate::sql::common::{from_nanos, CheckpointBarrier, FsSchema, FsSchemaRef, Watermark};
 use crate::sql::common::time_utils::print_time;
 use crate::sql::logical_planner::{DecodingContext, FsPhysicalExtensionCodec};
 
+// ============================================================================
+// 纯内存执行缓冲区
+// ============================================================================
+
 struct ActiveWindowExec {
     sender: Option<UnboundedSender<RecordBatch>>,
     result_stream: Option<SendableRecordBatchStream>,
@@ -58,6 +63,10 @@ impl ActiveWindowExec {
     }
 }
 
+// ============================================================================
+// 算子主体
+// ============================================================================
+
 pub struct WindowFunctionOperator {
     input_schema: FsSchemaRef,
     input_schema_unkeyed: FsSchemaRef,
@@ -141,25 +150,7 @@ impl MessageOperator for WindowFunctionOperator {
         "WindowFunction"
     }
 
-    async fn on_start(&mut self, ctx: &mut TaskContext) -> Result<()> {
-        let watermark = ctx.last_present_watermark();
-        let mut tm = ctx.table_manager_guard().await?;
-        let table = tm
-            .get_expiring_time_key_table("input", watermark)
-            .await
-            .map_err(|e| anyhow!("expiring time key table input: {e}"))?;
-
-        for (timestamp, batches) in table.all_batches_for_watermark(watermark) {
-            let exec = self.get_or_create_exec(*timestamp)?;
-            for batch in batches {
-                exec
-                    .sender
-                    .as_ref()
-                    .ok_or_else(|| anyhow!("window exec sender missing on restore"))?
-                    .send(batch.clone())
-                    .map_err(|e| anyhow!("restore send: {e}"))?;
-            }
-        }
+    async fn on_start(&mut self, _ctx: &mut TaskContext) -> Result<()> {
         Ok(())
     }
 
@@ -172,17 +163,9 @@ impl MessageOperator for WindowFunctionOperator {
         let current_watermark = ctx.last_present_watermark();
         let split_batches = self.filter_and_split_batches(batch, current_watermark)?;
 
-        let mut tm = ctx.table_manager_guard().await?;
-        let table = tm
-            .get_expiring_time_key_table("input", current_watermark)
-            .await
-            .map_err(|e| anyhow!("expiring time key table input: {e}"))?;
-
         for (sub_batch, timestamp) in split_batches {
-            table.insert(timestamp, sub_batch.clone());
             let exec = self.get_or_create_exec(timestamp)?;
-            exec
-                .sender
+            exec.sender
                 .as_ref()
                 .ok_or_else(|| anyhow!("window exec sender missing"))?
                 .send(sub_batch)
@@ -227,14 +210,7 @@ impl MessageOperator for WindowFunctionOperator {
         Ok(final_outputs)
     }
 
-    async fn snapshot_state(&mut self, _barrier: CheckpointBarrier, ctx: &mut TaskContext) -> Result<()> {
-        let watermark = ctx.last_present_watermark();
-        let mut tm = ctx.table_manager_guard().await?;
-        tm.get_expiring_time_key_table("input", watermark)
-            .await
-            .map_err(|e| anyhow!("expiring time key table input: {e}"))?
-            .flush(watermark)
-            .await?;
+    async fn snapshot_state(&mut self, _barrier: CheckpointBarrier, _ctx: &mut TaskContext) -> Result<()> {
         Ok(())
     }
 
@@ -243,6 +219,10 @@ impl MessageOperator for WindowFunctionOperator {
     }
 }
 
+// ============================================================================
+// 构造器
+// ============================================================================
+
 pub struct WindowFunctionConstructor;
 
 impl WindowFunctionConstructor {
@@ -290,3 +270,4 @@ impl WindowFunctionConstructor {
         })
     }
 }
+
diff --git a/src/runtime/streaming/storage/mod.rs b/src/runtime/streaming/storage/mod.rs
new file mode 100644
index 00000000..c411b5ee
--- /dev/null
+++ b/src/runtime/streaming/storage/mod.rs
@@ -0,0 +1,32 @@
+use anyhow::Result;
+use async_trait::async_trait;
+use std::sync::Arc;
+
+pub mod backend;
+pub mod manager;
+pub mod table;
+
+#[async_trait]
+pub trait StorageProvider: Send + Sync + 'static {
+    async fn get(&self, _path: &str) -> Result<Vec<u8>>;
+    async fn put(&self, _path: &str, _data: Vec<u8>) -> Result<()>;
+    async fn delete_if_present(&self, _path: &str) -> Result<()>;
+}
+
+pub type StorageProviderRef = Arc<dyn StorageProvider>;
+
+/// 空的存储实现，供测试和占位使用
+pub struct DummyStorageProvider;
+
+#[async_trait]
+impl StorageProvider for DummyStorageProvider {
+    async fn get(&self, _path: &str) -> Result<Vec<u8>> {
+        Ok(vec![])
+    }
+    async fn put(&self, _path: &str, _data: Vec<u8>) -> Result<()> {
+        Ok(())
+    }
+    async fn delete_if_present(&self, _path: &str) -> Result<()> {
+        Ok(())
+    }
+}
diff --git a/src/sql/datastream/logical.rs b/src/sql/datastream/logical.rs
index 13560a3e..c0e5465e 100644
--- a/src/sql/datastream/logical.rs
+++ b/src/sql/datastream/logical.rs
@@ -28,6 +28,7 @@ pub enum OperatorName {
     SlidingWindowAggregate,
     SessionWindowAggregate,
     UpdatingAggregate,
+    KeyBy,
     ConnectorSource,
     ConnectorSink,
 }
@@ -320,6 +321,7 @@ impl LogicalProgram {
                         "sql-session-window-aggregate".to_string()
                     }
                     OperatorName::UpdatingAggregate => "sql-updating-aggregate".to_string(),
+                    OperatorName::KeyBy => "key-by-routing".to_string(),
                     OperatorName::ConnectorSource => "connector-source".to_string(),
                     OperatorName::ConnectorSink => "connector-sink".to_string(),
                 };
diff --git a/src/sql/logical_node/logical/operator_name.rs b/src/sql/logical_node/logical/operator_name.rs
index 22f58bbe..2fd9ad82 100644
--- a/src/sql/logical_node/logical/operator_name.rs
+++ b/src/sql/logical_node/logical/operator_name.rs
@@ -30,6 +30,7 @@ pub enum OperatorName {
     SlidingWindowAggregate,
     SessionWindowAggregate,
     UpdatingAggregate,
+    KeyBy,
     ConnectorSource,
     ConnectorSink,
 }

From f54301f6b25658907299620ed0a141d3d4a6c129 Mon Sep 17 00:00:00 2001
From: luoluoyuyu <zhenyu@apache.org>
Date: Wed, 25 Mar 2026 00:09:22 +0800
Subject: [PATCH 17/44] update

---
 src/runtime/streaming/connectors/mod.rs      |  61 +++++++
 src/runtime/streaming/format/config.rs       |  38 +++++
 src/runtime/streaming/format/deserializer.rs |  85 ++++++++++
 src/runtime/streaming/format/json_encoder.rs | 170 +++++++++++++++++++
 src/runtime/streaming/format/serializer.rs   | 129 ++++++++++++++
 src/runtime/streaming/operators/key_by.rs    | 159 +++++++++++++++++
 src/runtime/streaming/storage/backend.rs     |  78 +++++++++
 src/runtime/streaming/storage/manager.rs     | 156 +++++++++++++++++
 src/runtime/streaming/storage/table.rs       |  91 ++++++++++
 9 files changed, 967 insertions(+)
 create mode 100644 src/runtime/streaming/connectors/mod.rs
 create mode 100644 src/runtime/streaming/format/config.rs
 create mode 100644 src/runtime/streaming/format/deserializer.rs
 create mode 100644 src/runtime/streaming/format/json_encoder.rs
 create mode 100644 src/runtime/streaming/format/serializer.rs
 create mode 100644 src/runtime/streaming/operators/key_by.rs
 create mode 100644 src/runtime/streaming/storage/backend.rs
 create mode 100644 src/runtime/streaming/storage/manager.rs
 create mode 100644 src/runtime/streaming/storage/table.rs

diff --git a/src/runtime/streaming/connectors/mod.rs b/src/runtime/streaming/connectors/mod.rs
new file mode 100644
index 00000000..d10a55a9
--- /dev/null
+++ b/src/runtime/streaming/connectors/mod.rs
@@ -0,0 +1,61 @@
+use anyhow::Result;
+use arrow_array::{ArrayRef, RecordBatch};
+use arrow_schema::Schema;
+use async_trait::async_trait;
+use std::collections::HashMap;
+use std::sync::Arc;
+
+use crate::sql::common::OperatorConfig;
+
+/// 维表查询接口：由具体 Connector（如 Redis、MySQL）实现。
+#[async_trait]
+pub trait LookupConnector: Send {
+    fn name(&self) -> &str;
+
+    /// 根据 key 列批量查询外部系统，返回结果 batch（含 `_lookup_key_index` 列）。
+    /// 返回 `None` 表示无匹配行。
+    async fn lookup(&self, keys: &[ArrayRef]) -> Option<Result<RecordBatch>>;
+}
+
+/// Connector 工厂 trait：每种外部系统实现此 trait 提供 Source / Sink / Lookup 构建能力。
+pub trait Connector: Send + Sync {
+    fn name(&self) -> &str;
+
+    fn make_lookup(
+        &self,
+        config: OperatorConfig,
+        schema: Arc<Schema>,
+    ) -> Result<Box<dyn LookupConnector>>;
+}
+
+/// 全局 Connector 注册表。
+pub struct ConnectorRegistry {
+    connectors: HashMap<String, Box<dyn Connector>>,
+}
+
+impl ConnectorRegistry {
+    pub fn new() -> Self {
+        Self {
+            connectors: HashMap::new(),
+        }
+    }
+
+    pub fn register(&mut self, connector: Box<dyn Connector>) {
+        self.connectors
+            .insert(connector.name().to_string(), connector);
+    }
+
+    pub fn get(&self, name: &str) -> Option<&dyn Connector> {
+        self.connectors.get(name).map(|c| c.as_ref())
+    }
+}
+
+/// 返回当前已注册的所有 Connector。
+///
+/// 目前返回空注册表，后续接入 Kafka / Redis 等时在此处注册。
+pub fn connectors() -> ConnectorRegistry {
+    let registry = ConnectorRegistry::new();
+    // TODO: registry.register(Box::new(KafkaConnector));
+    // TODO: registry.register(Box::new(RedisConnector));
+    registry
+}
diff --git a/src/runtime/streaming/format/config.rs b/src/runtime/streaming/format/config.rs
new file mode 100644
index 00000000..e0ac61bb
--- /dev/null
+++ b/src/runtime/streaming/format/config.rs
@@ -0,0 +1,38 @@
+use serde::{Deserialize, Serialize};
+
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
+pub enum TimestampFormat {
+    RFC3339,
+    UnixMillis,
+    UnixSeconds,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
+pub enum DecimalEncoding {
+    String,
+    Number,
+    Bytes,
+}
+
+/// 数据容错策略
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
+pub enum BadDataPolicy {
+    /// 遇到脏数据直接报错，导致算子 Panic 和重启
+    Fail,
+    /// 丢弃脏数据，并记录监控 Metrics
+    Drop,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct JsonFormat {
+    pub timestamp_format: TimestampFormat,
+    pub decimal_encoding: DecimalEncoding,
+    pub include_schema: bool,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub enum Format {
+    Json(JsonFormat),
+    RawString,
+    RawBytes,
+}
diff --git a/src/runtime/streaming/format/deserializer.rs b/src/runtime/streaming/format/deserializer.rs
new file mode 100644
index 00000000..83360bd8
--- /dev/null
+++ b/src/runtime/streaming/format/deserializer.rs
@@ -0,0 +1,85 @@
+//! 数据反序列化器：将外界收到的字节流转化为结构化 [`RecordBatch`]。
+
+use anyhow::{anyhow, Result};
+use arrow_array::builder::StringBuilder;
+use arrow_array::RecordBatch;
+use arrow_json::reader::ReaderBuilder;
+use arrow_schema::SchemaRef;
+use std::sync::Arc;
+
+use super::config::{BadDataPolicy, Format};
+
+pub struct DataDeserializer {
+    format: Format,
+    schema: SchemaRef,
+    bad_data_policy: BadDataPolicy,
+}
+
+impl DataDeserializer {
+    pub fn new(format: Format, schema: SchemaRef, bad_data_policy: BadDataPolicy) -> Self {
+        Self {
+            format,
+            schema,
+            bad_data_policy,
+        }
+    }
+
+    /// 工业级反序列化：包含完整的脏数据容错兜底
+    pub fn deserialize_batch(&self, messages: &[&[u8]]) -> Result<RecordBatch> {
+        match &self.format {
+            Format::Json(_) => self.deserialize_json(messages),
+            Format::RawString => self.deserialize_raw_string(messages),
+            Format::RawBytes => self.deserialize_raw_bytes(messages),
+        }
+    }
+
+    fn deserialize_json(&self, messages: &[&[u8]]) -> Result<RecordBatch> {
+        let mut buffer = Vec::with_capacity(messages.len() * 256);
+        for msg in messages {
+            buffer.extend_from_slice(msg);
+            buffer.push(b'\n');
+        }
+
+        let allow_bad_data = self.bad_data_policy == BadDataPolicy::Drop;
+        let mut decoder = ReaderBuilder::new(self.schema.clone())
+            .with_strict_mode(!allow_bad_data)
+            .build_decoder()?;
+
+        decoder.decode(&buffer)?;
+
+        let batch = if allow_bad_data {
+            let (batch, _mask, _, _errors) = decoder.flush_with_bad_data()?.unwrap();
+            batch
+        } else {
+            decoder
+                .flush()?
+                .ok_or_else(|| anyhow!("JSON decoder returned no batch"))?
+        };
+
+        Ok(batch)
+    }
+
+    fn deserialize_raw_string(&self, messages: &[&[u8]]) -> Result<RecordBatch> {
+        let mut builder = StringBuilder::with_capacity(messages.len(), messages.len() * 64);
+        for msg in messages {
+            builder.append_value(String::from_utf8_lossy(msg));
+        }
+
+        let array = Arc::new(builder.finish());
+        RecordBatch::try_new(self.schema.clone(), vec![array])
+            .map_err(|e| anyhow!("build RawString batch: {e}"))
+    }
+
+    fn deserialize_raw_bytes(&self, messages: &[&[u8]]) -> Result<RecordBatch> {
+        use arrow_array::builder::BinaryBuilder;
+
+        let mut builder = BinaryBuilder::with_capacity(messages.len(), messages.len() * 64);
+        for msg in messages {
+            builder.append_value(msg);
+        }
+
+        let array = Arc::new(builder.finish());
+        RecordBatch::try_new(self.schema.clone(), vec![array])
+            .map_err(|e| anyhow!("build RawBytes batch: {e}"))
+    }
+}
diff --git a/src/runtime/streaming/format/json_encoder.rs b/src/runtime/streaming/format/json_encoder.rs
new file mode 100644
index 00000000..7721d9bc
--- /dev/null
+++ b/src/runtime/streaming/format/json_encoder.rs
@@ -0,0 +1,170 @@
+//! 极致优化的 Arrow JSON 编码器。
+//!
+//! 解决 Arrow 原生 JSON 导出时不兼容 Kafka / 时间戳 / Decimal 的痛点。
+
+use arrow_array::{
+    Array, Decimal128Array, TimestampMicrosecondArray,
+    TimestampMillisecondArray, TimestampNanosecondArray, TimestampSecondArray,
+};
+use arrow_json::writer::NullableEncoder;
+use arrow_json::{Encoder, EncoderFactory, EncoderOptions};
+use arrow_schema::{ArrowError, DataType, FieldRef, TimeUnit};
+use base64::prelude::BASE64_STANDARD;
+use base64::Engine;
+
+use super::config::{DecimalEncoding, TimestampFormat};
+
+#[derive(Debug)]
+pub struct CustomEncoderFactory {
+    pub timestamp_format: TimestampFormat,
+    pub decimal_encoding: DecimalEncoding,
+}
+
+impl EncoderFactory for CustomEncoderFactory {
+    fn make_default_encoder<'a>(
+        &self,
+        _field: &'a FieldRef,
+        array: &'a dyn Array,
+        _options: &'a EncoderOptions,
+    ) -> Result<Option<NullableEncoder<'a>>, ArrowError> {
+        let encoder: Box<dyn Encoder> = match (
+            &self.decimal_encoding,
+            &self.timestamp_format,
+            array.data_type(),
+        ) {
+            // ── Timestamp → Unix 毫秒 ──
+            (_, TimestampFormat::UnixMillis, DataType::Timestamp(TimeUnit::Nanosecond, _)) => {
+                let arr = array
+                    .as_any()
+                    .downcast_ref::<TimestampNanosecondArray>()
+                    .unwrap()
+                    .clone();
+                Box::new(UnixMillisEncoder::Nanos(arr))
+            }
+            (_, TimestampFormat::UnixMillis, DataType::Timestamp(TimeUnit::Microsecond, _)) => {
+                let arr = array
+                    .as_any()
+                    .downcast_ref::<TimestampMicrosecondArray>()
+                    .unwrap()
+                    .clone();
+                Box::new(UnixMillisEncoder::Micros(arr))
+            }
+            (_, TimestampFormat::UnixMillis, DataType::Timestamp(TimeUnit::Millisecond, _)) => {
+                let arr = array
+                    .as_any()
+                    .downcast_ref::<TimestampMillisecondArray>()
+                    .unwrap()
+                    .clone();
+                Box::new(UnixMillisEncoder::Millis(arr))
+            }
+            (_, TimestampFormat::UnixMillis, DataType::Timestamp(TimeUnit::Second, _)) => {
+                let arr = array
+                    .as_any()
+                    .downcast_ref::<TimestampSecondArray>()
+                    .unwrap()
+                    .clone();
+                Box::new(UnixMillisEncoder::Seconds(arr))
+            }
+
+            // ── Decimal128 → String / Bytes ──
+            (DecimalEncoding::String, _, DataType::Decimal128(_, _)) => {
+                let arr = array
+                    .as_any()
+                    .downcast_ref::<Decimal128Array>()
+                    .unwrap()
+                    .clone();
+                Box::new(DecimalEncoder::StringEncoder(arr))
+            }
+            (DecimalEncoding::Bytes, _, DataType::Decimal128(_, _)) => {
+                let arr = array
+                    .as_any()
+                    .downcast_ref::<Decimal128Array>()
+                    .unwrap()
+                    .clone();
+                Box::new(DecimalEncoder::BytesEncoder(arr))
+            }
+
+            // ── Binary → Base64 ──
+            (_, _, DataType::Binary) => {
+                let arr = array
+                    .as_any()
+                    .downcast_ref::<arrow_array::BinaryArray>()
+                    .unwrap()
+                    .clone();
+                Box::new(BinaryEncoder(arr))
+            }
+
+            // 其他类型：降级使用 Arrow 原生 encoder
+            _ => return Ok(None),
+        };
+
+        Ok(Some(NullableEncoder::new(encoder, array.nulls().cloned())))
+    }
+}
+
+// ---------------------------------------------------------------------------
+// UnixMillisEncoder — 各精度 Timestamp → i64 毫秒
+// ---------------------------------------------------------------------------
+
+enum UnixMillisEncoder {
+    Nanos(TimestampNanosecondArray),
+    Micros(TimestampMicrosecondArray),
+    Millis(TimestampMillisecondArray),
+    Seconds(TimestampSecondArray),
+}
+
+impl Encoder for UnixMillisEncoder {
+    fn encode(&mut self, idx: usize, out: &mut Vec<u8>) {
+        let millis = match self {
+            Self::Nanos(arr) => arr.value(idx) / 1_000_000,
+            Self::Micros(arr) => arr.value(idx) / 1_000,
+            Self::Millis(arr) => arr.value(idx),
+            Self::Seconds(arr) => arr.value(idx) * 1_000,
+        };
+        out.extend_from_slice(millis.to_string().as_bytes());
+    }
+}
+
+// ---------------------------------------------------------------------------
+// DecimalEncoder — Decimal128 → JSON 字符串 / Base64 Bytes
+// ---------------------------------------------------------------------------
+
+enum DecimalEncoder {
+    StringEncoder(Decimal128Array),
+    BytesEncoder(Decimal128Array),
+}
+
+impl Encoder for DecimalEncoder {
+    fn encode(&mut self, idx: usize, out: &mut Vec<u8>) {
+        match self {
+            Self::StringEncoder(arr) => {
+                out.push(b'"');
+                out.extend_from_slice(arr.value_as_string(idx).as_bytes());
+                out.push(b'"');
+            }
+            Self::BytesEncoder(arr) => {
+                out.push(b'"');
+                out.extend_from_slice(
+                    BASE64_STANDARD
+                        .encode(arr.value(idx).to_be_bytes())
+                        .as_bytes(),
+                );
+                out.push(b'"');
+            }
+        }
+    }
+}
+
+// ---------------------------------------------------------------------------
+// BinaryEncoder — Binary → Base64 字符串
+// ---------------------------------------------------------------------------
+
+struct BinaryEncoder(arrow_array::BinaryArray);
+
+impl Encoder for BinaryEncoder {
+    fn encode(&mut self, idx: usize, out: &mut Vec<u8>) {
+        out.push(b'"');
+        out.extend_from_slice(BASE64_STANDARD.encode(self.0.value(idx)).as_bytes());
+        out.push(b'"');
+    }
+}
diff --git a/src/runtime/streaming/format/serializer.rs b/src/runtime/streaming/format/serializer.rs
new file mode 100644
index 00000000..80969ec6
--- /dev/null
+++ b/src/runtime/streaming/format/serializer.rs
@@ -0,0 +1,129 @@
+//! 数据序列化器：将内存 [`RecordBatch`] 转换为二进制消息流，供 Sink 连接器发送。
+
+use anyhow::{anyhow, Result};
+use arrow_array::{Array, RecordBatch, StructArray};
+use arrow_json::writer::make_encoder;
+use arrow_json::EncoderOptions;
+use arrow_schema::{DataType, Field, SchemaRef};
+use std::sync::Arc;
+
+use super::config::{Format, JsonFormat};
+use super::json_encoder::CustomEncoderFactory;
+
+pub struct DataSerializer {
+    format: Format,
+    projection_indices: Vec<usize>,
+}
+
+impl DataSerializer {
+    pub fn new(format: Format, schema: SchemaRef) -> Self {
+        let projection_indices: Vec<usize> = schema
+            .fields()
+            .iter()
+            .enumerate()
+            .filter(|(_, f)| !f.name().starts_with('_'))
+            .map(|(i, _)| i)
+            .collect();
+
+        Self {
+            format,
+            projection_indices,
+        }
+    }
+
+    pub fn serialize(&self, batch: &RecordBatch) -> Result<Vec<Vec<u8>>> {
+        let projected_batch = batch.project(&self.projection_indices)?;
+
+        match &self.format {
+            Format::Json(config) => self.serialize_json(config, &projected_batch),
+            Format::RawString => self.serialize_raw_string(&projected_batch),
+            Format::RawBytes => self.serialize_raw_bytes(&projected_batch),
+        }
+    }
+
+    fn serialize_json(&self, config: &JsonFormat, batch: &RecordBatch) -> Result<Vec<Vec<u8>>> {
+        let array = StructArray::from(batch.clone());
+        let field = Arc::new(Field::new_struct(
+            "",
+            batch.schema().fields().clone(),
+            false,
+        ));
+
+        let options = EncoderOptions::default()
+            .with_explicit_nulls(true)
+            .with_encoder_factory(Arc::new(CustomEncoderFactory {
+                timestamp_format: config.timestamp_format.clone(),
+                decimal_encoding: config.decimal_encoding.clone(),
+            }));
+
+        let mut encoder = make_encoder(&field, &array, &options)?;
+        let mut results = Vec::with_capacity(batch.num_rows());
+
+        for idx in 0..array.len() {
+            let mut buffer = Vec::with_capacity(128);
+            encoder.encode(idx, &mut buffer);
+            if !buffer.is_empty() {
+                results.push(buffer);
+            }
+        }
+        Ok(results)
+    }
+
+    fn serialize_raw_string(&self, batch: &RecordBatch) -> Result<Vec<Vec<u8>>> {
+        let value_idx = batch
+            .schema()
+            .index_of("value")
+            .map_err(|_| anyhow!("RawString format requires a 'value' column"))?;
+
+        if *batch.schema().field(value_idx).data_type() != DataType::Utf8 {
+            return Err(anyhow!("RawString 'value' column must be Utf8"));
+        }
+
+        let string_array = batch
+            .column(value_idx)
+            .as_any()
+            .downcast_ref::<arrow_array::StringArray>()
+            .unwrap();
+
+        let values: Vec<Vec<u8>> = (0..string_array.len())
+            .map(|i| {
+                if string_array.is_null(i) {
+                    vec![]
+                } else {
+                    string_array.value(i).as_bytes().to_vec()
+                }
+            })
+            .collect();
+
+        Ok(values)
+    }
+
+    fn serialize_raw_bytes(&self, batch: &RecordBatch) -> Result<Vec<Vec<u8>>> {
+        let value_idx = batch
+            .schema()
+            .index_of("value")
+            .map_err(|_| anyhow!("RawBytes format requires a 'value' column"))?;
+
+        if *batch.schema().field(value_idx).data_type() != DataType::Binary {
+            return Err(anyhow!("RawBytes 'value' column must be Binary"));
+        }
+
+        let binary_array = batch
+            .column(value_idx)
+            .as_any()
+            .downcast_ref::<arrow_array::BinaryArray>()
+            .unwrap();
+
+        let values: Vec<Vec<u8>> = (0..binary_array.len())
+            .map(|i| {
+                if binary_array.is_null(i) {
+                    vec![]
+                } else {
+                    binary_array.value(i).to_vec()
+                }
+            })
+            .collect();
+
+        Ok(values)
+    }
+}
diff --git a/src/runtime/streaming/operators/key_by.rs b/src/runtime/streaming/operators/key_by.rs
new file mode 100644
index 00000000..2c183577
--- /dev/null
+++ b/src/runtime/streaming/operators/key_by.rs
@@ -0,0 +1,159 @@
+//! 物理网络路由算子：利用 DataFusion 物理表达式提取 Key，基于 Hash 排序执行零拷贝切片路由。
+
+use anyhow::{anyhow, Result};
+use arrow_array::{Array, RecordBatch, UInt64Array};
+use arrow::compute::{sort_to_indices, take};
+use async_trait::async_trait;
+use datafusion::physical_expr::PhysicalExpr;
+use datafusion_physical_expr::expressions::Column;
+use datafusion_common::hash_utils::create_hashes;
+use std::sync::Arc;
+
+use crate::runtime::streaming::api::context::TaskContext;
+use crate::runtime::streaming::api::operator::MessageOperator;
+use crate::runtime::streaming::StreamOutput;
+use crate::sql::common::{CheckpointBarrier, Watermark};
+
+use protocol::grpc::api::KeyPlanOperator;
+
+pub struct KeyByOperator {
+    name: String,
+    key_extractors: Vec<Arc<dyn PhysicalExpr>>,
+    random_state: ahash::RandomState,
+}
+
+impl KeyByOperator {
+    pub fn new(name: String, key_extractors: Vec<Arc<dyn PhysicalExpr>>) -> Self {
+        Self {
+            name,
+            key_extractors,
+            random_state: ahash::RandomState::new(),
+        }
+    }
+}
+
+#[async_trait]
+impl MessageOperator for KeyByOperator {
+    fn name(&self) -> &str {
+        &self.name
+    }
+
+    async fn on_start(&mut self, _ctx: &mut TaskContext) -> Result<()> {
+        Ok(())
+    }
+
+    async fn process_data(
+        &mut self,
+        _input_idx: usize,
+        batch: RecordBatch,
+        _ctx: &mut TaskContext,
+    ) -> Result<Vec<StreamOutput>> {
+        let num_rows = batch.num_rows();
+        if num_rows == 0 {
+            return Ok(vec![]);
+        }
+
+        // 1. 执行物理表达式，提取所有 Key 列
+        let mut key_columns = Vec::with_capacity(self.key_extractors.len());
+        for expr in &self.key_extractors {
+            let column_array = expr
+                .evaluate(&batch)
+                .map_err(|e| anyhow!("Failed to evaluate key expr: {}", e))?
+                .into_array(num_rows)
+                .map_err(|e| anyhow!("Failed to convert into array: {}", e))?;
+            key_columns.push(column_array);
+        }
+
+        // 2. 向量化计算 Hash 数组
+        let mut hash_buffer = vec![0u64; num_rows];
+        create_hashes(&key_columns, &self.random_state, &mut hash_buffer)
+            .map_err(|e| anyhow!("Failed to compute hashes: {}", e))?;
+
+        let hash_array = UInt64Array::from(hash_buffer);
+
+        // 3. 基于 Hash 值排序，获取重排 Indices
+        let sorted_indices = sort_to_indices(&hash_array, None, None)
+            .map_err(|e| anyhow!("Failed to sort hashes: {}", e))?;
+
+        // 4. 对齐重排 Hash 数组和原始 Batch
+        let sorted_hashes_ref = take(&hash_array, &sorted_indices, None)?;
+        let sorted_hashes = sorted_hashes_ref
+            .as_any()
+            .downcast_ref::<UInt64Array>()
+            .unwrap();
+
+        let sorted_columns: std::result::Result<Vec<_>, _> = batch
+            .columns()
+            .iter()
+            .map(|col| take(col, &sorted_indices, None))
+            .collect();
+        let sorted_batch = RecordBatch::try_new(batch.schema(), sorted_columns?)?;
+
+        // 5. 零拷贝微批切片 —— 按 Hash 值连续段切分并标记路由意图
+        let mut outputs = Vec::new();
+        let mut start_idx = 0;
+
+        while start_idx < num_rows {
+            let current_hash = sorted_hashes.value(start_idx);
+            let mut end_idx = start_idx + 1;
+            while end_idx < num_rows && sorted_hashes.value(end_idx) == current_hash {
+                end_idx += 1;
+            }
+
+            let sub_batch = sorted_batch.slice(start_idx, end_idx - start_idx);
+            outputs.push(StreamOutput::Keyed(current_hash, sub_batch));
+            start_idx = end_idx;
+        }
+
+        Ok(outputs)
+    }
+
+    async fn process_watermark(
+        &mut self,
+        watermark: Watermark,
+        _ctx: &mut TaskContext,
+    ) -> Result<Vec<StreamOutput>> {
+        Ok(vec![StreamOutput::Watermark(watermark)])
+    }
+
+    async fn snapshot_state(
+        &mut self,
+        _barrier: CheckpointBarrier,
+        _ctx: &mut TaskContext,
+    ) -> Result<()> {
+        Ok(())
+    }
+
+    async fn on_close(&mut self, _ctx: &mut TaskContext) -> Result<Vec<StreamOutput>> {
+        Ok(vec![])
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Constructor
+// ---------------------------------------------------------------------------
+
+pub struct KeyByConstructor;
+
+impl KeyByConstructor {
+    pub fn with_config(&self, config: KeyPlanOperator) -> Result<KeyByOperator> {
+        let mut key_extractors: Vec<Arc<dyn PhysicalExpr>> =
+            Vec::with_capacity(config.key_fields.len());
+
+        for field_idx in &config.key_fields {
+            let idx = *field_idx as usize;
+            let expr = Arc::new(Column::new(&format!("col_{}", idx), idx))
+                as Arc<dyn PhysicalExpr>;
+            key_extractors.push(expr);
+        }
+
+        let name = if config.name.is_empty() {
+            "KeyBy".to_string()
+        } else {
+            config.name.clone()
+        };
+
+        Ok(KeyByOperator::new(name, key_extractors))
+    }
+}
+
diff --git a/src/runtime/streaming/storage/backend.rs b/src/runtime/streaming/storage/backend.rs
new file mode 100644
index 00000000..265b99ca
--- /dev/null
+++ b/src/runtime/streaming/storage/backend.rs
@@ -0,0 +1,78 @@
+use anyhow::Result;
+use async_trait::async_trait;
+
+#[derive(Default, Debug, Clone)]
+pub struct CheckpointMetadata {
+    pub job_id: String,
+    pub epoch: u32,
+    pub min_epoch: u32,
+    pub operator_ids: Vec<String>,
+}
+
+#[derive(Default, Debug, Clone)]
+pub struct OperatorCheckpointMetadata {
+    pub operator_id: String,
+    pub epoch: u32,
+}
+
+#[async_trait]
+pub trait BackingStore: Send + Sync + 'static {
+    fn name() -> &'static str;
+    async fn load_checkpoint_metadata(job_id: &str, epoch: u32) -> Result<CheckpointMetadata>;
+    async fn load_operator_metadata(
+        job_id: &str,
+        operator_id: &str,
+        epoch: u32,
+    ) -> Result<Option<OperatorCheckpointMetadata>>;
+    async fn write_operator_checkpoint_metadata(
+        metadata: OperatorCheckpointMetadata,
+    ) -> Result<()>;
+    async fn write_checkpoint_metadata(metadata: CheckpointMetadata) -> Result<()>;
+    async fn cleanup_checkpoint(
+        metadata: CheckpointMetadata,
+        old_min_epoch: u32,
+        new_min_epoch: u32,
+    ) -> Result<()>;
+}
+
+pub struct ParquetStateBackend;
+
+#[async_trait]
+impl BackingStore for ParquetStateBackend {
+    fn name() -> &'static str {
+        "parquet"
+    }
+
+    async fn load_checkpoint_metadata(
+        _job_id: &str,
+        _epoch: u32,
+    ) -> Result<CheckpointMetadata> {
+        Ok(CheckpointMetadata::default())
+    }
+
+    async fn load_operator_metadata(
+        _job_id: &str,
+        _operator_id: &str,
+        _epoch: u32,
+    ) -> Result<Option<OperatorCheckpointMetadata>> {
+        Ok(None)
+    }
+
+    async fn write_operator_checkpoint_metadata(
+        _metadata: OperatorCheckpointMetadata,
+    ) -> Result<()> {
+        Ok(())
+    }
+
+    async fn write_checkpoint_metadata(_metadata: CheckpointMetadata) -> Result<()> {
+        Ok(())
+    }
+
+    async fn cleanup_checkpoint(
+        _metadata: CheckpointMetadata,
+        _old_min_epoch: u32,
+        _new_min_epoch: u32,
+    ) -> Result<()> {
+        Ok(())
+    }
+}
diff --git a/src/runtime/streaming/storage/manager.rs b/src/runtime/streaming/storage/manager.rs
new file mode 100644
index 00000000..2aa79e6b
--- /dev/null
+++ b/src/runtime/streaming/storage/manager.rs
@@ -0,0 +1,156 @@
+use anyhow::{Result, anyhow};
+use std::collections::HashMap;
+use std::sync::Arc;
+use std::time::SystemTime;
+
+use super::table::TaskInfo;
+use super::{DummyStorageProvider, StorageProviderRef};
+
+#[derive(Default)]
+pub struct GlobalKeyedView<K, V> {
+    data: HashMap<K, V>,
+}
+
+impl<K: Eq + std::hash::Hash, V> GlobalKeyedView<K, V> {
+    pub async fn insert(&mut self, key: K, value: V) {
+        self.data.insert(key, value);
+    }
+
+    pub fn get(&self, key: &K) -> Option<&V> {
+        self.data.get(key)
+    }
+
+    pub fn get_all(&self) -> &HashMap<K, V> {
+        &self.data
+    }
+}
+
+#[derive(Default)]
+pub struct ExpiringTimeKeyView;
+
+impl ExpiringTimeKeyView {
+    pub fn insert(&mut self, _timestamp: SystemTime, _batch: arrow_array::RecordBatch) {}
+
+    pub fn all_batches_for_watermark(
+        &self,
+        _watermark: Option<SystemTime>,
+    ) -> std::iter::Empty<(&SystemTime, &Vec<arrow_array::RecordBatch>)> {
+        std::iter::empty()
+    }
+
+    pub async fn flush(&mut self, _watermark: Option<SystemTime>) -> Result<()> {
+        Ok(())
+    }
+}
+
+#[derive(Default)]
+pub struct KeyTimeView;
+
+impl KeyTimeView {
+    pub async fn insert(
+        &mut self,
+        _batch: arrow_array::RecordBatch,
+    ) -> Result<Vec<arrow_array::types::UInt64Type>> {
+        Ok(vec![])
+    }
+
+    pub fn get_batch(&self, _key: &[u8]) -> Result<Option<arrow_array::RecordBatch>> {
+        Ok(None)
+    }
+}
+
+pub struct BackendWriter {}
+
+pub struct TableManager {
+    epoch: u32,
+    min_epoch: u32,
+    writer: BackendWriter,
+    task_info: Arc<TaskInfo>,
+    storage: StorageProviderRef,
+    caches: HashMap<String, Box<dyn std::any::Any + Send>>,
+}
+
+impl TableManager {
+    /// 加载状态后端（返回默认的空 Manager）
+    pub async fn load(task_info: Arc<TaskInfo>) -> Result<(Self, Option<SystemTime>)> {
+        let manager = Self {
+            epoch: 1,
+            min_epoch: 1,
+            writer: BackendWriter {},
+            task_info,
+            storage: Arc::new(DummyStorageProvider),
+            caches: HashMap::new(),
+        };
+        Ok((manager, None))
+    }
+
+    /// 接收到 CheckpointBarrier 时（空操作）
+    pub async fn checkpoint(
+        &mut self,
+        _epoch: u32,
+        _watermark: Option<SystemTime>,
+        _then_stop: bool,
+    ) {
+    }
+
+    /// 面向算子的 API：获取全局 Key-Value 表
+    pub async fn get_global_keyed_state<
+        K: Eq + std::hash::Hash + Send + 'static,
+        V: Send + 'static,
+    >(
+        &mut self,
+        table_name: &str,
+    ) -> Result<&mut GlobalKeyedView<K, V>> {
+        if !self.caches.contains_key(table_name) {
+            let view: Box<dyn std::any::Any + Send> =
+                Box::new(GlobalKeyedView::<K, V> { data: HashMap::new() });
+            self.caches.insert(table_name.to_string(), view);
+        }
+
+        let cache = self.caches.get_mut(table_name).unwrap();
+
+        let view = cache
+            .downcast_mut::<GlobalKeyedView<K, V>>()
+            .ok_or_else(|| anyhow!("Table type mismatch for {}", table_name))?;
+
+        Ok(view)
+    }
+
+    /// 面向算子的 API：获取带 TTL 的时间键值表
+    pub async fn get_expiring_time_key_table(
+        &mut self,
+        table_name: &str,
+        _watermark: Option<SystemTime>,
+    ) -> Result<&mut ExpiringTimeKeyView> {
+        if !self.caches.contains_key(table_name) {
+            let view: Box<dyn std::any::Any + Send> = Box::new(ExpiringTimeKeyView::default());
+            self.caches.insert(table_name.to_string(), view);
+        }
+
+        let cache = self.caches.get_mut(table_name).unwrap();
+        let view = cache
+            .downcast_mut::<ExpiringTimeKeyView>()
+            .ok_or_else(|| anyhow!("Table type mismatch for {}", table_name))?;
+
+        Ok(view)
+    }
+
+    /// 面向算子的 API：获取标准的 Key-Time 双重映射表
+    pub async fn get_key_time_table(
+        &mut self,
+        table_name: &str,
+        _watermark: Option<SystemTime>,
+    ) -> Result<&mut KeyTimeView> {
+        if !self.caches.contains_key(table_name) {
+            let view: Box<dyn std::any::Any + Send> = Box::new(KeyTimeView::default());
+            self.caches.insert(table_name.to_string(), view);
+        }
+
+        let cache = self.caches.get_mut(table_name).unwrap();
+        let view = cache
+            .downcast_mut::<KeyTimeView>()
+            .ok_or_else(|| anyhow!("Table type mismatch for {}", table_name))?;
+
+        Ok(view)
+    }
+}
diff --git a/src/runtime/streaming/storage/table.rs b/src/runtime/streaming/storage/table.rs
new file mode 100644
index 00000000..4b37ec4a
--- /dev/null
+++ b/src/runtime/streaming/storage/table.rs
@@ -0,0 +1,91 @@
+use anyhow::Result;
+use arrow_array::RecordBatch;
+use async_trait::async_trait;
+use std::collections::{HashMap, HashSet};
+use std::sync::Arc;
+
+#[derive(Default)]
+pub struct TaskInfo {
+    pub job_id: String,
+    pub operator_id: String,
+    pub task_index: u32,
+}
+
+#[derive(Debug)]
+pub enum TableData {
+    RecordBatch(RecordBatch),
+    CommitData { data: Vec<u8> },
+    KeyedData { key: Vec<u8>, value: Vec<u8> },
+}
+
+pub struct CheckpointMessage {
+    pub epoch: u32,
+    pub time: std::time::SystemTime,
+    pub watermark: Option<std::time::SystemTime>,
+    pub then_stop: bool,
+}
+
+#[async_trait]
+pub trait TableEpochCheckpointer: Send + 'static {
+    type SubTableCheckpointMessage: prost::Message + Default;
+
+    async fn insert_data(&mut self, _data: TableData) -> Result<()> {
+        Ok(())
+    }
+
+    async fn finish(
+        self: Box<Self>,
+        _checkpoint: &CheckpointMessage,
+    ) -> Result<Option<(Self::SubTableCheckpointMessage, usize)>> {
+        Ok(None)
+    }
+
+    fn subtask_index(&self) -> u32;
+}
+
+#[async_trait]
+pub trait Table: Send + Sync + 'static + Clone {
+    type Checkpointer: TableEpochCheckpointer<
+        SubTableCheckpointMessage = Self::TableSubtaskCheckpointMetadata,
+    >;
+    type ConfigMessage: prost::Message + Default;
+    type TableCheckpointMessage: prost::Message + Default + Clone;
+    type TableSubtaskCheckpointMetadata: prost::Message + Default + Clone;
+
+    fn from_config(
+        _config: Self::ConfigMessage,
+        _task_info: Arc<TaskInfo>,
+        _storage_provider: super::StorageProviderRef,
+        _checkpoint_message: Option<Self::TableCheckpointMessage>,
+        _state_version: u32,
+    ) -> Result<Self>
+    where
+        Self: Sized;
+
+    fn epoch_checkpointer(
+        &self,
+        _epoch: u32,
+        _previous_metadata: Option<Self::TableSubtaskCheckpointMetadata>,
+    ) -> Result<Self::Checkpointer>;
+
+    fn merge_checkpoint_metadata(
+        _config: Self::ConfigMessage,
+        _subtask_metadata: HashMap<u32, Self::TableSubtaskCheckpointMetadata>,
+    ) -> Result<Option<Self::TableCheckpointMessage>> {
+        Ok(None)
+    }
+
+    fn subtask_metadata_from_table(
+        &self,
+        _table_metadata: Self::TableCheckpointMessage,
+    ) -> Result<Option<Self::TableSubtaskCheckpointMetadata>> {
+        Ok(None)
+    }
+
+    fn files_to_keep(
+        _config: Self::ConfigMessage,
+        _checkpoint: Self::TableCheckpointMessage,
+    ) -> Result<HashSet<String>> {
+        Ok(HashSet::new())
+    }
+}

From ee03dc83b92edf11c701728368786c2b032d5f98 Mon Sep 17 00:00:00 2001
From: luoluoyuyu <zhenyu@apache.org>
Date: Wed, 25 Mar 2026 00:34:58 +0800
Subject: [PATCH 18/44] update

---
 src/runtime/streaming/api/operator.rs    | 5 +++++
 src/runtime/streaming/cluster/manager.rs | 2 +-
 src/runtime/streaming/cluster/master.rs  | 5 ++---
 src/sql/logical_node/logical/mod.rs      | 1 +
 4 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/runtime/streaming/api/operator.rs b/src/runtime/streaming/api/operator.rs
index 3cd5a316..6eb49d2c 100644
--- a/src/runtime/streaming/api/operator.rs
+++ b/src/runtime/streaming/api/operator.rs
@@ -8,6 +8,7 @@ use datafusion::common::Result as DfResult;
 use datafusion::execution::context::SessionContext;
 use datafusion::execution::FunctionRegistry;
 use datafusion::logical_expr::{AggregateUDF, ScalarUDF, WindowUDF};
+use datafusion::logical_expr::planner::ExprPlanner;
 use std::collections::HashSet;
 use std::sync::Arc;
 use std::time::Duration;
@@ -59,6 +60,10 @@ impl FunctionRegistry for Registry {
     fn udwf(&self, name: &str) -> DfResult<Arc<WindowUDF>> {
         self.ctx.udwf(name)
     }
+
+    fn expr_planners(&self) -> Vec<Arc<dyn ExprPlanner>> {
+        self.ctx.expr_planners()
+    }
 }
 
 // ---------------------------------------------------------------------------
diff --git a/src/runtime/streaming/cluster/manager.rs b/src/runtime/streaming/cluster/manager.rs
index ce8ec881..34045dee 100644
--- a/src/runtime/streaming/cluster/manager.rs
+++ b/src/runtime/streaming/cluster/manager.rs
@@ -7,7 +7,7 @@ use crate::runtime::streaming::factory::OperatorFactory;
 use crate::runtime::streaming::memory::MemoryPool;
 use crate::runtime::streaming::network::NetworkEnvironment;
 use crate::runtime::streaming::protocol::control::{ControlCommand, StopMode};
-use arroyo_state::tables::table_manager::TableManager;
+use crate::runtime::streaming::storage::manager::TableManager;
 use std::collections::HashMap;
 use std::sync::Arc;
 use tokio::sync::mpsc::{channel, Sender};
diff --git a/src/runtime/streaming/cluster/master.rs b/src/runtime/streaming/cluster/master.rs
index 5817643d..e456d8e3 100644
--- a/src/runtime/streaming/cluster/master.rs
+++ b/src/runtime/streaming/cluster/master.rs
@@ -6,10 +6,9 @@ use crate::runtime::streaming::cluster::graph::{
     PhysicalEdgeDescriptor, ResourceProfile, SubtaskIndex, TaskDeploymentDescriptor, VertexId,
 };
 
-use arroyo_datastream::logical::{LogicalEdgeType, LogicalGraph, OperatorChain};
 use petgraph::Direction;
 use sha2::{Digest, Sha256};
-use crate::sql::logical_node::logical::{LogicalEdgeType, LogicalGraph};
+use crate::sql::logical_node::logical::{LogicalEdgeType, LogicalGraph, OperatorChain};
 
 #[derive(thiserror::Error, Debug)]
 pub enum CompileError {
@@ -167,7 +166,7 @@ impl JobCompiler {
                 LogicalEdgeType::Shuffle
                 | LogicalEdgeType::LeftJoin
                 | LogicalEdgeType::RightJoin => {
-                    if let Some(key_indices) = edge.schema.key_indices.as_ref() {
+                    if let Some(key_indices) = edge.schema.storage_keys() {
                         if !key_indices.is_empty() {
                             PartitioningStrategy::HashByKeys(key_indices.clone())
                         } else {
diff --git a/src/sql/logical_node/logical/mod.rs b/src/sql/logical_node/logical/mod.rs
index 3a94d1f3..ab318804 100644
--- a/src/sql/logical_node/logical/mod.rs
+++ b/src/sql/logical_node/logical/mod.rs
@@ -26,6 +26,7 @@ pub use logical_edge::{LogicalEdge, LogicalEdgeType};
 pub use logical_graph::{LogicalGraph, Optimizer};
 pub use logical_node::LogicalNode;
 pub use logical_program::LogicalProgram;
+pub use operator_chain::OperatorChain;
 pub use operator_name::OperatorName;
 pub use program_config::ProgramConfig;
 pub use python_udf_config::PythonUdfConfig;

From 5dc090c2e09f5b0ac480d645ed1b768fc269f8fa Mon Sep 17 00:00:00 2001
From: luoluoyuyu <zhenyu@apache.org>
Date: Thu, 26 Mar 2026 00:53:26 +0800
Subject: [PATCH 19/44] update

---
 .../streaming/operators/key_operator.rs       | 289 ++++++++++++++++++
 src/runtime/streaming/operators/mod.rs        |   6 +
 src/runtime/streaming/operators/projection.rs |  81 +++++
 .../operators/stateless_physical_executor.rs  |  77 +++++
 .../streaming/operators/value_execution.rs    |  66 ++++
 5 files changed, 519 insertions(+)
 create mode 100644 src/runtime/streaming/operators/key_operator.rs
 create mode 100644 src/runtime/streaming/operators/projection.rs
 create mode 100644 src/runtime/streaming/operators/stateless_physical_executor.rs
 create mode 100644 src/runtime/streaming/operators/value_execution.rs

diff --git a/src/runtime/streaming/operators/key_operator.rs b/src/runtime/streaming/operators/key_operator.rs
new file mode 100644
index 00000000..93a50db5
--- /dev/null
+++ b/src/runtime/streaming/operators/key_operator.rs
@@ -0,0 +1,289 @@
+//! 物理网络路由算子：利用 DataFusion 物理表达式提取 Key，基于 Hash 排序执行零拷贝切片路由。
+//!
+//! 提供两种算子：
+//! - [`KeyByOperator`]：纯 Key 提取 + Hash 路由，适用于简单的 GROUP BY / PARTITION BY。
+//! - [`KeyExecutionOperator`]：先执行完整物理计划，再按指定列 Hash 路由，适用于需要先做
+//!   计算（如聚合结果映射）再分区的场景。
+
+use anyhow::{anyhow, Result};
+use arrow_array::{Array, ArrayRef, RecordBatch, UInt64Array};
+use arrow::compute::{sort_to_indices, take};
+use async_trait::async_trait;
+use datafusion::physical_expr::PhysicalExpr;
+use datafusion_physical_expr::expressions::Column;
+use datafusion_common::hash_utils::create_hashes;
+use futures::StreamExt;
+use std::sync::Arc;
+
+use crate::runtime::streaming::api::context::TaskContext;
+use crate::runtime::streaming::api::operator::MessageOperator;
+use crate::runtime::streaming::operators::StatelessPhysicalExecutor;
+use crate::runtime::streaming::StreamOutput;
+use crate::sql::common::{CheckpointBarrier, Watermark};
+
+use protocol::grpc::api::KeyPlanOperator;
+
+pub struct KeyByOperator {
+    name: String,
+    key_extractors: Vec<Arc<dyn PhysicalExpr>>,
+    random_state: ahash::RandomState,
+}
+
+impl KeyByOperator {
+    pub fn new(name: String, key_extractors: Vec<Arc<dyn PhysicalExpr>>) -> Self {
+        Self {
+            name,
+            key_extractors,
+            random_state: ahash::RandomState::new(),
+        }
+    }
+}
+
+#[async_trait]
+impl MessageOperator for KeyByOperator {
+    fn name(&self) -> &str {
+        &self.name
+    }
+
+    async fn on_start(&mut self, _ctx: &mut TaskContext) -> Result<()> {
+        Ok(())
+    }
+
+    async fn process_data(
+        &mut self,
+        _input_idx: usize,
+        batch: RecordBatch,
+        _ctx: &mut TaskContext,
+    ) -> Result<Vec<StreamOutput>> {
+        let num_rows = batch.num_rows();
+        if num_rows == 0 {
+            return Ok(vec![]);
+        }
+
+        // 1. 执行物理表达式，提取所有 Key 列
+        let mut key_columns = Vec::with_capacity(self.key_extractors.len());
+        for expr in &self.key_extractors {
+            let column_array = expr
+                .evaluate(&batch)
+                .map_err(|e| anyhow!("Failed to evaluate key expr: {}", e))?
+                .into_array(num_rows)
+                .map_err(|e| anyhow!("Failed to convert into array: {}", e))?;
+            key_columns.push(column_array);
+        }
+
+        // 2. 向量化计算 Hash 数组
+        let mut hash_buffer = vec![0u64; num_rows];
+        create_hashes(&key_columns, &self.random_state, &mut hash_buffer)
+            .map_err(|e| anyhow!("Failed to compute hashes: {}", e))?;
+
+        let hash_array = UInt64Array::from(hash_buffer);
+
+        // 3. 基于 Hash 值排序，获取重排 Indices
+        let sorted_indices = sort_to_indices(&hash_array, None, None)
+            .map_err(|e| anyhow!("Failed to sort hashes: {}", e))?;
+
+        // 4. 对齐重排 Hash 数组和原始 Batch
+        let sorted_hashes_ref = take(&hash_array, &sorted_indices, None)?;
+        let sorted_hashes = sorted_hashes_ref
+            .as_any()
+            .downcast_ref::<UInt64Array>()
+            .unwrap();
+
+        let sorted_columns: std::result::Result<Vec<_>, _> = batch
+            .columns()
+            .iter()
+            .map(|col| take(col, &sorted_indices, None))
+            .collect();
+        let sorted_batch = RecordBatch::try_new(batch.schema(), sorted_columns?)?;
+
+        // 5. 零拷贝微批切片 —— 按 Hash 值连续段切分并标记路由意图
+        let mut outputs = Vec::new();
+        let mut start_idx = 0;
+
+        while start_idx < num_rows {
+            let current_hash = sorted_hashes.value(start_idx);
+            let mut end_idx = start_idx + 1;
+            while end_idx < num_rows && sorted_hashes.value(end_idx) == current_hash {
+                end_idx += 1;
+            }
+
+            let sub_batch = sorted_batch.slice(start_idx, end_idx - start_idx);
+            outputs.push(StreamOutput::Keyed(current_hash, sub_batch));
+            start_idx = end_idx;
+        }
+
+        Ok(outputs)
+    }
+
+    async fn process_watermark(
+        &mut self,
+        watermark: Watermark,
+        _ctx: &mut TaskContext,
+    ) -> Result<Vec<StreamOutput>> {
+        Ok(vec![StreamOutput::Watermark(watermark)])
+    }
+
+    async fn snapshot_state(
+        &mut self,
+        _barrier: CheckpointBarrier,
+        _ctx: &mut TaskContext,
+    ) -> Result<()> {
+        Ok(())
+    }
+
+    async fn on_close(&mut self, _ctx: &mut TaskContext) -> Result<Vec<StreamOutput>> {
+        Ok(vec![])
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Constructor
+// ---------------------------------------------------------------------------
+
+pub struct KeyByConstructor;
+
+impl KeyByConstructor {
+    pub fn with_config(&self, config: KeyPlanOperator) -> Result<KeyByOperator> {
+        let mut key_extractors: Vec<Arc<dyn PhysicalExpr>> =
+            Vec::with_capacity(config.key_fields.len());
+
+        for field_idx in &config.key_fields {
+            let idx = *field_idx as usize;
+            let expr = Arc::new(Column::new(&format!("col_{}", idx), idx))
+                as Arc<dyn PhysicalExpr>;
+            key_extractors.push(expr);
+        }
+
+        let name = if config.name.is_empty() {
+            "KeyBy".to_string()
+        } else {
+            config.name.clone()
+        };
+
+        Ok(KeyByOperator::new(name, key_extractors))
+    }
+}
+
+// ===========================================================================
+// KeyExecutionOperator — 先执行物理计划，再按 Key 列 Hash 路由
+// ===========================================================================
+
+/// 键控路由执行算子：先驱动 DataFusion 物理计划完成计算（如聚合结果映射），
+/// 再根据 `key_fields` 指定列计算 Hash 并以 [`StreamOutput::Keyed`] 输出，
+/// 实现算子内部分区。
+pub struct KeyExecutionOperator {
+    name: String,
+    executor: StatelessPhysicalExecutor,
+    key_fields: Vec<usize>,
+    random_state: ahash::RandomState,
+}
+
+impl KeyExecutionOperator {
+    pub fn new(
+        name: String,
+        executor: StatelessPhysicalExecutor,
+        key_fields: Vec<usize>,
+    ) -> Self {
+        Self {
+            name,
+            executor,
+            key_fields,
+            random_state: ahash::RandomState::new(),
+        }
+    }
+}
+
+#[async_trait]
+impl MessageOperator for KeyExecutionOperator {
+    fn name(&self) -> &str {
+        &self.name
+    }
+
+    async fn process_data(
+        &mut self,
+        _input_idx: usize,
+        batch: RecordBatch,
+        _ctx: &mut TaskContext,
+    ) -> Result<Vec<StreamOutput>> {
+        let mut outputs = Vec::new();
+
+        // 1. 执行物理转换
+        let mut stream = self.executor.process_batch(batch).await?;
+
+        while let Some(batch_result) = stream.next().await {
+            let out_batch = batch_result?;
+            let num_rows = out_batch.num_rows();
+            if num_rows == 0 {
+                continue;
+            }
+
+            // 2. 提取 Key 列并计算 Hash
+            let key_columns: Vec<ArrayRef> = self
+                .key_fields
+                .iter()
+                .map(|&idx| out_batch.column(idx).clone())
+                .collect();
+
+            let mut hash_buffer = vec![0u64; num_rows];
+            create_hashes(&key_columns, &self.random_state, &mut hash_buffer)
+                .map_err(|e| anyhow!("hash compute: {e}"))?;
+            let hash_array = UInt64Array::from(hash_buffer);
+
+            // 3. 基于 Hash 排序，获取重排 Indices
+            let sorted_indices = sort_to_indices(&hash_array, None, None)
+                .map_err(|e| anyhow!("sort hashes: {e}"))?;
+
+            let sorted_hashes_ref = take(&hash_array, &sorted_indices, None)?;
+            let sorted_hashes = sorted_hashes_ref
+                .as_any()
+                .downcast_ref::<UInt64Array>()
+                .unwrap();
+
+            let sorted_columns: std::result::Result<Vec<_>, _> = out_batch
+                .columns()
+                .iter()
+                .map(|col| take(col, &sorted_indices, None))
+                .collect();
+            let sorted_batch =
+                RecordBatch::try_new(out_batch.schema(), sorted_columns?)?;
+
+            // 4. 零拷贝切片 —— 按 Hash 连续段分组，标记 Keyed 路由意图
+            let mut start_idx = 0;
+            while start_idx < num_rows {
+                let current_hash = sorted_hashes.value(start_idx);
+                let mut end_idx = start_idx + 1;
+                while end_idx < num_rows
+                    && sorted_hashes.value(end_idx) == current_hash
+                {
+                    end_idx += 1;
+                }
+
+                let sub_batch = sorted_batch.slice(start_idx, end_idx - start_idx);
+                outputs.push(StreamOutput::Keyed(current_hash, sub_batch));
+                start_idx = end_idx;
+            }
+        }
+        Ok(outputs)
+    }
+
+    async fn process_watermark(
+        &mut self,
+        watermark: Watermark,
+        _ctx: &mut TaskContext,
+    ) -> Result<Vec<StreamOutput>> {
+        Ok(vec![StreamOutput::Watermark(watermark)])
+    }
+
+    async fn snapshot_state(
+        &mut self,
+        _barrier: CheckpointBarrier,
+        _ctx: &mut TaskContext,
+    ) -> Result<()> {
+        Ok(())
+    }
+
+    async fn on_close(&mut self, _ctx: &mut TaskContext) -> Result<Vec<StreamOutput>> {
+        Ok(vec![])
+    }
+}
+
diff --git a/src/runtime/streaming/operators/mod.rs b/src/runtime/streaming/operators/mod.rs
index 66d3e892..e3c0f566 100644
--- a/src/runtime/streaming/operators/mod.rs
+++ b/src/runtime/streaming/operators/mod.rs
@@ -7,6 +7,12 @@ pub mod sink;
 pub mod source;
 pub mod watermark;
 pub mod windows;
+mod key_operator;
+mod projection;
+mod stateless_physical_executor;
+mod value_execution;
+
+pub use stateless_physical_executor::StatelessPhysicalExecutor;
 
 pub use grouping::{IncrementalAggregatingFunc, Key, UpdatingCache};
 pub use joins::{
diff --git a/src/runtime/streaming/operators/projection.rs b/src/runtime/streaming/operators/projection.rs
new file mode 100644
index 00000000..5e62afc6
--- /dev/null
+++ b/src/runtime/streaming/operators/projection.rs
@@ -0,0 +1,81 @@
+//! 高性能投影算子：直接操作 Arrow Array 执行列映射与标量运算，
+//! 避开 DataFusion 执行树开销，适用于 SELECT 字段筛选和简单标量计算。
+
+use anyhow::Result;
+use arrow_array::RecordBatch;
+use async_trait::async_trait;
+use datafusion::physical_expr::PhysicalExpr;
+use std::sync::Arc;
+
+use crate::runtime::streaming::api::context::TaskContext;
+use crate::runtime::streaming::api::operator::MessageOperator;
+use crate::runtime::streaming::StreamOutput;
+use crate::sql::common::{CheckpointBarrier, FsSchemaRef, Watermark};
+
+pub struct ProjectionOperator {
+    name: String,
+    output_schema: FsSchemaRef,
+    exprs: Vec<Arc<dyn PhysicalExpr>>,
+}
+
+impl ProjectionOperator {
+    pub fn new(
+        name: String,
+        output_schema: FsSchemaRef,
+        exprs: Vec<Arc<dyn PhysicalExpr>>,
+    ) -> Self {
+        Self {
+            name,
+            output_schema,
+            exprs,
+        }
+    }
+}
+
+#[async_trait]
+impl MessageOperator for ProjectionOperator {
+    fn name(&self) -> &str {
+        &self.name
+    }
+
+    async fn process_data(
+        &mut self,
+        _input_idx: usize,
+        batch: RecordBatch,
+        _ctx: &mut TaskContext,
+    ) -> Result<Vec<StreamOutput>> {
+        if batch.num_rows() == 0 {
+            return Ok(vec![]);
+        }
+
+        let projected_columns = self
+            .exprs
+            .iter()
+            .map(|expr| {
+                expr.evaluate(&batch)
+                    .and_then(|val| val.into_array(batch.num_rows()))
+            })
+            .collect::<datafusion::common::Result<Vec<_>>>()?;
+
+        let out_batch =
+            RecordBatch::try_new(self.output_schema.schema.clone(), projected_columns)?;
+
+        Ok(vec![StreamOutput::Forward(out_batch)])
+    }
+
+    async fn process_watermark(
+        &mut self,
+        watermark: Watermark,
+        _ctx: &mut TaskContext,
+    ) -> Result<Vec<StreamOutput>> {
+        Ok(vec![StreamOutput::Watermark(watermark)])
+    }
+
+    async fn snapshot_state(
+        &mut self,
+        _barrier: CheckpointBarrier,
+        _ctx: &mut TaskContext,
+    ) -> Result<()> {
+        Ok(())
+    }
+}
diff --git a/src/runtime/streaming/operators/stateless_physical_executor.rs b/src/runtime/streaming/operators/stateless_physical_executor.rs
new file mode 100644
index 00000000..9e801188
--- /dev/null
+++ b/src/runtime/streaming/operators/stateless_physical_executor.rs
@@ -0,0 +1,77 @@
+//! 无状态物理计划执行器：将单批次写入 `SingleLockedBatch` 并让 DataFusion 计划消费。
+
+use std::sync::{Arc, RwLock};
+
+use anyhow::{anyhow, Result};
+use arrow_array::RecordBatch;
+use datafusion::execution::context::SessionContext;
+use datafusion::execution::runtime_env::RuntimeEnvBuilder;
+use datafusion::execution::SendableRecordBatchStream;
+use datafusion::execution::TaskContext;
+use datafusion::physical_plan::ExecutionPlan;
+use datafusion_proto::physical_plan::AsExecutionPlan;
+use datafusion_proto::protobuf::PhysicalPlanNode;
+use futures::StreamExt;
+use prost::Message;
+
+use crate::runtime::streaming::api::operator::Registry;
+use crate::sql::logical_planner::{DecodingContext, FsPhysicalExtensionCodec};
+
+pub struct StatelessPhysicalExecutor {
+    batch: Arc<RwLock<Option<RecordBatch>>>,
+    plan: Arc<dyn ExecutionPlan>,
+    task_context: Arc<TaskContext>,
+}
+
+impl StatelessPhysicalExecutor {
+    pub fn new(mut proto: &[u8], registry: &Registry) -> Result<Self> {
+        let batch = Arc::new(RwLock::default());
+
+        let plan_node = PhysicalPlanNode::decode(&mut proto)
+            .map_err(|e| anyhow!("decode PhysicalPlanNode: {e}"))?;
+        let codec = FsPhysicalExtensionCodec {
+            context: DecodingContext::SingleLockedBatch(batch.clone()),
+        };
+
+        let plan = plan_node.try_into_physical_plan(
+            registry,
+            &RuntimeEnvBuilder::new().build()?,
+            &codec,
+        )?;
+
+        Ok(Self {
+            batch,
+            plan,
+            task_context: SessionContext::new().task_ctx(),
+        })
+    }
+
+    pub async fn process_batch(&mut self, batch: RecordBatch) -> Result<SendableRecordBatchStream> {
+        {
+            let mut writer = self
+                .batch
+                .write()
+                .map_err(|e| anyhow!("SingleLockedBatch lock: {e}"))?;
+            *writer = Some(batch);
+        }
+        self.plan
+            .reset()
+            .map_err(|e| anyhow!("reset execution plan: {e}"))?;
+        self.plan
+            .execute(0, self.task_context.clone())
+            .map_err(|e| anyhow!("failed to compute plan: {e}"))
+    }
+
+    pub async fn process_single(&mut self, batch: RecordBatch) -> Result<RecordBatch> {
+        let mut stream = self.process_batch(batch).await?;
+        let result = stream
+            .next()
+            .await
+            .ok_or_else(|| anyhow!("empty output stream"))??;
+        anyhow::ensure!(
+            stream.next().await.is_none(),
+            "expected exactly one output batch"
+        );
+        Ok(result)
+    }
+}
diff --git a/src/runtime/streaming/operators/value_execution.rs b/src/runtime/streaming/operators/value_execution.rs
new file mode 100644
index 00000000..b9fb0cd8
--- /dev/null
+++ b/src/runtime/streaming/operators/value_execution.rs
@@ -0,0 +1,66 @@
+//! 通用无状态执行算子：驱动 DataFusion 物理计划（Filter, Case When, Scalar UDF 等），
+//! 不改变分区状态，适用于 Map / Filter 阶段。
+
+use anyhow::Result;
+use arrow_array::RecordBatch;
+use async_trait::async_trait;
+use futures::StreamExt;
+
+use crate::runtime::streaming::api::context::TaskContext;
+use crate::runtime::streaming::api::operator::MessageOperator;
+use crate::runtime::streaming::operators::StatelessPhysicalExecutor;
+use crate::runtime::streaming::StreamOutput;
+use crate::sql::common::{CheckpointBarrier, Watermark};
+
+pub struct ValueExecutionOperator {
+    name: String,
+    executor: StatelessPhysicalExecutor,
+}
+
+impl ValueExecutionOperator {
+    pub fn new(name: String, executor: StatelessPhysicalExecutor) -> Self {
+        Self { name, executor }
+    }
+}
+
+#[async_trait]
+impl MessageOperator for ValueExecutionOperator {
+    fn name(&self) -> &str {
+        &self.name
+    }
+
+    async fn process_data(
+        &mut self,
+        _input_idx: usize,
+        batch: RecordBatch,
+        _ctx: &mut TaskContext,
+    ) -> Result<Vec<StreamOutput>> {
+        let mut outputs = Vec::new();
+
+        let mut stream = self.executor.process_batch(batch).await?;
+
+        while let Some(batch_result) = stream.next().await {
+            let out_batch = batch_result?;
+            if out_batch.num_rows() > 0 {
+                outputs.push(StreamOutput::Forward(out_batch));
+            }
+        }
+        Ok(outputs)
+    }
+
+    async fn process_watermark(
+        &mut self,
+        watermark: Watermark,
+        _ctx: &mut TaskContext,
+    ) -> Result<Vec<StreamOutput>> {
+        Ok(vec![StreamOutput::Watermark(watermark)])
+    }
+
+    async fn snapshot_state(
+        &mut self,
+        _barrier: CheckpointBarrier,
+        _ctx: &mut TaskContext,
+    ) -> Result<()> {
+        Ok(())
+    }
+}

From 97b978e0b35afcd90e8e1149035939fd2ad346a0 Mon Sep 17 00:00:00 2001
From: luoluoyuyu <zhenyu@apache.org>
Date: Thu, 26 Mar 2026 21:35:43 +0800
Subject: [PATCH 20/44] update

---
 src/runtime/streaming/job/edge_manager.rs    |  41 ++++
 src/runtime/streaming/job/job_manager.rs     | 198 +++++++++++++++
 src/runtime/streaming/job/mod.rs             |   7 +
 src/runtime/streaming/job/models.rs          |  35 +++
 src/runtime/streaming/job/pipeline_runner.rs | 242 +++++++++++++++++++
 src/runtime/streaming/lib.rs                 |   2 +
 src/runtime/streaming/mod.rs                 |   3 +-
 src/runtime/streaming/state/mod.rs           |   0
 src/runtime/streaming/state/table_manager.rs |   0
 9 files changed, 527 insertions(+), 1 deletion(-)
 create mode 100644 src/runtime/streaming/job/edge_manager.rs
 create mode 100644 src/runtime/streaming/job/job_manager.rs
 create mode 100644 src/runtime/streaming/job/mod.rs
 create mode 100644 src/runtime/streaming/job/models.rs
 create mode 100644 src/runtime/streaming/job/pipeline_runner.rs
 delete mode 100644 src/runtime/streaming/state/mod.rs
 delete mode 100644 src/runtime/streaming/state/table_manager.rs

diff --git a/src/runtime/streaming/job/edge_manager.rs b/src/runtime/streaming/job/edge_manager.rs
new file mode 100644
index 00000000..10ca97f1
--- /dev/null
+++ b/src/runtime/streaming/job/edge_manager.rs
@@ -0,0 +1,41 @@
+use std::collections::HashMap;
+
+use protocol::grpc::api::{FsEdge, FsNode};
+use tokio::sync::mpsc;
+
+use crate::runtime::streaming::protocol::tracked::TrackedEvent;
+
+pub struct EdgeManager {
+    // PipelineID -> (输入 Receiver, 输出 Sender 列表)
+    endpoints: HashMap<u32, (Option<mpsc::Receiver<TrackedEvent>>, Vec<mpsc::Sender<TrackedEvent>>)>,
+}
+
+impl EdgeManager {
+    pub fn build(nodes: &[FsNode], edges: &[FsEdge]) -> Self {
+        let mut tx_map: HashMap<u32, Vec<mpsc::Sender<TrackedEvent>>> = HashMap::new();
+        let mut rx_map: HashMap<u32, mpsc::Receiver<TrackedEvent>> = HashMap::new();
+
+        for edge in edges {
+            let (tx, rx) = mpsc::channel(2048);
+            tx_map.entry(edge.source as u32).or_default().push(tx);
+            rx_map.insert(edge.target as u32, rx);
+        }
+
+        let mut endpoints = HashMap::new();
+        for node in nodes {
+            let id = node.node_index as u32;
+            endpoints.insert(id, (rx_map.remove(&id), tx_map.remove(&id).unwrap_or_default()));
+        }
+
+        Self { endpoints }
+    }
+
+    pub fn take_endpoints(
+        &mut self,
+        id: u32,
+    ) -> (Option<mpsc::Receiver<TrackedEvent>>, Vec<mpsc::Sender<TrackedEvent>>) {
+        self.endpoints
+            .remove(&id)
+            .expect("Critical: Execution Graph Inconsistent")
+    }
+}
diff --git a/src/runtime/streaming/job/job_manager.rs b/src/runtime/streaming/job/job_manager.rs
new file mode 100644
index 00000000..82b02b3d
--- /dev/null
+++ b/src/runtime/streaming/job/job_manager.rs
@@ -0,0 +1,198 @@
+use std::collections::HashMap;
+use std::sync::{Arc, RwLock};
+
+use protocol::grpc::api::{ChainedOperator, FsProgram};
+use tokio::sync::mpsc;
+use tracing::error;
+
+use crate::runtime::streaming::api::operator::ConstructedOperator;
+use crate::runtime::streaming::factory::OperatorFactory;
+use crate::runtime::streaming::job::edge_manager::EdgeManager;
+use crate::runtime::streaming::job::models::{PhysicalExecutionGraph, PhysicalPipeline, PipelineStatus};
+use crate::runtime::streaming::job::pipeline_runner::{FusionOperatorChain, PipelineRunner};
+use crate::runtime::streaming::memory::MemoryPool;
+use crate::runtime::streaming::protocol::control::{ControlCommand, StopMode};
+use crate::runtime::streaming::storage::manager::TableManager;
+
+pub struct JobManager {
+    active_jobs: Arc<RwLock<HashMap<String, PhysicalExecutionGraph>>>,
+    operator_factory: Arc<OperatorFactory>,
+    memory_pool: Arc<MemoryPool>,
+    table_manager: Option<Arc<tokio::sync::Mutex<TableManager>>>,
+}
+
+impl JobManager {
+    pub fn new(
+        operator_factory: Arc<OperatorFactory>,
+        max_memory_bytes: usize,
+        table_manager: Option<Arc<tokio::sync::Mutex<TableManager>>>,
+    ) -> Self {
+        Self {
+            active_jobs: Arc::new(RwLock::new(HashMap::new())),
+            operator_factory,
+            memory_pool: MemoryPool::new(max_memory_bytes),
+            table_manager,
+        }
+    }
+
+    /// 从逻辑计划点火物理线程
+    pub async fn submit_job(&self, program: FsProgram) -> anyhow::Result<String> {
+        let job_id = format!("job-{}", chrono::Utc::now().timestamp_millis());
+
+        let mut edge_manager = EdgeManager::build(&program.nodes, &program.edges);
+        let mut physical_pipelines = HashMap::new();
+
+        for node in &program.nodes {
+            let pipe_id = node.node_index as u32;
+            let (inbox, outboxes) = edge_manager.take_endpoints(pipe_id);
+            let chain = self.create_chain(&node.operators)?;
+            let (ctrl_tx, ctrl_rx) = mpsc::channel(64);
+            let status = Arc::new(RwLock::new(PipelineStatus::Initializing));
+
+            let thread_status = status.clone();
+            let job_id_for_thread = job_id.clone();
+            let exit_job_id = job_id_for_thread.clone();
+            let registry_ptr = self.active_jobs.clone();
+            let memory_pool = self.memory_pool.clone();
+            let table_manager = self.table_manager.clone();
+
+            let handle = std::thread::Builder::new()
+                .name(format!("Job-{}-Pipe-{}", job_id, pipe_id))
+                .spawn(move || {
+                    {
+                        let mut st = thread_status.write().unwrap();
+                        *st = PipelineStatus::Running;
+                    }
+
+                    let rt = tokio::runtime::Builder::new_current_thread()
+                        .enable_all()
+                        .build()
+                        .expect("build current thread runtime");
+
+                    let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
+                        rt.block_on(async move {
+                            let mut runner = PipelineRunner::new(
+                                pipe_id,
+                                chain,
+                                inbox,
+                                outboxes,
+                                ctrl_rx,
+                                job_id_for_thread.clone(),
+                                memory_pool,
+                                table_manager,
+                            );
+                            runner.run().await
+                        })
+                    }));
+
+                    Self::on_pipeline_exit(exit_job_id, pipe_id, result, thread_status, registry_ptr);
+                })?;
+
+            physical_pipelines.insert(
+                pipe_id,
+                PhysicalPipeline {
+                    pipeline_id: pipe_id,
+                    handle: Some(handle),
+                    status,
+                    control_tx: ctrl_tx,
+                },
+            );
+        }
+
+        let graph = PhysicalExecutionGraph {
+            job_id: job_id.clone(),
+            program,
+            pipelines: physical_pipelines,
+            start_time: std::time::Instant::now(),
+        };
+
+        self.active_jobs.write().unwrap().insert(job_id.clone(), graph);
+        Ok(job_id)
+    }
+
+    pub async fn stop_job(&self, job_id: &str, mode: StopMode) -> anyhow::Result<()> {
+        let controllers = {
+            let jobs = self.active_jobs.read().unwrap();
+            let graph = jobs
+                .get(job_id)
+                .ok_or_else(|| anyhow::anyhow!("job not found: {job_id}"))?;
+            graph
+                .pipelines
+                .values()
+                .map(|p| p.control_tx.clone())
+                .collect::<Vec<_>>()
+        };
+
+        for tx in controllers {
+            tx.send(ControlCommand::Stop { mode: mode.clone() }).await?;
+        }
+        Ok(())
+    }
+
+    pub fn get_pipeline_statuses(&self, job_id: &str) -> Option<HashMap<u32, PipelineStatus>> {
+        let jobs = self.active_jobs.read().unwrap();
+        let graph = jobs.get(job_id)?;
+        Some(
+            graph
+                .pipelines
+                .iter()
+                .map(|(id, pipeline)| (*id, pipeline.status.read().unwrap().clone()))
+                .collect(),
+        )
+    }
+
+    fn create_chain(&self, operators: &[ChainedOperator]) -> anyhow::Result<FusionOperatorChain> {
+        let mut chain = Vec::with_capacity(operators.len());
+        for op in operators {
+            match self
+                .operator_factory
+                .create_operator(&op.operator_name, &op.operator_config)?
+            {
+                ConstructedOperator::Operator(msg_op) => chain.push(msg_op),
+                ConstructedOperator::Source(_) => {
+                    return Err(anyhow::anyhow!(
+                        "source operator '{}' cannot be used inside a physical pipeline chain",
+                        op.operator_name
+                    ));
+                }
+            }
+        }
+        Ok(FusionOperatorChain::new(chain))
+    }
+
+    fn on_pipeline_exit(
+        job_id: String,
+        pipe_id: u32,
+        result: std::thread::Result<anyhow::Result<()>>,
+        status: Arc<RwLock<PipelineStatus>>,
+        _registry: Arc<RwLock<HashMap<String, PhysicalExecutionGraph>>>,
+    ) {
+        let mut needs_abort = false;
+        match result {
+            Ok(Err(e)) => {
+                *status.write().unwrap() = PipelineStatus::Failed {
+                    error: e.to_string(),
+                    is_panic: false,
+                };
+                needs_abort = true;
+            }
+            Err(_) => {
+                *status.write().unwrap() = PipelineStatus::Failed {
+                    error: "panic".into(),
+                    is_panic: true,
+                };
+                needs_abort = true;
+            }
+            Ok(Ok(_)) => {
+                *status.write().unwrap() = PipelineStatus::Finished;
+            }
+        }
+
+        if needs_abort {
+            error!(
+                "Pipeline {}-{} failed. Initiating Job Abort.",
+                job_id, pipe_id
+            );
+        }
+    }
+}
diff --git a/src/runtime/streaming/job/mod.rs b/src/runtime/streaming/job/mod.rs
new file mode 100644
index 00000000..9490e84e
--- /dev/null
+++ b/src/runtime/streaming/job/mod.rs
@@ -0,0 +1,7 @@
+pub mod edge_manager;
+pub mod job_manager;
+pub mod models;
+pub mod pipeline_runner;
+
+pub use job_manager::JobManager;
+pub use models::{PhysicalExecutionGraph, PhysicalPipeline, PipelineStatus};
diff --git a/src/runtime/streaming/job/models.rs b/src/runtime/streaming/job/models.rs
new file mode 100644
index 00000000..3e843ea4
--- /dev/null
+++ b/src/runtime/streaming/job/models.rs
@@ -0,0 +1,35 @@
+use std::collections::HashMap;
+use std::sync::{Arc, RwLock};
+use std::thread::JoinHandle;
+use std::time::Instant;
+
+use protocol::grpc::api::FsProgram;
+use tokio::sync::mpsc;
+
+use crate::runtime::streaming::protocol::control::ControlCommand;
+
+/// 物理 Pipeline 的实时状态
+#[derive(Debug, Clone, PartialEq)]
+pub enum PipelineStatus {
+    Initializing,
+    Running,
+    Failed { error: String, is_panic: bool },
+    Finished,
+    Stopping,
+}
+
+/// 物理执行图中的一个执行单元
+pub struct PhysicalPipeline {
+    pub pipeline_id: u32,
+    pub handle: Option<JoinHandle<()>>,
+    pub status: Arc<RwLock<PipelineStatus>>,
+    pub control_tx: mpsc::Sender<ControlCommand>,
+}
+
+/// 一个 SQL Job 的物理执行图
+pub struct PhysicalExecutionGraph {
+    pub job_id: String,
+    pub program: FsProgram,
+    pub pipelines: HashMap<u32, PhysicalPipeline>,
+    pub start_time: Instant,
+}
diff --git a/src/runtime/streaming/job/pipeline_runner.rs b/src/runtime/streaming/job/pipeline_runner.rs
new file mode 100644
index 00000000..57c0fec8
--- /dev/null
+++ b/src/runtime/streaming/job/pipeline_runner.rs
@@ -0,0 +1,242 @@
+use std::future::pending;
+use std::sync::Arc;
+
+use tokio::sync::mpsc;
+
+use crate::runtime::streaming::api::context::TaskContext;
+use crate::runtime::streaming::api::operator::MessageOperator;
+use crate::runtime::streaming::memory::MemoryPool;
+use crate::runtime::streaming::protocol::control::{ControlCommand, StopMode};
+use crate::runtime::streaming::protocol::event::StreamEvent;
+use crate::runtime::streaming::protocol::tracked::TrackedEvent;
+use crate::runtime::streaming::storage::manager::TableManager;
+use crate::sql::common::CheckpointBarrier;
+
+pub struct PipelineRunner {
+    chain: FusionOperatorChain,
+    inbox: Option<mpsc::Receiver<TrackedEvent>>,
+    outboxes: Vec<mpsc::Sender<TrackedEvent>>,
+    control_rx: mpsc::Receiver<ControlCommand>,
+    ctx: TaskContext,
+}
+
+impl PipelineRunner {
+    pub fn new(
+        pipeline_id: u32,
+        chain: FusionOperatorChain,
+        inbox: Option<mpsc::Receiver<TrackedEvent>>,
+        outboxes: Vec<mpsc::Sender<TrackedEvent>>,
+        control_rx: mpsc::Receiver<ControlCommand>,
+        job_id: String,
+        memory_pool: Arc<MemoryPool>,
+        table_manager: Option<Arc<tokio::sync::Mutex<TableManager>>>,
+    ) -> Self {
+        Self {
+            chain,
+            inbox,
+            outboxes,
+            control_rx,
+            ctx: TaskContext::new(job_id, pipeline_id, 0, 1, vec![], memory_pool, table_manager),
+        }
+    }
+
+    pub async fn run(&mut self) -> anyhow::Result<()> {
+        self.chain.on_start(&mut self.ctx).await?;
+
+        'main: loop {
+            tokio::select! {
+                biased;
+                Some(cmd) = self.control_rx.recv() => {
+                    if self.handle_control(cmd).await? {
+                        break 'main;
+                    }
+                }
+                Some(event) = async {
+                    if let Some(ref mut rx) = self.inbox { rx.recv().await }
+                    else { pending().await }
+                } => {
+                    self.process_event(event).await?;
+                }
+            }
+        }
+
+        self.chain.on_close(&mut self.ctx).await?;
+        Ok(())
+    }
+
+    async fn handle_control(&mut self, cmd: ControlCommand) -> anyhow::Result<bool> {
+        match &cmd {
+            ControlCommand::TriggerCheckpoint { barrier } => {
+                let barrier: CheckpointBarrier = barrier.clone().into();
+                self.chain.snapshot_state(barrier.clone(), &mut self.ctx).await?;
+                self.broadcast(StreamEvent::Barrier(barrier)).await?;
+            }
+            ControlCommand::Commit { epoch } => {
+                self.chain.commit_checkpoint(*epoch, &mut self.ctx).await?;
+            }
+            ControlCommand::Stop { mode } if *mode == StopMode::Immediate => {
+                return Ok(true);
+            }
+            _ => {}
+        }
+
+        self.chain.handle_control(cmd, &mut self.ctx).await
+    }
+
+    async fn process_event(&mut self, tracked: TrackedEvent) -> anyhow::Result<()> {
+        match tracked.event {
+            StreamEvent::Data(batch) => {
+                let outputs = self.chain.process_data(0, batch, &mut self.ctx).await?;
+                self.emit_outputs(outputs).await?;
+            }
+            StreamEvent::Watermark(wm) => {
+                let outputs = self.chain.process_watermark(wm.clone(), &mut self.ctx).await?;
+                self.emit_outputs(outputs).await?;
+                self.broadcast(StreamEvent::Watermark(wm)).await?;
+            }
+            StreamEvent::Barrier(barrier) => {
+                self.chain.snapshot_state(barrier.clone(), &mut self.ctx).await?;
+                self.broadcast(StreamEvent::Barrier(barrier)).await?;
+            }
+            StreamEvent::EndOfStream => {
+                self.broadcast(StreamEvent::EndOfStream).await?;
+            }
+        }
+        Ok(())
+    }
+
+    async fn emit_outputs(
+        &mut self,
+        outputs: Vec<crate::runtime::streaming::protocol::stream_out::StreamOutput>,
+    ) -> anyhow::Result<()> {
+        for out in outputs {
+            match out {
+                crate::runtime::streaming::protocol::stream_out::StreamOutput::Forward(batch)
+                | crate::runtime::streaming::protocol::stream_out::StreamOutput::Broadcast(batch)
+                | crate::runtime::streaming::protocol::stream_out::StreamOutput::Keyed(_, batch) => {
+                    self.broadcast(StreamEvent::Data(batch)).await?;
+                }
+                crate::runtime::streaming::protocol::stream_out::StreamOutput::Watermark(wm) => {
+                    self.broadcast(StreamEvent::Watermark(wm)).await?;
+                }
+            }
+        }
+        Ok(())
+    }
+
+    async fn broadcast(&self, event: StreamEvent) -> anyhow::Result<()> {
+        let tracked = TrackedEvent::control(event);
+        for tx in &self.outboxes {
+            tx.send(tracked.clone()).await?;
+        }
+        Ok(())
+    }
+}
+
+pub struct FusionOperatorChain {
+    operators: Vec<Box<dyn MessageOperator>>,
+}
+
+impl FusionOperatorChain {
+    pub fn new(operators: Vec<Box<dyn MessageOperator>>) -> Self {
+        Self { operators }
+    }
+
+    pub async fn on_start(&mut self, ctx: &mut TaskContext) -> anyhow::Result<()> {
+        for op in &mut self.operators {
+            op.on_start(ctx).await?;
+        }
+        Ok(())
+    }
+
+    pub async fn process_data(
+        &mut self,
+        input_idx: usize,
+        batch: arrow_array::RecordBatch,
+        ctx: &mut TaskContext,
+    ) -> anyhow::Result<Vec<crate::runtime::streaming::protocol::stream_out::StreamOutput>> {
+        let mut data_batches = vec![batch];
+        for (idx, op) in self.operators.iter_mut().enumerate() {
+            let mut next_batches = Vec::new();
+            for b in data_batches {
+                let outputs = op
+                    .process_data(if idx == 0 { input_idx } else { 0 }, b, ctx)
+                    .await?;
+                for out in outputs {
+                    match out {
+                        crate::runtime::streaming::protocol::stream_out::StreamOutput::Forward(b)
+                        | crate::runtime::streaming::protocol::stream_out::StreamOutput::Broadcast(b)
+                        | crate::runtime::streaming::protocol::stream_out::StreamOutput::Keyed(_, b) => {
+                            next_batches.push(b);
+                        }
+                        crate::runtime::streaming::protocol::stream_out::StreamOutput::Watermark(_) => {}
+                    }
+                }
+            }
+            data_batches = next_batches;
+        }
+        Ok(data_batches
+            .into_iter()
+            .map(crate::runtime::streaming::protocol::stream_out::StreamOutput::Forward)
+            .collect())
+    }
+
+    pub async fn process_watermark(
+        &mut self,
+        watermark: crate::sql::common::Watermark,
+        ctx: &mut TaskContext,
+    ) -> anyhow::Result<Vec<crate::runtime::streaming::protocol::stream_out::StreamOutput>> {
+        let mut outs = vec![crate::runtime::streaming::protocol::stream_out::StreamOutput::Watermark(watermark)];
+        for op in &mut self.operators {
+            let mut next = Vec::new();
+            for out in outs {
+                match out {
+                    crate::runtime::streaming::protocol::stream_out::StreamOutput::Watermark(wm) => {
+                        let mut produced = op.process_watermark(wm, ctx).await?;
+                        next.append(&mut produced);
+                    }
+                    other => next.push(other),
+                }
+            }
+            outs = next;
+        }
+        Ok(outs)
+    }
+
+    pub async fn snapshot_state(
+        &mut self,
+        barrier: CheckpointBarrier,
+        ctx: &mut TaskContext,
+    ) -> anyhow::Result<()> {
+        for op in &mut self.operators {
+            op.snapshot_state(barrier.clone(), ctx).await?;
+        }
+        Ok(())
+    }
+
+    pub async fn commit_checkpoint(&mut self, epoch: u32, ctx: &mut TaskContext) -> anyhow::Result<()> {
+        for op in &mut self.operators {
+            op.commit_checkpoint(epoch, ctx).await?;
+        }
+        Ok(())
+    }
+
+    pub async fn handle_control(
+        &mut self,
+        cmd: ControlCommand,
+        ctx: &mut TaskContext,
+    ) -> anyhow::Result<bool> {
+        let mut should_stop = false;
+        for op in &mut self.operators {
+            should_stop = should_stop || op.handle_control(cmd.clone(), ctx).await?;
+        }
+        Ok(should_stop)
+    }
+
+    pub async fn on_close(&mut self, ctx: &mut TaskContext) -> anyhow::Result<()> {
+        for op in &mut self.operators {
+            let _ = op.on_close(ctx).await?;
+        }
+        Ok(())
+    }
+}
diff --git a/src/runtime/streaming/lib.rs b/src/runtime/streaming/lib.rs
index 67cd8f70..6b145cd7 100644
--- a/src/runtime/streaming/lib.rs
+++ b/src/runtime/streaming/lib.rs
@@ -19,6 +19,7 @@ pub mod cluster;
 pub mod error;
 pub mod execution;
 pub mod factory;
+pub mod job;
 pub mod memory;
 pub mod network;
 pub mod operators;
@@ -36,6 +37,7 @@ pub use cluster::{
 pub use error::RunError;
 pub use execution::{SOURCE_IDLE_SLEEP, SourceRunner, SubtaskRunner};
 pub use factory::{OperatorConstructor, OperatorFactory};
+pub use job::{JobManager, PhysicalExecutionGraph, PhysicalPipeline, PipelineStatus};
 pub use memory::{MemoryPool, MemoryTicket};
 pub use network::{BoxedEventStream, NetworkEnvironment, PhysicalSender, RemoteSenderStub};
 pub use protocol::{
diff --git a/src/runtime/streaming/mod.rs b/src/runtime/streaming/mod.rs
index 5997623e..237f3c06 100644
--- a/src/runtime/streaming/mod.rs
+++ b/src/runtime/streaming/mod.rs
@@ -21,11 +21,11 @@ pub mod error;
 pub mod execution;
 pub mod factory;
 pub mod format;
+pub mod job;
 pub mod memory;
 pub mod network;
 pub mod operators;
 pub mod protocol;
-pub mod state;
 pub mod storage;
 
 pub use api::{
@@ -40,6 +40,7 @@ pub use cluster::{
 pub use error::RunError;
 pub use execution::{SOURCE_IDLE_SLEEP, SourceRunner, SubtaskRunner};
 pub use factory::{OperatorConstructor, OperatorFactory};
+pub use job::{JobManager, PhysicalExecutionGraph, PhysicalPipeline, PipelineStatus};
 pub use memory::{MemoryPool, MemoryTicket};
 pub use network::{BoxedEventStream, NetworkEnvironment, PhysicalSender, RemoteSenderStub};
 pub use protocol::{
diff --git a/src/runtime/streaming/state/mod.rs b/src/runtime/streaming/state/mod.rs
deleted file mode 100644
index e69de29b..00000000
diff --git a/src/runtime/streaming/state/table_manager.rs b/src/runtime/streaming/state/table_manager.rs
deleted file mode 100644
index e69de29b..00000000

From 784299538423e8614d32c99383a30f10630b1ee1 Mon Sep 17 00:00:00 2001
From: luoluoyuyu <zhenyu@apache.org>
Date: Sat, 28 Mar 2026 21:45:02 +0800
Subject: [PATCH 21/44] update

---
 src/config/global_config.rs                   |   9 +
 src/coordinator/coordinator.rs                |   1 +
 src/coordinator/execution/executor.rs         |  26 +-
 src/coordinator/runtime_context.rs            |  12 +-
 src/runtime/streaming/api/context.rs          |  25 +-
 src/runtime/streaming/api/operator.rs         |  20 -
 src/runtime/streaming/api/source.rs           |   8 +
 src/runtime/streaming/cluster/graph.rs        | 136 -----
 src/runtime/streaming/cluster/manager.rs      | 164 ------
 src/runtime/streaming/cluster/master.rs       | 273 ---------
 src/runtime/streaming/cluster/mod.rs          |  11 -
 src/runtime/streaming/cluster/wiring.rs       |  46 --
 src/runtime/streaming/connectors/mod.rs       |  61 --
 src/runtime/streaming/driver.rs               | 254 ++++++++
 src/runtime/streaming/error.rs                |  39 +-
 src/runtime/streaming/execution/mod.rs        |   4 +-
 src/runtime/streaming/execution/runner.rs     | 546 ++++++++++--------
 src/runtime/streaming/execution/source.rs     | 192 +++---
 .../execution/tracker/barrier_aligner.rs      |  24 +-
 .../execution/tracker/watermark_tracker.rs    |  11 +
 .../factory/registry/kafka_factory.rs         | 328 +++++++++++
 .../factory/{registry.rs => registry/mod.rs}  |  48 +-
 src/runtime/streaming/job/job_manager.rs      | 280 +++++----
 src/runtime/streaming/job/mod.rs              |   1 -
 src/runtime/streaming/job/pipeline_runner.rs  | 242 --------
 src/runtime/streaming/lib.rs                  |  13 +-
 src/runtime/streaming/mod.rs                  |  12 +-
 src/runtime/streaming/network/environment.rs  |  50 +-
 .../grouping/incremental_aggregate.rs         |   3 +-
 .../streaming/operators/joins/lookup_join.rs  | 365 ------------
 src/runtime/streaming/operators/joins/mod.rs  |   2 -
 src/runtime/streaming/operators/mod.rs        |   4 +-
 .../streaming/operators/sink/kafka/mod.rs     |  20 +-
 .../streaming/operators/source/kafka/mod.rs   |  85 +--
 .../watermark/watermark_generator.rs          |  33 +-
 .../windows/tumbling_aggregating_window.rs    |  39 +-
 src/runtime/streaming/storage/backend.rs      |  78 ---
 src/runtime/streaming/storage/manager.rs      | 156 -----
 src/runtime/streaming/storage/mod.rs          |  32 -
 src/runtime/streaming/storage/table.rs        |  91 ---
 src/server/initializer.rs                     |  22 +-
 src/sql/common/fs_schema.rs                   |   8 +
 src/sql/common/kafka_catalog.rs               | 122 ++++
 src/sql/common/mod.rs                         |   5 +
 src/sql/common/operator_config.rs             |   4 +
 45 files changed, 1531 insertions(+), 2374 deletions(-)
 delete mode 100644 src/runtime/streaming/cluster/graph.rs
 delete mode 100644 src/runtime/streaming/cluster/manager.rs
 delete mode 100644 src/runtime/streaming/cluster/master.rs
 delete mode 100644 src/runtime/streaming/cluster/mod.rs
 delete mode 100644 src/runtime/streaming/cluster/wiring.rs
 delete mode 100644 src/runtime/streaming/connectors/mod.rs
 create mode 100644 src/runtime/streaming/driver.rs
 create mode 100644 src/runtime/streaming/factory/registry/kafka_factory.rs
 rename src/runtime/streaming/factory/{registry.rs => registry/mod.rs} (88%)
 delete mode 100644 src/runtime/streaming/job/pipeline_runner.rs
 delete mode 100644 src/runtime/streaming/operators/joins/lookup_join.rs
 delete mode 100644 src/runtime/streaming/storage/backend.rs
 delete mode 100644 src/runtime/streaming/storage/manager.rs
 delete mode 100644 src/runtime/streaming/storage/mod.rs
 delete mode 100644 src/runtime/streaming/storage/table.rs
 create mode 100644 src/sql/common/kafka_catalog.rs

diff --git a/src/config/global_config.rs b/src/config/global_config.rs
index b4f92edd..33676125 100644
--- a/src/config/global_config.rs
+++ b/src/config/global_config.rs
@@ -19,6 +19,13 @@ use crate::config::python_config::PythonConfig;
 use crate::config::service_config::ServiceConfig;
 use crate::config::wasm_config::WasmConfig;
 
+#[derive(Debug, Clone, Serialize, Deserialize, Default)]
+pub struct StreamingConfig {
+    /// Maximum heap memory (in bytes) available to the streaming runtime's memory pool.
+    /// Defaults to 256 MiB when absent.
+    pub max_memory_bytes: Option<usize>,
+}
+
 #[derive(Debug, Clone, Serialize, Deserialize, Default)]
 pub struct GlobalConfig {
     pub service: ServiceConfig,
@@ -31,6 +38,8 @@ pub struct GlobalConfig {
     pub state_storage: crate::config::storage::StateStorageConfig,
     #[serde(default)]
     pub task_storage: crate::config::storage::TaskStorageConfig,
+    #[serde(default)]
+    pub streaming: StreamingConfig,
 }
 
 impl GlobalConfig {
diff --git a/src/coordinator/coordinator.rs b/src/coordinator/coordinator.rs
index ec81132a..f21b12ca 100644
--- a/src/coordinator/coordinator.rs
+++ b/src/coordinator/coordinator.rs
@@ -111,6 +111,7 @@ impl Coordinator {
             let res = Executor::new(
                 Arc::clone(&runtime.task_manager),
                 runtime.catalog_manager.clone(),
+                Arc::clone(&runtime.job_manager),
             )
             .execute(plan.as_ref())
             .map_err(|e| anyhow::anyhow!(e))
diff --git a/src/coordinator/execution/executor.rs b/src/coordinator/execution/executor.rs
index 3639ee7a..28082abe 100644
--- a/src/coordinator/execution/executor.rs
+++ b/src/coordinator/execution/executor.rs
@@ -12,6 +12,7 @@
 
 use std::sync::Arc;
 
+use protocol::grpc::api::FsProgram;
 use thiserror::Error;
 use tracing::{debug, info};
 
@@ -23,6 +24,7 @@ use crate::coordinator::plan::{
     StreamingTableConnectorPlan,
 };
 use crate::coordinator::statement::{ConfigSource, FunctionSource};
+use crate::runtime::streaming::job::JobManager;
 use crate::runtime::taskexecutor::TaskManager;
 use crate::sql::schema::StreamTable;
 use crate::storage::stream_catalog::CatalogManager;
@@ -42,13 +44,19 @@ pub enum ExecuteError {
 pub struct Executor {
     task_manager: Arc<TaskManager>,
     catalog_manager: Arc<CatalogManager>,
+    job_manager: Arc<JobManager>,
 }
 
 impl Executor {
-    pub fn new(task_manager: Arc<TaskManager>, catalog_manager: Arc<CatalogManager>) -> Self {
+    pub fn new(
+        task_manager: Arc<TaskManager>,
+        catalog_manager: Arc<CatalogManager>,
+        job_manager: Arc<JobManager>,
+    ) -> Self {
         Self {
             task_manager,
             catalog_manager,
+            job_manager,
         }
     }
 
@@ -273,8 +281,22 @@ impl PlanVisitor for Executor {
                 .add_table(sink)
                 .map_err(|e| ExecuteError::Internal(e.to_string()))?;
 
+            let fs_program: FsProgram = plan.program.clone().into();
+            let job_manager: Arc<JobManager> = Arc::clone(&self.job_manager);
+
+            let job_id = tokio::task::block_in_place(|| {
+                tokio::runtime::Handle::current().block_on(job_manager.submit_job(fs_program))
+            })
+            .map_err(|e| ExecuteError::Internal(format!("Failed to submit streaming job: {e}")))?;
+
+            info!(
+                job_id = %job_id,
+                table = %plan.name,
+                "Streaming table registered and job submitted"
+            );
+
             Ok(ExecuteResult::ok_with_data(
-                format!("Registered streaming table '{}'", plan.name),
+                format!("Streaming table '{}' created, job_id = {}", plan.name, job_id),
                 empty_record_batch(),
             ))
         };
diff --git a/src/coordinator/runtime_context.rs b/src/coordinator/runtime_context.rs
index 7b1d82dc..d0f80786 100644
--- a/src/coordinator/runtime_context.rs
+++ b/src/coordinator/runtime_context.rs
@@ -10,34 +10,36 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//! Runtime resources for a single coordinator run: [`TaskManager`] and [`CatalogManager`].
+//! Runtime resources for a single coordinator run: [`TaskManager`], [`CatalogManager`], and [`JobManager`].
 
 use std::sync::Arc;
 
 use anyhow::Result;
 
+use crate::runtime::streaming::job::JobManager;
 use crate::runtime::taskexecutor::TaskManager;
 use crate::sql::schema::StreamSchemaProvider;
 use crate::storage::stream_catalog::CatalogManager;
 
 /// Dependencies shared by analyze / plan / execute, analogous to installing globals in
-/// [`TaskManager`] and [`CatalogManager`].
+/// [`TaskManager`], [`CatalogManager`], and [`JobManager`].
 #[derive(Clone)]
 pub struct CoordinatorRuntimeContext {
     pub task_manager: Arc<TaskManager>,
     pub catalog_manager: Arc<CatalogManager>,
-    /// When set (e.g. unit tests), used for SQL planning instead of a catalog snapshot.
+    pub job_manager: Arc<JobManager>,
     planning_schema_override: Option<StreamSchemaProvider>,
 }
 
 impl CoordinatorRuntimeContext {
-    /// Resolve [`TaskManager`] and global stream catalog (same pattern as server startup).
     pub fn try_from_globals() -> Result<Self> {
         Ok(Self {
             task_manager: TaskManager::get()
                 .map_err(|e| anyhow::anyhow!("Failed to get TaskManager: {}", e))?,
             catalog_manager: CatalogManager::global()
                 .map_err(|e| anyhow::anyhow!("Failed to get CatalogManager: {}", e))?,
+            job_manager: JobManager::global()
+                .map_err(|e| anyhow::anyhow!("Failed to get JobManager: {}", e))?,
             planning_schema_override: None,
         })
     }
@@ -45,11 +47,13 @@ impl CoordinatorRuntimeContext {
     pub fn new(
         task_manager: Arc<TaskManager>,
         catalog_manager: Arc<CatalogManager>,
+        job_manager: Arc<JobManager>,
         planning_schema_override: Option<StreamSchemaProvider>,
     ) -> Self {
         Self {
             task_manager,
             catalog_manager,
+            job_manager,
             planning_schema_override,
         }
     }
diff --git a/src/runtime/streaming/api/context.rs b/src/runtime/streaming/api/context.rs
index e838b06e..77038bf3 100644
--- a/src/runtime/streaming/api/context.rs
+++ b/src/runtime/streaming/api/context.rs
@@ -2,11 +2,9 @@ use crate::runtime::streaming::memory::MemoryPool;
 use crate::runtime::streaming::protocol::event::StreamEvent;
 use crate::runtime::streaming::protocol::tracked::TrackedEvent;
 use crate::runtime::streaming::network::endpoint::PhysicalSender;
-use crate::runtime::streaming::storage::manager::TableManager;
 
 use arrow_array::RecordBatch;
 use std::sync::Arc;
-use tokio::sync::Mutex;
 
 pub struct TaskContext {
     pub job_id: String,
@@ -17,7 +15,6 @@ pub struct TaskContext {
     pub outboxes: Vec<PhysicalSender>,
 
     memory_pool: Arc<MemoryPool>,
-    table_manager: Option<Arc<Mutex<TableManager>>>,
 
     current_watermark: Option<std::time::SystemTime>,
 }
@@ -30,7 +27,6 @@ impl TaskContext {
         parallelism: u32,
         outboxes: Vec<PhysicalSender>,
         memory_pool: Arc<MemoryPool>,
-        table_manager: Option<Arc<Mutex<TableManager>>>,
     ) -> Self {
         Self {
             job_id,
@@ -39,7 +35,6 @@ impl TaskContext {
             parallelism,
             outboxes,
             memory_pool,
-            table_manager,
             current_watermark: None,
         }
     }
@@ -77,27 +72,9 @@ impl TaskContext {
     }
 
     // ========================================================================
-    // 状态管理与背压网络发送 API
+    // 背压网络发送 API
     // ========================================================================
 
-    pub async fn table_manager(&self) -> tokio::sync::MutexGuard<'_, TableManager> {
-        self.table_manager
-            .as_ref()
-            .expect("State backend not initialized")
-            .lock()
-            .await
-    }
-
-    pub async fn table_manager_guard(
-        &self,
-    ) -> anyhow::Result<tokio::sync::MutexGuard<'_, TableManager>> {
-        let arc = self
-            .table_manager
-            .as_ref()
-            .ok_or_else(|| anyhow::anyhow!("table_manager is not configured on TaskContext"))?;
-        Ok(arc.lock().await)
-    }
-
     /// 受内存池管控的数据发送：申请精准字节的内存船票后广播到所有下游
     pub async fn collect(&self, batch: RecordBatch) -> anyhow::Result<()> {
         if self.outboxes.is_empty() {
diff --git a/src/runtime/streaming/api/operator.rs b/src/runtime/streaming/api/operator.rs
index 6eb49d2c..3c088e3c 100644
--- a/src/runtime/streaming/api/operator.rs
+++ b/src/runtime/streaming/api/operator.rs
@@ -1,6 +1,5 @@
 use crate::runtime::streaming::api::context::TaskContext;
 use crate::runtime::streaming::api::source::SourceOperator;
-use crate::runtime::streaming::protocol::control::{ControlCommand, StopMode};
 use crate::runtime::streaming::protocol::stream_out::StreamOutput;
 use arrow_array::RecordBatch;
 use async_trait::async_trait;
@@ -128,25 +127,6 @@ pub trait MessageOperator: Send + 'static {
         Ok(vec![])
     }
 
-    /// 返回 `true` 时应立即结束运行循环（如 `StopMode::Immediate`）。
-    async fn handle_control(
-        &mut self,
-        command: ControlCommand,
-        _ctx: &mut TaskContext,
-    ) -> anyhow::Result<bool> {
-        match command {
-            ControlCommand::Stop { mode } => {
-                if mode == StopMode::Immediate {
-                    return Ok(true);
-                }
-                Ok(false)
-            }
-            ControlCommand::DropState | ControlCommand::Commit { .. } => Ok(false),
-            ControlCommand::Start | ControlCommand::UpdateConfig { .. } => Ok(false),
-            ControlCommand::TriggerCheckpoint { .. } => Ok(false),
-        }
-    }
-
     async fn on_close(&mut self, _ctx: &mut TaskContext) -> anyhow::Result<Vec<StreamOutput>> {
         Ok(vec![])
     }
diff --git a/src/runtime/streaming/api/source.rs b/src/runtime/streaming/api/source.rs
index 8ddeb3cf..a4ff46c4 100644
--- a/src/runtime/streaming/api/source.rs
+++ b/src/runtime/streaming/api/source.rs
@@ -18,7 +18,9 @@ pub enum SourceOffset {
 pub enum SourceEvent {
     Data(RecordBatch),
     Watermark(Watermark),
+    /// 无数据可读：必须由 Runner 调度退避，禁止在 `fetch_next` 内长时间阻塞。
     Idle,
+    EndOfStream,
 }
 
 #[async_trait]
@@ -29,8 +31,14 @@ pub trait SourceOperator: Send + 'static {
         Ok(())
     }
 
+    /// 核心拉取：无数据时必须返回 [`SourceEvent::Idle`]，严禁内部阻塞控制面。
     async fn fetch_next(&mut self, ctx: &mut TaskContext) -> anyhow::Result<SourceEvent>;
 
+    /// 独立于 `fetch_next` 的水位线脉搏（例如解决 Idle 时仍要推进水印）。
+    fn poll_watermark(&mut self) -> Option<Watermark> {
+        None
+    }
+
     async fn snapshot_state(
         &mut self,
         barrier: CheckpointBarrier,
diff --git a/src/runtime/streaming/cluster/graph.rs b/src/runtime/streaming/cluster/graph.rs
deleted file mode 100644
index 1ee8f8f7..00000000
--- a/src/runtime/streaming/cluster/graph.rs
+++ /dev/null
@@ -1,136 +0,0 @@
-use std::fmt;
-use std::sync::Arc;
-
-use crate::sql::common::FsSchema;
-// ============ 强类型 ID (Strong-type IDs) ============
-
-#[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub struct JobId(pub String);
-
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
-pub struct VertexId(pub u32);
-
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
-pub struct SubtaskIndex(pub u32);
-
-#[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub struct OperatorUid(pub String);
-
-impl fmt::Display for JobId {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        write!(f, "{}", self.0)
-    }
-}
-
-impl fmt::Display for VertexId {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        write!(f, "{}", self.0)
-    }
-}
-
-impl fmt::Display for SubtaskIndex {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        write!(f, "{}", self.0)
-    }
-}
-
-impl fmt::Display for OperatorUid {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        write!(f, "{}", self.0)
-    }
-}
-
-// ============ 资源画像 (Resource Profile) ============
-
-#[derive(Debug, Clone)]
-pub struct ResourceProfile {
-    pub managed_memory_bytes: u64,
-    pub cpu_cores: f64,
-    pub network_memory_bytes: u64,
-}
-
-impl Default for ResourceProfile {
-    fn default() -> Self {
-        Self {
-            managed_memory_bytes: 64 * 1024 * 1024,
-            cpu_cores: 1.0,
-            network_memory_bytes: 32 * 1024 * 1024,
-        }
-    }
-}
-
-// ============ 分区策略 (Partitioning Strategy) ============
-
-#[derive(Debug, Clone)]
-pub enum PartitioningStrategy {
-    Forward,
-    HashByKeys(Vec<usize>),
-    Rebalance,
-}
-
-// ============ 交换模式 (Exchange Mode) ============
-
-#[derive(Debug, Clone)]
-pub enum ExchangeMode {
-    LocalThread,
-    RemoteNetwork { target_addr: String },
-}
-
-// ============ 部署描述符 (Deployment Descriptors) ============
-
-#[derive(Debug, Clone)]
-pub struct TaskDeploymentDescriptor {
-    pub job_id: JobId,
-    pub vertex_id: VertexId,
-    pub subtask_idx: SubtaskIndex,
-    pub parallelism: u32,
-    pub operator_name: String,
-    pub operator_uid: OperatorUid,
-    pub is_source: bool,
-    pub operator_config_payload: Vec<u8>,
-    pub resources: ResourceProfile,
-    pub in_schemas: Vec<Arc<FsSchema>>,
-    pub out_schema: Option<Arc<FsSchema>>,
-    pub input_gates_count: usize,
-    pub output_gates_count: usize,
-}
-
-#[derive(Debug, Clone)]
-pub struct PhysicalEdgeDescriptor {
-    pub src_vertex: VertexId,
-    pub src_subtask: SubtaskIndex,
-    pub dst_vertex: VertexId,
-    pub dst_subtask: SubtaskIndex,
-    pub partitioning: PartitioningStrategy,
-    pub exchange_mode: ExchangeMode,
-}
-
-// ============ 执行图 (Execution Graph) ============
-
-#[derive(Debug, Clone)]
-pub struct ExecutionGraph {
-    pub job_id: JobId,
-    pub tasks: Vec<TaskDeploymentDescriptor>,
-    pub edges: Vec<PhysicalEdgeDescriptor>,
-}
-
-impl ExecutionGraph {
-    pub fn validate(&self) -> Result<(), String> {
-        if self.tasks.is_empty() {
-            return Err("Execution graph has no tasks".into());
-        }
-        if self.edges.is_empty() && self.tasks.len() > 1 {
-            return Err("Multi-task graph has no edges".into());
-        }
-        let mut seen = std::collections::HashSet::new();
-        for tdd in &self.tasks {
-            if !seen.insert((tdd.vertex_id, tdd.subtask_idx)) {
-                return Err(format!(
-                    "Duplicate subtask: vertex={}, subtask={}",
-                    tdd.vertex_id, tdd.subtask_idx
-                ));
-            }
-        }
-        Ok(())
-    }
-}
diff --git a/src/runtime/streaming/cluster/manager.rs b/src/runtime/streaming/cluster/manager.rs
deleted file mode 100644
index 34045dee..00000000
--- a/src/runtime/streaming/cluster/manager.rs
+++ /dev/null
@@ -1,164 +0,0 @@
-use crate::runtime::streaming::api::context::TaskContext;
-use crate::runtime::streaming::api::operator::ConstructedOperator;
-use crate::runtime::streaming::cluster::graph::ExecutionGraph;
-use crate::runtime::streaming::execution::runner::SubtaskRunner;
-use crate::runtime::streaming::execution::source::SourceRunner;
-use crate::runtime::streaming::factory::OperatorFactory;
-use crate::runtime::streaming::memory::MemoryPool;
-use crate::runtime::streaming::network::NetworkEnvironment;
-use crate::runtime::streaming::protocol::control::{ControlCommand, StopMode};
-use crate::runtime::streaming::storage::manager::TableManager;
-use std::collections::HashMap;
-use std::sync::Arc;
-use tokio::sync::mpsc::{channel, Sender};
-use tokio::task::JoinSet;
-use tracing::{error, info, instrument, warn};
-
-pub struct TaskManager {
-    pub worker_id: String,
-    memory_pool: Arc<MemoryPool>,
-    table_manager: Arc<tokio::sync::Mutex<TableManager>>,
-    operator_factory: Arc<OperatorFactory>,
-    task_supervisors: JoinSet<()>,
-    pub controllers: HashMap<(u32, u32), Sender<ControlCommand>>,
-}
-
-impl TaskManager {
-    pub fn new(
-        worker_id: String,
-        max_memory_bytes: usize,
-        table_manager: Arc<tokio::sync::Mutex<TableManager>>,
-        operator_factory: Arc<OperatorFactory>,
-    ) -> Self {
-        Self {
-            worker_id,
-            memory_pool: MemoryPool::new(max_memory_bytes),
-            table_manager,
-            operator_factory,
-            task_supervisors: JoinSet::new(),
-            controllers: HashMap::new(),
-        }
-    }
-
-    #[instrument(skip(self, graph), fields(job_id = %graph.job_id))]
-    pub async fn deploy_and_start(&mut self, graph: ExecutionGraph) -> anyhow::Result<()> {
-        info!("TaskManager [{}] starting deployment...", self.worker_id);
-
-        graph
-            .validate()
-            .map_err(|e| anyhow::anyhow!("Graph validation failed: {}", e))?;
-
-        // 1. 网络连线期
-        let local_queue_size = 1024;
-        let mut network_env = NetworkEnvironment::build_from_graph(&graph, local_queue_size);
-
-        // 2. 控制通道初始化
-        let mut control_rxs = HashMap::new();
-        for tdd in &graph.tasks {
-            let key = (tdd.vertex_id.0, tdd.subtask_idx.0);
-            let (ctrl_tx, ctrl_rx) = channel(32);
-            self.controllers.insert(key, ctrl_tx);
-            control_rxs.insert(key, ctrl_rx);
-        }
-
-        // 3. 部署与算子实例化
-        for tdd in graph.tasks {
-            let v_id = tdd.vertex_id;
-            let s_idx = tdd.subtask_idx;
-            let key = (v_id.0, s_idx.0);
-
-            let ctrl_rx = control_rxs.remove(&key).unwrap();
-            let inboxes = network_env.take_inboxes(v_id, s_idx);
-            let outboxes = network_env.take_outboxes(v_id, s_idx);
-
-            let ctx = TaskContext::new(
-                tdd.job_id.0.clone(),
-                v_id.0,
-                s_idx.0,
-                tdd.parallelism,
-                outboxes,
-                self.memory_pool.clone(),
-                Some(self.table_manager.clone()),
-            );
-
-            let constructed_op = self.operator_factory.create_operator(
-                &tdd.operator_name,
-                &tdd.operator_config_payload,
-            )?;
-
-            // 4. 任务发射入监督树
-            let worker_id = self.worker_id.clone();
-            match constructed_op {
-                ConstructedOperator::Source(source_op) => {
-                    let runner = SourceRunner::new(source_op, ctx, ctrl_rx);
-                    self.task_supervisors.spawn(async move {
-                        if let Err(e) = runner.run().await {
-                            error!(
-                                worker = %worker_id,
-                                vertex = key.0,
-                                subtask = key.1,
-                                "SourceTask CRASHED: {:?}", e
-                            );
-                            panic!("SourceTask failed");
-                        }
-                    });
-                }
-                ConstructedOperator::Operator(msg_op) => {
-                    let runner = SubtaskRunner::new(msg_op, ctx, inboxes, ctrl_rx);
-                    self.task_supervisors.spawn(async move {
-                        if let Err(e) = runner.run().await {
-                            error!(
-                                worker = %worker_id,
-                                vertex = key.0,
-                                subtask = key.1,
-                                "StreamTask CRASHED: {:?}", e
-                            );
-                            panic!("StreamTask failed");
-                        }
-                    });
-                }
-            }
-        }
-
-        info!(
-            "TaskManager [{}] deployment complete. All tasks ignited.",
-            self.worker_id
-        );
-        Ok(())
-    }
-
-    /// 监控运行状态：Supervisor 模式防止级联崩溃
-    pub async fn wait_and_supervise(mut self) {
-        while let Some(result) = self.task_supervisors.join_next().await {
-            match result {
-                Ok(_) => {
-                    info!("A subtask finished successfully.");
-                }
-                Err(join_error) => {
-                    if join_error.is_panic() {
-                        error!(
-                            "FATAL: A subtask panicked! Initiating emergency shutdown \
-                             of the entire TaskManager to prevent data corruption."
-                        );
-                        self.task_supervisors.abort_all();
-                        break;
-                    } else if join_error.is_cancelled() {
-                        warn!("A subtask was cancelled.");
-                    }
-                }
-            }
-        }
-        info!("TaskManager shutdown process complete.");
-    }
-
-    pub async fn stop_all(&self, mode: StopMode) {
-        for (key, tx) in &self.controllers {
-            if let Err(e) = tx
-                .send(ControlCommand::Stop { mode: mode.clone() })
-                .await
-            {
-                warn!("Failed to send stop command to task {:?}: {}", key, e);
-            }
-        }
-    }
-}
diff --git a/src/runtime/streaming/cluster/master.rs b/src/runtime/streaming/cluster/master.rs
deleted file mode 100644
index e456d8e3..00000000
--- a/src/runtime/streaming/cluster/master.rs
+++ /dev/null
@@ -1,273 +0,0 @@
-use std::collections::HashMap;
-use anyhow::Result;
-
-use crate::runtime::streaming::cluster::graph::{
-    ExchangeMode, ExecutionGraph, JobId, OperatorUid, PartitioningStrategy,
-    PhysicalEdgeDescriptor, ResourceProfile, SubtaskIndex, TaskDeploymentDescriptor, VertexId,
-};
-
-use petgraph::Direction;
-use sha2::{Digest, Sha256};
-use crate::sql::logical_node::logical::{LogicalEdgeType, LogicalGraph, OperatorChain};
-
-#[derive(thiserror::Error, Debug)]
-pub enum CompileError {
-    #[error("Topology Error: Forward edge between Vertex {src} (p={src_p}) and {dst} (p={dst_p}) requires identical parallelism.")]
-    ParallelismMismatch {
-        src: u32,
-        src_p: usize,
-        dst: u32,
-        dst_p: usize,
-    },
-
-    #[error("Serialization Error: Failed to serialize operator chain for Vertex {vertex_id}. Error: {source}")]
-    SerializationFailed {
-        vertex_id: u32,
-        source: anyhow::Error,
-    },
-
-    #[error("Validation Error: {0}")]
-    ValidationError(String),
-}
-
-pub struct JobCompiler;
-
-impl JobCompiler {
-    pub fn compile(
-        job_id: String,
-        logical: &LogicalGraph,
-    ) -> Result<ExecutionGraph, CompileError> {
-        let mut tasks = Vec::new();
-        let mut edges = Vec::new();
-        let job_id_typed = JobId(job_id.clone());
-
-        // ====================================================================
-        // 阶段 1：预计算网络门数量 (Pre-compute Network Gates)
-        // ====================================================================
-        let mut in_degrees: HashMap<(u32, u32), usize> = HashMap::new();
-        let mut out_degrees: HashMap<(u32, u32), usize> = HashMap::new();
-
-        for edge_idx in logical.edge_indices() {
-            let edge = logical.edge_weight(edge_idx).unwrap();
-            let (src_idx, dst_idx) = logical.edge_endpoints(edge_idx).unwrap();
-            let src_node = logical.node_weight(src_idx).unwrap();
-            let dst_node = logical.node_weight(dst_idx).unwrap();
-
-            match edge.edge_type {
-                LogicalEdgeType::Forward => {
-                    if src_node.parallelism != dst_node.parallelism {
-                        return Err(CompileError::ParallelismMismatch {
-                            src: src_node.node_id,
-                            src_p: src_node.parallelism,
-                            dst: dst_node.node_id,
-                            dst_p: dst_node.parallelism,
-                        });
-                    }
-                    for i in 0..src_node.parallelism as u32 {
-                        *out_degrees.entry((src_node.node_id, i)).or_insert(0) += 1;
-                        *in_degrees.entry((dst_node.node_id, i)).or_insert(0) += 1;
-                    }
-                }
-                LogicalEdgeType::Shuffle
-                | LogicalEdgeType::LeftJoin
-                | LogicalEdgeType::RightJoin => {
-                    for s in 0..src_node.parallelism as u32 {
-                        *out_degrees.entry((src_node.node_id, s)).or_insert(0) +=
-                            dst_node.parallelism;
-                    }
-                    for d in 0..dst_node.parallelism as u32 {
-                        *in_degrees.entry((dst_node.node_id, d)).or_insert(0) +=
-                            src_node.parallelism;
-                    }
-                }
-            }
-        }
-
-        // ====================================================================
-        // 阶段 2：节点展开与算子融合 (Node Expansion & Operator Fusion)
-        // ====================================================================
-        for idx in logical.node_indices() {
-            let node = logical.node_weight(idx).unwrap();
-            let parallelism = node.parallelism as u32;
-
-            let in_schemas: Vec<_> = logical
-                .edges_directed(idx, Direction::Incoming)
-                .map(|e| e.weight().schema.clone())
-                .collect();
-            let out_schema = logical
-                .edges_directed(idx, Direction::Outgoing)
-                .map(|e| e.weight().schema.clone())
-                .next();
-
-            let is_source = node.operator_chain.is_source();
-            let (head_op, _) = node
-                .operator_chain
-                .iter()
-                .next()
-                .expect("operator chain is non-empty");
-
-            let chain_payload =
-                Self::serialize_operator_chain(&node.operator_chain).map_err(|e| {
-                    CompileError::SerializationFailed {
-                        vertex_id: node.node_id,
-                        source: e,
-                    }
-                })?;
-
-            let base_uid = Self::generate_deterministic_uid(
-                &job_id,
-                node.node_id,
-                &node.operator_chain,
-            );
-
-            let resource_profile =
-                Self::calculate_resource_profile(&node.operator_chain, parallelism);
-
-            for subtask_idx in 0..parallelism {
-                let s_idx = SubtaskIndex(subtask_idx);
-                let v_id = VertexId(node.node_id);
-
-                let input_gates_count = *in_degrees
-                    .get(&(node.node_id, subtask_idx))
-                    .unwrap_or(&0);
-                let output_gates_count = *out_degrees
-                    .get(&(node.node_id, subtask_idx))
-                    .unwrap_or(&0);
-
-                tasks.push(TaskDeploymentDescriptor {
-                    job_id: job_id_typed.clone(),
-                    vertex_id: v_id,
-                    subtask_idx: s_idx,
-                    parallelism,
-                    operator_name: head_op.operator_name.to_string(),
-                    operator_uid: OperatorUid(format!("{}-{}", base_uid, subtask_idx)),
-                    is_source,
-                    operator_config_payload: chain_payload.clone(),
-                    resources: resource_profile.clone(),
-                    in_schemas: in_schemas.clone(),
-                    out_schema: out_schema.clone(),
-                    input_gates_count,
-                    output_gates_count,
-                });
-            }
-        }
-
-        // ====================================================================
-        // 阶段 3：物理边展开与路由策略推断 (Edge Expansion & Partitioning)
-        // ====================================================================
-        for edge_idx in logical.edge_indices() {
-            let edge = logical.edge_weight(edge_idx).unwrap();
-            let (src_graph_idx, dst_graph_idx) = logical.edge_endpoints(edge_idx).unwrap();
-            let src_node = logical.node_weight(src_graph_idx).unwrap();
-            let dst_node = logical.node_weight(dst_graph_idx).unwrap();
-
-            let partitioning = match edge.edge_type {
-                LogicalEdgeType::Forward => PartitioningStrategy::Forward,
-                LogicalEdgeType::Shuffle
-                | LogicalEdgeType::LeftJoin
-                | LogicalEdgeType::RightJoin => {
-                    if let Some(key_indices) = edge.schema.storage_keys() {
-                        if !key_indices.is_empty() {
-                            PartitioningStrategy::HashByKeys(key_indices.clone())
-                        } else {
-                            PartitioningStrategy::Rebalance
-                        }
-                    } else {
-                        PartitioningStrategy::Rebalance
-                    }
-                }
-            };
-
-            let default_exchange = ExchangeMode::LocalThread;
-
-            match edge.edge_type {
-                LogicalEdgeType::Forward => {
-                    for i in 0..src_node.parallelism as u32 {
-                        edges.push(PhysicalEdgeDescriptor {
-                            src_vertex: VertexId(src_node.node_id),
-                            src_subtask: SubtaskIndex(i),
-                            dst_vertex: VertexId(dst_node.node_id),
-                            dst_subtask: SubtaskIndex(i),
-                            partitioning: partitioning.clone(),
-                            exchange_mode: default_exchange.clone(),
-                        });
-                    }
-                }
-                _ => {
-                    for src_idx in 0..src_node.parallelism as u32 {
-                        for dst_idx in 0..dst_node.parallelism as u32 {
-                            edges.push(PhysicalEdgeDescriptor {
-                                src_vertex: VertexId(src_node.node_id),
-                                src_subtask: SubtaskIndex(src_idx),
-                                dst_vertex: VertexId(dst_node.node_id),
-                                dst_subtask: SubtaskIndex(dst_idx),
-                                partitioning: partitioning.clone(),
-                                exchange_mode: default_exchange.clone(),
-                            });
-                        }
-                    }
-                }
-            }
-        }
-
-        let exec_graph = ExecutionGraph {
-            job_id: job_id_typed,
-            tasks,
-            edges,
-        };
-
-        // ====================================================================
-        // 阶段 4：执行拓扑图防御性自检 (Validation)
-        // ====================================================================
-        exec_graph
-            .validate()
-            .map_err(CompileError::ValidationError)?;
-
-        Ok(exec_graph)
-    }
-
-    /// 确定性状态 UID 生成器：哪怕拓扑变化，只要算子内部逻辑不变就能继承状态。
-    fn generate_deterministic_uid(
-        job_id: &str,
-        node_id: u32,
-        chain: &OperatorChain,
-    ) -> String {
-        let mut hasher = Sha256::new();
-        hasher.update(job_id.as_bytes());
-        hasher.update(&node_id.to_le_bytes());
-
-        for (op, _) in chain.iter() {
-            hasher.update(op.operator_name.to_string().as_bytes());
-            hasher.update(&op.operator_config);
-        }
-
-        let result = hasher.finalize();
-        hex::encode(&result[..8])
-    }
-
-    /// 序列化整条算子链 (Operator Fusion)
-    fn serialize_operator_chain(chain: &OperatorChain) -> Result<Vec<u8>> {
-        bincode::serde::encode_to_vec(chain, bincode::config::standard())
-            .map_err(|e| anyhow::anyhow!("bincode encode failed: {}", e))
-    }
-
-    /// 资源画像智能推算
-    fn calculate_resource_profile(
-        chain: &OperatorChain,
-        parallelism: u32,
-    ) -> ResourceProfile {
-        let mut profile = ResourceProfile::default();
-
-        for (op, _) in chain.iter() {
-            let name = op.operator_name.to_string();
-            if name.contains("Window") || name.contains("Join") || name.contains("Aggregate") {
-                profile.managed_memory_bytes += 512 * 1024 * 1024 / parallelism as u64;
-                profile.cpu_cores += 0.5;
-            }
-            if name.contains("Source") || name.contains("Sink") {
-                profile.network_memory_bytes += 128 * 1024 * 1024 / parallelism as u64;
-            }
-        }
-        profile
-    }
-}
diff --git a/src/runtime/streaming/cluster/mod.rs b/src/runtime/streaming/cluster/mod.rs
deleted file mode 100644
index f337078c..00000000
--- a/src/runtime/streaming/cluster/mod.rs
+++ /dev/null
@@ -1,11 +0,0 @@
-pub mod graph;
-pub mod manager;
-pub mod master;
-mod wiring;
-
-pub use graph::{
-    ExchangeMode, ExecutionGraph, JobId, OperatorUid, PartitioningStrategy,
-    PhysicalEdgeDescriptor, ResourceProfile, SubtaskIndex, TaskDeploymentDescriptor, VertexId,
-};
-pub use manager::TaskManager;
-pub use master::{CompileError, JobCompiler};
diff --git a/src/runtime/streaming/cluster/wiring.rs b/src/runtime/streaming/cluster/wiring.rs
deleted file mode 100644
index eb3b4162..00000000
--- a/src/runtime/streaming/cluster/wiring.rs
+++ /dev/null
@@ -1,46 +0,0 @@
-//! 物理拓扑构建：channel 与一对一子任务边。
-//!
-//! 将 `arroyo_datastream::LogicalGraph` 完整编译为 Task 管道属于上层 worker/planner；
-//! 此处提供 **与图无关** 的 channel 工厂与边展开，供适配层调用。
-
-use crate::runtime::streaming::protocol::tracked::TrackedEvent;
-use std::collections::HashMap;
-use tokio::sync::mpsc::{self, Receiver, Sender};
-
-pub type SubtaskKey = (String, u32);
-
-pub type SubtaskOutChannels = HashMap<SubtaskKey, Vec<Sender<TrackedEvent>>>;
-pub type SubtaskInChannels = HashMap<SubtaskKey, Vec<Receiver<TrackedEvent>>>;
-
-pub fn stream_channel(capacity: usize) -> (Sender<TrackedEvent>, Receiver<TrackedEvent>) {
-    mpsc::channel(capacity)
-}
-
-#[derive(Debug, Clone, Eq, PartialEq, Hash)]
-pub struct NodeSpec {
-    pub id: String,
-    pub parallelism: u32,
-}
-
-#[derive(Debug, Clone)]
-pub struct PhysicalEdge {
-    pub from: (String, u32),
-    pub to: (String, u32),
-}
-
-/// 为每条 `PhysicalEdge` 建一条独立 channel，并挂到对应子任务的 sender/receiver 列表。
-pub fn build_one_to_one_channels(
-    edges: &[PhysicalEdge],
-    capacity: usize,
-) -> (SubtaskOutChannels, SubtaskInChannels) {
-    let mut senders: SubtaskOutChannels = HashMap::new();
-    let mut receivers: SubtaskInChannels = HashMap::new();
-
-    for e in edges {
-        let (tx, rx) = stream_channel(capacity);
-        senders.entry(e.from.clone()).or_default().push(tx);
-        receivers.entry(e.to.clone()).or_default().push(rx);
-    }
-
-    (senders, receivers)
-}
diff --git a/src/runtime/streaming/connectors/mod.rs b/src/runtime/streaming/connectors/mod.rs
deleted file mode 100644
index d10a55a9..00000000
--- a/src/runtime/streaming/connectors/mod.rs
+++ /dev/null
@@ -1,61 +0,0 @@
-use anyhow::Result;
-use arrow_array::{ArrayRef, RecordBatch};
-use arrow_schema::Schema;
-use async_trait::async_trait;
-use std::collections::HashMap;
-use std::sync::Arc;
-
-use crate::sql::common::OperatorConfig;
-
-/// 维表查询接口：由具体 Connector（如 Redis、MySQL）实现。
-#[async_trait]
-pub trait LookupConnector: Send {
-    fn name(&self) -> &str;
-
-    /// 根据 key 列批量查询外部系统，返回结果 batch（含 `_lookup_key_index` 列）。
-    /// 返回 `None` 表示无匹配行。
-    async fn lookup(&self, keys: &[ArrayRef]) -> Option<Result<RecordBatch>>;
-}
-
-/// Connector 工厂 trait：每种外部系统实现此 trait 提供 Source / Sink / Lookup 构建能力。
-pub trait Connector: Send + Sync {
-    fn name(&self) -> &str;
-
-    fn make_lookup(
-        &self,
-        config: OperatorConfig,
-        schema: Arc<Schema>,
-    ) -> Result<Box<dyn LookupConnector>>;
-}
-
-/// 全局 Connector 注册表。
-pub struct ConnectorRegistry {
-    connectors: HashMap<String, Box<dyn Connector>>,
-}
-
-impl ConnectorRegistry {
-    pub fn new() -> Self {
-        Self {
-            connectors: HashMap::new(),
-        }
-    }
-
-    pub fn register(&mut self, connector: Box<dyn Connector>) {
-        self.connectors
-            .insert(connector.name().to_string(), connector);
-    }
-
-    pub fn get(&self, name: &str) -> Option<&dyn Connector> {
-        self.connectors.get(name).map(|c| c.as_ref())
-    }
-}
-
-/// 返回当前已注册的所有 Connector。
-///
-/// 目前返回空注册表，后续接入 Kafka / Redis 等时在此处注册。
-pub fn connectors() -> ConnectorRegistry {
-    let registry = ConnectorRegistry::new();
-    // TODO: registry.register(Box::new(KafkaConnector));
-    // TODO: registry.register(Box::new(RedisConnector));
-    registry
-}
diff --git a/src/runtime/streaming/driver.rs b/src/runtime/streaming/driver.rs
new file mode 100644
index 00000000..f2abec87
--- /dev/null
+++ b/src/runtime/streaming/driver.rs
@@ -0,0 +1,254 @@
+use std::future::pending;
+use std::sync::Arc;
+
+use arrow_array::RecordBatch;
+use tokio::sync::mpsc;
+
+use crate::runtime::streaming::api::context::TaskContext;
+use crate::runtime::streaming::api::operator::{MessageOperator, OperatorContext, StreamOperator};
+use crate::runtime::streaming::context::{ChainedOperatorContext, TerminalOutputContext};
+use crate::runtime::streaming::environment::TaskEnvironment;
+use crate::runtime::streaming::protocol::control::{ControlCommand, StopMode};
+use crate::runtime::streaming::protocol::event::StreamEvent;
+use crate::runtime::streaming::protocol::stream_out::StreamOutput;
+use crate::runtime::streaming::protocol::tracked::TrackedEvent;
+use crate::sql::common::CheckpointBarrier;
+
+pub struct StreamTaskDriver {
+    head_op: Box<dyn StreamOperator>,
+    head_ctx: Box<dyn OperatorContext>,
+    inbox: Option<mpsc::Receiver<TrackedEvent>>,
+    control_rx: mpsc::Receiver<ControlCommand>,
+}
+
+impl StreamTaskDriver {
+    pub fn new(
+        task_id: u32,
+        mut operators: Vec<Box<dyn StreamOperator>>,
+        inbox: Option<mpsc::Receiver<TrackedEvent>>,
+        outboxes: Vec<mpsc::Sender<TrackedEvent>>,
+        control_rx: mpsc::Receiver<ControlCommand>,
+        job_id: String,
+    ) -> Self {
+        let env = TaskEnvironment::new(job_id, task_id, 0, 1);
+        let mut current_op = operators.pop().expect("Operators pipeline cannot be empty");
+        let mut current_ctx: Box<dyn OperatorContext> =
+            Box::new(TerminalOutputContext::new(outboxes, env));
+
+        while let Some(prev_op) = operators.pop() {
+            let chained = ChainedOperatorContext::new(current_op, current_ctx);
+            current_op = prev_op;
+            current_ctx = Box::new(chained);
+        }
+
+        Self {
+            head_op: current_op,
+            head_ctx: current_ctx,
+            inbox,
+            control_rx,
+        }
+    }
+
+    pub async fn run(&mut self) -> anyhow::Result<()> {
+        self.head_op.open(self.head_ctx.env()).await?;
+
+        'main_loop: loop {
+            tokio::select! {
+                biased;
+                Some(cmd) = self.control_rx.recv() => {
+                    if self.process_control_command(cmd).await? {
+                        break 'main_loop;
+                    }
+                }
+                Some(tracked) = async {
+                    if let Some(ref mut rx) = self.inbox { rx.recv().await }
+                    else { pending().await }
+                } => {
+                    self.pump_event(tracked.event).await?;
+                }
+            }
+        }
+
+        self.head_op.close(self.head_ctx.env()).await?;
+        Ok(())
+    }
+
+    async fn process_control_command(&mut self, cmd: ControlCommand) -> anyhow::Result<bool> {
+        match cmd {
+            ControlCommand::TriggerCheckpoint { barrier } => {
+                let barrier: CheckpointBarrier = barrier.into();
+                self.pump_event(StreamEvent::Barrier(barrier)).await?;
+                Ok(false)
+            }
+            ControlCommand::Commit { epoch } => {
+                self.head_op.commit_checkpoint(epoch, self.head_ctx.env()).await?;
+                self.head_ctx.commit_checkpoint(epoch).await?;
+                Ok(false)
+            }
+            ControlCommand::Stop { mode } if mode == StopMode::Immediate => Ok(true),
+            other_cmd => {
+                let stop_head = self
+                    .head_op
+                    .handle_control(other_cmd.clone(), self.head_ctx.env())
+                    .await?;
+                let stop_rest = self.head_ctx.handle_control(other_cmd).await?;
+                Ok(stop_head || stop_rest)
+            }
+        }
+    }
+
+    async fn pump_event(&mut self, event: StreamEvent) -> anyhow::Result<()> {
+        match event {
+            StreamEvent::Data(batch) => self.head_op.process_data(batch, self.head_ctx.as_mut()).await,
+            StreamEvent::Watermark(wm) => {
+                self.head_op.process_watermark(wm, self.head_ctx.as_mut()).await
+            }
+            StreamEvent::Barrier(br) => {
+                self.head_op
+                    .snapshot_state(br.clone(), self.head_ctx.as_mut())
+                    .await?;
+                self.head_ctx.broadcast(StreamEvent::Barrier(br)).await
+            }
+            StreamEvent::EndOfStream => {
+                self.head_op.close(self.head_ctx.env()).await?;
+                self.head_ctx.broadcast(StreamEvent::EndOfStream).await
+            }
+        }
+    }
+}
+
+pub struct MessageOperatorAdapter {
+    inner: Box<dyn MessageOperator>,
+}
+
+impl MessageOperatorAdapter {
+    pub fn new(inner: Box<dyn MessageOperator>) -> Self {
+        Self { inner }
+    }
+
+    async fn emit_outputs(
+        ctx: &mut dyn OperatorContext,
+        outputs: Vec<StreamOutput>,
+    ) -> anyhow::Result<()> {
+        for out in outputs {
+            match out {
+                StreamOutput::Forward(b) | StreamOutput::Broadcast(b) | StreamOutput::Keyed(_, b) => {
+                    ctx.collect(b).await?;
+                }
+                StreamOutput::Watermark(wm) => {
+                    ctx.broadcast(StreamEvent::Watermark(wm)).await?;
+                }
+            }
+        }
+        Ok(())
+    }
+}
+
+#[async_trait::async_trait(?Send)]
+impl StreamOperator for MessageOperatorAdapter {
+    async fn open(&mut self, env: &mut TaskEnvironment) -> anyhow::Result<()> {
+        let mut ctx = TaskContext::new(
+            env.job_id.clone(),
+            env.task_id,
+            env.subtask_index,
+            env.parallelism,
+            vec![],
+            env.memory_pool.clone(),
+        );
+        self.inner.on_start(&mut ctx).await
+    }
+
+    async fn close(&mut self, env: &mut TaskEnvironment) -> anyhow::Result<()> {
+        let mut ctx = TaskContext::new(
+            env.job_id.clone(),
+            env.task_id,
+            env.subtask_index,
+            env.parallelism,
+            vec![],
+            env.memory_pool.clone(),
+        );
+        let _ = self.inner.on_close(&mut ctx).await?;
+        Ok(())
+    }
+
+    async fn process_data(
+        &mut self,
+        batch: RecordBatch,
+        ctx: &mut dyn OperatorContext,
+    ) -> anyhow::Result<()> {
+        let mut op_ctx = TaskContext::new(
+            ctx.env().job_id.clone(),
+            ctx.env().task_id,
+            ctx.env().subtask_index,
+            ctx.env().parallelism,
+            vec![],
+            ctx.env().memory_pool.clone(),
+        );
+        let outs = self.inner.process_data(0, batch, &mut op_ctx).await?;
+        Self::emit_outputs(ctx, outs).await
+    }
+
+    async fn process_watermark(
+        &mut self,
+        wm: crate::sql::common::Watermark,
+        ctx: &mut dyn OperatorContext,
+    ) -> anyhow::Result<()> {
+        let mut op_ctx = TaskContext::new(
+            ctx.env().job_id.clone(),
+            ctx.env().task_id,
+            ctx.env().subtask_index,
+            ctx.env().parallelism,
+            vec![],
+            ctx.env().memory_pool.clone(),
+        );
+        let outs = self.inner.process_watermark(wm, &mut op_ctx).await?;
+        Self::emit_outputs(ctx, outs).await
+    }
+
+    async fn snapshot_state(
+        &mut self,
+        barrier: CheckpointBarrier,
+        ctx: &mut dyn OperatorContext,
+    ) -> anyhow::Result<()> {
+        let mut op_ctx = TaskContext::new(
+            ctx.env().job_id.clone(),
+            ctx.env().task_id,
+            ctx.env().subtask_index,
+            ctx.env().parallelism,
+            vec![],
+            ctx.env().memory_pool.clone(),
+        );
+        self.inner.snapshot_state(barrier, &mut op_ctx).await
+    }
+
+    async fn commit_checkpoint(
+        &mut self,
+        epoch: u32,
+        env: &mut TaskEnvironment,
+    ) -> anyhow::Result<()> {
+        let mut ctx = TaskContext::new(
+            env.job_id.clone(),
+            env.task_id,
+            env.subtask_index,
+            env.parallelism,
+            vec![],
+            env.memory_pool.clone(),
+        );
+        self.inner.commit_checkpoint(epoch, &mut ctx).await
+    }
+
+    async fn handle_control(
+        &mut self,
+        cmd: ControlCommand,
+        _env: &mut TaskEnvironment,
+    ) -> anyhow::Result<bool> {
+        match cmd {
+            ControlCommand::Stop { mode } => Ok(mode == StopMode::Immediate),
+            ControlCommand::DropState
+            | ControlCommand::Start
+            | ControlCommand::UpdateConfig { .. }
+            | ControlCommand::TriggerCheckpoint { .. }
+            | ControlCommand::Commit { .. } => Ok(false),
+        }
+    }
+}
diff --git a/src/runtime/streaming/error.rs b/src/runtime/streaming/error.rs
index f00bd9c4..3d8fba19 100644
--- a/src/runtime/streaming/error.rs
+++ b/src/runtime/streaming/error.rs
@@ -1,10 +1,43 @@
+use std::fmt::Display;
 use thiserror::Error;
 
-/// 子任务 / 源任务运行中的错误。
+/// 流水线 / 子任务运行期间的错误定义。
 #[derive(Debug, Error)]
 pub enum RunError {
-    #[error("operator error: {0:#}")]
+    /// 算子内部业务逻辑抛出的错误
+    #[error("Operator execution failed: {0:#}")]
     Operator(#[from] anyhow::Error),
-    #[error("downstream send: {0}")]
+
+    /// 向下游 Task 发送数据/信号时通道阻塞或断开
+    #[error("Downstream send failed: {0}")]
     DownstreamSend(String),
+
+    /// 引擎内部状态机错误或拓扑规划错误（如：DAG 为空、在链条中间发生 Shuffle）
+    #[error("Internal engine error: {0}")]
+    Internal(String),
+
+    /// Checkpoint 状态持久化或恢复时发生的错误
+    #[error("State backend error: {0}")]
+    State(String),
+
+    /// 底层网络或文件 I/O 错误
+    #[error("I/O error: {0}")]
+    Io(#[from] std::io::Error),
 }
+
+impl RunError {
+    /// 快捷构造器：引擎内部错误（常用于防御性编程和边界校验）
+    pub fn internal<T: Display>(msg: T) -> Self {
+        Self::Internal(msg.to_string())
+    }
+
+    /// 快捷构造器：下游发送异常
+    pub fn downstream<T: Display>(msg: T) -> Self {
+        Self::DownstreamSend(msg.to_string())
+    }
+
+    /// 快捷构造器：状态后端异常
+    pub fn state<T: Display>(msg: T) -> Self {
+        Self::State(msg.to_string())
+    }
+}
\ No newline at end of file
diff --git a/src/runtime/streaming/execution/mod.rs b/src/runtime/streaming/execution/mod.rs
index 34002193..4d55e361 100644
--- a/src/runtime/streaming/execution/mod.rs
+++ b/src/runtime/streaming/execution/mod.rs
@@ -4,5 +4,5 @@ pub mod runner;
 pub mod source;
 pub mod tracker;
 
-pub use runner::SubtaskRunner;
-pub use source::{SourceRunner, SOURCE_IDLE_SLEEP};
+pub use runner::{OperatorDrive, SubtaskRunner};
+pub use source::{SourceRunner, SOURCE_IDLE_SLEEP, WATERMARK_EMIT_INTERVAL};
diff --git a/src/runtime/streaming/execution/runner.rs b/src/runtime/streaming/execution/runner.rs
index d824d025..fa907088 100644
--- a/src/runtime/streaming/execution/runner.rs
+++ b/src/runtime/streaming/execution/runner.rs
@@ -1,296 +1,368 @@
+use async_trait::async_trait;
+use tokio::sync::mpsc::Receiver;
+use tokio_stream::{StreamExt, StreamMap};
+use tracing::{info, info_span, Instrument};
+
 use crate::runtime::streaming::api::context::TaskContext;
 use crate::runtime::streaming::api::operator::MessageOperator;
 use crate::runtime::streaming::error::RunError;
-use crate::runtime::streaming::protocol::control::ControlCommand;
-use crate::runtime::streaming::protocol::event::StreamEvent;
-use crate::runtime::streaming::protocol::stream_out::StreamOutput;
-use crate::runtime::streaming::protocol::tracked::TrackedEvent;
-use super::tracker::barrier_aligner::{AlignmentStatus, BarrierAligner};
-use super::tracker::watermark_tracker::WatermarkTracker;
 use crate::runtime::streaming::network::endpoint::BoxedEventStream;
-use std::collections::VecDeque;
-use std::pin::Pin;
-use tokio::sync::mpsc::Receiver;
-use tokio_stream::{StreamExt, StreamMap};
-use tracing::{debug, error, info, warn};
+use crate::runtime::streaming::protocol::{
+    control::{ControlCommand, StopMode},
+    event::StreamEvent,
+    stream_out::StreamOutput,
+    tracked::TrackedEvent,
+};
+use crate::runtime::streaming::execution::tracker::{
+    barrier_aligner::{AlignmentStatus, BarrierAligner},
+    watermark_tracker::WatermarkTracker,
+};
 use crate::sql::common::{CheckpointBarrier, Watermark};
 
-pub struct SubtaskRunner {
+// ==========================================
+// 第一部分：逻辑处理层 - 算子融合链 (Logical Driver)
+// ==========================================
+
+#[async_trait]
+pub trait OperatorDrive: Send {
+    async fn on_start(&mut self, ctx: &mut TaskContext) -> Result<(), RunError>;
+    async fn process_event(
+        &mut self,
+        input_idx: usize,
+        event: TrackedEvent,
+        ctx: &mut TaskContext,
+    ) -> Result<bool, RunError>;
+    async fn handle_control(
+        &mut self,
+        cmd: ControlCommand,
+        ctx: &mut TaskContext,
+    ) -> Result<bool, RunError>;
+    async fn on_close(&mut self, ctx: &mut TaskContext) -> Result<(), RunError>;
+}
+
+pub struct ChainedDriver {
     operator: Box<dyn MessageOperator>,
-    ctx: TaskContext,
-    inboxes: Vec<BoxedEventStream>,
-    control_rx: Receiver<ControlCommand>,
+    next: Option<Box<dyn OperatorDrive>>,
 }
 
-impl SubtaskRunner {
-    pub fn new(
-        operator: Box<dyn MessageOperator>,
-        ctx: TaskContext,
-        inboxes: Vec<BoxedEventStream>,
-        control_rx: Receiver<ControlCommand>,
-    ) -> Self {
-        Self { operator, ctx, inboxes, control_rx }
+impl ChainedDriver {
+    pub fn new(operator: Box<dyn MessageOperator>, next: Option<Box<dyn OperatorDrive>>) -> Self {
+        Self { operator, next }
     }
 
-    pub async fn run(mut self) -> Result<(), RunError> {
-        let input_count = self.inboxes.len();
-        info!(
-            job_id = %self.ctx.job_id,
-            vertex = self.ctx.vertex_id,
-            subtask = self.ctx.subtask_idx,
-            inputs = input_count,
-            operator = %self.operator.name(),
-            "subtask starting"
-        );
-
-        self.operator.on_start(&mut self.ctx).await?;
-
-        if input_count == 0 {
-            return self.run_source_loop().await;
+    /// 从后往前组装算子，构建责任链
+    pub fn build_chain(mut operators: Vec<Box<dyn MessageOperator>>) -> Option<Box<dyn OperatorDrive>> {
+        if operators.is_empty() {
+            return None;
         }
-
-        let mut stream_map: StreamMap<usize, Pin<Box<dyn tokio_stream::Stream<Item = TrackedEvent> + Send>>> = StreamMap::new();
-        for (i, inbox) in self.inboxes.into_iter().enumerate() {
-            stream_map.insert(i, inbox);
+        let mut next_driver: Option<Box<dyn OperatorDrive>> = None;
+        while let Some(op) = operators.pop() {
+            let current = ChainedDriver::new(op, next_driver);
+            next_driver = Some(Box::new(current));
         }
+        next_driver
+    }
 
-        let mut wm_tracker = WatermarkTracker::new(input_count);
-        let mut barrier_aligner = BarrierAligner::new(input_count);
-        let mut eof_count = 0usize;
-        let mut closed_on_full_eof = false;
-
-        let tick_interval = self.operator.tick_interval();
-        let mut tick_sleep: Option<Pin<Box<tokio::time::Sleep>>> =
-            tick_interval.map(|d| Box::pin(tokio::time::sleep(d)));
-        let mut tick_index: u64 = 0;
-
-        'run: loop {
-            tokio::select! {
-                biased;
-
-                cmd_opt = self.control_rx.recv() => {
-                    match cmd_opt {
-                        None => {
-                            debug!(
-                                vertex = self.ctx.vertex_id,
-                                subtask = self.ctx.subtask_idx,
-                                "control channel closed"
-                            );
-                            break 'run;
-                        }
-                        Some(cmd) => {
-                            info!(
-                                vertex = self.ctx.vertex_id,
-                                subtask = self.ctx.subtask_idx,
-                                ?cmd,
-                                "control command"
-                            );
-                            if Self::handle_control_command(&mut self.operator, &mut self.ctx, cmd)
-                                .await?
-                            {
-                                break 'run;
-                            }
-                        }
+    async fn dispatch_outputs(
+        &mut self,
+        outputs: Vec<StreamOutput>,
+        ctx: &mut TaskContext,
+    ) -> Result<(), RunError> {
+        for out in outputs {
+            match out {
+                StreamOutput::Forward(b) => {
+                    if let Some(next) = &mut self.next {
+                        next.process_event(0, TrackedEvent::control(StreamEvent::Data(b)), ctx)
+                            .await?;
+                    } else {
+                        ctx.collect(b).await?;
                     }
                 }
-
-                next_item = stream_map.next() => {
-                    let Some((input_idx, event)) = next_item else {
-                        break 'run;
-                    };
-
-                    if barrier_aligner.is_blocked(input_idx)
-                        && !matches!(event.event, StreamEvent::Barrier(_))
-                    {
-                        barrier_aligner.buffer_event(input_idx, event);
-                    } else {
-                        let mut work = VecDeque::new();
-                        work.push_back((input_idx, event));
-                        let mut exit_run = false;
-                        let mut dispatch = EventDispatchState {
-                            operator: &mut self.operator,
-                            ctx: &mut self.ctx,
-                            work: &mut work,
-                            wm_tracker: &mut wm_tracker,
-                            barrier_aligner: &mut barrier_aligner,
-                            eof_count: &mut eof_count,
-                            closed_on_full_eof: &mut closed_on_full_eof,
-                            input_count,
-                        };
-                        while let Some((idx, ev)) = dispatch.work.pop_front() {
-                            if Self::dispatch_stream_event(&mut dispatch, idx, ev).await? {
-                                exit_run = true;
-                                break;
-                            }
-                        }
-                        if exit_run {
-                            break 'run;
-                        }
+                StreamOutput::Keyed(hash, b) => {
+                    if self.next.is_some() {
+                        return Err(RunError::internal(format!(
+                            "Topology Error: Keyed output emitted in the middle of chain by '{}'",
+                            self.operator.name()
+                        )));
                     }
+                    ctx.collect_keyed(hash, b).await?;
                 }
-
-                _ = async {
-                    match tick_sleep.as_mut() {
-                        Some(s) => s.as_mut().await,
-                        None => std::future::pending().await,
+                StreamOutput::Broadcast(b) => {
+                    if self.next.is_some() {
+                        return Err(RunError::internal(format!(
+                            "Topology Error: Broadcast output emitted in the middle of chain by '{}'",
+                            self.operator.name()
+                        )));
                     }
-                }, if tick_interval.is_some() => {
-                    let outs = self
-                        .operator
-                        .process_tick(tick_index, &mut self.ctx)
+                    ctx.collect(b).await?;
+                }
+                StreamOutput::Watermark(wm) => {
+                    if let Some(next) = &mut self.next {
+                        next.process_event(
+                            0,
+                            TrackedEvent::control(StreamEvent::Watermark(wm)),
+                            ctx,
+                        )
                         .await?;
-                    tick_index = tick_index.wrapping_add(1);
-                    Self::dispatch_stream_outputs(&mut self.ctx, outs).await?;
-                    if let (Some(d), Some(s)) = (tick_interval, tick_sleep.as_mut()) {
-                        s.as_mut()
-                            .reset(tokio::time::Instant::now() + d);
+                    } else {
+                        ctx.broadcast(StreamEvent::Watermark(wm)).await?;
                     }
                 }
             }
         }
-
-        if !closed_on_full_eof {
-            let close_outs = self.operator.on_close(&mut self.ctx).await?;
-            Self::dispatch_stream_outputs(&mut self.ctx, close_outs).await?;
-        }
-
-        info!(
-            vertex = self.ctx.vertex_id,
-            subtask = self.ctx.subtask_idx,
-            "subtask shutdown"
-        );
         Ok(())
     }
 
-    async fn run_source_loop(mut self) -> Result<(), RunError> {
-        while let Some(cmd) = self.control_rx.recv().await {
-            if Self::handle_control_command(&mut self.operator, &mut self.ctx, cmd).await? {
-                break;
+    async fn forward_signal(
+        &mut self,
+        event: StreamEvent,
+        ctx: &mut TaskContext,
+    ) -> Result<(), RunError> {
+        if let Some(next) = &mut self.next {
+            next.process_event(0, TrackedEvent::control(event), ctx).await?;
+        } else {
+            match event {
+                StreamEvent::Watermark(wm) => ctx.broadcast(StreamEvent::Watermark(wm)).await?,
+                StreamEvent::Barrier(b) => ctx.broadcast(StreamEvent::Barrier(b)).await?,
+                StreamEvent::EndOfStream => ctx.broadcast(StreamEvent::EndOfStream).await?,
+                StreamEvent::Data(_) => unreachable!(),
             }
         }
-        let close_outs = self.operator.on_close(&mut self.ctx).await?;
-        Self::dispatch_stream_outputs(&mut self.ctx, close_outs).await?;
-        if !self.ctx.outboxes.is_empty() {
-            self.ctx.broadcast(StreamEvent::EndOfStream).await?;
+        Ok(())
+    }
+}
+
+#[async_trait]
+impl OperatorDrive for ChainedDriver {
+    async fn on_start(&mut self, ctx: &mut TaskContext) -> Result<(), RunError> {
+        self.operator.on_start(ctx).await?;
+        if let Some(next) = &mut self.next {
+            next.on_start(ctx).await?;
         }
-        info!(
-            vertex = self.ctx.vertex_id,
-            subtask = self.ctx.subtask_idx,
-            "Source subtask finished"
-        );
         Ok(())
     }
 
-    async fn handle_control_command(
-        operator: &mut Box<dyn MessageOperator>,
+    async fn process_event(
+        &mut self,
+        input_idx: usize,
+        tracked: TrackedEvent,
         ctx: &mut TaskContext,
-        cmd: ControlCommand,
     ) -> Result<bool, RunError> {
-        if let ControlCommand::TriggerCheckpoint { barrier } = &cmd {
-            let barrier: CheckpointBarrier = barrier.clone().into();
-            if let Err(e) = operator.snapshot_state(barrier, ctx).await {
-                error!("Source snapshot failed: {}", e);
+        let mut should_stop = false;
+        match tracked.event {
+            StreamEvent::Data(batch) => {
+                let outputs = self.operator.process_data(input_idx, batch, ctx).await?;
+                self.dispatch_outputs(outputs, ctx).await?;
+            }
+            StreamEvent::Watermark(wm) => {
+                let outputs = self.operator.process_watermark(wm.clone(), ctx).await?;
+                self.dispatch_outputs(outputs, ctx).await?;
+                self.forward_signal(StreamEvent::Watermark(wm), ctx).await?;
+            }
+            StreamEvent::Barrier(barrier) => {
+                self.operator.snapshot_state(barrier.clone(), ctx).await?;
+                self.forward_signal(StreamEvent::Barrier(barrier), ctx).await?;
+            }
+            StreamEvent::EndOfStream => {
+                should_stop = true;
+                self.forward_signal(StreamEvent::EndOfStream, ctx).await?;
             }
-            ctx.broadcast(StreamEvent::Barrier(barrier)).await?;
         }
+        Ok(should_stop)
+    }
 
-        if let ControlCommand::Commit { epoch } = &cmd {
-            if let Err(e) = operator.commit_checkpoint(*epoch, ctx).await {
-                error!("commit_checkpoint failed: {}", e);
+    async fn handle_control(
+        &mut self,
+        cmd: ControlCommand,
+        ctx: &mut TaskContext,
+    ) -> Result<bool, RunError> {
+        let mut stop = false;
+        match &cmd {
+            ControlCommand::TriggerCheckpoint { barrier } => {
+                let b: CheckpointBarrier = barrier.clone().into();
+                self.operator.snapshot_state(b, ctx).await?;
+            }
+            ControlCommand::Commit { epoch } => {
+                self.operator.commit_checkpoint(*epoch, ctx).await?;
+            }
+            ControlCommand::Stop { mode } => {
+                if *mode == StopMode::Immediate {
+                    stop = true;
+                }
             }
+            ControlCommand::DropState | ControlCommand::Start | ControlCommand::UpdateConfig { .. } => {}
         }
 
-        match operator.handle_control(cmd, ctx).await {
-            Ok(should_stop) => Ok(should_stop),
-            Err(e) => {
-                warn!("handle_control error: {}", e);
-                Ok(false)
+        if let Some(next) = &mut self.next {
+            if next.handle_control(cmd, ctx).await? {
+                stop = true;
             }
+        } else if let ControlCommand::TriggerCheckpoint { barrier } = cmd {
+            ctx.broadcast(StreamEvent::Barrier(barrier.into())).await?;
         }
+
+        Ok(stop)
     }
 
-    async fn dispatch_stream_outputs(
-        ctx: &mut TaskContext,
-        outputs: Vec<StreamOutput>,
-    ) -> Result<(), RunError> {
-        for out in outputs {
-            match out {
-                StreamOutput::Forward(b) => ctx.collect(b).await?,
-                StreamOutput::Keyed(hash, b) => ctx.collect_keyed(hash, b).await?,
-                StreamOutput::Broadcast(b) => ctx.collect(b).await?,
-                StreamOutput::Watermark(wm) => {
-                    ctx.broadcast(StreamEvent::Watermark(wm)).await?;
-                }
-            }
+    async fn on_close(&mut self, ctx: &mut TaskContext) -> Result<(), RunError> {
+        let close_outs = self.operator.on_close(ctx).await?;
+        self.dispatch_outputs(close_outs, ctx).await?;
+        if let Some(next) = &mut self.next {
+            next.on_close(ctx).await?;
         }
         Ok(())
     }
+}
 
-    async fn dispatch_stream_event(
-        st: &mut EventDispatchState<'_>,
-        input_idx: usize,
-        tracked: TrackedEvent,
-    ) -> Result<bool, RunError> {
-        let event = tracked.event;
-        match event {
-            StreamEvent::Data(batch) => {
-                let outputs = st
-                    .operator
-                    .process_data(input_idx, batch, st.ctx)
-                    .await?;
-                Self::dispatch_stream_outputs(st.ctx, outputs).await?;
-            }
-            StreamEvent::Watermark(wm) => {
-                if let Some(aligned_wm) = st.wm_tracker.update(input_idx, wm) {
-                    if let Watermark::EventTime(t) = aligned_wm {
-                        st.ctx.advance_watermark(t);
-                    }
-                    let outputs = st
-                        .operator
-                        .process_watermark(aligned_wm.clone(), st.ctx)
-                        .await?;
-                    Self::dispatch_stream_outputs(st.ctx, outputs).await?;
-                    st.ctx
-                        .broadcast(StreamEvent::Watermark(aligned_wm))
-                        .await?;
-                }
+// ==========================================
+// 第二部分：物理执行层 - 流水线 (Physical Driver)
+// ==========================================
+
+pub struct Pipeline {
+    chain_head: Box<dyn OperatorDrive>,
+    ctx: TaskContext,
+    inboxes: Vec<BoxedEventStream>,
+    control_rx: Receiver<ControlCommand>,
+
+    wm_tracker: WatermarkTracker,
+    barrier_aligner: BarrierAligner,
+    /// Barrier 未对齐时从轮询池移除的输入流（背压）
+    paused_streams: Vec<Option<BoxedEventStream>>,
+}
+
+impl Pipeline {
+    pub fn new(
+        operators: Vec<Box<dyn MessageOperator>>,
+        ctx: TaskContext,
+        inboxes: Vec<BoxedEventStream>,
+        control_rx: Receiver<ControlCommand>,
+    ) -> Result<Self, RunError> {
+        let input_count = inboxes.len();
+        let chain_head = ChainedDriver::build_chain(operators)
+            .ok_or_else(|| RunError::internal("Cannot build pipeline with empty operators"))?;
+
+        let paused_streams = (0..input_count).map(|_| None).collect();
+
+        Ok(Self {
+            chain_head,
+            ctx,
+            inboxes,
+            control_rx,
+            wm_tracker: WatermarkTracker::new(input_count),
+            barrier_aligner: BarrierAligner::new(input_count),
+            paused_streams,
+        })
+    }
+
+    pub async fn run(mut self) -> Result<(), RunError> {
+        let span = info_span!(
+            "pipeline_run",
+            job_id = %self.ctx.job_id,
+            vertex = self.ctx.vertex_id
+        );
+
+        async move {
+            info!("Pipeline initializing...");
+            self.chain_head.on_start(&mut self.ctx).await?;
+
+            let mut active_streams = StreamMap::new();
+            for (i, stream) in std::mem::take(&mut self.inboxes).into_iter().enumerate() {
+                active_streams.insert(i, stream);
             }
-            StreamEvent::Barrier(barrier) => {
-                match st.barrier_aligner.mark(input_idx, &barrier) {
-                    AlignmentStatus::Pending => {}
-                    AlignmentStatus::Complete(buffered) => {
-                        if let Err(e) = st.operator.snapshot_state(barrier, st.ctx).await {
-                            error!("Operator snapshot failed: {}", e);
+
+            loop {
+                tokio::select! {
+                    biased;
+
+                    Some(cmd) = self.control_rx.recv() => {
+                        if self.chain_head.handle_control(cmd, &mut self.ctx).await? {
+                            break;
                         }
-                        st.ctx.broadcast(StreamEvent::Barrier(barrier)).await?;
-                        for pair in buffered {
-                            st.work.push_back(pair);
+                    }
+
+                    Some((idx, tracked_event)) = active_streams.next() => {
+                        match tracked_event.event {
+                            StreamEvent::Data(batch) => {
+                                self.chain_head
+                                    .process_event(
+                                        idx,
+                                        TrackedEvent::control(StreamEvent::Data(batch)),
+                                        &mut self.ctx,
+                                    )
+                                    .await?;
+                            }
+
+                            StreamEvent::Barrier(barrier) => {
+                                match self.barrier_aligner.mark(idx, &barrier) {
+                                    AlignmentStatus::Pending => {
+                                        if let Some(stream) = active_streams.remove(&idx) {
+                                            self.paused_streams[idx] = Some(stream);
+                                        }
+                                    }
+                                    AlignmentStatus::Complete => {
+                                        self.chain_head
+                                            .process_event(
+                                                idx,
+                                                TrackedEvent::control(StreamEvent::Barrier(barrier)),
+                                                &mut self.ctx,
+                                            )
+                                            .await?;
+
+                                        for i in 0..self.paused_streams.len() {
+                                            if let Some(stream) = self.paused_streams[i].take() {
+                                                active_streams.insert(i, stream);
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+
+                            StreamEvent::Watermark(wm) => {
+                                if let Some(aligned_wm) = self.wm_tracker.update(idx, wm) {
+                                    if let Watermark::EventTime(t) = aligned_wm {
+                                        self.ctx.advance_watermark(t);
+                                    }
+                                    self.chain_head
+                                        .process_event(
+                                            idx,
+                                            TrackedEvent::control(StreamEvent::Watermark(aligned_wm)),
+                                            &mut self.ctx,
+                                        )
+                                        .await?;
+                                }
+                            }
+
+                            StreamEvent::EndOfStream => {
+                                if self.wm_tracker.increment_eof() == self.wm_tracker.input_count() {
+                                    self.chain_head
+                                        .process_event(
+                                            idx,
+                                            TrackedEvent::control(StreamEvent::EndOfStream),
+                                            &mut self.ctx,
+                                        )
+                                        .await?;
+                                    break;
+                                }
+                            }
                         }
                     }
+
+                    else => break,
                 }
             }
-            StreamEvent::EndOfStream => {
-                *st.eof_count += 1;
-                if *st.eof_count == st.input_count {
-                    let close_outs = st.operator.on_close(st.ctx).await?;
-                    Self::dispatch_stream_outputs(st.ctx, close_outs).await?;
-                    *st.closed_on_full_eof = true;
-                    st.ctx.broadcast(StreamEvent::EndOfStream).await?;
-                    return Ok(true);
-                }
-            }
+
+            self.teardown().await
         }
-        Ok(false)
+        .instrument(span)
+        .await
     }
-}
 
-struct EventDispatchState<'a> {
-    operator: &'a mut Box<dyn MessageOperator>,
-    ctx: &'a mut TaskContext,
-    work: &'a mut VecDeque<(usize, TrackedEvent)>,
-    wm_tracker: &'a mut WatermarkTracker,
-    barrier_aligner: &'a mut BarrierAligner,
-    eof_count: &'a mut usize,
-    closed_on_full_eof: &'a mut bool,
-    input_count: usize,
+    async fn teardown(mut self) -> Result<(), RunError> {
+        info!("Pipeline tearing down...");
+        self.chain_head.on_close(&mut self.ctx).await?;
+        Ok(())
+    }
 }
+
+/// 与执行引擎语义对齐的别名
+pub type SubtaskRunner = Pipeline;
diff --git a/src/runtime/streaming/execution/source.rs b/src/runtime/streaming/execution/source.rs
index 9fe1983e..d51132ac 100644
--- a/src/runtime/streaming/execution/source.rs
+++ b/src/runtime/streaming/execution/source.rs
@@ -1,18 +1,25 @@
+//! 源任务物理驱动：控制面优先、`fetch_next` 非阻塞契约、可选融合算子链下推。
+
 use crate::runtime::streaming::api::context::TaskContext;
 use crate::runtime::streaming::api::source::{SourceEvent, SourceOperator};
 use crate::runtime::streaming::error::RunError;
+use crate::runtime::streaming::execution::runner::OperatorDrive;
 use crate::runtime::streaming::protocol::control::ControlCommand;
 use crate::runtime::streaming::protocol::event::StreamEvent;
+use crate::runtime::streaming::protocol::tracked::TrackedEvent;
+use crate::sql::common::CheckpointBarrier;
 use std::time::Duration;
 use tokio::sync::mpsc::Receiver;
-use tokio::time::sleep;
-use tracing::{debug, info, warn};
-use crate::sql::common::CheckpointBarrier;
+use tokio::time::{interval, MissedTickBehavior};
+use tracing::{info, info_span, warn, Instrument};
 
 pub const SOURCE_IDLE_SLEEP: Duration = Duration::from_millis(50);
+pub const WATERMARK_EMIT_INTERVAL: Duration = Duration::from_millis(200);
 
 pub struct SourceRunner {
     operator: Box<dyn SourceOperator>,
+    /// 有链时数据与信号经链尾再 `collect` / `broadcast`；无链则直接走 `TaskContext`。
+    chain_head: Option<Box<dyn OperatorDrive>>,
     ctx: TaskContext,
     control_rx: Receiver<ControlCommand>,
 }
@@ -20,101 +27,144 @@ pub struct SourceRunner {
 impl SourceRunner {
     pub fn new(
         operator: Box<dyn SourceOperator>,
+        chain_head: Option<Box<dyn OperatorDrive>>,
         ctx: TaskContext,
         control_rx: Receiver<ControlCommand>,
     ) -> Self {
         Self {
             operator,
+            chain_head,
             ctx,
             control_rx,
         }
     }
 
     pub async fn run(mut self) -> Result<(), RunError> {
-        info!(
-            job_id = %self.ctx.job_id,
+        let span = info_span!(
+            "source_run",
             vertex = self.ctx.vertex_id,
-            subtask = self.ctx.subtask_idx,
-            operator = %self.operator.name(),
-            "source subtask starting"
+            op = self.operator.name()
         );
 
-        self.operator.on_start(&mut self.ctx).await?;
-
-        let mut is_running = true;
-        let mut idle_pending = false;
-
-        while is_running {
-            tokio::select! {
-                biased;
-                cmd_opt = self.control_rx.recv() => {
-                    match cmd_opt {
-                        None => {
-                            debug!(
-                                vertex = self.ctx.vertex_id,
-                                subtask = self.ctx.subtask_idx,
-                                "source control channel closed"
-                            );
-                            is_running = false;
-                        }
-                        Some(cmd) => {
-                            match cmd {
-                                ControlCommand::Stop { .. } => {
+        async move {
+            info!("Source subtask starting");
+            self.operator.on_start(&mut self.ctx).await?;
+            if let Some(chain) = &mut self.chain_head {
+                chain.on_start(&mut self.ctx).await?;
+            }
+
+            let mut idle_timer = interval(SOURCE_IDLE_SLEEP);
+            idle_timer.set_missed_tick_behavior(MissedTickBehavior::Skip);
+
+            let mut wm_timer = interval(WATERMARK_EMIT_INTERVAL);
+            wm_timer.set_missed_tick_behavior(MissedTickBehavior::Skip);
+
+            let mut is_idle = false;
+            let mut is_running = true;
+
+            while is_running {
+                tokio::select! {
+                    biased;
+
+                    cmd_opt = self.control_rx.recv() => {
+                        match cmd_opt {
+                            None => is_running = false,
+                            Some(cmd) => {
+                                if self.handle_control(cmd).await? {
                                     is_running = false;
                                 }
-                                ControlCommand::TriggerCheckpoint { barrier } => {
-                                    let barrier: CheckpointBarrier = barrier.into();
-                                    self.operator
-                                        .snapshot_state(barrier, &mut self.ctx)
-                                        .await?;
-                                    self.ctx
-                                        .broadcast(StreamEvent::Barrier(barrier))
-                                        .await?;
-                                }
-                                ControlCommand::Start
-                                | ControlCommand::DropState
-                                | ControlCommand::Commit { .. }
-                                | ControlCommand::UpdateConfig { .. } => {
-                                    debug!(?cmd, "source: ignored control command");
-                                }
                             }
                         }
                     }
-                }
-                _ = sleep(SOURCE_IDLE_SLEEP), if is_running && idle_pending => {
-                    idle_pending = false;
-                }
-                fetch_res = self.operator.fetch_next(&mut self.ctx), if is_running && !idle_pending => {
-                    match fetch_res {
-                        Ok(SourceEvent::Data(batch)) => {
-                            self.ctx.collect(batch).await?;
-                        }
-                        Ok(SourceEvent::Watermark(wm)) => {
-                            self.ctx.broadcast(StreamEvent::Watermark(wm)).await?;
-                        }
-                        Ok(SourceEvent::Idle) => {
-                            idle_pending = true;
+
+                    _ = wm_timer.tick() => {
+                        if let Some(wm) = self.operator.poll_watermark() {
+                            self.dispatch_event(StreamEvent::Watermark(wm)).await?;
                         }
-                        Err(e) => {
-                            warn!(
-                                vertex = self.ctx.vertex_id,
-                                error = %e,
-                                "fetch_next error"
-                            );
-                            return Err(RunError::Operator(e));
+                    }
+
+                    _ = idle_timer.tick(), if is_idle => {
+                        is_idle = false;
+                    }
+
+                    fetch_res = self.operator.fetch_next(&mut self.ctx), if !is_idle => {
+                        match fetch_res {
+                            Ok(SourceEvent::Data(batch)) => {
+                                self.dispatch_event(StreamEvent::Data(batch)).await?;
+                            }
+                            Ok(SourceEvent::Watermark(wm)) => {
+                                self.dispatch_event(StreamEvent::Watermark(wm)).await?;
+                            }
+                            Ok(SourceEvent::Idle) => {
+                                is_idle = true;
+                                idle_timer.reset();
+                            }
+                            Ok(SourceEvent::EndOfStream) => {
+                                self.dispatch_event(StreamEvent::EndOfStream).await?;
+                                is_running = false;
+                            }
+                            Err(e) => {
+                                warn!("fetch_next error: {}", e);
+                                return Err(RunError::Operator(e));
+                            }
                         }
                     }
                 }
             }
+
+            self.teardown().await
         }
+        .instrument(span)
+        .await
+    }
 
-        self.operator.on_close(&mut self.ctx).await?;
+    async fn dispatch_event(&mut self, event: StreamEvent) -> Result<(), RunError> {
+        if let Some(chain) = &mut self.chain_head {
+            let _stop = chain
+                .process_event(0, TrackedEvent::control(event), &mut self.ctx)
+                .await?;
+        } else {
+            match event {
+                StreamEvent::Data(b) => self.ctx.collect(b).await?,
+                StreamEvent::Watermark(w) => {
+                    self.ctx.broadcast(StreamEvent::Watermark(w)).await?;
+                }
+                StreamEvent::Barrier(b) => {
+                    self.ctx.broadcast(StreamEvent::Barrier(b)).await?;
+                }
+                StreamEvent::EndOfStream => {
+                    self.ctx.broadcast(StreamEvent::EndOfStream).await?;
+                }
+            }
+        }
+        Ok(())
+    }
 
-        info!(
-            vertex = self.ctx.vertex_id,
-            subtask = self.ctx.subtask_idx,
-            "source subtask shutdown"
-        );
+    async fn handle_control(&mut self, cmd: ControlCommand) -> Result<bool, RunError> {
+        match cmd {
+            ControlCommand::TriggerCheckpoint { barrier } => {
+                let b: CheckpointBarrier = barrier.into();
+                self.operator.snapshot_state(b.clone(), &mut self.ctx).await?;
+                self.dispatch_event(StreamEvent::Barrier(b)).await?;
+            }
+            ControlCommand::Stop { .. } => return Ok(true),
+            other => {
+                if let Some(chain) = &mut self.chain_head {
+                    if chain.handle_control(other, &mut self.ctx).await? {
+                        return Ok(true);
+                    }
+                }
+            }
+        }
+        Ok(false)
+    }
+
+    async fn teardown(mut self) -> Result<(), RunError> {
+        self.operator.on_close(&mut self.ctx).await?;
+        if let Some(chain) = &mut self.chain_head {
+            chain.on_close(&mut self.ctx).await?;
+        }
+        info!("Source subtask shutdown");
         Ok(())
     }
 }
diff --git a/src/runtime/streaming/execution/tracker/barrier_aligner.rs b/src/runtime/streaming/execution/tracker/barrier_aligner.rs
index e284922b..05f2cc90 100644
--- a/src/runtime/streaming/execution/tracker/barrier_aligner.rs
+++ b/src/runtime/streaming/execution/tracker/barrier_aligner.rs
@@ -1,13 +1,15 @@
-//! Chandy–Lamport 风格屏障对齐。
+//! Chandy–Lamport 风格屏障对齐（零内存缓冲：未对齐时从轮询池移除输入流，依赖底层背压）。
 
 use std::collections::HashSet;
-use crate::runtime::streaming::protocol::TrackedEvent;
+
 use crate::sql::common::CheckpointBarrier;
 
 #[derive(Debug)]
 pub enum AlignmentStatus {
+    /// 未对齐：外层应将当前通道从 `StreamMap` 挂起（Pause）。
     Pending,
-    Complete(Vec<(usize, TrackedEvent)>),
+    /// 已对齐：外层触发快照并唤醒所有挂起通道（Resume）。
+    Complete,
 }
 
 #[derive(Debug)]
@@ -15,7 +17,6 @@ pub struct BarrierAligner {
     input_count: usize,
     current_epoch: Option<u32>,
     reached_inputs: HashSet<usize>,
-    buffered_events: Vec<(usize, TrackedEvent)>,
 }
 
 impl BarrierAligner {
@@ -24,34 +25,23 @@ impl BarrierAligner {
             input_count,
             current_epoch: None,
             reached_inputs: HashSet::new(),
-            buffered_events: Vec::new(),
         }
     }
 
-    pub fn is_blocked(&self, input_idx: usize) -> bool {
-        self.current_epoch.is_some() && self.reached_inputs.contains(&input_idx)
-    }
-
-    pub fn buffer_event(&mut self, input_idx: usize, event: TrackedEvent) {
-        self.buffered_events.push((input_idx, event));
-    }
-
     pub fn mark(&mut self, input_idx: usize, barrier: &CheckpointBarrier) -> AlignmentStatus {
         if self.current_epoch != Some(barrier.epoch) {
             self.current_epoch = Some(barrier.epoch);
             self.reached_inputs.clear();
-            self.buffered_events.clear();
         }
 
         self.reached_inputs.insert(input_idx);
 
         if self.reached_inputs.len() == self.input_count {
-            let released = std::mem::take(&mut self.buffered_events);
             self.current_epoch = None;
             self.reached_inputs.clear();
-            AlignmentStatus::Complete(released)
+            AlignmentStatus::Complete
         } else {
             AlignmentStatus::Pending
         }
     }
-}
\ No newline at end of file
+}
diff --git a/src/runtime/streaming/execution/tracker/watermark_tracker.rs b/src/runtime/streaming/execution/tracker/watermark_tracker.rs
index 29233fc3..ca2f082f 100644
--- a/src/runtime/streaming/execution/tracker/watermark_tracker.rs
+++ b/src/runtime/streaming/execution/tracker/watermark_tracker.rs
@@ -5,6 +5,7 @@ use crate::sql::common::Watermark;
 pub struct WatermarkTracker {
     watermarks: Vec<Option<Watermark>>,
     current_min_watermark: Option<Watermark>,
+    eof_count: usize,
 }
 
 impl WatermarkTracker {
@@ -12,6 +13,7 @@ impl WatermarkTracker {
         Self {
             watermarks: vec![None; input_count],
             current_min_watermark: None,
+            eof_count: 0,
         }
     }
 
@@ -31,6 +33,15 @@ impl WatermarkTracker {
         self.current_min_watermark = Some(new_min);
         Some(new_min)
     }
+
+    pub fn increment_eof(&mut self) -> usize {
+        self.eof_count += 1;
+        self.eof_count
+    }
+
+    pub fn input_count(&self) -> usize {
+        self.watermarks.len()
+    }
 }
 
 #[cfg(test)]
diff --git a/src/runtime/streaming/factory/registry/kafka_factory.rs b/src/runtime/streaming/factory/registry/kafka_factory.rs
new file mode 100644
index 00000000..6a451166
--- /dev/null
+++ b/src/runtime/streaming/factory/registry/kafka_factory.rs
@@ -0,0 +1,328 @@
+//! Kafka Source/Sink：从 [`ConnectorOp`] + [`OperatorConfig`] 构造物理算子（鉴权与 client 配置合并）。
+
+use anyhow::{anyhow, bail, Context, Result};
+use prost::Message;
+use std::collections::HashMap;
+use std::num::NonZeroU32;
+use std::sync::Arc;
+
+use protocol::grpc::api::ConnectorOp;
+use tracing::{info, warn};
+
+use super::OperatorConstructor;
+use crate::runtime::streaming::api::operator::{ConstructedOperator, Registry};
+use crate::runtime::streaming::api::source::SourceOffset;
+use crate::runtime::streaming::format::{
+    BadDataPolicy, DataSerializer, DecimalEncoding as RtDecimalEncoding, Format as RuntimeFormat,
+    JsonFormat as RuntimeJsonFormat, TimestampFormat as RtTimestampFormat,
+};
+use crate::runtime::streaming::operators::sink::kafka::{ConsistencyMode, KafkaSinkOperator};
+use crate::runtime::streaming::operators::source::kafka::{BufferedDeserializer, KafkaSourceOperator};
+use crate::sql::common::formats::{
+    BadData, DecimalEncoding as SqlDecimalEncoding, Format as SqlFormat, JsonFormat as SqlJsonFormat,
+    TimestampFormat as SqlTimestampFormat,
+};
+use crate::sql::common::kafka_catalog::{
+    KafkaConfig, KafkaConfigAuthentication, KafkaTable, ReadMode, SinkCommitMode, TableType,
+};
+use crate::sql::common::{FsSchema, OperatorConfig};
+
+const DEFAULT_SOURCE_BATCH_SIZE: usize = 1024;
+
+/// 合并连接级鉴权、全局 `connection_properties` 与表级 `client_configs`（表级覆盖同名键）。
+pub fn build_client_configs(config: &KafkaConfig, table: &KafkaTable) -> Result<HashMap<String, String>> {
+    let mut client_configs = HashMap::new();
+
+    match &config.authentication {
+        KafkaConfigAuthentication::None => {}
+        KafkaConfigAuthentication::Sasl {
+            protocol,
+            mechanism,
+            username,
+            password,
+        } => {
+            client_configs.insert("security.protocol".to_string(), protocol.clone());
+            client_configs.insert("sasl.mechanism".to_string(), mechanism.clone());
+            client_configs.insert("sasl.username".to_string(), username.clone());
+            client_configs.insert("sasl.password".to_string(), password.clone());
+        }
+        KafkaConfigAuthentication::AwsMskIam { region } => {
+            client_configs.insert("security.protocol".to_string(), "SASL_SSL".to_string());
+            client_configs.insert("sasl.mechanism".to_string(), "OAUTHBEARER".to_string());
+            client_configs.insert(
+                "sasl.oauthbearer.extensions".to_string(),
+                format!("logicalCluster=aws_msk;aws_region={region}"),
+            );
+        }
+    }
+
+    for (k, v) in &config.connection_properties {
+        client_configs.insert(k.clone(), v.clone());
+    }
+
+    for (k, v) in &table.client_configs {
+        if client_configs.contains_key(k) {
+            warn!(
+                "Kafka config key '{}' is defined in both connection and table; using table value",
+                k
+            );
+        }
+        client_configs.insert(k.clone(), v.clone());
+    }
+
+    Ok(client_configs)
+}
+
+fn bad_data_policy(b: Option<BadData>) -> BadDataPolicy {
+    match b.unwrap_or_default() {
+        BadData::Fail {} => BadDataPolicy::Fail,
+        BadData::Drop {} => BadDataPolicy::Drop,
+    }
+}
+
+fn sql_timestamp_format(t: SqlTimestampFormat) -> RtTimestampFormat {
+    match t {
+        SqlTimestampFormat::RFC3339 => RtTimestampFormat::RFC3339,
+        SqlTimestampFormat::UnixMillis => RtTimestampFormat::UnixMillis,
+    }
+}
+
+fn sql_decimal_encoding(d: SqlDecimalEncoding) -> RtDecimalEncoding {
+    match d {
+        SqlDecimalEncoding::Number => RtDecimalEncoding::Number,
+        SqlDecimalEncoding::String => RtDecimalEncoding::String,
+        SqlDecimalEncoding::Bytes => RtDecimalEncoding::Bytes,
+    }
+}
+
+fn sql_json_format_to_runtime(j: &SqlJsonFormat) -> RuntimeJsonFormat {
+    RuntimeJsonFormat {
+        timestamp_format: sql_timestamp_format(j.timestamp_format),
+        decimal_encoding: sql_decimal_encoding(j.decimal_encoding),
+        include_schema: j.include_schema,
+    }
+}
+
+fn sql_format_to_runtime(f: SqlFormat) -> Result<RuntimeFormat> {
+    match f {
+        SqlFormat::Json(j) => Ok(RuntimeFormat::Json(sql_json_format_to_runtime(&j))),
+        SqlFormat::RawString(_) => Ok(RuntimeFormat::RawString),
+        SqlFormat::RawBytes(_) => Ok(RuntimeFormat::RawBytes),
+        other => bail!(
+            "Kafka connector: format '{}' is not supported for runtime deserializer/serializer yet",
+            other.name()
+        ),
+    }
+}
+
+fn kafka_table_offset_to_runtime(o: crate::sql::common::KafkaTableSourceOffset) -> SourceOffset {
+    use crate::sql::common::KafkaTableSourceOffset as KOff;
+    match o {
+        KOff::Latest => SourceOffset::Latest,
+        KOff::Earliest => SourceOffset::Earliest,
+        KOff::Group => SourceOffset::Group,
+    }
+}
+
+fn non_zero_rate_per_second(op: &OperatorConfig) -> NonZeroU32 {
+    op.rate_limit
+        .as_ref()
+        .and_then(|r| NonZeroU32::new(r.messages_per_second.max(1)))
+        .unwrap_or_else(|| NonZeroU32::new(1_000_000).expect("nonzero"))
+}
+
+fn sink_fs_schema_adjusted(
+    fs: FsSchema,
+    key_field: &Option<String>,
+    timestamp_field: &Option<String>,
+) -> Result<FsSchema> {
+    if key_field.is_none() && timestamp_field.is_none() {
+        return Ok(fs);
+    }
+    let schema = fs.schema.clone();
+    let ts = if let Some(name) = timestamp_field {
+        schema
+            .column_with_name(name)
+            .ok_or_else(|| anyhow!("timestamp column '{name}' not found in schema"))?
+            .0
+    } else {
+        fs.timestamp_index
+    };
+    let keys = fs.clone_storage_key_indices();
+    let routing = if let Some(name) = key_field {
+        let k = schema
+            .column_with_name(name)
+            .ok_or_else(|| anyhow!("key column '{name}' not found in schema"))?
+            .0;
+        Some(vec![k])
+    } else {
+        fs.clone_routing_key_indices()
+    };
+    Ok(FsSchema::new(schema, ts, keys, routing))
+}
+
+fn decode_operator_config(op: &ConnectorOp) -> Result<OperatorConfig> {
+    serde_json::from_str(&op.config).with_context(|| {
+        format!(
+            "Invalid OperatorConfig JSON for connector '{}'",
+            op.connector
+        )
+    })
+}
+
+/// 由 [`ConnectorOp`] 构造 Kafka Source（`connector` 须为 `kafka`）。
+pub struct KafkaSourceDispatcher;
+
+impl OperatorConstructor for KafkaSourceDispatcher {
+    fn with_config(&self, payload: &[u8], _registry: Arc<Registry>) -> Result<ConstructedOperator> {
+        let op = ConnectorOp::decode(payload)
+            .context("Failed to decode ConnectorOp protobuf for Kafka Source")?;
+
+        if op.connector != "kafka" {
+            bail!(
+                "KafkaSourceDispatcher: expected connector 'kafka', got '{}'",
+                op.connector
+            );
+        }
+
+        let op_config = decode_operator_config(&op)?;
+
+        let kafka_config: KafkaConfig = serde_json::from_value(op_config.connection.clone())
+            .context("Failed to parse Kafka connection configuration")?;
+
+        let kafka_table: KafkaTable = serde_json::from_value(op_config.table.clone())
+            .context("Failed to parse Kafka table configuration")?;
+
+        let TableType::Source {
+            offset,
+            read_mode,
+            group_id,
+            group_id_prefix,
+        } = &kafka_table.kind
+        else {
+            bail!(
+                "Expected Kafka Source, got Sink configuration for topic '{}'",
+                kafka_table.topic
+            );
+        };
+
+        info!("Constructing Kafka Source for topic: {}", kafka_table.topic);
+
+        let mut client_configs = build_client_configs(&kafka_config, &kafka_table)?;
+        if let Some(ReadMode::ReadCommitted) = read_mode {
+            client_configs.insert("isolation.level".to_string(), "read_committed".to_string());
+        }
+
+        let sql_format = op_config
+            .format
+            .clone()
+            .context("Format must be specified for Kafka Source")?;
+        let runtime_format = sql_format_to_runtime(sql_format)?;
+        let fs = op_config
+            .input_schema
+            .clone()
+            .context("input_schema is required for Kafka Source")?;
+        let bad = bad_data_policy(op_config.bad_data.clone());
+
+        let deserializer: std::boxed::Box<
+            dyn crate::runtime::streaming::operators::source::kafka::BatchDeserializer,
+        > = Box::new(BufferedDeserializer::new(
+            runtime_format,
+            fs.schema.clone(),
+            bad,
+            DEFAULT_SOURCE_BATCH_SIZE,
+        ));
+
+        let source_op = KafkaSourceOperator::new(
+            kafka_table.topic.clone(),
+            kafka_config.bootstrap_servers.clone(),
+            group_id.clone(),
+            group_id_prefix.clone(),
+            kafka_table_offset_to_runtime(*offset),
+            client_configs,
+            non_zero_rate_per_second(&op_config),
+            op_config.metadata_fields,
+            deserializer,
+        );
+
+        Ok(ConstructedOperator::Source(Box::new(source_op)))
+    }
+}
+
+/// 由 [`ConnectorOp`] 构造 Kafka Sink（`connector` 须为 `kafka`）。
+pub struct KafkaSinkDispatcher;
+
+impl OperatorConstructor for KafkaSinkDispatcher {
+    fn with_config(&self, payload: &[u8], _registry: Arc<Registry>) -> Result<ConstructedOperator> {
+        let op = ConnectorOp::decode(payload)
+            .context("Failed to decode ConnectorOp protobuf for Kafka Sink")?;
+
+        if op.connector != "kafka" {
+            bail!(
+                "KafkaSinkDispatcher: expected connector 'kafka', got '{}'",
+                op.connector
+            );
+        }
+
+        let op_config = decode_operator_config(&op)?;
+
+        let kafka_config: KafkaConfig = serde_json::from_value(op_config.connection.clone())
+            .context("Failed to parse Kafka connection configuration")?;
+
+        let kafka_table: KafkaTable = serde_json::from_value(op_config.table.clone())
+            .context("Failed to parse Kafka table configuration")?;
+
+        let TableType::Sink {
+            commit_mode,
+            key_field,
+            timestamp_field,
+        } = &kafka_table.kind
+        else {
+            bail!(
+                "Expected Kafka Sink, got Source configuration for topic '{}'",
+                kafka_table.topic
+            );
+        };
+
+        info!("Constructing Kafka Sink for topic: {}", kafka_table.topic);
+
+        let client_configs = build_client_configs(&kafka_config, &kafka_table)?;
+
+        let consistency = match commit_mode {
+            SinkCommitMode::ExactlyOnce => ConsistencyMode::ExactlyOnce,
+            SinkCommitMode::AtLeastOnce => ConsistencyMode::AtLeastOnce,
+        };
+
+        let sql_format = op_config
+            .format
+            .clone()
+            .context("Format must be specified for Kafka Sink")?;
+        let runtime_format = sql_format_to_runtime(sql_format)?;
+
+        let fs_in = op_config
+            .input_schema
+            .clone()
+            .context("input_schema is required for Kafka Sink")?;
+        let fs = sink_fs_schema_adjusted(fs_in, key_field, timestamp_field)?;
+
+        let serializer = DataSerializer::new(runtime_format, fs.schema.clone());
+
+        let sink_op = KafkaSinkOperator::new(
+            kafka_table.topic.clone(),
+            kafka_config.bootstrap_servers.clone(),
+            consistency,
+            client_configs,
+            fs,
+            serializer,
+        );
+
+        Ok(ConstructedOperator::Operator(Box::new(sink_op)))
+    }
+}
+
+/// 注册 `KafkaSource` / `KafkaSink` 构造器（由 [`super::OperatorFactory::register_builtins`] 调用）。
+pub fn register_kafka_plugins(factory: &mut super::OperatorFactory) {
+    factory.register("KafkaSource", Box::new(KafkaSourceDispatcher));
+    factory.register("KafkaSink", Box::new(KafkaSinkDispatcher));
+    info!("Registered Kafka connector plugins (KafkaSource, KafkaSink)");
+}
diff --git a/src/runtime/streaming/factory/registry.rs b/src/runtime/streaming/factory/registry/mod.rs
similarity index 88%
rename from src/runtime/streaming/factory/registry.rs
rename to src/runtime/streaming/factory/registry/mod.rs
index b8b45fff..9bb1148d 100644
--- a/src/runtime/streaming/factory/registry.rs
+++ b/src/runtime/streaming/factory/registry/mod.rs
@@ -9,7 +9,7 @@ use crate::runtime::streaming::api::operator::ConstructedOperator;
 use crate::runtime::streaming::operators::PassthroughOperator;
 use crate::runtime::streaming::operators::grouping::IncrementalAggregatingConstructor;
 use crate::runtime::streaming::operators::joins::{
-    InstantJoinConstructor, JoinWithExpirationConstructor, LookupJoinConstructor,
+    InstantJoinConstructor, JoinWithExpirationConstructor,
 };
 use crate::runtime::streaming::operators::key_by::KeyByConstructor;
 use crate::runtime::streaming::operators::watermark::WatermarkGeneratorConstructor;
@@ -18,11 +18,14 @@ use crate::runtime::streaming::operators::windows::{
     TumblingAggregateWindowConstructor, WindowFunctionConstructor,
 };
 
+pub mod kafka_factory;
+
+use kafka_factory::{register_kafka_plugins, KafkaSinkDispatcher, KafkaSourceDispatcher};
+
 use protocol::grpc::api::{
     ConnectorOp, ExpressionWatermarkConfig,
     JoinOperator as JoinOperatorProto,
     KeyPlanOperator as KeyByProto,
-    LookupJoinOperator as LookupJoinProto,
     SessionWindowAggregateOperator, SlidingWindowAggregateOperator,
     TumblingWindowAggregateOperator, UpdatingAggregateOperator,
     WindowFunctionOperator as WindowFunctionProto,
@@ -45,7 +48,7 @@ pub trait OperatorConstructor: Send + Sync {
 
 /// 持有 `name → OperatorConstructor` 映射与共享 [`Registry`]。
 ///
-/// [`TaskManager`] 在部署 TDD 时调用 [`create_operator`]，完成从字节流到运行时算子的
+/// `JobManager` 在部署任务时调用 [`create_operator`]，完成从字节流到运行时算子的
 /// 反射式实例化。
 pub struct OperatorFactory {
     constructors: HashMap<String, Box<dyn OperatorConstructor>>,
@@ -118,6 +121,8 @@ impl OperatorFactory {
         self.register("Projection", Box::new(PassthroughConstructor("Projection")));
         self.register("ArrowValue", Box::new(PassthroughConstructor("ArrowValue")));
         self.register("ArrowKey", Box::new(PassthroughConstructor("ArrowKey")));
+
+        register_kafka_plugins(self);
     }
 }
 
@@ -197,11 +202,8 @@ impl OperatorConstructor for InstantJoinBridge {
 
 struct LookupJoinBridge;
 impl OperatorConstructor for LookupJoinBridge {
-    fn with_config(&self, config: &[u8], registry: Arc<Registry>) -> Result<ConstructedOperator> {
-        let proto = LookupJoinProto::decode(config)
-            .map_err(|e| anyhow!("Decode LookupJoinOperator failed: {e}"))?;
-        let op = LookupJoinConstructor.with_config(proto, registry)?;
-        Ok(ConstructedOperator::Operator(Box::new(op)))
+    fn with_config(&self, _config: &[u8], _registry: Arc<Registry>) -> Result<ConstructedOperator> {
+        Err(anyhow!("LookupJoin is not supported in the current runtime"))
     }
 }
 
@@ -232,24 +234,16 @@ impl OperatorConstructor for KeyByBridge {
 pub struct ConnectorSourceDispatcher;
 
 impl OperatorConstructor for ConnectorSourceDispatcher {
-    fn with_config(&self, config: &[u8], _registry: Arc<Registry>) -> Result<ConstructedOperator> {
+    fn with_config(&self, config: &[u8], registry: Arc<Registry>) -> Result<ConstructedOperator> {
         let op = ConnectorOp::decode(config)
             .map_err(|e| anyhow!("decode ConnectorOp (source): {e}"))?;
 
         match op.connector.as_str() {
-            "kafka" => {
-                // TODO: 委托给 crate::connectors::kafka::build_kafka_source(&op.config)
-                Err(anyhow!(
-                    "ConnectorSource '{}' factory wiring not yet implemented",
-                    op.connector
-                ))
-            }
-            "redis" => {
-                Err(anyhow!(
-                    "ConnectorSource '{}' factory wiring not yet implemented",
-                    op.connector
-                ))
-            }
+            "kafka" => KafkaSourceDispatcher.with_config(config, registry),
+            "redis" => Err(anyhow!(
+                "ConnectorSource '{}' factory wiring not yet implemented",
+                op.connector
+            )),
             other => Err(anyhow!("Unsupported source connector type: {}", other)),
         }
     }
@@ -258,18 +252,12 @@ impl OperatorConstructor for ConnectorSourceDispatcher {
 pub struct ConnectorSinkDispatcher;
 
 impl OperatorConstructor for ConnectorSinkDispatcher {
-    fn with_config(&self, config: &[u8], _registry: Arc<Registry>) -> Result<ConstructedOperator> {
+    fn with_config(&self, config: &[u8], registry: Arc<Registry>) -> Result<ConstructedOperator> {
         let op = ConnectorOp::decode(config)
             .map_err(|e| anyhow!("decode ConnectorOp (sink): {e}"))?;
 
         match op.connector.as_str() {
-            "kafka" => {
-                // TODO: 委托给 crate::connectors::kafka::build_kafka_sink(&op.config)
-                Err(anyhow!(
-                    "ConnectorSink '{}' factory wiring not yet implemented",
-                    op.connector
-                ))
-            }
+            "kafka" => KafkaSinkDispatcher.with_config(config, registry),
             other => Err(anyhow!("Unsupported sink connector type: {}", other)),
         }
     }
diff --git a/src/runtime/streaming/job/job_manager.rs b/src/runtime/streaming/job/job_manager.rs
index 82b02b3d..6413eba6 100644
--- a/src/runtime/streaming/job/job_manager.rs
+++ b/src/runtime/streaming/job/job_manager.rs
@@ -1,100 +1,92 @@
 use std::collections::HashMap;
-use std::sync::{Arc, RwLock};
+use std::sync::{Arc, OnceLock, RwLock};
 
-use protocol::grpc::api::{ChainedOperator, FsProgram};
+use anyhow::anyhow;
 use tokio::sync::mpsc;
-use tracing::error;
+use tokio_stream::wrappers::ReceiverStream;
+use tracing::{error, info, warn};
+
+use protocol::grpc::api::{ChainedOperator, FsProgram};
 
-use crate::runtime::streaming::api::operator::ConstructedOperator;
+use crate::runtime::streaming::api::context::TaskContext;
+use crate::runtime::streaming::api::operator::{ConstructedOperator, MessageOperator};
+use crate::runtime::streaming::execution::runner::Pipeline;
 use crate::runtime::streaming::factory::OperatorFactory;
 use crate::runtime::streaming::job::edge_manager::EdgeManager;
 use crate::runtime::streaming::job::models::{PhysicalExecutionGraph, PhysicalPipeline, PipelineStatus};
-use crate::runtime::streaming::job::pipeline_runner::{FusionOperatorChain, PipelineRunner};
 use crate::runtime::streaming::memory::MemoryPool;
+use crate::runtime::streaming::network::endpoint::{BoxedEventStream, PhysicalSender};
 use crate::runtime::streaming::protocol::control::{ControlCommand, StopMode};
-use crate::runtime::streaming::storage::manager::TableManager;
+
+static GLOBAL_JOB_MANAGER: OnceLock<Arc<JobManager>> = OnceLock::new();
 
 pub struct JobManager {
     active_jobs: Arc<RwLock<HashMap<String, PhysicalExecutionGraph>>>,
     operator_factory: Arc<OperatorFactory>,
     memory_pool: Arc<MemoryPool>,
-    table_manager: Option<Arc<tokio::sync::Mutex<TableManager>>>,
 }
 
 impl JobManager {
-    pub fn new(
-        operator_factory: Arc<OperatorFactory>,
-        max_memory_bytes: usize,
-        table_manager: Option<Arc<tokio::sync::Mutex<TableManager>>>,
-    ) -> Self {
+    pub fn new(operator_factory: Arc<OperatorFactory>, max_memory_bytes: usize) -> Self {
         Self {
             active_jobs: Arc::new(RwLock::new(HashMap::new())),
             operator_factory,
             memory_pool: MemoryPool::new(max_memory_bytes),
-            table_manager,
         }
     }
 
-    /// 从逻辑计划点火物理线程
+    pub fn init(operator_factory: Arc<OperatorFactory>, max_memory_bytes: usize) -> anyhow::Result<()> {
+        let manager = Arc::new(Self::new(operator_factory, max_memory_bytes));
+        GLOBAL_JOB_MANAGER
+            .set(manager)
+            .map_err(|_| anyhow!("JobManager singleton already initialized"))
+    }
+
+    pub fn global() -> anyhow::Result<Arc<Self>> {
+        GLOBAL_JOB_MANAGER
+            .get()
+            .cloned()
+            .ok_or_else(|| anyhow!("JobManager not initialized. Call init() first."))
+    }
+
+    /// 核心主干：从逻辑计划点火物理流水线
     pub async fn submit_job(&self, program: FsProgram) -> anyhow::Result<String> {
         let job_id = format!("job-{}", chrono::Utc::now().timestamp_millis());
-
         let mut edge_manager = EdgeManager::build(&program.nodes, &program.edges);
-        let mut physical_pipelines = HashMap::new();
+        let mut pipelines = HashMap::new();
 
         for node in &program.nodes {
-            let pipe_id = node.node_index as u32;
-            let (inbox, outboxes) = edge_manager.take_endpoints(pipe_id);
-            let chain = self.create_chain(&node.operators)?;
-            let (ctrl_tx, ctrl_rx) = mpsc::channel(64);
+            let pipeline_id = node.node_index as u32;
+
+            let (raw_inboxes, raw_outboxes) = edge_manager.take_endpoints(pipeline_id);
+            let physical_outboxes = raw_outboxes.into_iter().map(PhysicalSender::Local).collect();
+            let physical_inboxes: Vec<BoxedEventStream> = raw_inboxes
+                .into_iter()
+                .map(|rx| Box::pin(ReceiverStream::new(rx)) as _)
+                .collect();
+
+            let operators = self.build_operator_chain(&node.operators)?;
+
+            let (control_tx, control_rx) = mpsc::channel(64);
             let status = Arc::new(RwLock::new(PipelineStatus::Initializing));
 
-            let thread_status = status.clone();
-            let job_id_for_thread = job_id.clone();
-            let exit_job_id = job_id_for_thread.clone();
-            let registry_ptr = self.active_jobs.clone();
-            let memory_pool = self.memory_pool.clone();
-            let table_manager = self.table_manager.clone();
-
-            let handle = std::thread::Builder::new()
-                .name(format!("Job-{}-Pipe-{}", job_id, pipe_id))
-                .spawn(move || {
-                    {
-                        let mut st = thread_status.write().unwrap();
-                        *st = PipelineStatus::Running;
-                    }
-
-                    let rt = tokio::runtime::Builder::new_current_thread()
-                        .enable_all()
-                        .build()
-                        .expect("build current thread runtime");
-
-                    let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
-                        rt.block_on(async move {
-                            let mut runner = PipelineRunner::new(
-                                pipe_id,
-                                chain,
-                                inbox,
-                                outboxes,
-                                ctrl_rx,
-                                job_id_for_thread.clone(),
-                                memory_pool,
-                                table_manager,
-                            );
-                            runner.run().await
-                        })
-                    }));
-
-                    Self::on_pipeline_exit(exit_job_id, pipe_id, result, thread_status, registry_ptr);
-                })?;
-
-            physical_pipelines.insert(
-                pipe_id,
+            let handle = self.spawn_pipeline_thread(
+                job_id.clone(),
+                pipeline_id,
+                operators,
+                physical_inboxes,
+                physical_outboxes,
+                control_rx,
+                Arc::clone(&status),
+            )?;
+
+            pipelines.insert(
+                pipeline_id,
                 PhysicalPipeline {
-                    pipeline_id: pipe_id,
+                    pipeline_id,
                     handle: Some(handle),
                     status,
-                    control_tx: ctrl_tx,
+                    control_tx,
                 },
             );
         }
@@ -102,97 +94,157 @@ impl JobManager {
         let graph = PhysicalExecutionGraph {
             job_id: job_id.clone(),
             program,
-            pipelines: physical_pipelines,
+            pipelines,
             start_time: std::time::Instant::now(),
         };
 
         self.active_jobs.write().unwrap().insert(job_id.clone(), graph);
+        info!(job_id = %job_id, "Job submitted successfully.");
+
         Ok(job_id)
     }
 
     pub async fn stop_job(&self, job_id: &str, mode: StopMode) -> anyhow::Result<()> {
-        let controllers = {
-            let jobs = self.active_jobs.read().unwrap();
-            let graph = jobs
+        let control_senders: Vec<_> = {
+            let jobs_guard = self.active_jobs.read().unwrap();
+            let graph = jobs_guard
                 .get(job_id)
-                .ok_or_else(|| anyhow::anyhow!("job not found: {job_id}"))?;
-            graph
-                .pipelines
-                .values()
-                .map(|p| p.control_tx.clone())
-                .collect::<Vec<_>>()
+                .ok_or_else(|| anyhow::anyhow!("Job not found: {job_id}"))?;
+
+            graph.pipelines.values().map(|p| p.control_tx.clone()).collect()
         };
 
-        for tx in controllers {
-            tx.send(ControlCommand::Stop { mode: mode.clone() }).await?;
+        for tx in control_senders {
+            let _ = tx.send(ControlCommand::Stop { mode: mode.clone() }).await;
         }
+
+        info!(job_id = %job_id, mode = ?mode, "Job stop signal dispatched.");
         Ok(())
     }
 
     pub fn get_pipeline_statuses(&self, job_id: &str) -> Option<HashMap<u32, PipelineStatus>> {
-        let jobs = self.active_jobs.read().unwrap();
-        let graph = jobs.get(job_id)?;
+        let jobs_guard = self.active_jobs.read().unwrap();
+        let graph = jobs_guard.get(job_id)?;
+
         Some(
-            graph
-                .pipelines
+            graph.pipelines
                 .iter()
-                .map(|(id, pipeline)| (*id, pipeline.status.read().unwrap().clone()))
+                .map(|(id, pipeline)| {
+                    (*id, pipeline.status.read().unwrap().clone())
+                })
                 .collect(),
         )
     }
 
-    fn create_chain(&self, operators: &[ChainedOperator]) -> anyhow::Result<FusionOperatorChain> {
-        let mut chain = Vec::with_capacity(operators.len());
-        for op in operators {
-            match self
-                .operator_factory
-                .create_operator(&op.operator_name, &op.operator_config)?
-            {
+    // ========================================================================
+    // 内部私有方法
+    // ========================================================================
+
+    fn build_operator_chain(
+        &self,
+        operator_configs: &[ChainedOperator],
+    ) -> anyhow::Result<Vec<Box<dyn MessageOperator>>> {
+        let mut chain = Vec::with_capacity(operator_configs.len());
+
+        for op_config in operator_configs {
+            let constructed = self.operator_factory
+                .create_operator(&op_config.operator_name, &op_config.operator_config)?;
+
+            match constructed {
                 ConstructedOperator::Operator(msg_op) => chain.push(msg_op),
                 ConstructedOperator::Source(_) => {
-                    return Err(anyhow::anyhow!(
-                        "source operator '{}' cannot be used inside a physical pipeline chain",
-                        op.operator_name
-                    ));
+                    anyhow::bail!(
+                        "Topology Error: Source operator '{}' cannot be scheduled inside a MessageOperator physical chain.",
+                        op_config.operator_name
+                    );
                 }
             }
         }
-        Ok(FusionOperatorChain::new(chain))
+        Ok(chain)
     }
 
-    fn on_pipeline_exit(
+    fn spawn_pipeline_thread(
+        &self,
         job_id: String,
-        pipe_id: u32,
-        result: std::thread::Result<anyhow::Result<()>>,
+        pipeline_id: u32,
+        operators: Vec<Box<dyn MessageOperator>>,
+        inboxes: Vec<BoxedEventStream>,
+        outboxes: Vec<PhysicalSender>,
+        control_rx: mpsc::Receiver<ControlCommand>,
         status: Arc<RwLock<PipelineStatus>>,
-        _registry: Arc<RwLock<HashMap<String, PhysicalExecutionGraph>>>,
+    ) -> anyhow::Result<std::thread::JoinHandle<()>> {
+        let memory_pool = Arc::clone(&self.memory_pool);
+        let thread_name = format!("Task-{job_id}-{pipeline_id}");
+
+        let handle = std::thread::Builder::new()
+            .name(thread_name)
+            .spawn(move || {
+                *status.write().unwrap() = PipelineStatus::Running;
+
+                let rt = tokio::runtime::Builder::new_current_thread()
+                    .enable_all()
+                    .build()
+                    .expect("Failed to build current-thread Tokio runtime for pipeline");
+
+                let job_id_inner = job_id.clone();
+                let execution_result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
+                    rt.block_on(async move {
+                        let ctx = TaskContext::new(
+                            job_id_inner,
+                            pipeline_id,
+                            0,
+                            1,
+                            outboxes,
+                            memory_pool,
+                        );
+
+                        let pipeline = Pipeline::new(operators, ctx, inboxes, control_rx)
+                            .map_err(|e| anyhow::anyhow!("Pipeline init failed: {e}"))?;
+
+                        pipeline.run().await.map_err(|e| anyhow::anyhow!("Pipeline execution failed: {e}"))
+                    })
+                }));
+
+                Self::handle_pipeline_exit(&job_id, pipeline_id, execution_result, &status);
+            })?;
+
+        Ok(handle)
+    }
+
+    fn handle_pipeline_exit(
+        job_id: &str,
+        pipeline_id: u32,
+        thread_result: std::thread::Result<anyhow::Result<()>>,
+        status: &RwLock<PipelineStatus>,
     ) {
-        let mut needs_abort = false;
-        match result {
+        let mut is_fatal = false;
+        let final_status = match thread_result {
+            Ok(Ok(_)) => {
+                info!(job_id = %job_id, pipeline_id = pipeline_id, "Pipeline finished gracefully.");
+                PipelineStatus::Finished
+            }
             Ok(Err(e)) => {
-                *status.write().unwrap() = PipelineStatus::Failed {
+                error!(job_id = %job_id, pipeline_id = pipeline_id, error = %e, "Pipeline failed.");
+                is_fatal = true;
+                PipelineStatus::Failed {
                     error: e.to_string(),
                     is_panic: false,
-                };
-                needs_abort = true;
+                }
             }
             Err(_) => {
-                *status.write().unwrap() = PipelineStatus::Failed {
-                    error: "panic".into(),
+                error!(job_id = %job_id, pipeline_id = pipeline_id, "Pipeline thread panicked!");
+                is_fatal = true;
+                PipelineStatus::Failed {
+                    error: "Task thread encountered an unexpected panic".into(),
                     is_panic: true,
-                };
-                needs_abort = true;
-            }
-            Ok(Ok(_)) => {
-                *status.write().unwrap() = PipelineStatus::Finished;
+                }
             }
-        }
+        };
 
-        if needs_abort {
-            error!(
-                "Pipeline {}-{} failed. Initiating Job Abort.",
-                job_id, pipe_id
-            );
+        *status.write().unwrap() = final_status;
+
+        if is_fatal {
+            warn!(job_id = %job_id, pipeline_id = pipeline_id, "Pipeline failure detected, Job should be aborted or recovered.");
         }
     }
 }
diff --git a/src/runtime/streaming/job/mod.rs b/src/runtime/streaming/job/mod.rs
index 9490e84e..448c26cd 100644
--- a/src/runtime/streaming/job/mod.rs
+++ b/src/runtime/streaming/job/mod.rs
@@ -1,7 +1,6 @@
 pub mod edge_manager;
 pub mod job_manager;
 pub mod models;
-pub mod pipeline_runner;
 
 pub use job_manager::JobManager;
 pub use models::{PhysicalExecutionGraph, PhysicalPipeline, PipelineStatus};
diff --git a/src/runtime/streaming/job/pipeline_runner.rs b/src/runtime/streaming/job/pipeline_runner.rs
deleted file mode 100644
index 57c0fec8..00000000
--- a/src/runtime/streaming/job/pipeline_runner.rs
+++ /dev/null
@@ -1,242 +0,0 @@
-use std::future::pending;
-use std::sync::Arc;
-
-use tokio::sync::mpsc;
-
-use crate::runtime::streaming::api::context::TaskContext;
-use crate::runtime::streaming::api::operator::MessageOperator;
-use crate::runtime::streaming::memory::MemoryPool;
-use crate::runtime::streaming::protocol::control::{ControlCommand, StopMode};
-use crate::runtime::streaming::protocol::event::StreamEvent;
-use crate::runtime::streaming::protocol::tracked::TrackedEvent;
-use crate::runtime::streaming::storage::manager::TableManager;
-use crate::sql::common::CheckpointBarrier;
-
-pub struct PipelineRunner {
-    chain: FusionOperatorChain,
-    inbox: Option<mpsc::Receiver<TrackedEvent>>,
-    outboxes: Vec<mpsc::Sender<TrackedEvent>>,
-    control_rx: mpsc::Receiver<ControlCommand>,
-    ctx: TaskContext,
-}
-
-impl PipelineRunner {
-    pub fn new(
-        pipeline_id: u32,
-        chain: FusionOperatorChain,
-        inbox: Option<mpsc::Receiver<TrackedEvent>>,
-        outboxes: Vec<mpsc::Sender<TrackedEvent>>,
-        control_rx: mpsc::Receiver<ControlCommand>,
-        job_id: String,
-        memory_pool: Arc<MemoryPool>,
-        table_manager: Option<Arc<tokio::sync::Mutex<TableManager>>>,
-    ) -> Self {
-        Self {
-            chain,
-            inbox,
-            outboxes,
-            control_rx,
-            ctx: TaskContext::new(job_id, pipeline_id, 0, 1, vec![], memory_pool, table_manager),
-        }
-    }
-
-    pub async fn run(&mut self) -> anyhow::Result<()> {
-        self.chain.on_start(&mut self.ctx).await?;
-
-        'main: loop {
-            tokio::select! {
-                biased;
-                Some(cmd) = self.control_rx.recv() => {
-                    if self.handle_control(cmd).await? {
-                        break 'main;
-                    }
-                }
-                Some(event) = async {
-                    if let Some(ref mut rx) = self.inbox { rx.recv().await }
-                    else { pending().await }
-                } => {
-                    self.process_event(event).await?;
-                }
-            }
-        }
-
-        self.chain.on_close(&mut self.ctx).await?;
-        Ok(())
-    }
-
-    async fn handle_control(&mut self, cmd: ControlCommand) -> anyhow::Result<bool> {
-        match &cmd {
-            ControlCommand::TriggerCheckpoint { barrier } => {
-                let barrier: CheckpointBarrier = barrier.clone().into();
-                self.chain.snapshot_state(barrier.clone(), &mut self.ctx).await?;
-                self.broadcast(StreamEvent::Barrier(barrier)).await?;
-            }
-            ControlCommand::Commit { epoch } => {
-                self.chain.commit_checkpoint(*epoch, &mut self.ctx).await?;
-            }
-            ControlCommand::Stop { mode } if *mode == StopMode::Immediate => {
-                return Ok(true);
-            }
-            _ => {}
-        }
-
-        self.chain.handle_control(cmd, &mut self.ctx).await
-    }
-
-    async fn process_event(&mut self, tracked: TrackedEvent) -> anyhow::Result<()> {
-        match tracked.event {
-            StreamEvent::Data(batch) => {
-                let outputs = self.chain.process_data(0, batch, &mut self.ctx).await?;
-                self.emit_outputs(outputs).await?;
-            }
-            StreamEvent::Watermark(wm) => {
-                let outputs = self.chain.process_watermark(wm.clone(), &mut self.ctx).await?;
-                self.emit_outputs(outputs).await?;
-                self.broadcast(StreamEvent::Watermark(wm)).await?;
-            }
-            StreamEvent::Barrier(barrier) => {
-                self.chain.snapshot_state(barrier.clone(), &mut self.ctx).await?;
-                self.broadcast(StreamEvent::Barrier(barrier)).await?;
-            }
-            StreamEvent::EndOfStream => {
-                self.broadcast(StreamEvent::EndOfStream).await?;
-            }
-        }
-        Ok(())
-    }
-
-    async fn emit_outputs(
-        &mut self,
-        outputs: Vec<crate::runtime::streaming::protocol::stream_out::StreamOutput>,
-    ) -> anyhow::Result<()> {
-        for out in outputs {
-            match out {
-                crate::runtime::streaming::protocol::stream_out::StreamOutput::Forward(batch)
-                | crate::runtime::streaming::protocol::stream_out::StreamOutput::Broadcast(batch)
-                | crate::runtime::streaming::protocol::stream_out::StreamOutput::Keyed(_, batch) => {
-                    self.broadcast(StreamEvent::Data(batch)).await?;
-                }
-                crate::runtime::streaming::protocol::stream_out::StreamOutput::Watermark(wm) => {
-                    self.broadcast(StreamEvent::Watermark(wm)).await?;
-                }
-            }
-        }
-        Ok(())
-    }
-
-    async fn broadcast(&self, event: StreamEvent) -> anyhow::Result<()> {
-        let tracked = TrackedEvent::control(event);
-        for tx in &self.outboxes {
-            tx.send(tracked.clone()).await?;
-        }
-        Ok(())
-    }
-}
-
-pub struct FusionOperatorChain {
-    operators: Vec<Box<dyn MessageOperator>>,
-}
-
-impl FusionOperatorChain {
-    pub fn new(operators: Vec<Box<dyn MessageOperator>>) -> Self {
-        Self { operators }
-    }
-
-    pub async fn on_start(&mut self, ctx: &mut TaskContext) -> anyhow::Result<()> {
-        for op in &mut self.operators {
-            op.on_start(ctx).await?;
-        }
-        Ok(())
-    }
-
-    pub async fn process_data(
-        &mut self,
-        input_idx: usize,
-        batch: arrow_array::RecordBatch,
-        ctx: &mut TaskContext,
-    ) -> anyhow::Result<Vec<crate::runtime::streaming::protocol::stream_out::StreamOutput>> {
-        let mut data_batches = vec![batch];
-        for (idx, op) in self.operators.iter_mut().enumerate() {
-            let mut next_batches = Vec::new();
-            for b in data_batches {
-                let outputs = op
-                    .process_data(if idx == 0 { input_idx } else { 0 }, b, ctx)
-                    .await?;
-                for out in outputs {
-                    match out {
-                        crate::runtime::streaming::protocol::stream_out::StreamOutput::Forward(b)
-                        | crate::runtime::streaming::protocol::stream_out::StreamOutput::Broadcast(b)
-                        | crate::runtime::streaming::protocol::stream_out::StreamOutput::Keyed(_, b) => {
-                            next_batches.push(b);
-                        }
-                        crate::runtime::streaming::protocol::stream_out::StreamOutput::Watermark(_) => {}
-                    }
-                }
-            }
-            data_batches = next_batches;
-        }
-        Ok(data_batches
-            .into_iter()
-            .map(crate::runtime::streaming::protocol::stream_out::StreamOutput::Forward)
-            .collect())
-    }
-
-    pub async fn process_watermark(
-        &mut self,
-        watermark: crate::sql::common::Watermark,
-        ctx: &mut TaskContext,
-    ) -> anyhow::Result<Vec<crate::runtime::streaming::protocol::stream_out::StreamOutput>> {
-        let mut outs = vec![crate::runtime::streaming::protocol::stream_out::StreamOutput::Watermark(watermark)];
-        for op in &mut self.operators {
-            let mut next = Vec::new();
-            for out in outs {
-                match out {
-                    crate::runtime::streaming::protocol::stream_out::StreamOutput::Watermark(wm) => {
-                        let mut produced = op.process_watermark(wm, ctx).await?;
-                        next.append(&mut produced);
-                    }
-                    other => next.push(other),
-                }
-            }
-            outs = next;
-        }
-        Ok(outs)
-    }
-
-    pub async fn snapshot_state(
-        &mut self,
-        barrier: CheckpointBarrier,
-        ctx: &mut TaskContext,
-    ) -> anyhow::Result<()> {
-        for op in &mut self.operators {
-            op.snapshot_state(barrier.clone(), ctx).await?;
-        }
-        Ok(())
-    }
-
-    pub async fn commit_checkpoint(&mut self, epoch: u32, ctx: &mut TaskContext) -> anyhow::Result<()> {
-        for op in &mut self.operators {
-            op.commit_checkpoint(epoch, ctx).await?;
-        }
-        Ok(())
-    }
-
-    pub async fn handle_control(
-        &mut self,
-        cmd: ControlCommand,
-        ctx: &mut TaskContext,
-    ) -> anyhow::Result<bool> {
-        let mut should_stop = false;
-        for op in &mut self.operators {
-            should_stop = should_stop || op.handle_control(cmd.clone(), ctx).await?;
-        }
-        Ok(should_stop)
-    }
-
-    pub async fn on_close(&mut self, ctx: &mut TaskContext) -> anyhow::Result<()> {
-        for op in &mut self.operators {
-            let _ = op.on_close(ctx).await?;
-        }
-        Ok(())
-    }
-}
diff --git a/src/runtime/streaming/lib.rs b/src/runtime/streaming/lib.rs
index 6b145cd7..06cab2ee 100644
--- a/src/runtime/streaming/lib.rs
+++ b/src/runtime/streaming/lib.rs
@@ -15,7 +15,6 @@
 
 pub mod api;
 pub mod arrow;
-pub mod cluster;
 pub mod error;
 pub mod execution;
 pub mod factory;
@@ -24,23 +23,19 @@ pub mod memory;
 pub mod network;
 pub mod operators;
 pub mod protocol;
-pub mod state;
 
 pub use api::{
     ConstructedOperator, MessageOperator, SourceEvent, SourceOffset, SourceOperator, TaskContext,
 };
-pub use cluster::{
-    CompileError, ExchangeMode, ExecutionGraph, JobCompiler, JobId, PartitioningStrategy,
-    PhysicalEdgeDescriptor, ResourceProfile, SubtaskIndex, TaskDeploymentDescriptor, TaskManager,
-    VertexId,
-};
 pub use error::RunError;
-pub use execution::{SOURCE_IDLE_SLEEP, SourceRunner, SubtaskRunner};
+pub use execution::{
+    OperatorDrive, SourceRunner, SubtaskRunner, SOURCE_IDLE_SLEEP, WATERMARK_EMIT_INTERVAL,
+};
 pub use factory::{OperatorConstructor, OperatorFactory};
 pub use job::{JobManager, PhysicalExecutionGraph, PhysicalPipeline, PipelineStatus};
 pub use memory::{MemoryPool, MemoryTicket};
 pub use network::{BoxedEventStream, NetworkEnvironment, PhysicalSender, RemoteSenderStub};
 pub use protocol::{
-    CheckpointBarrierWire, ControlCommand, StopMode, StreamEvent, StreamOutput, Watermark,
+    CheckpointBarrierWire, ControlCommand, StopMode, StreamEvent, StreamOutput,
     control_channel, merge_watermarks, watermark_strictly_advances,
 };
diff --git a/src/runtime/streaming/mod.rs b/src/runtime/streaming/mod.rs
index 237f3c06..4a761460 100644
--- a/src/runtime/streaming/mod.rs
+++ b/src/runtime/streaming/mod.rs
@@ -15,8 +15,6 @@
 
 pub mod api;
 pub mod arrow;
-pub mod cluster;
-pub mod connectors;
 pub mod error;
 pub mod execution;
 pub mod factory;
@@ -26,21 +24,13 @@ pub mod memory;
 pub mod network;
 pub mod operators;
 pub mod protocol;
-pub mod storage;
 
 pub use api::{
-    ConstructedOperator, MessageOperator, Registry, SourceEvent, SourceOffset, SourceOperator,
-    TaskContext,
-};
-pub use cluster::{
-    CompileError, ExchangeMode, ExecutionGraph, JobCompiler, JobId, PartitioningStrategy,
-    PhysicalEdgeDescriptor, ResourceProfile, SubtaskIndex, TaskDeploymentDescriptor, TaskManager,
-    VertexId,
+    ConstructedOperator, MessageOperator, SourceEvent, SourceOffset, SourceOperator, TaskContext,
 };
 pub use error::RunError;
 pub use execution::{SOURCE_IDLE_SLEEP, SourceRunner, SubtaskRunner};
 pub use factory::{OperatorConstructor, OperatorFactory};
-pub use job::{JobManager, PhysicalExecutionGraph, PhysicalPipeline, PipelineStatus};
 pub use memory::{MemoryPool, MemoryTicket};
 pub use network::{BoxedEventStream, NetworkEnvironment, PhysicalSender, RemoteSenderStub};
 pub use protocol::{
diff --git a/src/runtime/streaming/network/environment.rs b/src/runtime/streaming/network/environment.rs
index 789af2a8..19aedec7 100644
--- a/src/runtime/streaming/network/environment.rs
+++ b/src/runtime/streaming/network/environment.rs
@@ -1,12 +1,8 @@
-use crate::runtime::streaming::cluster::graph::{
-    ExchangeMode, ExecutionGraph, SubtaskIndex, VertexId,
-};
-use crate::runtime::streaming::protocol::tracked::TrackedEvent;
-use super::endpoint::{BoxedEventStream, PhysicalSender, RemoteSenderStub};
+use super::endpoint::{BoxedEventStream, PhysicalSender};
 use std::collections::HashMap;
-use tokio::sync::mpsc;
-use tokio_stream::wrappers::ReceiverStream;
-use tracing::info;
+
+pub type VertexId = u32;
+pub type SubtaskIndex = u32;
 
 /// 物理网络路由注册表
 pub struct NetworkEnvironment {
@@ -22,44 +18,6 @@ impl NetworkEnvironment {
         }
     }
 
-    pub fn build_from_graph(graph: &ExecutionGraph, local_queue_size: usize) -> Self {
-        let mut env = Self::new();
-
-        for edge in &graph.edges {
-            let src_key = (edge.src_vertex, edge.src_subtask);
-            let dst_key = (edge.dst_vertex, edge.dst_subtask);
-
-            match &edge.exchange_mode {
-                ExchangeMode::LocalThread => {
-                    let (tx, rx) = mpsc::channel::<TrackedEvent>(local_queue_size);
-
-                    let sender = PhysicalSender::Local(tx);
-                    let receiver_stream =
-                        Box::pin(ReceiverStream::new(rx)) as BoxedEventStream;
-
-                    env.outboxes.entry(src_key).or_default().push(sender);
-                    env.inboxes.entry(dst_key).or_default().push(receiver_stream);
-                }
-                ExchangeMode::RemoteNetwork { target_addr } => {
-                    let remote_stub = RemoteSenderStub {
-                        target_addr: target_addr.clone(),
-                    };
-                    env.outboxes
-                        .entry(src_key)
-                        .or_default()
-                        .push(PhysicalSender::Remote(remote_stub));
-                }
-            }
-        }
-
-        info!(
-            "Network Environment built. Wired {} connections.",
-            graph.edges.len()
-        );
-
-        env
-    }
-
     pub fn take_outboxes(
         &mut self,
         vertex_id: VertexId,
diff --git a/src/runtime/streaming/operators/grouping/incremental_aggregate.rs b/src/runtime/streaming/operators/grouping/incremental_aggregate.rs
index ac2cd585..42eda177 100644
--- a/src/runtime/streaming/operators/grouping/incremental_aggregate.rs
+++ b/src/runtime/streaming/operators/grouping/incremental_aggregate.rs
@@ -473,8 +473,7 @@ impl IncrementalAggregatingFunc {
         Ok(())
     }
 
-    async fn initialize(&mut self, ctx: &mut TaskContext) -> Result<()> {
-        let mut tm = ctx.table_manager_guard().await?;
+    async fn initialize(&mut self, _ctx: &mut TaskContext) -> Result<()> {
         // let table = tm
         //     .get_uncached_key_value_view("a")
         //     .await
diff --git a/src/runtime/streaming/operators/joins/lookup_join.rs b/src/runtime/streaming/operators/joins/lookup_join.rs
deleted file mode 100644
index c6458174..00000000
--- a/src/runtime/streaming/operators/joins/lookup_join.rs
+++ /dev/null
@@ -1,365 +0,0 @@
-//! 维表 Lookup Join（Enrichment）：与 worker `arrow/lookup_join` 逻辑对齐，实现 [`MessageOperator`]。
-
-use anyhow::{anyhow, Result};
-use arrow::compute::filter_record_batch;
-use arrow::row::{OwnedRow, RowConverter, SortField};
-use arrow_array::cast::AsArray;
-use arrow_array::types::UInt64Type;
-use arrow_array::{Array, BooleanArray, RecordBatch};
-use arrow_schema::{DataType, Field, FieldRef, Schema};
-use async_trait::async_trait;
-use datafusion::physical_expr::PhysicalExpr;
-use datafusion_proto::physical_plan::DefaultPhysicalExtensionCodec;
-use datafusion_proto::physical_plan::from_proto::parse_physical_expr;
-use datafusion_proto::protobuf::PhysicalExprNode;
-use mini_moka::sync::Cache;
-use prost::Message;
-use protocol::grpc::api::{JoinType, LookupJoinOperator as LookupJoinProto};
-use std::collections::HashMap;
-use std::sync::Arc;
-use std::time::Duration;
-
-use crate::runtime::streaming::api::context::TaskContext;
-use crate::runtime::streaming::api::operator::{MessageOperator, Registry};
-use crate::runtime::streaming::connectors::{LookupConnector, connectors};
-use crate::runtime::streaming::StreamOutput;
-use crate::sql::common::{CheckpointBarrier, FsSchema, MetadataField, OperatorConfig, Watermark, LOOKUP_KEY_INDEX_FIELD};
-
-#[derive(Copy, Clone, PartialEq, Eq)]
-pub enum LookupJoinType {
-    Left,
-    Inner,
-}
-
-/// 维表查询连接算子：外部系统打宽 + 可选 LRU 缓存。
-pub struct LookupJoinOperator {
-    name: String,
-    connector: Box<dyn LookupConnector + Send>,
-    key_exprs: Vec<Arc<dyn PhysicalExpr>>,
-    cache: Option<Cache<OwnedRow, OwnedRow>>,
-    key_row_converter: RowConverter,
-    result_row_converter: RowConverter,
-    join_type: LookupJoinType,
-    lookup_schema: Arc<Schema>,
-    metadata_fields: Vec<MetadataField>,
-    input_schema: Arc<FsSchema>,
-    /// 与 worker 侧 `ctx.out_schema` 对齐：由 input 去 key + lookup 列 + 时间列拼成。
-    output_schema: Arc<Schema>,
-}
-
-fn build_lookup_output_schema(
-    input: &FsSchema,
-    lookup_columns: &[FieldRef],
-) -> anyhow::Result<Arc<Schema>> {
-    let key_indices = input.routing_keys().cloned().unwrap_or_default();
-    let ts = input.timestamp_index;
-    let mut out: Vec<FieldRef> = Vec::new();
-    for i in 0..input.schema.fields().len() {
-        if key_indices.contains(&i) || i == ts {
-            continue;
-        }
-        out.push(input.schema.fields()[i].clone());
-    }
-    out.extend(lookup_columns.iter().cloned());
-    out.push(input.schema.fields()[ts].clone());
-    Ok(Arc::new(Schema::new(out)))
-}
-
-impl LookupJoinOperator {
-    async fn process_lookup_batch(&mut self, batch: RecordBatch) -> Result<Vec<StreamOutput>> {
-        let num_rows = batch.num_rows();
-        if num_rows == 0 {
-            return Ok(vec![]);
-        }
-
-        let key_arrays: Vec<_> = self
-            .key_exprs
-            .iter()
-            .map(|expr| {
-                expr.evaluate(&batch)
-                    .map_err(|e| anyhow!("key expr evaluate: {e}"))?
-                    .into_array(num_rows)
-                    .map_err(|e| anyhow!("key expr into_array: {e}"))
-            })
-            .collect::<Result<_>>()?;
-
-        let rows = self
-            .key_row_converter
-            .convert_columns(&key_arrays)
-            .map_err(|e| anyhow!("key_row_converter: {e}"))?;
-
-        let mut key_map: HashMap<OwnedRow, Vec<usize>> = HashMap::new();
-        for (i, row) in rows.iter().enumerate() {
-            key_map.entry(row.owned()).or_default().push(i);
-        }
-
-        let uncached_keys: Vec<&OwnedRow> = if let Some(cache) = &mut self.cache {
-            key_map
-                .keys()
-                .filter(|k| !cache.contains_key(*k))
-                .collect()
-        } else {
-            key_map.keys().collect()
-        };
-
-        // 按 key 字节存 OwnedRow，避免借用 `convert_columns` 返回的临时行缓冲。
-        let mut results: HashMap<Vec<u8>, OwnedRow> = HashMap::new();
-
-        if !uncached_keys.is_empty() {
-            let cols = self
-                .key_row_converter
-                .convert_rows(uncached_keys.iter().map(|r| r.row()))
-                .map_err(|e| anyhow!("convert_rows for lookup: {e}"))?;
-
-            if let Some(result_batch) = self.connector.lookup(&cols).await {
-                let mut result_batch = result_batch.map_err(|e| anyhow!("connector lookup: {e}"))?;
-
-                let key_idx_col = result_batch
-                    .schema()
-                    .index_of(LOOKUP_KEY_INDEX_FIELD)
-                    .map_err(|e| anyhow!("{e}"))?;
-                let keys = result_batch.remove_column(key_idx_col);
-                let keys = keys.as_primitive::<UInt64Type>();
-
-                let result_rows = self
-                    .result_row_converter
-                    .convert_columns(result_batch.columns())
-                    .map_err(|e| anyhow!("result_row_converter: {e}"))?;
-
-                for (i, v) in result_rows.iter().enumerate() {
-                    if keys.is_null(i) {
-                        return Err(anyhow!("lookup key index is null at row {i}"));
-                    }
-                    let req_idx = keys.value(i) as usize;
-                    if req_idx >= uncached_keys.len() {
-                        return Err(anyhow!(
-                            "lookup key index {req_idx} out of range ({} keys)",
-                            uncached_keys.len()
-                        ));
-                    }
-                    let key_bytes = uncached_keys[req_idx].as_ref().to_vec();
-                    let owned = v.owned();
-                    results.insert(key_bytes.clone(), owned.clone());
-                    if let Some(cache) = &mut self.cache {
-                        cache.insert(uncached_keys[req_idx].clone(), owned);
-                    }
-                }
-            }
-        }
-
-        let mut output_rows = self
-            .result_row_converter
-            .empty_rows(batch.num_rows(), batch.num_rows().saturating_mul(10));
-
-        for row in rows.iter() {
-            let row_owned = self
-                .cache
-                .as_mut()
-                .and_then(|c| c.get(&row.owned()))
-                .unwrap_or_else(|| {
-                    results
-                        .get(row.as_ref())
-                        .expect("missing lookup result for key (cache miss without connector row)")
-                        .clone()
-                });
-            output_rows.push(row_owned.row());
-        }
-
-        let right_side = self
-            .result_row_converter
-            .convert_rows(output_rows.iter())
-            .map_err(|e| anyhow!("convert_rows output: {e}"))?;
-
-        let nonnull = (self.join_type == LookupJoinType::Inner).then(|| {
-            let mut nonnull = vec![false; batch.num_rows()];
-            for (_, a) in self
-                .lookup_schema
-                .fields()
-                .iter()
-                .zip(right_side.iter())
-                .filter(|(f, _)| {
-                    !self
-                        .metadata_fields
-                        .iter()
-                        .any(|m| &m.field_name == f.name())
-                })
-            {
-                if let Some(nulls) = a.logical_nulls() {
-                    for (valid, b) in nulls.iter().zip(nonnull.iter_mut()) {
-                        *b |= valid;
-                    }
-                } else {
-                    nonnull.fill(true);
-                    break;
-                }
-            }
-            BooleanArray::from(nonnull)
-        });
-
-        let key_indices = self
-            .input_schema
-            .routing_keys()
-            .cloned()
-            .unwrap_or_default();
-        let non_keys: Vec<_> = (0..batch.num_columns())
-            .filter(|i| !key_indices.contains(i) && *i != self.input_schema.timestamp_index)
-            .collect();
-
-        let mut result_cols = batch
-            .project(&non_keys)
-            .map_err(|e| anyhow!("project non_keys: {e}"))?
-            .columns()
-            .to_vec();
-        result_cols.extend(right_side);
-        result_cols.push(batch.column(self.input_schema.timestamp_index).clone());
-
-        let mut out_batch = RecordBatch::try_new(self.output_schema.clone(), result_cols)
-            .map_err(|e| anyhow!("try_new output batch: {e}"))?;
-
-        if let Some(mask) = nonnull {
-            out_batch = filter_record_batch(&out_batch, &mask).map_err(|e| anyhow!("{e}"))?;
-        }
-
-        if out_batch.num_rows() == 0 {
-            return Ok(vec![]);
-        }
-
-        Ok(vec![StreamOutput::Forward(out_batch)])
-    }
-}
-
-#[async_trait]
-impl MessageOperator for LookupJoinOperator {
-    fn name(&self) -> &str {
-        &self.name
-    }
-
-    async fn on_start(&mut self, _ctx: &mut TaskContext) -> Result<()> {
-        Ok(())
-    }
-
-    async fn process_data(
-        &mut self,
-        _input_idx: usize,
-        batch: RecordBatch,
-        _ctx: &mut TaskContext,
-    ) -> Result<Vec<StreamOutput>> {
-        self.process_lookup_batch(batch).await
-    }
-
-    async fn process_watermark(
-        &mut self,
-        _watermark: Watermark,
-        _ctx: &mut TaskContext,
-    ) -> Result<Vec<StreamOutput>> {
-        Ok(vec![])
-    }
-
-    async fn snapshot_state(
-        &mut self,
-        _barrier: CheckpointBarrier,
-        _ctx: &mut TaskContext,
-    ) -> Result<()> {
-        Ok(())
-    }
-
-    async fn on_close(&mut self, _ctx: &mut TaskContext) -> Result<Vec<StreamOutput>> {
-        Ok(vec![])
-    }
-}
-
-/// 从配置构造 [`LookupJoinOperator`]（非 `ConstructedOperator` / `ArrowOperator`）。
-pub struct LookupJoinConstructor;
-
-impl LookupJoinConstructor {
-    pub fn with_config(
-        &self,
-        config: LookupJoinProto,
-        registry: Arc<Registry>,
-    ) -> anyhow::Result<LookupJoinOperator> {
-        let join_type = config.join_type();
-        let input_schema: FsSchema = config.input_schema.unwrap().try_into()?;
-        let lookup_schema: FsSchema = config.lookup_schema.unwrap().try_into()?;
-
-        let exprs = config
-            .key_exprs
-            .iter()
-            .map(|e| {
-                let expr = PhysicalExprNode::decode(&mut e.left_expr.as_slice())?;
-                Ok(parse_physical_expr(
-                    &expr,
-                    registry.as_ref(),
-                    &input_schema.schema,
-                    &DefaultPhysicalExtensionCodec {},
-                )?)
-            })
-            .collect::<anyhow::Result<Vec<_>>>()?;
-
-        let op = config.connector.unwrap();
-        let operator_config: OperatorConfig = serde_json::from_str(&op.config)?;
-
-        let result_row_converter = RowConverter::new(
-            lookup_schema
-                .schema_without_timestamp()
-                .fields
-                .iter()
-                .map(|f| SortField::new(f.data_type().clone()))
-                .collect(),
-        )?;
-
-        let lookup_schema_arc = Arc::new(
-            lookup_schema
-                .with_additional_fields(
-                    [Field::new(LOOKUP_KEY_INDEX_FIELD, DataType::UInt64, false)].into_iter(),
-                )?
-                .schema_without_timestamp(),
-        );
-
-        let output_schema = build_lookup_output_schema(&input_schema, lookup_schema_arc.fields())?;
-
-        let connector = connectors()
-            .get(op.connector.as_str())
-            .unwrap_or_else(|| panic!("No connector with name '{}'", op.connector))
-            .make_lookup(operator_config.clone(), lookup_schema_arc.clone())?;
-
-        let name = format!("LookupJoin({})", connector.name());
-
-        let max_capacity_bytes = config.max_capacity_bytes.unwrap_or(8 * 1024 * 1024);
-        let cache = (max_capacity_bytes > 0).then(|| {
-            let mut c = Cache::builder()
-                .weigher(|k: &OwnedRow, v: &OwnedRow| (k.as_ref().len() + v.as_ref().len()) as u32)
-                .max_capacity(max_capacity_bytes);
-
-            if let Some(ttl) = config.ttl_micros {
-                c = c.time_to_live(Duration::from_micros(ttl));
-            }
-            c.build()
-        });
-
-        let key_row_converter = RowConverter::new(
-            exprs
-                .iter()
-                .map(|e| Ok(SortField::new(e.data_type(&input_schema.schema)?)))
-                .collect::<anyhow::Result<_>>()?,
-        )?;
-
-        Ok(LookupJoinOperator {
-            name,
-            connector,
-            key_exprs: exprs,
-            cache,
-            key_row_converter,
-            result_row_converter,
-            join_type: match join_type {
-                JoinType::Inner => LookupJoinType::Inner,
-                JoinType::Left => LookupJoinType::Left,
-                jt => panic!("invalid lookup join type {:?}", jt),
-            },
-            lookup_schema: lookup_schema_arc,
-            metadata_fields: operator_config.metadata_fields,
-            input_schema: Arc::new(input_schema),
-            output_schema,
-        })
-    }
-}
-
diff --git a/src/runtime/streaming/operators/joins/mod.rs b/src/runtime/streaming/operators/joins/mod.rs
index d53e4b91..ccfff792 100644
--- a/src/runtime/streaming/operators/joins/mod.rs
+++ b/src/runtime/streaming/operators/joins/mod.rs
@@ -1,7 +1,5 @@
 pub mod join_instance;
 pub mod join_with_expiration;
-pub mod lookup_join;
 
 pub use join_instance::{InstantJoinConstructor, InstantJoinOperator};
 pub use join_with_expiration::{JoinWithExpirationConstructor, JoinWithExpirationOperator};
-pub use lookup_join::{LookupJoinConstructor, LookupJoinOperator, LookupJoinType};
diff --git a/src/runtime/streaming/operators/mod.rs b/src/runtime/streaming/operators/mod.rs
index e3c0f566..958b5320 100644
--- a/src/runtime/streaming/operators/mod.rs
+++ b/src/runtime/streaming/operators/mod.rs
@@ -15,9 +15,7 @@ mod value_execution;
 pub use stateless_physical_executor::StatelessPhysicalExecutor;
 
 pub use grouping::{IncrementalAggregatingFunc, Key, UpdatingCache};
-pub use joins::{
-    InstantJoinOperator, JoinWithExpirationOperator, LookupJoinOperator, LookupJoinType,
-};
+pub use joins::{InstantJoinOperator, JoinWithExpirationOperator};
 pub use key_by::KeyByOperator;
 pub use sink::{ConsistencyMode, KafkaSinkOperator};
 pub use source::{BatchDeserializer, BufferedDeserializer, KafkaSourceOperator, KafkaState};
diff --git a/src/runtime/streaming/operators/sink/kafka/mod.rs b/src/runtime/streaming/operators/sink/kafka/mod.rs
index 0b68b88b..1ce01673 100644
--- a/src/runtime/streaming/operators/sink/kafka/mod.rs
+++ b/src/runtime/streaming/operators/sink/kafka/mod.rs
@@ -209,14 +209,7 @@ impl MessageOperator for KafkaSinkOperator {
                 self.at_least_once_producer = Some(self.create_producer(ctx, None)?);
             }
             ConsistencyMode::ExactlyOnce => {
-                let mut next_idx = {
-                    let mut tm = ctx.table_manager_guard().await?;
-                    let index_table = tm
-                        .get_global_keyed_state::<u32, usize>("tx_idx")
-                        .await
-                        .map_err(|e| anyhow!(e))?;
-                    index_table.get(&ctx.subtask_idx).copied().unwrap_or(0)
-                };
+                let mut next_idx = 0usize;
 
                 let active_producer = self.create_producer(ctx, Some(next_idx))?;
                 next_idx += 1;
@@ -301,17 +294,6 @@ impl MessageOperator for KafkaSinkOperator {
             let old_producer = std::mem::replace(&mut state.active_producer, new_producer);
             state.producer_awaiting_commit = Some(old_producer);
 
-            {
-                let mut tm = ctx.table_manager_guard().await?;
-                let index_table = tm
-                    .get_global_keyed_state::<u32, usize>("tx_idx")
-                    .await
-                    .map_err(|e| anyhow!(e))?;
-                index_table
-                    .insert(ctx.subtask_idx, state.next_transaction_index)
-                    .await;
-            }
-
             state.next_transaction_index += 1;
         }
 
diff --git a/src/runtime/streaming/operators/source/kafka/mod.rs b/src/runtime/streaming/operators/source/kafka/mod.rs
index 595fbcc3..b17a504b 100644
--- a/src/runtime/streaming/operators/source/kafka/mod.rs
+++ b/src/runtime/streaming/operators/source/kafka/mod.rs
@@ -10,7 +10,7 @@ use rdkafka::consumer::{CommitMode, Consumer, StreamConsumer};
 use rdkafka::{ClientConfig, Message as KMessage, Offset, TopicPartitionList};
 use std::collections::HashMap;
 use std::num::NonZeroU32;
-use std::time::Duration;
+use std::time::{Duration, Instant};
 use tracing::{debug, error, info, warn};
 
 use crate::runtime::streaming::api::context::TaskContext;
@@ -28,7 +28,7 @@ pub struct KafkaState {
     offset: i64,
 }
 
-/// 增量反序列化缓冲 trait：Source 逐条 `deserialize_slice`，攒满后 `flush_buffer` 输出 [`RecordBatch`]。
+/// 增量反序列化缓冲 trait：Source 逐条 `deserialize_slice`，攒满或超时后 `flush_buffer` 输出 [`RecordBatch`]。
 pub trait BatchDeserializer: Send + 'static {
     fn deserialize_slice(
         &mut self,
@@ -40,6 +40,9 @@ pub trait BatchDeserializer: Send + 'static {
     fn should_flush(&self) -> bool;
 
     fn flush_buffer(&mut self) -> Result<Option<RecordBatch>>;
+
+    /// 缓冲区是否无任何待反序列化数据。
+    fn is_empty(&self) -> bool;
 }
 
 // ---------------------------------------------------------------------------
@@ -88,6 +91,10 @@ impl BatchDeserializer for BufferedDeserializer {
         self.buffer.clear();
         Ok(Some(batch))
     }
+
+    fn is_empty(&self) -> bool {
+        self.buffer.is_empty()
+    }
 }
 
 impl SourceOffset {
@@ -104,6 +111,9 @@ impl SourceOffset {
 // 2. 核心算子外壳
 // ============================================================================
 
+const KAFKA_POLL_TIMEOUT: Duration = Duration::from_millis(100);
+const MAX_BATCH_LINGER_TIME: Duration = Duration::from_millis(500);
+
 pub struct KafkaSourceOperator {
     pub topic: String,
     pub bootstrap_servers: String,
@@ -121,6 +131,9 @@ pub struct KafkaSourceOperator {
 
     current_offsets: HashMap<i32, i64>,
     is_empty_assignment: bool,
+
+    /// 上次成功 flush 出 batch 的时间，用于低流量时按逗留时间强制发车。
+    last_flush_time: Instant,
 }
 
 impl KafkaSourceOperator {
@@ -149,6 +162,7 @@ impl KafkaSourceOperator {
             deserializer,
             current_offsets: HashMap::new(),
             is_empty_assignment: false,
+            last_flush_time: Instant::now(),
         }
     }
 
@@ -175,18 +189,8 @@ impl KafkaSourceOperator {
             .set("group.id", &group_id)
             .create()?;
 
-        let (has_state, state_map) = {
-            let mut tm = ctx.table_manager_guard().await?;
-            let global_state = tm
-                .get_global_keyed_state::<i32, KafkaState>("k")
-                .await
-                .map_err(|e| anyhow!(e))?;
-            let restored_states: Vec<_> = global_state.get_all().values().copied().collect();
-            let has_state = !restored_states.is_empty();
-            let state_map: HashMap<i32, KafkaState> =
-                restored_states.into_iter().map(|s| (s.partition, s)).collect();
-            (has_state, state_map)
-        };
+        let has_state = false;
+        let state_map: HashMap<i32, KafkaState> = HashMap::new();
 
         let metadata = consumer
             .fetch_metadata(Some(&self.topic), Duration::from_secs(30))
@@ -266,12 +270,16 @@ impl SourceOperator for KafkaSourceOperator {
             .as_ref()
             .ok_or_else(|| anyhow!("rate limiter not initialized"))?;
 
-        let recv_result = tokio::time::timeout(Duration::from_millis(50), consumer.recv()).await;
-
-        match recv_result {
+        match tokio::time::timeout(KAFKA_POLL_TIMEOUT, consumer.recv()).await {
             Ok(Ok(msg)) => {
+                let partition = msg.partition();
+                let offset = msg.offset();
+                let timestamp = msg.timestamp().to_millis().unwrap_or(0);
+
+                // 无论是否有 payload（含 Tombstone），都必须推进位点，否则会永久卡在墓碑消息上。
+                self.current_offsets.insert(partition, offset);
+
                 if let Some(payload) = msg.payload() {
-                    let timestamp = msg.timestamp().to_millis().unwrap_or(0);
                     let topic = msg.topic();
 
                     let connector_metadata = if !self.metadata_fields.is_empty() {
@@ -299,17 +307,25 @@ impl SourceOperator for KafkaSourceOperator {
                         timestamp.max(0) as u64,
                         connector_metadata,
                     )?;
+                } else {
+                    debug!(
+                        "Received tombstone message at partition {} offset {}",
+                        partition, offset
+                    );
+                }
 
-                    self.current_offsets.insert(msg.partition(), msg.offset());
+                rate_limiter.until_ready().await;
 
-                    rate_limiter.until_ready().await;
+                let should_flush_by_size = self.deserializer.should_flush();
+                let should_flush_by_time = self.last_flush_time.elapsed() > MAX_BATCH_LINGER_TIME;
 
-                    if self.deserializer.should_flush() {
-                        if let Some(batch) = self.deserializer.flush_buffer()? {
-                            return Ok(SourceEvent::Data(batch));
-                        }
+                if !self.deserializer.is_empty() && (should_flush_by_size || should_flush_by_time) {
+                    if let Some(batch) = self.deserializer.flush_buffer()? {
+                        self.last_flush_time = Instant::now();
+                        return Ok(SourceEvent::Data(batch));
                     }
                 }
+
                 Ok(SourceEvent::Idle)
             }
             Ok(Err(e)) => {
@@ -317,8 +333,10 @@ impl SourceOperator for KafkaSourceOperator {
                 Err(anyhow!("Kafka error: {}", e))
             }
             Err(_) => {
-                if self.deserializer.should_flush() {
+                // 超时内无新消息：若缓冲区仍有积压，强制 flush，避免低流量下数据长期滞留。
+                if !self.deserializer.is_empty() {
                     if let Some(batch) = self.deserializer.flush_buffer()? {
+                        self.last_flush_time = Instant::now();
                         return Ok(SourceEvent::Data(batch));
                     }
                 }
@@ -334,25 +352,8 @@ impl SourceOperator for KafkaSourceOperator {
     ) -> Result<()> {
         debug!("Source [{}] executing checkpoint", ctx.subtask_idx);
 
-        let mut tm = ctx.table_manager_guard().await?;
-        let global_state = tm
-            .get_global_keyed_state::<i32, KafkaState>("k")
-            .await
-            .map_err(|e| anyhow!(e))?;
-
         let mut topic_partitions = TopicPartitionList::new();
-
         for (&partition, &offset) in &self.current_offsets {
-            global_state
-                .insert(
-                    partition,
-                    KafkaState {
-                        partition,
-                        offset: offset + 1,
-                    },
-                )
-                .await;
-
             topic_partitions
                 .add_partition_offset(&self.topic, partition, Offset::Offset(offset))
                 .map_err(|e| anyhow!("add_partition_offset: {e}"))?;
diff --git a/src/runtime/streaming/operators/watermark/watermark_generator.rs b/src/runtime/streaming/operators/watermark/watermark_generator.rs
index f210c95a..3af64bf7 100644
--- a/src/runtime/streaming/operators/watermark/watermark_generator.rs
+++ b/src/runtime/streaming/operators/watermark/watermark_generator.rs
@@ -73,6 +73,8 @@ impl WatermarkGeneratorOperator {
         Some(from_nanos(max_ts as u128))
     }
 
+    /// 水位线计算必须取评估后数组的 **Max**，不能取 Min：同一 Batch 内多行时，
+    /// Min 会低估“已见事件时间”的安全基线（例如 ts-5s 在两行上 min 会偏早）。
     fn evaluate_watermark(&self, batch: &RecordBatch) -> Result<SystemTime> {
         let watermark_array = self
             .expression
@@ -84,10 +86,10 @@ impl WatermarkGeneratorOperator {
             .downcast_ref::<TimestampNanosecondArray>()
             .ok_or_else(|| anyhow!("watermark expression must return TimestampNanosecondArray"))?;
 
-        let min_watermark_nanos = aggregate::min(typed_array)
-            .ok_or_else(|| anyhow!("failed to extract min watermark from batch"))?;
+        let max_watermark_nanos = aggregate::max(typed_array)
+            .ok_or_else(|| anyhow!("failed to extract max watermark from batch"))?;
 
-        Ok(from_nanos(min_watermark_nanos as u128))
+        Ok(from_nanos(max_watermark_nanos as u128))
     }
 }
 
@@ -101,19 +103,8 @@ impl MessageOperator for WatermarkGeneratorOperator {
         Some(Duration::from_secs(1))
     }
 
-    async fn on_start(&mut self, ctx: &mut TaskContext) -> Result<()> {
+    async fn on_start(&mut self, _ctx: &mut TaskContext) -> Result<()> {
         self.last_event_wall = SystemTime::now();
-
-        let mut tm = ctx.table_manager_guard().await?;
-        let gs = tm
-            .get_global_keyed_state::<u32, WatermarkGeneratorState>("s")
-            .await
-            .map_err(|e| anyhow!("global keyed state s: {e}"))?;
-
-        if let Some(recovered) = gs.get(&ctx.subtask_idx) {
-            self.state = *recovered;
-        }
-
         Ok(())
     }
 
@@ -132,12 +123,15 @@ impl MessageOperator for WatermarkGeneratorOperator {
         };
 
         let new_watermark = self.evaluate_watermark(&batch)?;
+
+        // 死守单调递增底线，绝不倒流
         self.state.max_watermark = self.state.max_watermark.max(new_watermark);
 
         let time_since_last_emit = max_batch_ts
             .duration_since(self.state.last_watermark_emitted_at)
             .unwrap_or(Duration::ZERO);
 
+        // 空闲唤醒或达到发射间隔则发射水印
         if self.is_idle || time_since_last_emit > self.interval {
             debug!(
                 "[{}] emitting expression watermark {}",
@@ -174,6 +168,7 @@ impl MessageOperator for WatermarkGeneratorOperator {
                 .last_event_wall
                 .elapsed()
                 .unwrap_or(Duration::ZERO);
+            // 系统时钟超时，发射 Idle 水印，避免下游一直等不到推进
             if !self.is_idle && elapsed > idle_timeout {
                 info!(
                     "task [{}] entering Idle after {:?}",
@@ -186,13 +181,7 @@ impl MessageOperator for WatermarkGeneratorOperator {
         Ok(vec![])
     }
 
-    async fn snapshot_state(&mut self, _barrier: CheckpointBarrier, ctx: &mut TaskContext) -> Result<()> {
-        let mut tm = ctx.table_manager_guard().await?;
-        tm.get_global_keyed_state::<u32, WatermarkGeneratorState>("s")
-            .await
-            .map_err(|e| anyhow!("global keyed state s: {e}"))?
-            .insert(ctx.subtask_idx, self.state)
-            .await;
+    async fn snapshot_state(&mut self, _barrier: CheckpointBarrier, _ctx: &mut TaskContext) -> Result<()> {
         Ok(())
     }
 
diff --git a/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs b/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs
index c23da40a..f835bac2 100644
--- a/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs
+++ b/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs
@@ -139,21 +139,7 @@ impl MessageOperator for TumblingWindowOperator {
         "TumblingWindow"
     }
 
-    async fn on_start(&mut self, ctx: &mut TaskContext) -> Result<()> {
-        let watermark = ctx.last_present_watermark();
-        let mut tm = ctx.table_manager_guard().await?;
-        let table = tm
-            .get_expiring_time_key_table("t", watermark)
-            .await
-            .map_err(|e| anyhow!("expiring time key table t: {e}"))?;
-
-        for (timestamp, batches) in table.all_batches_for_watermark(watermark) {
-            let bin_start = self.bin_start(*timestamp);
-            let slot = self.active_bins.entry(bin_start).or_default();
-            for batch in batches {
-                slot.finished_batches.push(batch.clone());
-            }
-        }
+    async fn on_start(&mut self, _ctx: &mut TaskContext) -> Result<()> {
         Ok(())
     }
 
@@ -287,28 +273,7 @@ impl MessageOperator for TumblingWindowOperator {
         Ok(final_outputs)
     }
 
-    async fn snapshot_state(&mut self, _barrier: CheckpointBarrier, ctx: &mut TaskContext) -> Result<()> {
-        let watermark = ctx.last_present_watermark();
-        let mut tm = ctx.table_manager_guard().await?;
-        let table = tm
-            .get_expiring_time_key_table("t", watermark)
-            .await
-            .map_err(|e| anyhow!("expiring time key table t: {e}"))?;
-
-        for (bin_start, active_bin) in self.active_bins.iter_mut() {
-            active_bin.close_and_drain().await?;
-
-            for batch in &active_bin.finished_batches {
-                let state_batch = Self::add_bin_start_as_timestamp(
-                    batch,
-                    *bin_start,
-                    self.partial_schema.schema.clone(),
-                )?;
-                table.insert(*bin_start, state_batch);
-            }
-        }
-
-        table.flush(watermark).await?;
+    async fn snapshot_state(&mut self, _barrier: CheckpointBarrier, _ctx: &mut TaskContext) -> Result<()> {
         Ok(())
     }
 
diff --git a/src/runtime/streaming/storage/backend.rs b/src/runtime/streaming/storage/backend.rs
deleted file mode 100644
index 265b99ca..00000000
--- a/src/runtime/streaming/storage/backend.rs
+++ /dev/null
@@ -1,78 +0,0 @@
-use anyhow::Result;
-use async_trait::async_trait;
-
-#[derive(Default, Debug, Clone)]
-pub struct CheckpointMetadata {
-    pub job_id: String,
-    pub epoch: u32,
-    pub min_epoch: u32,
-    pub operator_ids: Vec<String>,
-}
-
-#[derive(Default, Debug, Clone)]
-pub struct OperatorCheckpointMetadata {
-    pub operator_id: String,
-    pub epoch: u32,
-}
-
-#[async_trait]
-pub trait BackingStore: Send + Sync + 'static {
-    fn name() -> &'static str;
-    async fn load_checkpoint_metadata(job_id: &str, epoch: u32) -> Result<CheckpointMetadata>;
-    async fn load_operator_metadata(
-        job_id: &str,
-        operator_id: &str,
-        epoch: u32,
-    ) -> Result<Option<OperatorCheckpointMetadata>>;
-    async fn write_operator_checkpoint_metadata(
-        metadata: OperatorCheckpointMetadata,
-    ) -> Result<()>;
-    async fn write_checkpoint_metadata(metadata: CheckpointMetadata) -> Result<()>;
-    async fn cleanup_checkpoint(
-        metadata: CheckpointMetadata,
-        old_min_epoch: u32,
-        new_min_epoch: u32,
-    ) -> Result<()>;
-}
-
-pub struct ParquetStateBackend;
-
-#[async_trait]
-impl BackingStore for ParquetStateBackend {
-    fn name() -> &'static str {
-        "parquet"
-    }
-
-    async fn load_checkpoint_metadata(
-        _job_id: &str,
-        _epoch: u32,
-    ) -> Result<CheckpointMetadata> {
-        Ok(CheckpointMetadata::default())
-    }
-
-    async fn load_operator_metadata(
-        _job_id: &str,
-        _operator_id: &str,
-        _epoch: u32,
-    ) -> Result<Option<OperatorCheckpointMetadata>> {
-        Ok(None)
-    }
-
-    async fn write_operator_checkpoint_metadata(
-        _metadata: OperatorCheckpointMetadata,
-    ) -> Result<()> {
-        Ok(())
-    }
-
-    async fn write_checkpoint_metadata(_metadata: CheckpointMetadata) -> Result<()> {
-        Ok(())
-    }
-
-    async fn cleanup_checkpoint(
-        _metadata: CheckpointMetadata,
-        _old_min_epoch: u32,
-        _new_min_epoch: u32,
-    ) -> Result<()> {
-        Ok(())
-    }
-}
diff --git a/src/runtime/streaming/storage/manager.rs b/src/runtime/streaming/storage/manager.rs
deleted file mode 100644
index 2aa79e6b..00000000
--- a/src/runtime/streaming/storage/manager.rs
+++ /dev/null
@@ -1,156 +0,0 @@
-use anyhow::{Result, anyhow};
-use std::collections::HashMap;
-use std::sync::Arc;
-use std::time::SystemTime;
-
-use super::table::TaskInfo;
-use super::{DummyStorageProvider, StorageProviderRef};
-
-#[derive(Default)]
-pub struct GlobalKeyedView<K, V> {
-    data: HashMap<K, V>,
-}
-
-impl<K: Eq + std::hash::Hash, V> GlobalKeyedView<K, V> {
-    pub async fn insert(&mut self, key: K, value: V) {
-        self.data.insert(key, value);
-    }
-
-    pub fn get(&self, key: &K) -> Option<&V> {
-        self.data.get(key)
-    }
-
-    pub fn get_all(&self) -> &HashMap<K, V> {
-        &self.data
-    }
-}
-
-#[derive(Default)]
-pub struct ExpiringTimeKeyView;
-
-impl ExpiringTimeKeyView {
-    pub fn insert(&mut self, _timestamp: SystemTime, _batch: arrow_array::RecordBatch) {}
-
-    pub fn all_batches_for_watermark(
-        &self,
-        _watermark: Option<SystemTime>,
-    ) -> std::iter::Empty<(&SystemTime, &Vec<arrow_array::RecordBatch>)> {
-        std::iter::empty()
-    }
-
-    pub async fn flush(&mut self, _watermark: Option<SystemTime>) -> Result<()> {
-        Ok(())
-    }
-}
-
-#[derive(Default)]
-pub struct KeyTimeView;
-
-impl KeyTimeView {
-    pub async fn insert(
-        &mut self,
-        _batch: arrow_array::RecordBatch,
-    ) -> Result<Vec<arrow_array::types::UInt64Type>> {
-        Ok(vec![])
-    }
-
-    pub fn get_batch(&self, _key: &[u8]) -> Result<Option<arrow_array::RecordBatch>> {
-        Ok(None)
-    }
-}
-
-pub struct BackendWriter {}
-
-pub struct TableManager {
-    epoch: u32,
-    min_epoch: u32,
-    writer: BackendWriter,
-    task_info: Arc<TaskInfo>,
-    storage: StorageProviderRef,
-    caches: HashMap<String, Box<dyn std::any::Any + Send>>,
-}
-
-impl TableManager {
-    /// 加载状态后端（返回默认的空 Manager）
-    pub async fn load(task_info: Arc<TaskInfo>) -> Result<(Self, Option<SystemTime>)> {
-        let manager = Self {
-            epoch: 1,
-            min_epoch: 1,
-            writer: BackendWriter {},
-            task_info,
-            storage: Arc::new(DummyStorageProvider),
-            caches: HashMap::new(),
-        };
-        Ok((manager, None))
-    }
-
-    /// 接收到 CheckpointBarrier 时（空操作）
-    pub async fn checkpoint(
-        &mut self,
-        _epoch: u32,
-        _watermark: Option<SystemTime>,
-        _then_stop: bool,
-    ) {
-    }
-
-    /// 面向算子的 API：获取全局 Key-Value 表
-    pub async fn get_global_keyed_state<
-        K: Eq + std::hash::Hash + Send + 'static,
-        V: Send + 'static,
-    >(
-        &mut self,
-        table_name: &str,
-    ) -> Result<&mut GlobalKeyedView<K, V>> {
-        if !self.caches.contains_key(table_name) {
-            let view: Box<dyn std::any::Any + Send> =
-                Box::new(GlobalKeyedView::<K, V> { data: HashMap::new() });
-            self.caches.insert(table_name.to_string(), view);
-        }
-
-        let cache = self.caches.get_mut(table_name).unwrap();
-
-        let view = cache
-            .downcast_mut::<GlobalKeyedView<K, V>>()
-            .ok_or_else(|| anyhow!("Table type mismatch for {}", table_name))?;
-
-        Ok(view)
-    }
-
-    /// 面向算子的 API：获取带 TTL 的时间键值表
-    pub async fn get_expiring_time_key_table(
-        &mut self,
-        table_name: &str,
-        _watermark: Option<SystemTime>,
-    ) -> Result<&mut ExpiringTimeKeyView> {
-        if !self.caches.contains_key(table_name) {
-            let view: Box<dyn std::any::Any + Send> = Box::new(ExpiringTimeKeyView::default());
-            self.caches.insert(table_name.to_string(), view);
-        }
-
-        let cache = self.caches.get_mut(table_name).unwrap();
-        let view = cache
-            .downcast_mut::<ExpiringTimeKeyView>()
-            .ok_or_else(|| anyhow!("Table type mismatch for {}", table_name))?;
-
-        Ok(view)
-    }
-
-    /// 面向算子的 API：获取标准的 Key-Time 双重映射表
-    pub async fn get_key_time_table(
-        &mut self,
-        table_name: &str,
-        _watermark: Option<SystemTime>,
-    ) -> Result<&mut KeyTimeView> {
-        if !self.caches.contains_key(table_name) {
-            let view: Box<dyn std::any::Any + Send> = Box::new(KeyTimeView::default());
-            self.caches.insert(table_name.to_string(), view);
-        }
-
-        let cache = self.caches.get_mut(table_name).unwrap();
-        let view = cache
-            .downcast_mut::<KeyTimeView>()
-            .ok_or_else(|| anyhow!("Table type mismatch for {}", table_name))?;
-
-        Ok(view)
-    }
-}
diff --git a/src/runtime/streaming/storage/mod.rs b/src/runtime/streaming/storage/mod.rs
deleted file mode 100644
index c411b5ee..00000000
--- a/src/runtime/streaming/storage/mod.rs
+++ /dev/null
@@ -1,32 +0,0 @@
-use anyhow::Result;
-use async_trait::async_trait;
-use std::sync::Arc;
-
-pub mod backend;
-pub mod manager;
-pub mod table;
-
-#[async_trait]
-pub trait StorageProvider: Send + Sync + 'static {
-    async fn get(&self, _path: &str) -> Result<Vec<u8>>;
-    async fn put(&self, _path: &str, _data: Vec<u8>) -> Result<()>;
-    async fn delete_if_present(&self, _path: &str) -> Result<()>;
-}
-
-pub type StorageProviderRef = Arc<dyn StorageProvider>;
-
-/// 空的存储实现，供测试和占位使用
-pub struct DummyStorageProvider;
-
-#[async_trait]
-impl StorageProvider for DummyStorageProvider {
-    async fn get(&self, _path: &str) -> Result<Vec<u8>> {
-        Ok(vec![])
-    }
-    async fn put(&self, _path: &str, _data: Vec<u8>) -> Result<()> {
-        Ok(())
-    }
-    async fn delete_if_present(&self, _path: &str) -> Result<()> {
-        Ok(())
-    }
-}
diff --git a/src/runtime/streaming/storage/table.rs b/src/runtime/streaming/storage/table.rs
deleted file mode 100644
index 4b37ec4a..00000000
--- a/src/runtime/streaming/storage/table.rs
+++ /dev/null
@@ -1,91 +0,0 @@
-use anyhow::Result;
-use arrow_array::RecordBatch;
-use async_trait::async_trait;
-use std::collections::{HashMap, HashSet};
-use std::sync::Arc;
-
-#[derive(Default)]
-pub struct TaskInfo {
-    pub job_id: String,
-    pub operator_id: String,
-    pub task_index: u32,
-}
-
-#[derive(Debug)]
-pub enum TableData {
-    RecordBatch(RecordBatch),
-    CommitData { data: Vec<u8> },
-    KeyedData { key: Vec<u8>, value: Vec<u8> },
-}
-
-pub struct CheckpointMessage {
-    pub epoch: u32,
-    pub time: std::time::SystemTime,
-    pub watermark: Option<std::time::SystemTime>,
-    pub then_stop: bool,
-}
-
-#[async_trait]
-pub trait TableEpochCheckpointer: Send + 'static {
-    type SubTableCheckpointMessage: prost::Message + Default;
-
-    async fn insert_data(&mut self, _data: TableData) -> Result<()> {
-        Ok(())
-    }
-
-    async fn finish(
-        self: Box<Self>,
-        _checkpoint: &CheckpointMessage,
-    ) -> Result<Option<(Self::SubTableCheckpointMessage, usize)>> {
-        Ok(None)
-    }
-
-    fn subtask_index(&self) -> u32;
-}
-
-#[async_trait]
-pub trait Table: Send + Sync + 'static + Clone {
-    type Checkpointer: TableEpochCheckpointer<
-        SubTableCheckpointMessage = Self::TableSubtaskCheckpointMetadata,
-    >;
-    type ConfigMessage: prost::Message + Default;
-    type TableCheckpointMessage: prost::Message + Default + Clone;
-    type TableSubtaskCheckpointMetadata: prost::Message + Default + Clone;
-
-    fn from_config(
-        _config: Self::ConfigMessage,
-        _task_info: Arc<TaskInfo>,
-        _storage_provider: super::StorageProviderRef,
-        _checkpoint_message: Option<Self::TableCheckpointMessage>,
-        _state_version: u32,
-    ) -> Result<Self>
-    where
-        Self: Sized;
-
-    fn epoch_checkpointer(
-        &self,
-        _epoch: u32,
-        _previous_metadata: Option<Self::TableSubtaskCheckpointMetadata>,
-    ) -> Result<Self::Checkpointer>;
-
-    fn merge_checkpoint_metadata(
-        _config: Self::ConfigMessage,
-        _subtask_metadata: HashMap<u32, Self::TableSubtaskCheckpointMetadata>,
-    ) -> Result<Option<Self::TableCheckpointMessage>> {
-        Ok(None)
-    }
-
-    fn subtask_metadata_from_table(
-        &self,
-        _table_metadata: Self::TableCheckpointMessage,
-    ) -> Result<Option<Self::TableSubtaskCheckpointMetadata>> {
-        Ok(None)
-    }
-
-    fn files_to_keep(
-        _config: Self::ConfigMessage,
-        _checkpoint: Self::TableCheckpointMessage,
-    ) -> Result<HashSet<String>> {
-        Ok(HashSet::new())
-    }
-}
diff --git a/src/server/initializer.rs b/src/server/initializer.rs
index 46eca375..7786169a 100644
--- a/src/server/initializer.rs
+++ b/src/server/initializer.rs
@@ -92,7 +92,8 @@ pub fn build_core_registry() -> ComponentRegistry {
     let builder = {
         let b = ComponentRegistryBuilder::new()
             .register("WasmCache", initialize_wasm_cache)
-            .register("TaskManager", initialize_task_manager);
+            .register("TaskManager", initialize_task_manager)
+            .register("JobManager", initialize_job_manager);
         #[cfg(feature = "python")]
         let b = b.register("PythonService", initialize_python_service);
         b
@@ -150,6 +151,22 @@ fn initialize_python_service(config: &GlobalConfig) -> Result<()> {
     Ok(())
 }
 
+fn initialize_job_manager(config: &GlobalConfig) -> Result<()> {
+    use crate::runtime::streaming::api::operator::Registry;
+    use crate::runtime::streaming::factory::OperatorFactory;
+    use crate::runtime::streaming::job::JobManager;
+    use std::sync::Arc;
+
+    let registry = Arc::new(Registry::new());
+    let factory = Arc::new(OperatorFactory::new(registry));
+    let max_memory_bytes = config.streaming.max_memory_bytes.unwrap_or(256 * 1024 * 1024);
+
+    JobManager::init(factory, max_memory_bytes)
+        .context("JobManager service failed to start")?;
+
+    Ok(())
+}
+
 fn initialize_coordinator(_config: &GlobalConfig) -> Result<()> {
     crate::runtime::taskexecutor::TaskManager::get()
         .context("Dependency violation: Coordinator requires TaskManager")?;
@@ -157,5 +174,8 @@ fn initialize_coordinator(_config: &GlobalConfig) -> Result<()> {
     crate::storage::stream_catalog::CatalogManager::global()
         .context("Dependency violation: Coordinator requires StreamCatalog")?;
 
+    crate::runtime::streaming::job::JobManager::global()
+        .context("Dependency violation: Coordinator requires JobManager")?;
+
     Ok(())
 }
diff --git a/src/sql/common/fs_schema.rs b/src/sql/common/fs_schema.rs
index c99af1e5..f7fd5328 100644
--- a/src/sql/common/fs_schema.rs
+++ b/src/sql/common/fs_schema.rs
@@ -220,6 +220,14 @@ impl FsSchema {
         self.key_indices.as_ref()
     }
 
+    pub fn clone_storage_key_indices(&self) -> Option<Vec<usize>> {
+        self.key_indices.clone()
+    }
+
+    pub fn clone_routing_key_indices(&self) -> Option<Vec<usize>> {
+        self.routing_key_indices.clone()
+    }
+
     pub fn filter_by_time(
         &self,
         batch: RecordBatch,
diff --git a/src/sql/common/kafka_catalog.rs b/src/sql/common/kafka_catalog.rs
new file mode 100644
index 00000000..99c8983e
--- /dev/null
+++ b/src/sql/common/kafka_catalog.rs
@@ -0,0 +1,122 @@
+//! Kafka 表级与连接级配置（与 JSON Schema / Catalog 对齐）。
+//!
+//! 放在 [`crate::sql::common`] 而非 `runtime::streaming`，以便 **SQL 规划、Coordinator、连接配置存储**
+//! 与 **运行时工厂**（如 `ConnectorSourceDispatcher`）共用同一套类型，避免循环依赖。
+//!
+//! 与 [`crate::runtime::streaming::api::source::SourceOffset`] 语义相同但独立定义，运行时可用 `From`/`match` 做映射。
+
+use serde::{Deserialize, Serialize};
+use std::collections::HashMap;
+
+// ── KafkaTable：单表 Source/Sink ─────────────────────────────────────────
+
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
+pub struct KafkaTable {
+    pub topic: String,
+    /// Source / Sink 判别及各自字段；与顶层 JSON 扁平字段共用 `type` 标签。
+    #[serde(flatten)]
+    pub kind: TableType,
+    #[serde(default)]
+    pub client_configs: HashMap<String, String>,
+    pub value_subject: Option<String>,
+}
+
+impl KafkaTable {
+    /// Schema Registry subject；未配置时与常见约定一致：`{topic}-value`。
+    pub fn subject(&self) -> String {
+        self.value_subject
+            .clone()
+            .unwrap_or_else(|| format!("{}-value", self.topic))
+    }
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
+#[serde(tag = "type", rename_all = "snake_case")]
+pub enum TableType {
+    Source {
+        offset: KafkaTableSourceOffset,
+        read_mode: Option<ReadMode>,
+        group_id: Option<String>,
+        group_id_prefix: Option<String>,
+    },
+    Sink {
+        commit_mode: SinkCommitMode,
+        key_field: Option<String>,
+        timestamp_field: Option<String>,
+    },
+}
+
+#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
+#[serde(rename_all = "snake_case")]
+pub enum KafkaTableSourceOffset {
+    Latest,
+    Earliest,
+    #[default]
+    Group,
+}
+
+#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
+#[serde(rename_all = "snake_case")]
+pub enum ReadMode {
+    ReadUncommitted,
+    ReadCommitted,
+}
+
+#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
+#[serde(rename_all = "snake_case")]
+pub enum SinkCommitMode {
+    #[default]
+    AtLeastOnce,
+    ExactlyOnce,
+}
+
+// ── KafkaConfig：集群 / 鉴权 / Schema Registry ───────────────────────────
+
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
+#[serde(rename_all = "camelCase")]
+pub struct KafkaConfig {
+    pub bootstrap_servers: String,
+    #[serde(default)]
+    pub authentication: KafkaConfigAuthentication,
+    #[serde(default)]
+    pub schema_registry_enum: Option<SchemaRegistryConfig>,
+    #[serde(default)]
+    pub connection_properties: HashMap<String, String>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
+#[serde(tag = "type")]
+pub enum KafkaConfigAuthentication {
+    #[serde(rename = "None")]
+    None,
+    #[serde(rename = "AWS_MSK_IAM")]
+    AwsMskIam { region: String },
+    #[serde(rename = "SASL")]
+    Sasl {
+        protocol: String,
+        mechanism: String,
+        username: String,
+        password: String,
+    },
+}
+
+impl Default for KafkaConfigAuthentication {
+    fn default() -> Self {
+        Self::None
+    }
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
+#[serde(tag = "type")]
+pub enum SchemaRegistryConfig {
+    #[serde(rename = "None")]
+    None,
+    #[serde(rename = "Confluent Schema Registry")]
+    ConfluentSchemaRegistry {
+        endpoint: String,
+        #[serde(rename = "apiKey")]
+        api_key: Option<String>,
+        #[serde(rename = "apiSecret")]
+        api_secret: Option<String>,
+    },
+}
diff --git a/src/sql/common/mod.rs b/src/sql/common/mod.rs
index cb833c8e..722b2e58 100644
--- a/src/sql/common/mod.rs
+++ b/src/sql/common/mod.rs
@@ -25,6 +25,7 @@ pub mod errors;
 pub mod format_from_opts;
 pub mod formats;
 pub mod hash;
+pub mod kafka_catalog;
 pub mod message;
 pub mod operator_config;
 pub mod task_info;
@@ -49,6 +50,10 @@ pub use control::{
 };
 pub use fs_schema::{FsSchema, FsSchemaRef};
 pub use connector_options::{ConnectorOptions, FromOpts};
+pub use kafka_catalog::{
+    KafkaConfig, KafkaConfigAuthentication, KafkaTable, KafkaTableSourceOffset, ReadMode,
+    SchemaRegistryConfig, SinkCommitMode, TableType,
+};
 pub use errors::{DataflowError, DataflowResult};
 pub use formats::{BadData, Format, Framing, JsonCompression, JsonFormat};
 pub use operator_config::{MetadataField, OperatorConfig, RateLimit};
diff --git a/src/sql/common/operator_config.rs b/src/sql/common/operator_config.rs
index 744dbd85..a1f703f5 100644
--- a/src/sql/common/operator_config.rs
+++ b/src/sql/common/operator_config.rs
@@ -2,6 +2,7 @@ use serde::{Deserialize, Serialize};
 use serde_json::Value;
 
 use super::formats::{BadData, Format, Framing};
+use super::fs_schema::FsSchema;
 
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct RateLimit {
@@ -27,4 +28,7 @@ pub struct OperatorConfig {
     pub rate_limit: Option<RateLimit>,
     #[serde(default)]
     pub metadata_fields: Vec<MetadataField>,
+    /// Arrow 行 schema（Kafka Source/Sink 反序列化、序列化必需）。
+    #[serde(default)]
+    pub input_schema: Option<FsSchema>,
 }

From 157e13d28db99a8a1ca123590a807dc6d8f87fb4 Mon Sep 17 00:00:00 2001
From: luoluoyuyu <zhenyu@apache.org>
Date: Sun, 29 Mar 2026 00:41:17 +0800
Subject: [PATCH 22/44] update

---
 Cargo.lock                     |   1 -
 cli/cli/Cargo.toml             |   1 -
 src/sql/api/connections.rs     |   6 +-
 src/sql/schema/source_table.rs | 190 ++++++++++++++++++++++++++++++---
 4 files changed, 179 insertions(+), 19 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index fc3a898a..e9ce4109 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2326,7 +2326,6 @@ dependencies = [
  "arrow-schema 52.2.0",
  "clap",
  "comfy-table",
- "function-stream",
  "protocol",
  "rustyline",
  "thiserror 2.0.17",
diff --git a/cli/cli/Cargo.toml b/cli/cli/Cargo.toml
index 72352995..e3c1c591 100644
--- a/cli/cli/Cargo.toml
+++ b/cli/cli/Cargo.toml
@@ -12,7 +12,6 @@ arrow-array = "52"
 arrow-ipc = "52"
 arrow-schema = "52"
 comfy-table = "7"
-function-stream = { path = "../../" }
 protocol = { path = "../../protocol" }
 clap = { version = "4.5", features = ["derive"] }
 thiserror = "2"
diff --git a/src/sql/api/connections.rs b/src/sql/api/connections.rs
index d88dee75..7873ceb2 100644
--- a/src/sql/api/connections.rs
+++ b/src/sql/api/connections.rs
@@ -366,7 +366,7 @@ impl TryFrom<Field> for SourceField {
                 precision: *p,
                 scale: *s,
             }),
-            (DataType::Binary, None) | (DataType::LargeBinary, None) => FieldType::Bytes,
+            (DataType::Binary | DataType::LargeBinary | DataType::BinaryView, None) => FieldType::Bytes,
             (DataType::Timestamp(TimeUnit::Second, _), None) => {
                 FieldType::Timestamp(TimestampField {
                     unit: TimestampUnit::Second,
@@ -387,8 +387,8 @@ impl TryFrom<Field> for SourceField {
                     unit: TimestampUnit::Nanosecond,
                 })
             }
-            (DataType::Utf8, None) => FieldType::String,
-            (DataType::Utf8, Some(FsExtensionType::JSON)) => FieldType::Json,
+            (DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View, None) => FieldType::String,
+            (DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View, Some(FsExtensionType::JSON)) => FieldType::Json,
             (DataType::Struct(fields), None) => {
                 let fields: Result<_, String> = fields
                     .into_iter()
diff --git a/src/sql/schema/source_table.rs b/src/sql/schema/source_table.rs
index dd962e34..85041f4b 100644
--- a/src/sql/schema/source_table.rs
+++ b/src/sql/schema/source_table.rs
@@ -38,7 +38,13 @@ use super::StreamSchemaProvider;
 use crate::multifield_partial_ord;
 use crate::sql::api::{ConnectionProfile, ConnectionSchema, SourceField};
 use crate::sql::common::connector_options::ConnectorOptions;
-use crate::sql::common::{BadData, Format, Framing, JsonCompression, JsonFormat};
+use crate::sql::common::kafka_catalog::{
+    KafkaConfig, KafkaConfigAuthentication, KafkaTable, KafkaTableSourceOffset, ReadMode,
+    SinkCommitMode, TableType as KafkaTableType,
+};
+use crate::sql::common::{
+    BadData, Format, Framing, FsSchema, JsonCompression, JsonFormat, OperatorConfig, RateLimit,
+};
 use crate::sql::schema::ConnectionType;
 use crate::sql::schema::table::SqlSource;
 use crate::sql::types::ProcessingMode;
@@ -303,8 +309,8 @@ impl SourceTable {
 
         let connection_schema = ConnectionSchema::try_new(
             format.clone(),
-            Some(bad_data),
-            framing,
+            Some(bad_data.clone()),
+            framing.clone(),
             schema_fields,
             None,
             Some(inferred_empty),
@@ -421,19 +427,34 @@ impl SourceTable {
 
         table.lookup_cache_ttl = options.pull_opt_duration("lookup.cache.ttl")?;
 
-        let extra_opts = options.drain_remaining_string_values()?;
-        let mut config_root = serde_json::json!({
-            "connector": connector_name,
-            "connection_schema": connection_schema,
-        });
-        if let serde_json::Value::Object(ref mut map) = config_root {
-            for (k, v) in extra_opts {
-                map.insert(k, serde_json::Value::String(v));
+        if connector_name.eq_ignore_ascii_case("kafka") {
+            let physical = table.produce_physical_schema();
+            let op_cfg = wire_kafka_operator_config(
+                options,
+                role,
+                &physical,
+                &format,
+                bad_data,
+                framing,
+            )?;
+            table.opaque_config = serde_json::to_string(&op_cfg).map_err(|e| {
+                DataFusionError::Plan(format!("failed to serialize Kafka OperatorConfig: {e}"))
+            })?;
+        } else {
+            let extra_opts = options.drain_remaining_string_values()?;
+            let mut config_root = serde_json::json!({
+                "connector": connector_name,
+                "connection_schema": connection_schema,
+            });
+            if let serde_json::Value::Object(ref mut map) = config_root {
+                for (k, v) in extra_opts {
+                    map.insert(k, serde_json::Value::String(v));
+                }
             }
+            table.opaque_config = serde_json::to_string(&config_root).map_err(|e| {
+                DataFusionError::Plan(format!("failed to serialize connector config: {e}"))
+            })?;
         }
-        table.opaque_config = serde_json::to_string(&config_root).map_err(|e| {
-            DataFusionError::Plan(format!("failed to serialize connector config: {e}"))
-        })?;
 
         if role == TableRole::Ingestion && encoding.supports_delta_updates() && primary_keys.is_empty()
         {
@@ -544,6 +565,147 @@ impl SourceTable {
     }
 }
 
+/// Kafka: runtime [`KafkaSourceDispatcher`] / [`KafkaSinkDispatcher`] expect [`OperatorConfig`] JSON,
+/// not the legacy `{ connector, connection_schema, ... }` blob used by other adapters.
+fn wire_kafka_operator_config(
+    options: &mut ConnectorOptions,
+    role: TableRole,
+    physical_schema: &Schema,
+    format: &Option<Format>,
+    bad_data: BadData,
+    framing: Option<Framing>,
+) -> Result<OperatorConfig> {
+    let bootstrap_servers = match options.pull_opt_str("bootstrap.servers")? {
+        Some(s) => s,
+        None => options
+            .pull_opt_str("bootstrap_servers")?
+            .ok_or_else(|| {
+                plan_datafusion_err!(
+                    "Kafka connector requires 'bootstrap.servers' in the WITH clause"
+                )
+            })?,
+    };
+
+    let topic = options
+        .pull_opt_str("topic")?
+        .ok_or_else(|| plan_datafusion_err!("Kafka connector requires 'topic' in the WITH clause"))?;
+
+    let sql_format = format.clone().ok_or_else(|| {
+        plan_datafusion_err!(
+            "Kafka connector requires 'format' in the WITH clause (e.g. format = 'json')"
+        )
+    })?;
+
+    let rate_limit = options
+        .pull_opt_u64("rate_limit.messages_per_second")?
+        .map(|v| RateLimit {
+            messages_per_second: v.clamp(1, u32::MAX as u64) as u32,
+        });
+
+    let value_subject = options.pull_opt_str("value.subject")?;
+
+    let kind = match role {
+        TableRole::Ingestion => {
+            let offset = match options.pull_opt_str("scan.startup.mode")?.as_deref() {
+                Some("latest") => KafkaTableSourceOffset::Latest,
+                Some("earliest") => KafkaTableSourceOffset::Earliest,
+                None | Some("group-offsets") | Some("group") => KafkaTableSourceOffset::Group,
+                Some(other) => {
+                    return plan_err!(
+                        "invalid scan.startup.mode '{other}'; expected latest, earliest, or group-offsets"
+                    );
+                }
+            };
+            let read_mode = match options.pull_opt_str("isolation.level")?.as_deref() {
+                Some("read_committed") => Some(ReadMode::ReadCommitted),
+                Some("read_uncommitted") => Some(ReadMode::ReadUncommitted),
+                None => None,
+                Some(other) => {
+                    return plan_err!("invalid isolation.level '{other}'");
+                }
+            };
+            let group_id = match options.pull_opt_str("group.id")? {
+                Some(s) => Some(s),
+                None => options.pull_opt_str("group_id")?,
+            };
+            let group_id_prefix = options.pull_opt_str("group.id.prefix")?;
+            KafkaTableType::Source {
+                offset,
+                read_mode,
+                group_id,
+                group_id_prefix,
+            }
+        }
+        TableRole::Egress => {
+            let commit_mode = match options.pull_opt_str("sink.commit.mode")?.as_deref() {
+                Some("exactly-once") | Some("exactly_once") => SinkCommitMode::ExactlyOnce,
+                None | Some("at-least-once") | Some("at_least_once") => SinkCommitMode::AtLeastOnce,
+                Some(other) => {
+                    return plan_err!("invalid sink.commit.mode '{other}'");
+                }
+            };
+            let key_field = match options.pull_opt_str("sink.key.field")? {
+                Some(s) => Some(s),
+                None => options.pull_opt_str("key.field")?,
+            };
+            let timestamp_field = match options.pull_opt_str("sink.timestamp.field")? {
+                Some(s) => Some(s),
+                None => options.pull_opt_str("timestamp.field")?,
+            };
+            KafkaTableType::Sink {
+                commit_mode,
+                key_field,
+                timestamp_field,
+            }
+        }
+        TableRole::Reference => {
+            return plan_err!("Kafka connector cannot be used as a lookup table in this path");
+        }
+    };
+
+    // Role already decided; keep these out of librdkafka `connection_properties`.
+    let _ = options.pull_opt_str("type")?;
+    let _ = options.pull_opt_str("connector")?;
+
+    let connection_properties = options.drain_remaining_string_values()?;
+
+    let kafka_connection = KafkaConfig {
+        bootstrap_servers,
+        authentication: KafkaConfigAuthentication::None,
+        schema_registry_enum: None,
+        connection_properties,
+    };
+
+    let kafka_table = KafkaTable {
+        topic,
+        kind,
+        client_configs: HashMap::new(),
+        value_subject,
+    };
+
+    let fields: Vec<Field> = physical_schema
+        .fields()
+        .iter()
+        .map(|f| f.as_ref().clone())
+        .collect();
+    let input_schema = FsSchema::from_fields(fields);
+
+    Ok(OperatorConfig {
+        connection: serde_json::to_value(&kafka_connection).map_err(|e| {
+            DataFusionError::Plan(format!("Kafka connection serialization failed: {e}"))
+        })?,
+        table: serde_json::to_value(&kafka_table).map_err(|e| {
+            DataFusionError::Plan(format!("Kafka table serialization failed: {e}"))
+        })?,
+        format: Some(sql_format),
+        bad_data: Some(bad_data),
+        framing,
+        rate_limit,
+        metadata_fields: vec![],
+        input_schema: Some(input_schema),
+    })
+}
+
 /// Plan a SQL scalar expression against a table-qualified schema (e.g. watermark `AS` clause).
 fn plan_generating_expr(
     ast: &ast::Expr,

From de79169e0c4ffa016ab19c7ce872649d248fafb7 Mon Sep 17 00:00:00 2001
From: luoluoyuyu <zhenyu@apache.org>
Date: Sun, 29 Mar 2026 11:27:08 +0800
Subject: [PATCH 23/44] update

---
 src/common/fs_schema.rs                       | 12 +++
 src/coordinator/plan/logical_plan_visitor.rs  | 16 ++-
 src/coordinator/tool/mod.rs                   | 12 +++
 src/runtime/streaming/api/context.rs          | 12 +++
 src/runtime/streaming/api/mod.rs              | 12 +++
 src/runtime/streaming/api/operator.rs         | 12 +++
 src/runtime/streaming/api/source.rs           | 12 +++
 src/runtime/streaming/arrow/mod.rs            | 12 +++
 src/runtime/streaming/driver.rs               | 12 +++
 src/runtime/streaming/error.rs                | 12 +++
 src/runtime/streaming/execution/mod.rs        | 12 +++
 src/runtime/streaming/execution/runner.rs     | 12 +++
 src/runtime/streaming/execution/source.rs     | 12 +++
 .../execution/tracker/barrier_aligner.rs      | 12 +++
 .../streaming/execution/tracker/mod.rs        | 12 +++
 .../execution/tracker/watermark_tracker.rs    | 12 +++
 src/runtime/streaming/factory/mod.rs          | 12 +++
 .../factory/registry/kafka_factory.rs         | 17 +++-
 src/runtime/streaming/factory/registry/mod.rs | 19 +++-
 src/runtime/streaming/format/config.rs        | 12 +++
 src/runtime/streaming/format/deserializer.rs  | 12 +++
 src/runtime/streaming/format/json_encoder.rs  | 12 +++
 src/runtime/streaming/format/mod.rs           | 12 +++
 src/runtime/streaming/format/serializer.rs    | 12 +++
 src/runtime/streaming/job/edge_manager.rs     | 12 +++
 src/runtime/streaming/job/job_manager.rs      | 12 +++
 src/runtime/streaming/job/mod.rs              | 12 +++
 src/runtime/streaming/job/models.rs           | 12 +++
 src/runtime/streaming/memory/mod.rs           | 12 +++
 src/runtime/streaming/memory/pool.rs          | 12 +++
 src/runtime/streaming/memory/ticket.rs        | 12 +++
 src/runtime/streaming/network/endpoint.rs     | 12 +++
 src/runtime/streaming/network/environment.rs  | 12 +++
 src/runtime/streaming/network/mod.rs          | 12 +++
 .../grouping/incremental_aggregate.rs         | 15 ++-
 .../streaming/operators/grouping/mod.rs       | 12 +++
 .../operators/grouping/updating_cache.rs      | 12 +++
 .../operators/joins/join_instance.rs          | 12 +++
 .../operators/joins/join_with_expiration.rs   | 12 +++
 src/runtime/streaming/operators/joins/mod.rs  | 12 +++
 src/runtime/streaming/operators/key_by.rs     | 12 +++
 .../streaming/operators/key_operator.rs       | 12 +++
 src/runtime/streaming/operators/mod.rs        | 12 +++
 src/runtime/streaming/operators/projection.rs | 12 +++
 .../streaming/operators/sink/kafka/mod.rs     | 12 +++
 src/runtime/streaming/operators/sink/mod.rs   | 12 +++
 .../streaming/operators/source/kafka/mod.rs   | 12 +++
 src/runtime/streaming/operators/source/mod.rs | 12 +++
 .../operators/stateless_physical_executor.rs  | 12 +++
 .../streaming/operators/value_execution.rs    | 12 +++
 .../streaming/operators/watermark/mod.rs      | 12 +++
 .../watermark/watermark_generator.rs          | 12 +++
 .../streaming/operators/windows/mod.rs        | 12 +++
 .../windows/session_aggregating_window.rs     | 12 +++
 .../windows/sliding_aggregating_window.rs     | 12 +++
 .../windows/tumbling_aggregating_window.rs    | 12 +++
 .../operators/windows/window_function.rs      | 12 +++
 src/runtime/streaming/protocol/control.rs     | 12 +++
 src/runtime/streaming/protocol/event.rs       | 12 +++
 src/runtime/streaming/protocol/mod.rs         | 12 +++
 src/runtime/streaming/protocol/stream_out.rs  | 12 +++
 src/runtime/streaming/protocol/tracked.rs     | 12 +++
 src/runtime/streaming/protocol/watermark.rs   | 12 +++
 src/runtime/wasm/processor/function_error.rs  | 12 +++
 src/sql/analysis/aggregate_rewriter.rs        | 12 +++
 src/sql/analysis/async_udf_rewriter.rs        | 12 +++
 src/sql/analysis/join_rewriter.rs             | 12 +++
 src/sql/analysis/mod.rs                       | 12 +++
 src/sql/analysis/row_time_rewriter.rs         | 12 +++
 src/sql/analysis/sink_input_rewriter.rs       | 12 +++
 src/sql/analysis/source_metadata_visitor.rs   | 12 +++
 src/sql/analysis/stream_rewriter.rs           | 12 +++
 src/sql/analysis/streaming_window_analzer.rs  | 12 +++
 src/sql/analysis/udafs.rs                     | 12 +++
 src/sql/analysis/window_function_rewriter.rs  | 12 +++
 src/sql/api/checkpoints.rs                    | 12 +++
 src/sql/api/connections.rs                    | 12 +++
 src/sql/api/metrics.rs                        | 12 +++
 src/sql/api/mod.rs                            | 12 +++
 src/sql/api/pipelines.rs                      | 12 +++
 src/sql/api/public_ids.rs                     | 12 +++
 src/sql/api/schema_resolver.rs                | 12 +++
 src/sql/api/udfs.rs                           | 12 +++
 src/sql/api/var_str.rs                        | 12 +++
 src/sql/common/arrow_ext.rs                   | 12 +++
 src/sql/common/connector_options.rs           | 12 +++
 src/sql/common/control.rs                     | 12 +++
 src/sql/common/converter.rs                   | 12 +++
 src/sql/common/date.rs                        | 12 +++
 src/sql/common/debezium.rs                    | 12 +++
 src/sql/common/errors.rs                      | 12 +++
 src/sql/common/format_from_opts.rs            | 57 ++++++-----
 src/sql/common/formats.rs                     | 12 +++
 src/sql/common/fs_schema.rs                   | 12 +++
 src/sql/common/hash.rs                        | 12 +++
 src/sql/common/kafka_catalog.rs               | 12 +++
 src/sql/common/message.rs                     | 12 +++
 src/sql/common/mod.rs                         |  2 +
 src/sql/common/operator_config.rs             | 12 +++
 src/sql/common/task_info.rs                   | 12 +++
 src/sql/common/time_utils.rs                  | 12 +++
 src/sql/common/with_option_keys.rs            | 97 +++++++++++++++++++
 src/sql/common/worker.rs                      | 12 +++
 src/sql/datastream/logical.rs                 | 39 +++-----
 src/sql/datastream/mod.rs                     | 12 +++
 src/sql/extensions/aggregate.rs               | 14 +--
 src/sql/extensions/async_udf.rs               |  6 +-
 src/sql/extensions/constants.rs               |  3 +-
 src/sql/extensions/debezium.rs                | 23 ++---
 src/sql/extensions/join.rs                    |  5 +-
 src/sql/extensions/key_calculation.rs         | 11 ++-
 src/sql/extensions/lookup.rs                  |  5 +-
 src/sql/extensions/projection.rs              |  5 +-
 src/sql/extensions/remote_table.rs            |  3 +-
 src/sql/extensions/sink.rs                    |  3 +-
 src/sql/extensions/table_source.rs            |  3 +-
 src/sql/extensions/timestamp_append.rs        |  3 +-
 src/sql/extensions/updating_aggregate.rs      | 11 ++-
 src/sql/extensions/watermark_node.rs          | 16 +--
 src/sql/extensions/windows_function.rs        |  7 +-
 src/sql/functions/mod.rs                      | 26 +++--
 src/sql/logical_node/logical/operator_name.rs | 23 +++++
 src/sql/logical_node/mod.rs                   | 12 +++
 src/sql/logical_planner/mod.rs                | 25 +++--
 src/sql/logical_planner/planner.rs            | 12 +++
 src/sql/physical/physical_planner.rs          | 12 +++
 src/sql/schema/data_encoding_format.rs        | 10 +-
 src/sql/schema/source_table.rs                | 73 +++++++-------
 src/sql/schema/table_role.rs                  |  5 +-
 src/sql/types/data_type.rs                    | 12 +++
 src/sql/types/df_field.rs                     | 12 +++
 src/sql/types/mod.rs                          | 12 +++
 src/sql/types/placeholder_udf.rs              | 12 +++
 src/sql/types/stream_schema.rs                | 12 +++
 src/sql/types/window.rs                       | 20 +++-
 135 files changed, 1653 insertions(+), 169 deletions(-)
 create mode 100644 src/sql/common/with_option_keys.rs

diff --git a/src/common/fs_schema.rs b/src/common/fs_schema.rs
index e9ce6586..4229b957 100644
--- a/src/common/fs_schema.rs
+++ b/src/common/fs_schema.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 //! FunctionStream table/stream schema: Arrow [`Schema`] plus timestamp index and optional key columns.
 //!
 //! [`Schema`]: datafusion::arrow::datatypes::Schema
diff --git a/src/coordinator/plan/logical_plan_visitor.rs b/src/coordinator/plan/logical_plan_visitor.rs
index 14ed01b8..9e95c5bd 100644
--- a/src/coordinator/plan/logical_plan_visitor.rs
+++ b/src/coordinator/plan/logical_plan_visitor.rs
@@ -38,6 +38,7 @@ use crate::coordinator::tool::ConnectorOptions;
 use crate::sql::analysis::{
     maybe_add_key_extension_to_sink, rewrite_sinks, StreamSchemaProvider,
 };
+use crate::sql::common::with_option_keys as opt;
 use crate::sql::extensions::sink::StreamEgressNode;
 use crate::sql::functions::{is_json_union, serialize_outgoing_json};
 use crate::sql::logical_node::logical::{LogicalProgram, ProgramConfig};
@@ -47,9 +48,6 @@ use crate::sql::rewrite_plan;
 use crate::sql::schema::source_table::SourceTable;
 use crate::sql::schema::{ColumnDescriptor, ConnectionType, Table};
 
-const OPT_CONNECTOR: &str = "connector";
-const OPT_PARTITION_BY: &str = "partition_by";
-
 #[derive(Clone)]
 pub struct LogicalPlanVisitor {
     schema_provider: StreamSchemaProvider,
@@ -95,11 +93,11 @@ impl LogicalPlanVisitor {
         debug!("Initiating streaming sink compilation for identifier: {}", sink_table_name);
 
         let mut sink_properties = ConnectorOptions::new(with_options, &None)?;
-        let connector_type = sink_properties.pull_opt_str(OPT_CONNECTOR)?.ok_or_else(|| {
+        let connector_type = sink_properties.pull_opt_str(opt::CONNECTOR)?.ok_or_else(|| {
             plan_datafusion_err!(
             "Validation Error: Streaming table '{}' requires the '{}' property",
             sink_table_name,
-            OPT_CONNECTOR
+            opt::CONNECTOR
         )
         })?;
 
@@ -192,7 +190,7 @@ impl LogicalPlanVisitor {
         options: &mut ConnectorOptions,
     ) -> Result<Option<Vec<Expr>>> {
         options
-            .pull_opt_str(OPT_PARTITION_BY)?
+            .pull_opt_str(opt::PARTITION_BY)?
             .map(|raw_cols| raw_cols.split(',').map(|c| col(c.trim())).collect())
             .map(Ok)
             .transpose()
@@ -200,7 +198,7 @@ impl LogicalPlanVisitor {
 
     fn contains_connector_property(options: &[SqlOption]) -> bool {
         options.iter().any(|opt| match opt {
-            SqlOption::KeyValue { key, .. } => key.value.eq_ignore_ascii_case(OPT_CONNECTOR),
+            SqlOption::KeyValue { key, .. } => key.value.eq_ignore_ascii_case(opt::CONNECTOR),
             _ => false,
         })
     }
@@ -279,10 +277,10 @@ impl LogicalPlanVisitor {
             .collect::<Vec<_>>();
 
         let mut connector_options = ConnectorOptions::new(&stmt.with_options, &None)?;
-        let adapter_type = connector_options.pull_opt_str(OPT_CONNECTOR)?.ok_or_else(|| {
+        let adapter_type = connector_options.pull_opt_str(opt::CONNECTOR)?.ok_or_else(|| {
             plan_datafusion_err!(
                 "Configuration Error: Missing required property '{}' in WITH clause",
-                OPT_CONNECTOR
+                opt::CONNECTOR
             )
         })?;
 
diff --git a/src/coordinator/tool/mod.rs b/src/coordinator/tool/mod.rs
index 8ef77230..6b48aa0e 100644
--- a/src/coordinator/tool/mod.rs
+++ b/src/coordinator/tool/mod.rs
@@ -1 +1,13 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 pub use crate::sql::common::ConnectorOptions;
diff --git a/src/runtime/streaming/api/context.rs b/src/runtime/streaming/api/context.rs
index 77038bf3..b70d40df 100644
--- a/src/runtime/streaming/api/context.rs
+++ b/src/runtime/streaming/api/context.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use crate::runtime::streaming::memory::MemoryPool;
 use crate::runtime::streaming::protocol::event::StreamEvent;
 use crate::runtime::streaming::protocol::tracked::TrackedEvent;
diff --git a/src/runtime/streaming/api/mod.rs b/src/runtime/streaming/api/mod.rs
index 8115b0fe..49e45328 100644
--- a/src/runtime/streaming/api/mod.rs
+++ b/src/runtime/streaming/api/mod.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 //! 接口层：算子与源实现需遵循的 trait 与运行时上下文。
 
 pub mod context;
diff --git a/src/runtime/streaming/api/operator.rs b/src/runtime/streaming/api/operator.rs
index 3c088e3c..eabeff85 100644
--- a/src/runtime/streaming/api/operator.rs
+++ b/src/runtime/streaming/api/operator.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use crate::runtime::streaming::api::context::TaskContext;
 use crate::runtime::streaming::api::source::SourceOperator;
 use crate::runtime::streaming::protocol::stream_out::StreamOutput;
diff --git a/src/runtime/streaming/api/source.rs b/src/runtime/streaming/api/source.rs
index a4ff46c4..1f79de38 100644
--- a/src/runtime/streaming/api/source.rs
+++ b/src/runtime/streaming/api/source.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 //! 源算子：由 [`crate::runtime::streaming::execution::SourceRunner`] 驱动 `fetch_next`，不得在内部死循环阻塞控制面。
 
 use crate::runtime::streaming::api::context::TaskContext;
diff --git a/src/runtime/streaming/arrow/mod.rs b/src/runtime/streaming/arrow/mod.rs
index fdfa87f7..d706199f 100644
--- a/src/runtime/streaming/arrow/mod.rs
+++ b/src/runtime/streaming/arrow/mod.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 //! Arrow / DataFusion 辅助：聚合表达式解码等。
 //!
 //! `UpdatingCache` 位于 [`crate::runtime::streaming::operators::updating_cache`]。
diff --git a/src/runtime/streaming/driver.rs b/src/runtime/streaming/driver.rs
index f2abec87..011e49ab 100644
--- a/src/runtime/streaming/driver.rs
+++ b/src/runtime/streaming/driver.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use std::future::pending;
 use std::sync::Arc;
 
diff --git a/src/runtime/streaming/error.rs b/src/runtime/streaming/error.rs
index 3d8fba19..c8d1944a 100644
--- a/src/runtime/streaming/error.rs
+++ b/src/runtime/streaming/error.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use std::fmt::Display;
 use thiserror::Error;
 
diff --git a/src/runtime/streaming/execution/mod.rs b/src/runtime/streaming/execution/mod.rs
index 4d55e361..a4fb6d95 100644
--- a/src/runtime/streaming/execution/mod.rs
+++ b/src/runtime/streaming/execution/mod.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 //! 执行层：Tokio Actor 运行容器。
 
 pub mod runner;
diff --git a/src/runtime/streaming/execution/runner.rs b/src/runtime/streaming/execution/runner.rs
index fa907088..994d8c04 100644
--- a/src/runtime/streaming/execution/runner.rs
+++ b/src/runtime/streaming/execution/runner.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use async_trait::async_trait;
 use tokio::sync::mpsc::Receiver;
 use tokio_stream::{StreamExt, StreamMap};
diff --git a/src/runtime/streaming/execution/source.rs b/src/runtime/streaming/execution/source.rs
index d51132ac..a9fbd561 100644
--- a/src/runtime/streaming/execution/source.rs
+++ b/src/runtime/streaming/execution/source.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 //! 源任务物理驱动：控制面优先、`fetch_next` 非阻塞契约、可选融合算子链下推。
 
 use crate::runtime::streaming::api::context::TaskContext;
diff --git a/src/runtime/streaming/execution/tracker/barrier_aligner.rs b/src/runtime/streaming/execution/tracker/barrier_aligner.rs
index 05f2cc90..34b5380a 100644
--- a/src/runtime/streaming/execution/tracker/barrier_aligner.rs
+++ b/src/runtime/streaming/execution/tracker/barrier_aligner.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 //! Chandy–Lamport 风格屏障对齐（零内存缓冲：未对齐时从轮询池移除输入流，依赖底层背压）。
 
 use std::collections::HashSet;
diff --git a/src/runtime/streaming/execution/tracker/mod.rs b/src/runtime/streaming/execution/tracker/mod.rs
index bfa24e8b..81329c27 100644
--- a/src/runtime/streaming/execution/tracker/mod.rs
+++ b/src/runtime/streaming/execution/tracker/mod.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 //! 协调层：屏障对齐与多路水位线追踪。
 
 pub mod barrier_aligner;
diff --git a/src/runtime/streaming/execution/tracker/watermark_tracker.rs b/src/runtime/streaming/execution/tracker/watermark_tracker.rs
index ca2f082f..6304b4c3 100644
--- a/src/runtime/streaming/execution/tracker/watermark_tracker.rs
+++ b/src/runtime/streaming/execution/tracker/watermark_tracker.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use crate::runtime::streaming::protocol::watermark::{merge_watermarks, watermark_strictly_advances};
 use crate::sql::common::Watermark;
 
diff --git a/src/runtime/streaming/factory/mod.rs b/src/runtime/streaming/factory/mod.rs
index 2cc0cfba..8c03c298 100644
--- a/src/runtime/streaming/factory/mod.rs
+++ b/src/runtime/streaming/factory/mod.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 pub mod registry;
 
 pub use registry::{
diff --git a/src/runtime/streaming/factory/registry/kafka_factory.rs b/src/runtime/streaming/factory/registry/kafka_factory.rs
index 6a451166..8f42acd9 100644
--- a/src/runtime/streaming/factory/registry/kafka_factory.rs
+++ b/src/runtime/streaming/factory/registry/kafka_factory.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 //! Kafka Source/Sink：从 [`ConnectorOp`] + [`OperatorConfig`] 构造物理算子（鉴权与 client 配置合并）。
 
 use anyhow::{anyhow, bail, Context, Result};
@@ -18,6 +30,7 @@ use crate::runtime::streaming::format::{
 };
 use crate::runtime::streaming::operators::sink::kafka::{ConsistencyMode, KafkaSinkOperator};
 use crate::runtime::streaming::operators::source::kafka::{BufferedDeserializer, KafkaSourceOperator};
+use crate::sql::common::constants::connector_type;
 use crate::sql::common::formats::{
     BadData, DecimalEncoding as SqlDecimalEncoding, Format as SqlFormat, JsonFormat as SqlJsonFormat,
     TimestampFormat as SqlTimestampFormat,
@@ -178,7 +191,7 @@ impl OperatorConstructor for KafkaSourceDispatcher {
         let op = ConnectorOp::decode(payload)
             .context("Failed to decode ConnectorOp protobuf for Kafka Source")?;
 
-        if op.connector != "kafka" {
+        if op.connector != connector_type::KAFKA {
             bail!(
                 "KafkaSourceDispatcher: expected connector 'kafka', got '{}'",
                 op.connector
@@ -257,7 +270,7 @@ impl OperatorConstructor for KafkaSinkDispatcher {
         let op = ConnectorOp::decode(payload)
             .context("Failed to decode ConnectorOp protobuf for Kafka Sink")?;
 
-        if op.connector != "kafka" {
+        if op.connector != connector_type::KAFKA {
             bail!(
                 "KafkaSinkDispatcher: expected connector 'kafka', got '{}'",
                 op.connector
diff --git a/src/runtime/streaming/factory/registry/mod.rs b/src/runtime/streaming/factory/registry/mod.rs
index 9bb1148d..d129f644 100644
--- a/src/runtime/streaming/factory/registry/mod.rs
+++ b/src/runtime/streaming/factory/registry/mod.rs
@@ -1,8 +1,21 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use anyhow::{anyhow, Result};
 use prost::Message;
 use std::collections::HashMap;
 use std::sync::Arc;
 
+use crate::sql::common::constants::connector_type;
 use crate::runtime::streaming::api::operator::Registry;
 
 use crate::runtime::streaming::api::operator::ConstructedOperator;
@@ -239,8 +252,8 @@ impl OperatorConstructor for ConnectorSourceDispatcher {
             .map_err(|e| anyhow!("decode ConnectorOp (source): {e}"))?;
 
         match op.connector.as_str() {
-            "kafka" => KafkaSourceDispatcher.with_config(config, registry),
-            "redis" => Err(anyhow!(
+            ct if ct == connector_type::KAFKA => KafkaSourceDispatcher.with_config(config, registry),
+            ct if ct == connector_type::REDIS => Err(anyhow!(
                 "ConnectorSource '{}' factory wiring not yet implemented",
                 op.connector
             )),
@@ -257,7 +270,7 @@ impl OperatorConstructor for ConnectorSinkDispatcher {
             .map_err(|e| anyhow!("decode ConnectorOp (sink): {e}"))?;
 
         match op.connector.as_str() {
-            "kafka" => KafkaSinkDispatcher.with_config(config, registry),
+            ct if ct == connector_type::KAFKA => KafkaSinkDispatcher.with_config(config, registry),
             other => Err(anyhow!("Unsupported sink connector type: {}", other)),
         }
     }
diff --git a/src/runtime/streaming/format/config.rs b/src/runtime/streaming/format/config.rs
index e0ac61bb..235e1d82 100644
--- a/src/runtime/streaming/format/config.rs
+++ b/src/runtime/streaming/format/config.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use serde::{Deserialize, Serialize};
 
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
diff --git a/src/runtime/streaming/format/deserializer.rs b/src/runtime/streaming/format/deserializer.rs
index 83360bd8..1c32d48a 100644
--- a/src/runtime/streaming/format/deserializer.rs
+++ b/src/runtime/streaming/format/deserializer.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 //! 数据反序列化器：将外界收到的字节流转化为结构化 [`RecordBatch`]。
 
 use anyhow::{anyhow, Result};
diff --git a/src/runtime/streaming/format/json_encoder.rs b/src/runtime/streaming/format/json_encoder.rs
index 7721d9bc..8d34e9ef 100644
--- a/src/runtime/streaming/format/json_encoder.rs
+++ b/src/runtime/streaming/format/json_encoder.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 //! 极致优化的 Arrow JSON 编码器。
 //!
 //! 解决 Arrow 原生 JSON 导出时不兼容 Kafka / 时间戳 / Decimal 的痛点。
diff --git a/src/runtime/streaming/format/mod.rs b/src/runtime/streaming/format/mod.rs
index c4dbbeda..b27935ba 100644
--- a/src/runtime/streaming/format/mod.rs
+++ b/src/runtime/streaming/format/mod.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 pub mod config;
 pub mod deserializer;
 pub mod json_encoder;
diff --git a/src/runtime/streaming/format/serializer.rs b/src/runtime/streaming/format/serializer.rs
index 80969ec6..7b61d055 100644
--- a/src/runtime/streaming/format/serializer.rs
+++ b/src/runtime/streaming/format/serializer.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 //! 数据序列化器：将内存 [`RecordBatch`] 转换为二进制消息流，供 Sink 连接器发送。
 
 use anyhow::{anyhow, Result};
diff --git a/src/runtime/streaming/job/edge_manager.rs b/src/runtime/streaming/job/edge_manager.rs
index 10ca97f1..53f82cb9 100644
--- a/src/runtime/streaming/job/edge_manager.rs
+++ b/src/runtime/streaming/job/edge_manager.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use std::collections::HashMap;
 
 use protocol::grpc::api::{FsEdge, FsNode};
diff --git a/src/runtime/streaming/job/job_manager.rs b/src/runtime/streaming/job/job_manager.rs
index 6413eba6..844131a0 100644
--- a/src/runtime/streaming/job/job_manager.rs
+++ b/src/runtime/streaming/job/job_manager.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use std::collections::HashMap;
 use std::sync::{Arc, OnceLock, RwLock};
 
diff --git a/src/runtime/streaming/job/mod.rs b/src/runtime/streaming/job/mod.rs
index 448c26cd..72f98d69 100644
--- a/src/runtime/streaming/job/mod.rs
+++ b/src/runtime/streaming/job/mod.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 pub mod edge_manager;
 pub mod job_manager;
 pub mod models;
diff --git a/src/runtime/streaming/job/models.rs b/src/runtime/streaming/job/models.rs
index 3e843ea4..35b48da7 100644
--- a/src/runtime/streaming/job/models.rs
+++ b/src/runtime/streaming/job/models.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use std::collections::HashMap;
 use std::sync::{Arc, RwLock};
 use std::thread::JoinHandle;
diff --git a/src/runtime/streaming/memory/mod.rs b/src/runtime/streaming/memory/mod.rs
index 93101fa2..45fc3194 100644
--- a/src/runtime/streaming/memory/mod.rs
+++ b/src/runtime/streaming/memory/mod.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 pub mod pool;
 pub mod ticket;
 
diff --git a/src/runtime/streaming/memory/pool.rs b/src/runtime/streaming/memory/pool.rs
index 98ba4cf3..54276088 100644
--- a/src/runtime/streaming/memory/pool.rs
+++ b/src/runtime/streaming/memory/pool.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use parking_lot::Mutex;
 use std::sync::atomic::{AtomicUsize, Ordering};
 use std::sync::Arc;
diff --git a/src/runtime/streaming/memory/ticket.rs b/src/runtime/streaming/memory/ticket.rs
index ca1759b9..1c9d2798 100644
--- a/src/runtime/streaming/memory/ticket.rs
+++ b/src/runtime/streaming/memory/ticket.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use std::sync::Arc;
 
 use super::pool::MemoryPool;
diff --git a/src/runtime/streaming/network/endpoint.rs b/src/runtime/streaming/network/endpoint.rs
index 3fc1fc57..a8525e1e 100644
--- a/src/runtime/streaming/network/endpoint.rs
+++ b/src/runtime/streaming/network/endpoint.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use crate::runtime::streaming::protocol::event::StreamEvent;
 use crate::runtime::streaming::protocol::tracked::TrackedEvent;
 use anyhow::{anyhow, Result};
diff --git a/src/runtime/streaming/network/environment.rs b/src/runtime/streaming/network/environment.rs
index 19aedec7..07ea0cab 100644
--- a/src/runtime/streaming/network/environment.rs
+++ b/src/runtime/streaming/network/environment.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use super::endpoint::{BoxedEventStream, PhysicalSender};
 use std::collections::HashMap;
 
diff --git a/src/runtime/streaming/network/mod.rs b/src/runtime/streaming/network/mod.rs
index 259e0f12..4b120781 100644
--- a/src/runtime/streaming/network/mod.rs
+++ b/src/runtime/streaming/network/mod.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 pub mod endpoint;
 pub mod environment;
 
diff --git a/src/runtime/streaming/operators/grouping/incremental_aggregate.rs b/src/runtime/streaming/operators/grouping/incremental_aggregate.rs
index 42eda177..104d24a1 100644
--- a/src/runtime/streaming/operators/grouping/incremental_aggregate.rs
+++ b/src/runtime/streaming/operators/grouping/incremental_aggregate.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use anyhow::{anyhow, bail, Result};
 use arrow::compute::max_array;
 use arrow::row::{RowConverter, SortField};
@@ -13,6 +25,7 @@ use arrow_schema::{DataType, Field, FieldRef, Schema, SchemaBuilder, TimeUnit};
 use datafusion::common::{Result as DFResult, ScalarValue};
 use datafusion::physical_expr::aggregate::AggregateFunctionExpr;
 use datafusion::physical_plan::{Accumulator, PhysicalExpr};
+use crate::sql::common::constants::updating_state_field;
 use datafusion_proto::physical_plan::DefaultPhysicalExtensionCodec;
 use datafusion_proto::physical_plan::from_proto::parse_physical_expr;
 use datafusion_proto::protobuf::PhysicalExprNode;
@@ -239,7 +252,7 @@ impl IncrementalAggregatingFunc {
                 .expect("_updating_meta must be StructArray");
 
             let is_retract_array = meta_struct
-                .column_by_name("is_retract")
+                .column_by_name(updating_state_field::IS_RETRACT)
                 .expect("meta struct must have is_retract");
             
             Some(is_retract_array.as_any().downcast_ref::<BooleanArray>().expect("is_retract must be BooleanArray"))
diff --git a/src/runtime/streaming/operators/grouping/mod.rs b/src/runtime/streaming/operators/grouping/mod.rs
index fb2ae7b1..ef672351 100644
--- a/src/runtime/streaming/operators/grouping/mod.rs
+++ b/src/runtime/streaming/operators/grouping/mod.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 pub mod incremental_aggregate;
 pub mod updating_cache;
 
diff --git a/src/runtime/streaming/operators/grouping/updating_cache.rs b/src/runtime/streaming/operators/grouping/updating_cache.rs
index b6fbcc99..bdba9fa7 100644
--- a/src/runtime/streaming/operators/grouping/updating_cache.rs
+++ b/src/runtime/streaming/operators/grouping/updating_cache.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 //! 按 key 的增量状态缓存：LRU + TTL（idle），供 [`super::incremental_aggregate`] 等使用。
 
 use std::borrow::Borrow;
diff --git a/src/runtime/streaming/operators/joins/join_instance.rs b/src/runtime/streaming/operators/joins/join_instance.rs
index 639876bf..278bc8fe 100644
--- a/src/runtime/streaming/operators/joins/join_instance.rs
+++ b/src/runtime/streaming/operators/joins/join_instance.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 //! 瞬时 JOIN：双通道喂入 DataFusion 物理计划，水位线推进时闭合实例并抽干结果（纯内存版）。
 
 use anyhow::{anyhow, Result};
diff --git a/src/runtime/streaming/operators/joins/join_with_expiration.rs b/src/runtime/streaming/operators/joins/join_with_expiration.rs
index c2bb6259..1a31b253 100644
--- a/src/runtime/streaming/operators/joins/join_with_expiration.rs
+++ b/src/runtime/streaming/operators/joins/join_with_expiration.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 //! 带 TTL 的 Key-Time Join：纯内存状态版 + DataFusion 物理计划成对计算。
 //! 完全移除了底层 TableManager 和持久化状态依赖。
 
diff --git a/src/runtime/streaming/operators/joins/mod.rs b/src/runtime/streaming/operators/joins/mod.rs
index ccfff792..bc81f328 100644
--- a/src/runtime/streaming/operators/joins/mod.rs
+++ b/src/runtime/streaming/operators/joins/mod.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 pub mod join_instance;
 pub mod join_with_expiration;
 
diff --git a/src/runtime/streaming/operators/key_by.rs b/src/runtime/streaming/operators/key_by.rs
index 2c183577..a432011d 100644
--- a/src/runtime/streaming/operators/key_by.rs
+++ b/src/runtime/streaming/operators/key_by.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 //! 物理网络路由算子：利用 DataFusion 物理表达式提取 Key，基于 Hash 排序执行零拷贝切片路由。
 
 use anyhow::{anyhow, Result};
diff --git a/src/runtime/streaming/operators/key_operator.rs b/src/runtime/streaming/operators/key_operator.rs
index 93a50db5..5dfd66f6 100644
--- a/src/runtime/streaming/operators/key_operator.rs
+++ b/src/runtime/streaming/operators/key_operator.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 //! 物理网络路由算子：利用 DataFusion 物理表达式提取 Key，基于 Hash 排序执行零拷贝切片路由。
 //!
 //! 提供两种算子：
diff --git a/src/runtime/streaming/operators/mod.rs b/src/runtime/streaming/operators/mod.rs
index 958b5320..dc8b39b7 100644
--- a/src/runtime/streaming/operators/mod.rs
+++ b/src/runtime/streaming/operators/mod.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 //! 内置算子。
 
 pub mod grouping;
diff --git a/src/runtime/streaming/operators/projection.rs b/src/runtime/streaming/operators/projection.rs
index 5e62afc6..33fe0d51 100644
--- a/src/runtime/streaming/operators/projection.rs
+++ b/src/runtime/streaming/operators/projection.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 //! 高性能投影算子：直接操作 Arrow Array 执行列映射与标量运算，
 //! 避开 DataFusion 执行树开销，适用于 SELECT 字段筛选和简单标量计算。
 
diff --git a/src/runtime/streaming/operators/sink/kafka/mod.rs b/src/runtime/streaming/operators/sink/kafka/mod.rs
index 1ce01673..dccc561d 100644
--- a/src/runtime/streaming/operators/sink/kafka/mod.rs
+++ b/src/runtime/streaming/operators/sink/kafka/mod.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 //! Kafka Sink：实现 [`crate::runtime::streaming::api::operator::MessageOperator`]，支持 At-Least-Once 与 Exactly-Once（事务 + 二阶段提交）。
 
 use anyhow::{anyhow, bail, Result};
diff --git a/src/runtime/streaming/operators/sink/mod.rs b/src/runtime/streaming/operators/sink/mod.rs
index 3b88f563..93b3b0ee 100644
--- a/src/runtime/streaming/operators/sink/mod.rs
+++ b/src/runtime/streaming/operators/sink/mod.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 //! 与外部系统对接的 Sink 实现（Kafka 等）。
 
 pub mod kafka;
diff --git a/src/runtime/streaming/operators/source/kafka/mod.rs b/src/runtime/streaming/operators/source/kafka/mod.rs
index b17a504b..59507c2e 100644
--- a/src/runtime/streaming/operators/source/kafka/mod.rs
+++ b/src/runtime/streaming/operators/source/kafka/mod.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 //! Kafka 源算子：实现 [`crate::runtime::streaming::api::source::SourceOperator`]，由 [`crate::runtime::streaming::execution::SourceRunner`] 轮询 `fetch_next`。
 
 use anyhow::{anyhow, Context as _, Result};
diff --git a/src/runtime/streaming/operators/source/mod.rs b/src/runtime/streaming/operators/source/mod.rs
index 59b3ff7c..687e2289 100644
--- a/src/runtime/streaming/operators/source/mod.rs
+++ b/src/runtime/streaming/operators/source/mod.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 //! 与外部系统对接的源实现（Kafka 等）。
 
 pub mod kafka;
diff --git a/src/runtime/streaming/operators/stateless_physical_executor.rs b/src/runtime/streaming/operators/stateless_physical_executor.rs
index 9e801188..45619dc6 100644
--- a/src/runtime/streaming/operators/stateless_physical_executor.rs
+++ b/src/runtime/streaming/operators/stateless_physical_executor.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 //! 无状态物理计划执行器：将单批次写入 `SingleLockedBatch` 并让 DataFusion 计划消费。
 
 use std::sync::{Arc, RwLock};
diff --git a/src/runtime/streaming/operators/value_execution.rs b/src/runtime/streaming/operators/value_execution.rs
index b9fb0cd8..c3b3d525 100644
--- a/src/runtime/streaming/operators/value_execution.rs
+++ b/src/runtime/streaming/operators/value_execution.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 //! 通用无状态执行算子：驱动 DataFusion 物理计划（Filter, Case When, Scalar UDF 等），
 //! 不改变分区状态，适用于 Map / Filter 阶段。
 
diff --git a/src/runtime/streaming/operators/watermark/mod.rs b/src/runtime/streaming/operators/watermark/mod.rs
index becc0b8f..4486a0fd 100644
--- a/src/runtime/streaming/operators/watermark/mod.rs
+++ b/src/runtime/streaming/operators/watermark/mod.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 pub mod watermark_generator;
 
 pub use watermark_generator::{WatermarkGeneratorConstructor, WatermarkGeneratorOperator, WatermarkGeneratorState};
diff --git a/src/runtime/streaming/operators/watermark/watermark_generator.rs b/src/runtime/streaming/operators/watermark/watermark_generator.rs
index 3af64bf7..2b255f9b 100644
--- a/src/runtime/streaming/operators/watermark/watermark_generator.rs
+++ b/src/runtime/streaming/operators/watermark/watermark_generator.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 //! 表达式水位生成器：与 worker `arrow/watermark_generator` 对齐，通过 [`StreamOutput::Watermark`] 向下游广播。
 
 use anyhow::{anyhow, Result};
diff --git a/src/runtime/streaming/operators/windows/mod.rs b/src/runtime/streaming/operators/windows/mod.rs
index ba594016..02c9eccb 100644
--- a/src/runtime/streaming/operators/windows/mod.rs
+++ b/src/runtime/streaming/operators/windows/mod.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 pub mod session_aggregating_window;
 pub mod sliding_aggregating_window;
 pub mod tumbling_aggregating_window;
diff --git a/src/runtime/streaming/operators/windows/session_aggregating_window.rs b/src/runtime/streaming/operators/windows/session_aggregating_window.rs
index cae0935c..8fa3f2f7 100644
--- a/src/runtime/streaming/operators/windows/session_aggregating_window.rs
+++ b/src/runtime/streaming/operators/windows/session_aggregating_window.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 //! 会话窗口聚合：纯内存版，完全脱离持久化状态存储。
 //! 利用 BTreeMap 充当优先队列，数据天然在内存中进行 Gap 合并与触发。
 
diff --git a/src/runtime/streaming/operators/windows/sliding_aggregating_window.rs b/src/runtime/streaming/operators/windows/sliding_aggregating_window.rs
index aa2e2474..e5af57f3 100644
--- a/src/runtime/streaming/operators/windows/sliding_aggregating_window.rs
+++ b/src/runtime/streaming/operators/windows/sliding_aggregating_window.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 //! 滑动窗口聚合：纯内存版。
 //! 完全依赖内部的 TieredRecordBatchHolder 和 ActiveBin 在内存中进行计算，
 //! 摆脱 TableManager 依赖，遇到 Barrier 自动透传。
diff --git a/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs b/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs
index f835bac2..40c757dc 100644
--- a/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs
+++ b/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 //! 滚动（tumbling）窗口聚合：与 worker `arrow/tumbling_aggregating_window` 对齐，实现 [`MessageOperator`]。
 
 use anyhow::{anyhow, Result};
diff --git a/src/runtime/streaming/operators/windows/window_function.rs b/src/runtime/streaming/operators/windows/window_function.rs
index 03f02a19..4e9c83ce 100644
--- a/src/runtime/streaming/operators/windows/window_function.rs
+++ b/src/runtime/streaming/operators/windows/window_function.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 //! 窗口函数（按事件时间分桶的瞬时执行）：纯内存版。
 //! 完全依赖内部的 ActiveWindowExec 通道在内存中缓冲数据，
 //! 摆脱持久化状态存储的依赖，遇到 Barrier 自动透传。
diff --git a/src/runtime/streaming/protocol/control.rs b/src/runtime/streaming/protocol/control.rs
index a7a9da57..d225e2e8 100644
--- a/src/runtime/streaming/protocol/control.rs
+++ b/src/runtime/streaming/protocol/control.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 //! 控制平面：与 [`super::event::StreamEvent`] 队列分离的高优先级指令。
 
 use serde::{Deserialize, Serialize};
diff --git a/src/runtime/streaming/protocol/event.rs b/src/runtime/streaming/protocol/event.rs
index ee974e7e..efd43952 100644
--- a/src/runtime/streaming/protocol/event.rs
+++ b/src/runtime/streaming/protocol/event.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use arrow_array::RecordBatch;
 use crate::sql::common::{CheckpointBarrier, Watermark};
 
diff --git a/src/runtime/streaming/protocol/mod.rs b/src/runtime/streaming/protocol/mod.rs
index 852562de..f859df28 100644
--- a/src/runtime/streaming/protocol/mod.rs
+++ b/src/runtime/streaming/protocol/mod.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 //! 协议层：数据事件、控制命令、水位线合并与比较语义。
 
 pub mod control;
diff --git a/src/runtime/streaming/protocol/stream_out.rs b/src/runtime/streaming/protocol/stream_out.rs
index 49d963df..0f6619f9 100644
--- a/src/runtime/streaming/protocol/stream_out.rs
+++ b/src/runtime/streaming/protocol/stream_out.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use arrow_array::RecordBatch;
 use crate::sql::common::Watermark;
 
diff --git a/src/runtime/streaming/protocol/tracked.rs b/src/runtime/streaming/protocol/tracked.rs
index c675b5bd..5034abd2 100644
--- a/src/runtime/streaming/protocol/tracked.rs
+++ b/src/runtime/streaming/protocol/tracked.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use std::sync::Arc;
 
 use crate::runtime::streaming::memory::MemoryTicket;
diff --git a/src/runtime/streaming/protocol/watermark.rs b/src/runtime/streaming/protocol/watermark.rs
index 43baeabb..9c039aa5 100644
--- a/src/runtime/streaming/protocol/watermark.rs
+++ b/src/runtime/streaming/protocol/watermark.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 //! 水位线类型来自 `arroyo_types::Watermark`；此处提供 **多路对齐合并** 与 **单调推进** 判断。
 
 use crate::sql::common::Watermark;
diff --git a/src/runtime/wasm/processor/function_error.rs b/src/runtime/wasm/processor/function_error.rs
index b38f8dd9..f9b8fe8e 100644
--- a/src/runtime/wasm/processor/function_error.rs
+++ b/src/runtime/wasm/processor/function_error.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 #[derive(Debug, Clone)]
 pub enum FunctionErrorStage {
     Input,
diff --git a/src/sql/analysis/aggregate_rewriter.rs b/src/sql/analysis/aggregate_rewriter.rs
index f11b53d0..36024ab0 100644
--- a/src/sql/analysis/aggregate_rewriter.rs
+++ b/src/sql/analysis/aggregate_rewriter.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use datafusion::common::tree_node::{Transformed, TreeNodeRewriter};
 use datafusion::common::{DFSchema, DataFusionError, Result, not_impl_err, plan_err};
 use datafusion::functions_aggregate::expr_fn::max;
diff --git a/src/sql/analysis/async_udf_rewriter.rs b/src/sql/analysis/async_udf_rewriter.rs
index 0ad4dfc2..31a92057 100644
--- a/src/sql/analysis/async_udf_rewriter.rs
+++ b/src/sql/analysis/async_udf_rewriter.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use crate::sql::extensions::remote_table::RemoteTableBoundaryNode;
 use crate::sql::extensions::{ASYNC_RESULT_FIELD, AsyncFunctionExecutionNode};
 use crate::sql::schema::StreamSchemaProvider;
diff --git a/src/sql/analysis/join_rewriter.rs b/src/sql/analysis/join_rewriter.rs
index e9efe96b..77131595 100644
--- a/src/sql/analysis/join_rewriter.rs
+++ b/src/sql/analysis/join_rewriter.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use crate::sql::schema::StreamSchemaProvider;
 use crate::sql::extensions::join::StreamingJoinNode;
 use crate::sql::extensions::key_calculation::KeyExtractionNode;
diff --git a/src/sql/analysis/mod.rs b/src/sql/analysis/mod.rs
index 697d8c97..d417ebd1 100644
--- a/src/sql/analysis/mod.rs
+++ b/src/sql/analysis/mod.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 #![allow(clippy::new_without_default)]
 
 pub(crate) mod aggregate_rewriter;
diff --git a/src/sql/analysis/row_time_rewriter.rs b/src/sql/analysis/row_time_rewriter.rs
index 0a31d9f8..f0c4e435 100644
--- a/src/sql/analysis/row_time_rewriter.rs
+++ b/src/sql/analysis/row_time_rewriter.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use datafusion::common::tree_node::{Transformed, TreeNodeRewriter};
 use datafusion::common::{Column, Result as DFResult};
 use datafusion::logical_expr::Expr;
diff --git a/src/sql/analysis/sink_input_rewriter.rs b/src/sql/analysis/sink_input_rewriter.rs
index e491a75a..6b8b2de1 100644
--- a/src/sql/analysis/sink_input_rewriter.rs
+++ b/src/sql/analysis/sink_input_rewriter.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use crate::sql::extensions::sink::StreamEgressNode;
 use crate::sql::extensions::StreamingOperatorBlueprint;
 use datafusion::common::Result as DFResult;
diff --git a/src/sql/analysis/source_metadata_visitor.rs b/src/sql/analysis/source_metadata_visitor.rs
index 0d2e1455..81b9b179 100644
--- a/src/sql/analysis/source_metadata_visitor.rs
+++ b/src/sql/analysis/source_metadata_visitor.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use crate::sql::extensions::sink::{StreamEgressNode, STREAM_EGRESS_NODE_NAME};
 use crate::sql::extensions::table_source::{StreamIngestionNode, STREAM_INGESTION_NODE_NAME};
 use crate::sql::schema::StreamSchemaProvider;
diff --git a/src/sql/analysis/stream_rewriter.rs b/src/sql/analysis/stream_rewriter.rs
index 22ed3c83..a62a7bd1 100644
--- a/src/sql/analysis/stream_rewriter.rs
+++ b/src/sql/analysis/stream_rewriter.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use std::sync::Arc;
 
 use super::StreamSchemaProvider;
diff --git a/src/sql/analysis/streaming_window_analzer.rs b/src/sql/analysis/streaming_window_analzer.rs
index 5eed3d2b..609bd2ee 100644
--- a/src/sql/analysis/streaming_window_analzer.rs
+++ b/src/sql/analysis/streaming_window_analzer.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use std::collections::HashSet;
 use std::sync::Arc;
 
diff --git a/src/sql/analysis/udafs.rs b/src/sql/analysis/udafs.rs
index 9685c2d4..73fc062c 100644
--- a/src/sql/analysis/udafs.rs
+++ b/src/sql/analysis/udafs.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use datafusion::arrow::array::ArrayRef;
 use datafusion::error::Result;
 use datafusion::physical_plan::Accumulator;
diff --git a/src/sql/analysis/window_function_rewriter.rs b/src/sql/analysis/window_function_rewriter.rs
index 8f195325..7b94b841 100644
--- a/src/sql/analysis/window_function_rewriter.rs
+++ b/src/sql/analysis/window_function_rewriter.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use datafusion::common::tree_node::Transformed;
 use datafusion::common::{Column, Result as DFResult, plan_err, tree_node::TreeNodeRewriter};
 use datafusion::logical_expr::{
diff --git a/src/sql/api/checkpoints.rs b/src/sql/api/checkpoints.rs
index 243cae40..d9bdc139 100644
--- a/src/sql/api/checkpoints.rs
+++ b/src/sql/api/checkpoints.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use crate::sql::common::to_micros;
 use serde::{Deserialize, Serialize};
 use std::time::SystemTime;
diff --git a/src/sql/api/connections.rs b/src/sql/api/connections.rs
index 7873ceb2..148df69d 100644
--- a/src/sql/api/connections.rs
+++ b/src/sql/api/connections.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use crate::sql::common::formats::{BadData, Format, Framing};
 use crate::sql::common::{FsExtensionType, FsSchema};
 use datafusion::arrow::datatypes::{DataType, Field, Fields, TimeUnit};
diff --git a/src/sql/api/metrics.rs b/src/sql/api/metrics.rs
index 25d129e5..671b52f6 100644
--- a/src/sql/api/metrics.rs
+++ b/src/sql/api/metrics.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use serde::{Deserialize, Serialize};
 
 #[derive(Serialize, Deserialize, Copy, Clone, Debug, Hash, PartialEq, Eq)]
diff --git a/src/sql/api/mod.rs b/src/sql/api/mod.rs
index 85cbcaaa..3969296a 100644
--- a/src/sql/api/mod.rs
+++ b/src/sql/api/mod.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 //! REST/RPC API types for the FunctionStream system.
 //!
 //! Adapted from Arroyo's `arroyo-rpc/src/api_types` and utility modules.
diff --git a/src/sql/api/pipelines.rs b/src/sql/api/pipelines.rs
index 8b42036c..d6cc5253 100644
--- a/src/sql/api/pipelines.rs
+++ b/src/sql/api/pipelines.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use super::udfs::Udf;
 use crate::sql::common::control::ErrorDomain;
 use serde::{Deserialize, Serialize};
diff --git a/src/sql/api/public_ids.rs b/src/sql/api/public_ids.rs
index 15a9f72e..33aa6427 100644
--- a/src/sql/api/public_ids.rs
+++ b/src/sql/api/public_ids.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use std::time::{SystemTime, UNIX_EPOCH};
 
 const ID_LENGTH: usize = 10;
diff --git a/src/sql/api/schema_resolver.rs b/src/sql/api/schema_resolver.rs
index a9124900..57d3d702 100644
--- a/src/sql/api/schema_resolver.rs
+++ b/src/sql/api/schema_resolver.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use async_trait::async_trait;
 
 /// Trait for resolving schemas by ID (e.g., from a schema registry).
diff --git a/src/sql/api/udfs.rs b/src/sql/api/udfs.rs
index 41085168..781d5b07 100644
--- a/src/sql/api/udfs.rs
+++ b/src/sql/api/udfs.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use serde::{Deserialize, Serialize};
 
 #[derive(Serialize, Deserialize, Clone, Debug)]
diff --git a/src/sql/api/var_str.rs b/src/sql/api/var_str.rs
index c4256e38..2638cd06 100644
--- a/src/sql/api/var_str.rs
+++ b/src/sql/api/var_str.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use serde::{Deserialize, Serialize};
 use std::env;
 
diff --git a/src/sql/common/arrow_ext.rs b/src/sql/common/arrow_ext.rs
index 701bf8e4..782f4358 100644
--- a/src/sql/common/arrow_ext.rs
+++ b/src/sql/common/arrow_ext.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use std::collections::HashMap;
 use std::fmt::{Display, Formatter};
 use std::time::SystemTime;
diff --git a/src/sql/common/connector_options.rs b/src/sql/common/connector_options.rs
index 308d5197..6bd6dfa6 100644
--- a/src/sql/common/connector_options.rs
+++ b/src/sql/common/connector_options.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use std::collections::HashMap;
 use std::num::{NonZero, NonZeroU64};
 use std::str::FromStr;
diff --git a/src/sql/common/control.rs b/src/sql/common/control.rs
index efdc754e..4ea9a12f 100644
--- a/src/sql/common/control.rs
+++ b/src/sql/common/control.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use std::collections::HashMap;
 use std::time::SystemTime;
 
diff --git a/src/sql/common/converter.rs b/src/sql/common/converter.rs
index 8f6a2ba8..ec4687f8 100644
--- a/src/sql/common/converter.rs
+++ b/src/sql/common/converter.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use std::sync::Arc;
 use arrow::row::{OwnedRow, RowConverter, RowParser, Rows, SortField};
 use arrow_array::{Array, ArrayRef, BooleanArray};
diff --git a/src/sql/common/date.rs b/src/sql/common/date.rs
index c18e31a7..718d5f56 100644
--- a/src/sql/common/date.rs
+++ b/src/sql/common/date.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use serde::Serialize;
 use std::convert::TryFrom;
 
diff --git a/src/sql/common/debezium.rs b/src/sql/common/debezium.rs
index 3c9f4747..9dbc401f 100644
--- a/src/sql/common/debezium.rs
+++ b/src/sql/common/debezium.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use bincode::{Decode, Encode};
 use serde::{Deserialize, Serialize};
 use std::convert::TryFrom;
diff --git a/src/sql/common/errors.rs b/src/sql/common/errors.rs
index 507851bd..fa4a722e 100644
--- a/src/sql/common/errors.rs
+++ b/src/sql/common/errors.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use std::fmt;
 
 /// Result type for streaming operators and collectors.
diff --git a/src/sql/common/format_from_opts.rs b/src/sql/common/format_from_opts.rs
index dc9a43da..2469fb08 100644
--- a/src/sql/common/format_from_opts.rs
+++ b/src/sql/common/format_from_opts.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 //! Parse `WITH` clause format / framing / bad-data options (Arroyo-compatible keys).
 
 use std::str::FromStr;
@@ -5,6 +17,7 @@ use std::str::FromStr;
 use datafusion::common::{Result as DFResult, plan_datafusion_err, plan_err};
 
 use super::connector_options::ConnectorOptions;
+use super::with_option_keys as opt;
 use super::formats::{
     AvroFormat, BadData, DecimalEncoding, Format, Framing, JsonCompression, JsonFormat,
     NewlineDelimitedFraming, ParquetCompression, ParquetFormat, ProtobufFormat, RawBytesFormat,
@@ -14,32 +27,32 @@ use super::formats::{
 impl JsonFormat {
     pub fn from_opts(opts: &mut ConnectorOptions) -> DFResult<Self> {
         let mut j = JsonFormat::default();
-        if let Some(v) = opts.pull_opt_bool("json.confluent_schema_registry")? {
+        if let Some(v) = opts.pull_opt_bool(opt::JSON_CONFLUENT_SCHEMA_REGISTRY)? {
             j.confluent_schema_registry = v;
         }
-        if let Some(v) = opts.pull_opt_u64("json.confluent_schema_version")? {
+        if let Some(v) = opts.pull_opt_u64(opt::JSON_CONFLUENT_SCHEMA_VERSION)? {
             j.schema_id = Some(v as u32);
         }
-        if let Some(v) = opts.pull_opt_bool("json.include_schema")? {
+        if let Some(v) = opts.pull_opt_bool(opt::JSON_INCLUDE_SCHEMA)? {
             j.include_schema = v;
         }
-        if let Some(v) = opts.pull_opt_bool("json.debezium")? {
+        if let Some(v) = opts.pull_opt_bool(opt::JSON_DEBEZIUM)? {
             j.debezium = v;
         }
-        if let Some(v) = opts.pull_opt_bool("json.unstructured")? {
+        if let Some(v) = opts.pull_opt_bool(opt::JSON_UNSTRUCTURED)? {
             j.unstructured = v;
         }
-        if let Some(s) = opts.pull_opt_str("json.timestamp_format")? {
+        if let Some(s) = opts.pull_opt_str(opt::JSON_TIMESTAMP_FORMAT)? {
             j.timestamp_format = TimestampFormat::try_from(s.as_str()).map_err(|_| {
                 plan_datafusion_err!("invalid json.timestamp_format '{}'", s)
             })?;
         }
-        if let Some(s) = opts.pull_opt_str("json.decimal_encoding")? {
+        if let Some(s) = opts.pull_opt_str(opt::JSON_DECIMAL_ENCODING)? {
             j.decimal_encoding = DecimalEncoding::try_from(s.as_str()).map_err(|_| {
                 plan_datafusion_err!("invalid json.decimal_encoding '{s}'")
             })?;
         }
-        if let Some(s) = opts.pull_opt_str("json.compression")? {
+        if let Some(s) = opts.pull_opt_str(opt::JSON_COMPRESSION)? {
             j.compression = JsonCompression::from_str(&s)
                 .map_err(|e| plan_datafusion_err!("invalid json.compression: {e}"))?;
         }
@@ -49,7 +62,7 @@ impl JsonFormat {
 
 impl Format {
     pub fn from_opts(opts: &mut ConnectorOptions) -> DFResult<Option<Self>> {
-        let Some(name) = opts.pull_opt_str("format")? else {
+        let Some(name) = opts.pull_opt_str(opt::FORMAT)? else {
             return Ok(None);
         };
         match name.to_lowercase().as_str() {
@@ -77,16 +90,16 @@ impl AvroFormat {
             into_unstructured_json: false,
             schema_id: None,
         };
-        if let Some(v) = opts.pull_opt_bool("avro.confluent_schema_registry")? {
+        if let Some(v) = opts.pull_opt_bool(opt::AVRO_CONFLUENT_SCHEMA_REGISTRY)? {
             a.confluent_schema_registry = v;
         }
-        if let Some(v) = opts.pull_opt_bool("avro.raw_datums")? {
+        if let Some(v) = opts.pull_opt_bool(opt::AVRO_RAW_DATUMS)? {
             a.raw_datums = v;
         }
-        if let Some(v) = opts.pull_opt_bool("avro.into_unstructured_json")? {
+        if let Some(v) = opts.pull_opt_bool(opt::AVRO_INTO_UNSTRUCTURED_JSON)? {
             a.into_unstructured_json = v;
         }
-        if let Some(v) = opts.pull_opt_u64("avro.schema_id")? {
+        if let Some(v) = opts.pull_opt_u64(opt::AVRO_SCHEMA_ID)? {
             a.schema_id = Some(v as u32);
         }
         Ok(a)
@@ -96,11 +109,11 @@ impl AvroFormat {
 impl ParquetFormat {
     fn from_opts(opts: &mut ConnectorOptions) -> DFResult<Self> {
         let mut p = ParquetFormat::default();
-        if let Some(s) = opts.pull_opt_str("parquet.compression")? {
+        if let Some(s) = opts.pull_opt_str(opt::PARQUET_COMPRESSION)? {
             p.compression = ParquetCompression::from_str(&s)
                 .map_err(|e| plan_datafusion_err!("invalid parquet.compression: {e}"))?;
         }
-        if let Some(v) = opts.pull_opt_u64("parquet.row_group_bytes")? {
+        if let Some(v) = opts.pull_opt_u64(opt::PARQUET_ROW_GROUP_BYTES)? {
             p.row_group_bytes = Some(v);
         }
         Ok(p)
@@ -116,16 +129,16 @@ impl ProtobufFormat {
             confluent_schema_registry: false,
             length_delimited: false,
         };
-        if let Some(v) = opts.pull_opt_bool("protobuf.into_unstructured_json")? {
+        if let Some(v) = opts.pull_opt_bool(opt::PROTOBUF_INTO_UNSTRUCTURED_JSON)? {
             p.into_unstructured_json = v;
         }
-        if let Some(s) = opts.pull_opt_str("protobuf.message_name")? {
+        if let Some(s) = opts.pull_opt_str(opt::PROTOBUF_MESSAGE_NAME)? {
             p.message_name = Some(s);
         }
-        if let Some(v) = opts.pull_opt_bool("protobuf.confluent_schema_registry")? {
+        if let Some(v) = opts.pull_opt_bool(opt::PROTOBUF_CONFLUENT_SCHEMA_REGISTRY)? {
             p.confluent_schema_registry = v;
         }
-        if let Some(v) = opts.pull_opt_bool("protobuf.length_delimited")? {
+        if let Some(v) = opts.pull_opt_bool(opt::PROTOBUF_LENGTH_DELIMITED)? {
             p.length_delimited = v;
         }
         Ok(p)
@@ -134,11 +147,11 @@ impl ProtobufFormat {
 
 impl Framing {
     pub fn from_opts(opts: &mut ConnectorOptions) -> DFResult<Option<Self>> {
-        let method = opts.pull_opt_str("framing.method")?;
+        let method = opts.pull_opt_str(opt::FRAMING_METHOD)?;
         match method.as_deref() {
             None => Ok(None),
             Some("newline") | Some("newline_delimited") => {
-                let max = opts.pull_opt_u64("framing.max_line_length")?;
+                let max = opts.pull_opt_u64(opt::FRAMING_MAX_LINE_LENGTH)?;
                 Ok(Some(Framing::Newline(NewlineDelimitedFraming {
                     max_line_length: max,
                 })))
@@ -150,7 +163,7 @@ impl Framing {
 
 impl BadData {
     pub fn from_opts(opts: &mut ConnectorOptions) -> DFResult<Self> {
-        let Some(s) = opts.pull_opt_str("bad_data")? else {
+        let Some(s) = opts.pull_opt_str(opt::BAD_DATA)? else {
             return Ok(BadData::Fail {});
         };
         match s.to_lowercase().as_str() {
diff --git a/src/sql/common/formats.rs b/src/sql/common/formats.rs
index 25d09a74..e37be020 100644
--- a/src/sql/common/formats.rs
+++ b/src/sql/common/formats.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use serde::{Deserialize, Serialize};
 use std::fmt::{Display, Formatter};
 use std::str::FromStr;
diff --git a/src/sql/common/fs_schema.rs b/src/sql/common/fs_schema.rs
index f7fd5328..eb92d4ac 100644
--- a/src/sql/common/fs_schema.rs
+++ b/src/sql/common/fs_schema.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 //! FunctionStream table/stream schema: Arrow [`Schema`] plus timestamp index and optional key columns.
 //!
 //! [`Schema`]: datafusion::arrow::datatypes::Schema
diff --git a/src/sql/common/hash.rs b/src/sql/common/hash.rs
index 8f47a8fa..6dce5b9a 100644
--- a/src/sql/common/hash.rs
+++ b/src/sql/common/hash.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use std::ops::RangeInclusive;
 
 /// Randomly generated seeds for consistent hashing. Changing these breaks existing state.
diff --git a/src/sql/common/kafka_catalog.rs b/src/sql/common/kafka_catalog.rs
index 99c8983e..e54e6901 100644
--- a/src/sql/common/kafka_catalog.rs
+++ b/src/sql/common/kafka_catalog.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 //! Kafka 表级与连接级配置（与 JSON Schema / Catalog 对齐）。
 //!
 //! 放在 [`crate::sql::common`] 而非 `runtime::streaming`，以便 **SQL 规划、Coordinator、连接配置存储**
diff --git a/src/sql/common/message.rs b/src/sql/common/message.rs
index 29b7f3a5..4dcde95b 100644
--- a/src/sql/common/message.rs
+++ b/src/sql/common/message.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use bincode::{Decode, Encode};
 use datafusion::arrow::array::RecordBatch;
 use serde::{Deserialize, Serialize};
diff --git a/src/sql/common/mod.rs b/src/sql/common/mod.rs
index 722b2e58..7a4b4ee4 100644
--- a/src/sql/common/mod.rs
+++ b/src/sql/common/mod.rs
@@ -17,6 +17,8 @@
 
 pub mod arrow_ext;
 pub mod connector_options;
+pub mod with_option_keys;
+pub mod constants;
 pub mod control;
 pub mod date;
 pub mod debezium;
diff --git a/src/sql/common/operator_config.rs b/src/sql/common/operator_config.rs
index a1f703f5..b9e40391 100644
--- a/src/sql/common/operator_config.rs
+++ b/src/sql/common/operator_config.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use serde::{Deserialize, Serialize};
 use serde_json::Value;
 
diff --git a/src/sql/common/task_info.rs b/src/sql/common/task_info.rs
index 5a31511b..479ab082 100644
--- a/src/sql/common/task_info.rs
+++ b/src/sql/common/task_info.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use bincode::{Decode, Encode};
 use serde::{Deserialize, Serialize};
 use std::collections::HashMap;
diff --git a/src/sql/common/time_utils.rs b/src/sql/common/time_utils.rs
index 2ee5a126..323445cd 100644
--- a/src/sql/common/time_utils.rs
+++ b/src/sql/common/time_utils.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use std::collections::HashMap;
 use std::hash::Hash;
 use std::time::{Duration, SystemTime, UNIX_EPOCH};
diff --git a/src/sql/common/with_option_keys.rs b/src/sql/common/with_option_keys.rs
new file mode 100644
index 00000000..e48d9b7a
--- /dev/null
+++ b/src/sql/common/with_option_keys.rs
@@ -0,0 +1,97 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! SQL `WITH` 子句中的选项名，以及部分连接器序列化 JSON 的字段名（单一来源）。
+
+// ── 通用 / 表级 ─────────────────────────────────────────────────────────────
+
+pub const CONNECTOR: &str = "connector";
+pub const TYPE: &str = "type";
+pub const FORMAT: &str = "format";
+/// 未指定 `format` 选项时的默认格式名（值，非键）。
+pub const DEFAULT_FORMAT_VALUE: &str = "json";
+pub const BAD_DATA: &str = "bad_data";
+pub const PARTITION_BY: &str = "partition_by";
+
+pub const EVENT_TIME_FIELD: &str = "event_time_field";
+pub const WATERMARK_FIELD: &str = "watermark_field";
+
+pub const IDLE_MICROS: &str = "idle_micros";
+pub const IDLE_TIME: &str = "idle_time";
+
+pub const LOOKUP_CACHE_MAX_BYTES: &str = "lookup.cache.max_bytes";
+pub const LOOKUP_CACHE_TTL: &str = "lookup.cache.ttl";
+
+// ── 非 Kafka 连接器的 opaque JSON（`CONNECTOR` 与 WITH 选项同名）────────────
+
+pub const CONNECTION_SCHEMA: &str = "connection_schema";
+
+// ── 后端参数序列化（如 lookup）──────────────────────────────────────────────
+
+pub const ADAPTER: &str = "adapter";
+
+// ── Kafka ─────────────────────────────────────────────────────────────────
+
+pub const KAFKA_BOOTSTRAP_SERVERS: &str = "bootstrap.servers";
+pub const KAFKA_BOOTSTRAP_SERVERS_LEGACY: &str = "bootstrap_servers";
+pub const KAFKA_TOPIC: &str = "topic";
+pub const KAFKA_RATE_LIMIT_MESSAGES_PER_SECOND: &str = "rate_limit.messages_per_second";
+pub const KAFKA_VALUE_SUBJECT: &str = "value.subject";
+pub const KAFKA_SCAN_STARTUP_MODE: &str = "scan.startup.mode";
+pub const KAFKA_ISOLATION_LEVEL: &str = "isolation.level";
+pub const KAFKA_GROUP_ID: &str = "group.id";
+pub const KAFKA_GROUP_ID_LEGACY: &str = "group_id";
+pub const KAFKA_GROUP_ID_PREFIX: &str = "group.id.prefix";
+pub const KAFKA_SINK_COMMIT_MODE: &str = "sink.commit.mode";
+pub const KAFKA_SINK_KEY_FIELD: &str = "sink.key.field";
+pub const KAFKA_KEY_FIELD_LEGACY: &str = "key.field";
+pub const KAFKA_SINK_TIMESTAMP_FIELD: &str = "sink.timestamp.field";
+pub const KAFKA_TIMESTAMP_FIELD_LEGACY: &str = "timestamp.field";
+
+// ── JSON format ───────────────────────────────────────────────────────────
+
+pub const JSON_CONFLUENT_SCHEMA_REGISTRY: &str = "json.confluent_schema_registry";
+pub const JSON_CONFLUENT_SCHEMA_VERSION: &str = "json.confluent_schema_version";
+pub const JSON_INCLUDE_SCHEMA: &str = "json.include_schema";
+pub const JSON_DEBEZIUM: &str = "json.debezium";
+pub const JSON_UNSTRUCTURED: &str = "json.unstructured";
+pub const JSON_TIMESTAMP_FORMAT: &str = "json.timestamp_format";
+pub const JSON_DECIMAL_ENCODING: &str = "json.decimal_encoding";
+pub const JSON_COMPRESSION: &str = "json.compression";
+
+// ── Avro ──────────────────────────────────────────────────────────────────
+
+pub const AVRO_CONFLUENT_SCHEMA_REGISTRY: &str = "avro.confluent_schema_registry";
+pub const AVRO_RAW_DATUMS: &str = "avro.raw_datums";
+pub const AVRO_INTO_UNSTRUCTURED_JSON: &str = "avro.into_unstructured_json";
+pub const AVRO_SCHEMA_ID: &str = "avro.schema_id";
+
+// ── Parquet ───────────────────────────────────────────────────────────────
+
+pub const PARQUET_COMPRESSION: &str = "parquet.compression";
+pub const PARQUET_ROW_GROUP_BYTES: &str = "parquet.row_group_bytes";
+
+// ── Protobuf ────────────────────────────────────────────────────────────────
+
+pub const PROTOBUF_INTO_UNSTRUCTURED_JSON: &str = "protobuf.into_unstructured_json";
+pub const PROTOBUF_MESSAGE_NAME: &str = "protobuf.message_name";
+pub const PROTOBUF_CONFLUENT_SCHEMA_REGISTRY: &str = "protobuf.confluent_schema_registry";
+pub const PROTOBUF_LENGTH_DELIMITED: &str = "protobuf.length_delimited";
+
+// ── Framing ─────────────────────────────────────────────────────────────────
+
+pub const FRAMING_METHOD: &str = "framing.method";
+pub const FRAMING_MAX_LINE_LENGTH: &str = "framing.max_line_length";
+
+// ── 从字符串 map 推断编码（catalog 等）──────────────────────────────────────
+
+pub const FORMAT_DEBEZIUM_FLAG: &str = "format.debezium";
diff --git a/src/sql/common/worker.rs b/src/sql/common/worker.rs
index c12163ba..48c218fb 100644
--- a/src/sql/common/worker.rs
+++ b/src/sql/common/worker.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use std::fmt::{Display, Formatter};
 use std::sync::Arc;
 
diff --git a/src/sql/datastream/logical.rs b/src/sql/datastream/logical.rs
index c0e5465e..e26be9f3 100644
--- a/src/sql/datastream/logical.rs
+++ b/src/sql/datastream/logical.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use itertools::Itertools;
 
 use datafusion::arrow::datatypes::DataType;
@@ -301,31 +313,10 @@ impl LogicalProgram {
         let mut s = HashSet::new();
         for n in self.graph.node_weights() {
             for t in &n.operator_chain.operators {
-                let feature = match &t.operator_name {
-                    OperatorName::AsyncUdf => "async-udf".to_string(),
-                    OperatorName::ExpressionWatermark
-                    | OperatorName::ArrowValue
-                    | OperatorName::ArrowKey
-                    | OperatorName::Projection => continue,
-                    OperatorName::Join => "join-with-expiration".to_string(),
-                    OperatorName::InstantJoin => "windowed-join".to_string(),
-                    OperatorName::WindowFunction => "sql-window-function".to_string(),
-                    OperatorName::LookupJoin => "lookup-join".to_string(),
-                    OperatorName::TumblingWindowAggregate => {
-                        "sql-tumbling-window-aggregate".to_string()
-                    }
-                    OperatorName::SlidingWindowAggregate => {
-                        "sql-sliding-window-aggregate".to_string()
-                    }
-                    OperatorName::SessionWindowAggregate => {
-                        "sql-session-window-aggregate".to_string()
-                    }
-                    OperatorName::UpdatingAggregate => "sql-updating-aggregate".to_string(),
-                    OperatorName::KeyBy => "key-by-routing".to_string(),
-                    OperatorName::ConnectorSource => "connector-source".to_string(),
-                    OperatorName::ConnectorSink => "connector-sink".to_string(),
+                let Some(tag) = t.operator_name.feature_tag() else {
+                    continue;
                 };
-                s.insert(feature);
+                s.insert(tag.to_string());
             }
         }
         s
diff --git a/src/sql/datastream/mod.rs b/src/sql/datastream/mod.rs
index 82d25f24..922801f6 100644
--- a/src/sql/datastream/mod.rs
+++ b/src/sql/datastream/mod.rs
@@ -1 +1,13 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 pub mod logical;
diff --git a/src/sql/extensions/aggregate.rs b/src/sql/extensions/aggregate.rs
index 7ba16f7a..e05129c8 100644
--- a/src/sql/extensions/aggregate.rs
+++ b/src/sql/extensions/aggregate.rs
@@ -31,6 +31,7 @@ use protocol::grpc::api::{
 };
 
 use crate::multifield_partial_ord;
+use crate::sql::common::constants::{extension_node, proto_operator_name};
 use crate::sql::common::{FsSchema, FsSchemaRef};
 use crate::sql::extensions::{
     CompiledTopologyNode, StreamingOperatorBlueprint, SystemTimestampInjectorNode,
@@ -43,8 +44,7 @@ use crate::sql::types::{
     schema_from_df_fields, schema_from_df_fields_with_metadata,
 };
 
-pub(crate) const STREAM_AGG_EXTENSION_NAME: &str = "StreamWindowAggregateNode";
-const INTERNAL_TIMESTAMP_COL: &str = "_timestamp";
+pub(crate) const STREAM_AGG_EXTENSION_NAME: &str = extension_node::STREAM_WINDOW_AGGREGATE;
 
 /// Represents a streaming windowed aggregation node in the logical plan.
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
@@ -104,7 +104,7 @@ impl StreamWindowAggregateNode {
         )?;
 
         let operator_config = TumblingWindowAggregateOperator {
-            name: "TumblingWindow".to_string(),
+            name: proto_operator_name::TUMBLING_WINDOW.to_string(),
             width_micros: duration.as_micros() as u64,
             binning_function: binning_expr.encode_to_vec(),
             input_schema: Some(
@@ -175,7 +175,7 @@ impl StreamWindowAggregateNode {
             format!("sliding_window_{node_id}"),
             OperatorName::SlidingWindowAggregate,
             operator_config.encode_to_vec(),
-            "sliding window".to_string(),
+            proto_operator_name::SLIDING_WINDOW_LABEL.to_string(),
             1,
         ))
     }
@@ -255,7 +255,7 @@ impl StreamWindowAggregateNode {
         apply_final_projection: bool,
     ) -> Result<LogicalNode> {
         let ts_column_expr =
-            Expr::Column(Column::new_unqualified(INTERNAL_TIMESTAMP_COL.to_string()));
+            Expr::Column(Column::new_unqualified(TIMESTAMP_FIELD.to_string()));
         let binning_expr = planner.create_physical_expr(&ts_column_expr, &input_schema)?;
         let binning_proto = serialize_physical_expr(&binning_expr, &DefaultPhysicalExtensionCodec {})?;
 
@@ -277,7 +277,7 @@ impl StreamWindowAggregateNode {
         } = planner.split_physical_plan(self.partition_keys.clone(), &self.base_agg_plan, true)?;
 
         let operator_config = TumblingWindowAggregateOperator {
-            name: "InstantWindow".to_string(),
+            name: proto_operator_name::INSTANT_WINDOW.to_string(),
             width_micros: 0,
             binning_function: binning_proto.encode_to_vec(),
             input_schema: Some(
@@ -298,7 +298,7 @@ impl StreamWindowAggregateNode {
             format!("instant_window_{node_id}"),
             OperatorName::TumblingWindowAggregate,
             operator_config.encode_to_vec(),
-            "instant window".to_string(),
+            proto_operator_name::INSTANT_WINDOW_LABEL.to_string(),
             1,
         ))
     }
diff --git a/src/sql/extensions/async_udf.rs b/src/sql/extensions/async_udf.rs
index 147e0f90..8add0625 100644
--- a/src/sql/extensions/async_udf.rs
+++ b/src/sql/extensions/async_udf.rs
@@ -25,6 +25,7 @@ use prost::Message;
 use protocol::grpc::api::{AsyncUdfOperator, AsyncUdfOrdering};
 
 use crate::multifield_partial_ord;
+use crate::sql::common::constants::extension_node;
 use crate::sql::common::{FsSchema, FsSchemaRef};
 use crate::sql::extensions::streaming_operator_blueprint::{CompiledTopologyNode, StreamingOperatorBlueprint};
 use crate::sql::logical_node::logical::{
@@ -33,8 +34,9 @@ use crate::sql::logical_node::logical::{
 use crate::sql::logical_planner::planner::{NamedNode, Planner};
 use crate::sql::types::{DFField, fields_with_qualifiers, schema_from_df_fields};
 
-pub(crate) const NODE_TYPE_NAME: &str = "AsyncFunctionExecutionNode";
-pub const ASYNC_RESULT_FIELD: &str = "__async_result";
+use super::ASYNC_RESULT_FIELD;
+
+pub(crate) const NODE_TYPE_NAME: &str = extension_node::ASYNC_FUNCTION_EXECUTION;
 
 /// Represents a logical node that executes an external asynchronous function (UDF)
 /// and projects the final results into the streaming pipeline.
diff --git a/src/sql/extensions/constants.rs b/src/sql/extensions/constants.rs
index 489af179..245dacec 100644
--- a/src/sql/extensions/constants.rs
+++ b/src/sql/extensions/constants.rs
@@ -10,5 +10,4 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-/// Column name substituted for an async UDF call after rewrite.
-pub const ASYNC_RESULT_FIELD: &str = "__async_result";
+pub use crate::sql::common::constants::sql_field::ASYNC_RESULT as ASYNC_RESULT_FIELD;
diff --git a/src/sql/extensions/debezium.rs b/src/sql/extensions/debezium.rs
index 612c0d79..a1042194 100644
--- a/src/sql/extensions/debezium.rs
+++ b/src/sql/extensions/debezium.rs
@@ -20,6 +20,7 @@ use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore};
 use datafusion::physical_plan::DisplayAs;
 
 use crate::multifield_partial_ord;
+use crate::sql::common::constants::{cdc, extension_node};
 use crate::sql::common::{FsSchema, FsSchemaRef, UPDATING_META_FIELD};
 use crate::sql::logical_planner::planner::{NamedNode, Planner};
 use crate::sql::logical_planner::updating_meta_field;
@@ -31,12 +32,8 @@ use super::{CompiledTopologyNode, StreamingOperatorBlueprint};
 // Constants & Identifiers
 // -----------------------------------------------------------------------------
 
-pub(crate) const UNROLL_NODE_NAME: &str = "UnrollDebeziumPayloadNode";
-pub(crate) const PACK_NODE_NAME: &str = "PackDebeziumEnvelopeNode";
-
-const CDC_FIELD_BEFORE: &str = "before";
-const CDC_FIELD_AFTER: &str = "after";
-const CDC_FIELD_OP: &str = "op";
+pub(crate) const UNROLL_NODE_NAME: &str = extension_node::UNROLL_DEBEZIUM_PAYLOAD;
+pub(crate) const PACK_NODE_NAME: &str = extension_node::PACK_DEBEZIUM_ENVELOPE;
 
 // -----------------------------------------------------------------------------
 // Core Schema Codec
@@ -68,12 +65,12 @@ impl DebeziumSchemaCodec {
 
         let mut envelope_fields = vec![
             Arc::new(Field::new(
-                CDC_FIELD_BEFORE,
+                cdc::BEFORE,
                 payload_struct_type.clone(),
                 true,
             )),
-            Arc::new(Field::new(CDC_FIELD_AFTER, payload_struct_type, true)),
-            Arc::new(Field::new(CDC_FIELD_OP, DataType::Utf8, true)),
+            Arc::new(Field::new(cdc::AFTER, payload_struct_type, true)),
+            Arc::new(Field::new(cdc::OP, DataType::Utf8, true)),
         ];
 
         if let Some(ts) = ts_field {
@@ -134,15 +131,15 @@ impl UnrollDebeziumPayloadNode {
     }
 
     fn validate_envelope_structure(schema: &DFSchemaRef) -> Result<(usize, usize)> {
-        let before_idx = schema.index_of_column_by_name(None, CDC_FIELD_BEFORE).ok_or_else(
+        let before_idx = schema.index_of_column_by_name(None, cdc::BEFORE).ok_or_else(
             || DataFusionError::Plan("Missing 'before' state column in CDC stream".into()),
         )?;
 
-        let after_idx = schema.index_of_column_by_name(None, CDC_FIELD_AFTER).ok_or_else(
+        let after_idx = schema.index_of_column_by_name(None, cdc::AFTER).ok_or_else(
             || DataFusionError::Plan("Missing 'after' state column in CDC stream".into()),
         )?;
 
-        let op_idx = schema.index_of_column_by_name(None, CDC_FIELD_OP).ok_or_else(|| {
+        let op_idx = schema.index_of_column_by_name(None, cdc::OP).ok_or_else(|| {
             DataFusionError::Plan("Missing 'op' operation column in CDC stream".into())
         })?;
 
@@ -158,7 +155,7 @@ impl UnrollDebeziumPayloadNode {
         if *schema.field(op_idx).data_type() != DataType::Utf8 {
             return plan_err!(
                 "The '{}' column must be of type Utf8",
-                CDC_FIELD_OP
+                cdc::OP
             );
         }
 
diff --git a/src/sql/extensions/join.rs b/src/sql/extensions/join.rs
index 70fbf3a3..9789a216 100644
--- a/src/sql/extensions/join.rs
+++ b/src/sql/extensions/join.rs
@@ -22,6 +22,7 @@ use datafusion_proto::protobuf::PhysicalPlanNode;
 use prost::Message;
 use protocol::grpc::api::JoinOperator;
 
+use crate::sql::common::constants::{extension_node, runtime_operator_kind};
 use crate::sql::common::{FsSchema, FsSchemaRef};
 use crate::sql::extensions::{CompiledTopologyNode, StreamingOperatorBlueprint};
 use crate::sql::logical_node::logical::{
@@ -34,7 +35,7 @@ use crate::sql::logical_planner::FsPhysicalExtensionCodec;
 // Constants
 // -----------------------------------------------------------------------------
 
-pub(crate) const STREAM_JOIN_NODE_TYPE: &str = "StreamingJoinNode";
+pub(crate) const STREAM_JOIN_NODE_TYPE: &str = extension_node::STREAMING_JOIN;
 
 // -----------------------------------------------------------------------------
 // Logical Node Definition
@@ -187,7 +188,7 @@ impl StreamingOperatorBlueprint for StreamingJoinNode {
             node_identifier.clone(),
             self.determine_operator_type(),
             operator_config.encode_to_vec(),
-            "streaming_join".to_string(),
+            runtime_operator_kind::STREAMING_JOIN.to_string(),
             1,
         );
 
diff --git a/src/sql/extensions/key_calculation.rs b/src/sql/extensions/key_calculation.rs
index 484d464c..6a9e924b 100644
--- a/src/sql/extensions/key_calculation.rs
+++ b/src/sql/extensions/key_calculation.rs
@@ -27,6 +27,7 @@ use prost::Message;
 use protocol::grpc::api::{KeyPlanOperator, ProjectionOperator};
 
 use crate::multifield_partial_ord;
+use crate::sql::common::constants::{extension_node, sql_field};
 use crate::sql::common::{FsSchema, FsSchemaRef};
 use crate::sql::extensions::{CompiledTopologyNode, StreamingOperatorBlueprint};
 use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName};
@@ -34,7 +35,7 @@ use crate::sql::logical_planner::FsPhysicalExtensionCodec;
 use crate::sql::logical_planner::planner::{NamedNode, Planner};
 use crate::sql::types::{fields_with_qualifiers, schema_from_df_fields_with_metadata};
 
-pub(crate) const EXTENSION_NODE_IDENTIFIER: &str = "KeyExtractionNode";
+pub(crate) const EXTENSION_NODE_IDENTIFIER: &str = extension_node::KEY_EXTRACTION;
 
 /// Routing strategy for shuffling data across the stream topology.
 #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd)]
@@ -101,7 +102,7 @@ impl KeyExtractionNode {
         indices: &[usize],
     ) -> (Vec<u8>, OperatorName) {
         let operator_config = KeyPlanOperator {
-            name: "key".into(),
+            name: sql_field::DEFAULT_KEY_LABEL.into(),
             physical_plan: physical_plan_proto.encode_to_vec(),
             key_fields: indices.iter().map(|&idx| idx as u64).collect(),
         };
@@ -153,7 +154,11 @@ impl KeyExtractionNode {
         }
 
         let operator_config = ProjectionOperator {
-            name: self.operator_label.as_deref().unwrap_or("key").to_string(),
+            name: self
+                .operator_label
+                .as_deref()
+                .unwrap_or(sql_field::DEFAULT_KEY_LABEL)
+                .to_string(),
             input_schema: Some(input_schema_ref.as_ref().clone().into()),
             output_schema: Some(output_fs_schema.into()),
             exprs: physical_expr_payloads,
diff --git a/src/sql/extensions/lookup.rs b/src/sql/extensions/lookup.rs
index c34c5b10..684a8f97 100644
--- a/src/sql/extensions/lookup.rs
+++ b/src/sql/extensions/lookup.rs
@@ -24,6 +24,7 @@ use protocol::grpc::api;
 use protocol::grpc::api::{ConnectorOp, LookupJoinCondition, LookupJoinOperator};
 
 use crate::multifield_partial_ord;
+use crate::sql::common::constants::extension_node;
 use crate::sql::common::{FsSchema, FsSchemaRef};
 use crate::sql::extensions::{CompiledTopologyNode, StreamingOperatorBlueprint};
 use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName};
@@ -35,8 +36,8 @@ use crate::sql::schema::utils::add_timestamp_field_arrow;
 // Constants & Identifiers
 // -----------------------------------------------------------------------------
 
-pub const DICTIONARY_SOURCE_NODE_NAME: &str = "ReferenceTableSource";
-pub const STREAM_DICTIONARY_JOIN_NODE_NAME: &str = "StreamReferenceJoin";
+pub const DICTIONARY_SOURCE_NODE_NAME: &str = extension_node::REFERENCE_TABLE_SOURCE;
+pub const STREAM_DICTIONARY_JOIN_NODE_NAME: &str = extension_node::STREAM_REFERENCE_JOIN;
 
 // -----------------------------------------------------------------------------
 // Logical Node: Reference Table Source
diff --git a/src/sql/extensions/projection.rs b/src/sql/extensions/projection.rs
index 2175bddf..d1b9e755 100644
--- a/src/sql/extensions/projection.rs
+++ b/src/sql/extensions/projection.rs
@@ -22,6 +22,7 @@ use prost::Message;
 use protocol::grpc::api::ProjectionOperator;
 
 use crate::multifield_partial_ord;
+use crate::sql::common::constants::{extension_node, sql_field};
 use crate::sql::common::{FsSchema, FsSchemaRef};
 use crate::sql::extensions::{CompiledTopologyNode, StreamingOperatorBlueprint};
 use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName};
@@ -32,8 +33,8 @@ use crate::sql::types::{DFField, schema_from_df_fields};
 // Constants & Identifiers
 // -----------------------------------------------------------------------------
 
-pub(crate) const STREAM_PROJECTION_NODE_NAME: &str = "StreamProjectionNode";
-const DEFAULT_PROJECTION_LABEL: &str = "projection";
+pub(crate) const STREAM_PROJECTION_NODE_NAME: &str = extension_node::STREAM_PROJECTION;
+const DEFAULT_PROJECTION_LABEL: &str = sql_field::DEFAULT_PROJECTION_LABEL;
 
 // -----------------------------------------------------------------------------
 // Logical Node Definition
diff --git a/src/sql/extensions/remote_table.rs b/src/sql/extensions/remote_table.rs
index 5011bb4c..a9a65c51 100644
--- a/src/sql/extensions/remote_table.rs
+++ b/src/sql/extensions/remote_table.rs
@@ -22,6 +22,7 @@ use prost::Message;
 use protocol::grpc::api::ValuePlanOperator;
 
 use crate::multifield_partial_ord;
+use crate::sql::common::constants::extension_node;
 use crate::sql::common::{FsSchema, FsSchemaRef};
 use crate::sql::extensions::{CompiledTopologyNode, StreamingOperatorBlueprint};
 use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName};
@@ -32,7 +33,7 @@ use crate::sql::logical_planner::planner::{NamedNode, Planner};
 // Constants & Identifiers
 // -----------------------------------------------------------------------------
 
-pub(crate) const REMOTE_TABLE_NODE_NAME: &str = "RemoteTableBoundaryNode";
+pub(crate) const REMOTE_TABLE_NODE_NAME: &str = extension_node::REMOTE_TABLE_BOUNDARY;
 
 // -----------------------------------------------------------------------------
 // Logical Node Definition
diff --git a/src/sql/extensions/sink.rs b/src/sql/extensions/sink.rs
index 8fc31aac..d2916486 100644
--- a/src/sql/extensions/sink.rs
+++ b/src/sql/extensions/sink.rs
@@ -18,6 +18,7 @@ use datafusion::logical_expr::{Expr, Extension, LogicalPlan, UserDefinedLogicalN
 use prost::Message;
 
 use crate::multifield_partial_ord;
+use crate::sql::common::constants::extension_node;
 use crate::sql::common::{FsSchema, FsSchemaRef, UPDATING_META_FIELD};
 use crate::sql::extensions::{CompiledTopologyNode, StreamingOperatorBlueprint};
 use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName};
@@ -31,7 +32,7 @@ use super::remote_table::RemoteTableBoundaryNode;
 // Constants & Identifiers
 // -----------------------------------------------------------------------------
 
-pub(crate) const STREAM_EGRESS_NODE_NAME: &str = "StreamEgressNode";
+pub(crate) const STREAM_EGRESS_NODE_NAME: &str = extension_node::STREAM_EGRESS;
 
 // -----------------------------------------------------------------------------
 // Logical Node Definition
diff --git a/src/sql/extensions/table_source.rs b/src/sql/extensions/table_source.rs
index 292284ba..3f998c5a 100644
--- a/src/sql/extensions/table_source.rs
+++ b/src/sql/extensions/table_source.rs
@@ -18,6 +18,7 @@ use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore};
 use prost::Message;
 
 use crate::multifield_partial_ord;
+use crate::sql::common::constants::extension_node;
 use crate::sql::common::{FsSchema, FsSchemaRef};
 use crate::sql::extensions::debezium::DebeziumSchemaCodec;
 use crate::sql::logical_node::logical::{LogicalNode, OperatorName};
@@ -32,7 +33,7 @@ use super::{CompiledTopologyNode, StreamingOperatorBlueprint};
 // Constants & Identifiers
 // -----------------------------------------------------------------------------
 
-pub(crate) const STREAM_INGESTION_NODE_NAME: &str = "StreamIngestionNode";
+pub(crate) const STREAM_INGESTION_NODE_NAME: &str = extension_node::STREAM_INGESTION;
 
 // -----------------------------------------------------------------------------
 // Logical Node Definition
diff --git a/src/sql/extensions/timestamp_append.rs b/src/sql/extensions/timestamp_append.rs
index 7a3a07e9..2d8b985b 100644
--- a/src/sql/extensions/timestamp_append.rs
+++ b/src/sql/extensions/timestamp_append.rs
@@ -16,13 +16,14 @@ use datafusion::common::{DFSchemaRef, Result, TableReference, internal_err};
 use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore};
 
 use crate::multifield_partial_ord;
+use crate::sql::common::constants::extension_node;
 use crate::sql::schema::utils::{add_timestamp_field, has_timestamp_field};
 
 // -----------------------------------------------------------------------------
 // Constants & Identifiers
 // -----------------------------------------------------------------------------
 
-pub(crate) const TIMESTAMP_INJECTOR_NODE_NAME: &str = "SystemTimestampInjectorNode";
+pub(crate) const TIMESTAMP_INJECTOR_NODE_NAME: &str = extension_node::SYSTEM_TIMESTAMP_INJECTOR;
 
 // -----------------------------------------------------------------------------
 // Logical Node Definition
diff --git a/src/sql/extensions/updating_aggregate.rs b/src/sql/extensions/updating_aggregate.rs
index 1671fb13..9d12806f 100644
--- a/src/sql/extensions/updating_aggregate.rs
+++ b/src/sql/extensions/updating_aggregate.rs
@@ -25,6 +25,7 @@ use datafusion_proto::protobuf::PhysicalPlanNode;
 use prost::Message;
 use protocol::grpc::api::UpdatingAggregateOperator;
 
+use crate::sql::common::constants::{extension_node, proto_operator_name, updating_state_field};
 use crate::sql::common::{FsSchema, FsSchemaRef};
 use crate::sql::extensions::{CompiledTopologyNode, IsRetractExtension, StreamingOperatorBlueprint};
 use crate::sql::functions::multi_hash;
@@ -36,7 +37,7 @@ use crate::sql::logical_planner::planner::{NamedNode, Planner};
 // Constants & Configuration
 // -----------------------------------------------------------------------------
 
-pub(crate) const CONTINUOUS_AGGREGATE_NODE_NAME: &str = "ContinuousAggregateNode";
+pub(crate) const CONTINUOUS_AGGREGATE_NODE_NAME: &str = extension_node::CONTINUOUS_AGGREGATE;
 
 const DEFAULT_FLUSH_INTERVAL_MICROS: u64 = 10_000_000;
 
@@ -102,9 +103,9 @@ impl ContinuousAggregateNode {
         };
 
         named_struct(vec![
-            lit("is_retract"),
+            lit(updating_state_field::IS_RETRACT),
             lit(false),
-            lit("id"),
+            lit(updating_state_field::ID),
             state_id_hash,
         ])
     }
@@ -128,7 +129,7 @@ impl ContinuousAggregateNode {
             planner.serialize_as_physical_expr(&meta_expr, &upstream_df_schema)?;
 
         Ok(UpdatingAggregateOperator {
-            name: "UpdatingAggregate".to_string(),
+            name: proto_operator_name::UPDATING_AGGREGATE.to_string(),
             input_schema: Some((**upstream_schema).clone().into()),
             final_schema: Some(self.yielded_schema().into()),
             aggregate_exec: compiled_agg_payload,
@@ -220,7 +221,7 @@ impl StreamingOperatorBlueprint for ContinuousAggregateNode {
             format!("updating_aggregate_{node_index}"),
             OperatorName::UpdatingAggregate,
             operator_config.encode_to_vec(),
-            "UpdatingAggregate".to_string(),
+            proto_operator_name::UPDATING_AGGREGATE.to_string(),
             1,
         );
 
diff --git a/src/sql/extensions/watermark_node.rs b/src/sql/extensions/watermark_node.rs
index 7cdb9b67..231e1951 100644
--- a/src/sql/extensions/watermark_node.rs
+++ b/src/sql/extensions/watermark_node.rs
@@ -22,19 +22,19 @@ use prost::Message;
 use protocol::grpc::api::ExpressionWatermarkConfig;
 
 use crate::multifield_partial_ord;
+use crate::sql::common::constants::{extension_node, runtime_operator_kind};
 use crate::sql::common::{FsSchema, FsSchemaRef};
 use crate::sql::extensions::{CompiledTopologyNode, StreamingOperatorBlueprint};
 use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName};
 use crate::sql::logical_planner::planner::{NamedNode, Planner};
 use crate::sql::schema::utils::add_timestamp_field;
+use crate::sql::types::TIMESTAMP_FIELD;
 
 // -----------------------------------------------------------------------------
 // Constants & Identifiers
 // -----------------------------------------------------------------------------
 
-pub(crate) const EVENT_TIME_WATERMARK_NODE_NAME: &str = "EventTimeWatermarkNode";
-
-const INTERNAL_TIMESTAMP_COLUMN: &str = "_timestamp";
+pub(crate) const EVENT_TIME_WATERMARK_NODE_NAME: &str = extension_node::EVENT_TIME_WATERMARK;
 
 const DEFAULT_WATERMARK_EMISSION_PERIOD_MICROS: u64 = 1_000_000;
 
@@ -72,11 +72,11 @@ impl EventTimeWatermarkNode {
         )?;
 
         let internal_timestamp_offset = resolved_schema
-            .index_of_column_by_name(None, INTERNAL_TIMESTAMP_COLUMN)
+            .index_of_column_by_name(None, TIMESTAMP_FIELD)
             .ok_or_else(|| {
                 DataFusionError::Plan(format!(
                     "Fatal: Failed to resolve mandatory temporal column '{}'",
-                    INTERNAL_TIMESTAMP_COLUMN
+                    TIMESTAMP_FIELD
                 ))
             })?;
 
@@ -163,11 +163,11 @@ impl UserDefinedLogicalNodeCore for EventTimeWatermarkNode {
 
         let internal_timestamp_offset = self
             .resolved_schema
-            .index_of_column_by_name(Some(&self.namespace_qualifier), INTERNAL_TIMESTAMP_COLUMN)
+            .index_of_column_by_name(Some(&self.namespace_qualifier), TIMESTAMP_FIELD)
             .ok_or_else(|| {
                 DataFusionError::Plan(format!(
                     "Optimizer Error: Lost tracking of temporal column '{}'",
-                    INTERNAL_TIMESTAMP_COLUMN
+                    TIMESTAMP_FIELD
                 ))
             })?;
 
@@ -210,7 +210,7 @@ impl StreamingOperatorBlueprint for EventTimeWatermarkNode {
             format!("watermark_{node_index}"),
             OperatorName::ExpressionWatermark,
             operator_config.encode_to_vec(),
-            "watermark_generator".to_string(),
+            runtime_operator_kind::WATERMARK_GENERATOR.to_string(),
             1,
         );
 
diff --git a/src/sql/extensions/windows_function.rs b/src/sql/extensions/windows_function.rs
index e53e2ee9..09945378 100644
--- a/src/sql/extensions/windows_function.rs
+++ b/src/sql/extensions/windows_function.rs
@@ -21,6 +21,7 @@ use datafusion_proto::{physical_plan::AsExecutionPlan, protobuf::PhysicalPlanNod
 use prost::Message;
 use protocol::grpc::api::WindowFunctionOperator;
 
+use crate::sql::common::constants::{extension_node, proto_operator_name, runtime_operator_kind};
 use crate::sql::common::{FsSchema, FsSchemaRef};
 use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName};
 use crate::sql::logical_planner::FsPhysicalExtensionCodec;
@@ -33,7 +34,7 @@ use super::{CompiledTopologyNode, StreamingOperatorBlueprint};
 // Constants & Identifiers
 // -----------------------------------------------------------------------------
 
-pub(crate) const STREAMING_WINDOW_NODE_NAME: &str = "StreamingWindowFunctionNode";
+pub(crate) const STREAMING_WINDOW_NODE_NAME: &str = extension_node::STREAMING_WINDOW_FUNCTION;
 
 // -----------------------------------------------------------------------------
 // Logical Node Definition
@@ -163,7 +164,7 @@ impl StreamingOperatorBlueprint for StreamingWindowFunctionNode {
         let evaluation_plan_payload = self.compile_physical_evaluation_plan(planner)?;
 
         let operator_config = WindowFunctionOperator {
-            name: "WindowFunction".to_string(),
+            name: proto_operator_name::WINDOW_FUNCTION.to_string(),
             input_schema: Some(input_schema.as_ref().clone().into()),
             binning_function: binning_payload,
             window_function_plan: evaluation_plan_payload,
@@ -174,7 +175,7 @@ impl StreamingOperatorBlueprint for StreamingWindowFunctionNode {
             format!("window_function_{node_index}"),
             OperatorName::WindowFunction,
             operator_config.encode_to_vec(),
-            "streaming_window_evaluator".to_string(),
+            runtime_operator_kind::STREAMING_WINDOW_EVALUATOR.to_string(),
             1,
         );
 
diff --git a/src/sql/functions/mod.rs b/src/sql/functions/mod.rs
index bfd59654..b78f5d2a 100644
--- a/src/sql/functions/mod.rs
+++ b/src/sql/functions/mod.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use crate::sql::schema::StreamSchemaProvider;
 use datafusion::arrow::array::{
     Array, ArrayRef, StringArray, UnionArray,
@@ -22,7 +34,7 @@ use std::collections::HashMap;
 use std::fmt::{Debug, Write};
 use std::sync::{Arc, OnceLock};
 
-const SERIALIZE_JSON_UNION: &str = "serialize_json_union";
+use crate::sql::common::constants::scalar_fn;
 
 /// Borrowed from DataFusion
 ///
@@ -57,7 +69,7 @@ make_udf_function!(MultiHashFunction, MULTI_HASH, multi_hash);
 pub fn register_all(registry: &mut dyn FunctionRegistry) {
     registry
         .register_udf(Arc::new(create_udf(
-            "get_first_json_object",
+            scalar_fn::GET_FIRST_JSON_OBJECT,
             vec![DataType::Utf8, DataType::Utf8],
             DataType::Utf8,
             Volatility::Immutable,
@@ -67,7 +79,7 @@ pub fn register_all(registry: &mut dyn FunctionRegistry) {
 
     registry
         .register_udf(Arc::new(create_udf(
-            "extract_json",
+            scalar_fn::EXTRACT_JSON,
             vec![DataType::Utf8, DataType::Utf8],
             DataType::List(Arc::new(Field::new("item", DataType::Utf8, true))),
             Volatility::Immutable,
@@ -77,7 +89,7 @@ pub fn register_all(registry: &mut dyn FunctionRegistry) {
 
     registry
         .register_udf(Arc::new(create_udf(
-            "extract_json_string",
+            scalar_fn::EXTRACT_JSON_STRING,
             vec![DataType::Utf8, DataType::Utf8],
             DataType::Utf8,
             Volatility::Immutable,
@@ -87,7 +99,7 @@ pub fn register_all(registry: &mut dyn FunctionRegistry) {
 
     registry
         .register_udf(Arc::new(create_udf(
-            SERIALIZE_JSON_UNION,
+            scalar_fn::SERIALIZE_JSON_UNION,
             vec![DataType::Union(union_fields(), UnionMode::Sparse)],
             DataType::Utf8,
             Volatility::Immutable,
@@ -190,7 +202,7 @@ impl ScalarUDFImpl for MultiHashFunction {
     }
 
     fn name(&self) -> &str {
-        "multi_hash"
+        scalar_fn::MULTI_HASH
     }
 
     fn signature(&self) -> &Signature {
@@ -456,7 +468,7 @@ pub(crate) fn serialize_outgoing_json(
             if is_json_union(f.data_type()) {
                 Expr::Alias(Alias::new(
                     Expr::ScalarFunction(ScalarFunction::new_udf(
-                        registry.udf(SERIALIZE_JSON_UNION).unwrap(),
+                        registry.udf(scalar_fn::SERIALIZE_JSON_UNION).unwrap(),
                         vec![col(f.name())],
                     )),
                     Option::<TableReference>::None,
diff --git a/src/sql/logical_node/logical/operator_name.rs b/src/sql/logical_node/logical/operator_name.rs
index 2fd9ad82..6cb00914 100644
--- a/src/sql/logical_node/logical/operator_name.rs
+++ b/src/sql/logical_node/logical/operator_name.rs
@@ -15,6 +15,8 @@ use std::str::FromStr;
 use serde::{Deserialize, Deserializer, Serialize, Serializer};
 use strum::{Display, EnumString};
 
+use crate::sql::common::constants::operator_feature;
+
 #[derive(Clone, Copy, Debug, Eq, PartialEq, EnumString, Display)]
 pub enum OperatorName {
     ExpressionWatermark,
@@ -35,6 +37,27 @@ pub enum OperatorName {
     ConnectorSink,
 }
 
+impl OperatorName {
+    /// 特性 / 指标聚合使用的 kebab-case 标签（与 [`crate::sql::common::constants::operator_feature`] 一致）。
+    pub fn feature_tag(self) -> Option<&'static str> {
+        match self {
+            Self::ExpressionWatermark | Self::ArrowValue | Self::ArrowKey | Self::Projection => None,
+            Self::AsyncUdf => Some(operator_feature::ASYNC_UDF),
+            Self::Join => Some(operator_feature::JOIN_WITH_EXPIRATION),
+            Self::InstantJoin => Some(operator_feature::WINDOWED_JOIN),
+            Self::WindowFunction => Some(operator_feature::SQL_WINDOW_FUNCTION),
+            Self::LookupJoin => Some(operator_feature::LOOKUP_JOIN),
+            Self::TumblingWindowAggregate => Some(operator_feature::SQL_TUMBLING_WINDOW_AGGREGATE),
+            Self::SlidingWindowAggregate => Some(operator_feature::SQL_SLIDING_WINDOW_AGGREGATE),
+            Self::SessionWindowAggregate => Some(operator_feature::SQL_SESSION_WINDOW_AGGREGATE),
+            Self::UpdatingAggregate => Some(operator_feature::SQL_UPDATING_AGGREGATE),
+            Self::KeyBy => Some(operator_feature::KEY_BY_ROUTING),
+            Self::ConnectorSource => Some(operator_feature::CONNECTOR_SOURCE),
+            Self::ConnectorSink => Some(operator_feature::CONNECTOR_SINK),
+        }
+    }
+}
+
 impl Serialize for OperatorName {
     fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
     where
diff --git a/src/sql/logical_node/mod.rs b/src/sql/logical_node/mod.rs
index 82d25f24..922801f6 100644
--- a/src/sql/logical_node/mod.rs
+++ b/src/sql/logical_node/mod.rs
@@ -1 +1,13 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 pub mod logical;
diff --git a/src/sql/logical_planner/mod.rs b/src/sql/logical_planner/mod.rs
index 85046c0d..54634651 100644
--- a/src/sql/logical_planner/mod.rs
+++ b/src/sql/logical_planner/mod.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use datafusion::arrow::{
     array::{
         Array, AsArray, BooleanBuilder, PrimitiveArray, RecordBatch, StringArray, StructArray,
@@ -31,6 +43,7 @@ use crate::make_udf_function;
 use crate::sql::functions::MultiHashFunction;
 use crate::sql::analysis::UNNESTED_COL;
 use crate::sql::schema::utils::window_arrow_struct;
+use crate::sql::common::constants::cdc;
 use crate::sql::common::{TIMESTAMP_FIELD, UPDATING_META_FIELD};
 use datafusion::arrow::datatypes::{TimestampNanosecondType, UInt64Type};
 use datafusion::catalog::memory::MemorySourceConfig;
@@ -763,9 +776,9 @@ pub struct DebeziumUnrollingExec {
 impl DebeziumUnrollingExec {
     pub fn try_new(input: Arc<dyn ExecutionPlan>, primary_keys: Vec<usize>) -> Result<Self> {
         let input_schema = input.schema();
-        let before_index = input_schema.index_of("before")?;
-        let after_index = input_schema.index_of("after")?;
-        let op_index = input_schema.index_of("op")?;
+        let before_index = input_schema.index_of(cdc::BEFORE)?;
+        let after_index = input_schema.index_of(cdc::AFTER)?;
+        let op_index = input_schema.index_of(cdc::OP)?;
         let _timestamp_index = input_schema.index_of(TIMESTAMP_FIELD)?;
         let before_type = input_schema.field(before_index).data_type();
         let after_type = input_schema.field(after_index).data_type();
@@ -888,9 +901,9 @@ impl DebeziumUnrollingStream {
             return plan_err!("there must be at least one primary key for a Debezium source");
         }
         let input_schema = input.schema();
-        let before_index = input_schema.index_of("before")?;
-        let after_index = input_schema.index_of("after")?;
-        let op_index = input_schema.index_of("op")?;
+        let before_index = input_schema.index_of(cdc::BEFORE)?;
+        let after_index = input_schema.index_of(cdc::AFTER)?;
+        let op_index = input_schema.index_of(cdc::OP)?;
         let timestamp_index = input_schema.index_of(TIMESTAMP_FIELD)?;
 
         Ok(Self {
diff --git a/src/sql/logical_planner/planner.rs b/src/sql/logical_planner/planner.rs
index bd25423c..be388ad4 100644
--- a/src/sql/logical_planner/planner.rs
+++ b/src/sql/logical_planner/planner.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use std::collections::HashMap;
 use std::sync::Arc;
 use std::thread;
diff --git a/src/sql/physical/physical_planner.rs b/src/sql/physical/physical_planner.rs
index 963fa76f..fc66b3b0 100644
--- a/src/sql/physical/physical_planner.rs
+++ b/src/sql/physical/physical_planner.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use std::collections::HashMap;
 use std::sync::Arc;
 use std::thread;
diff --git a/src/sql/schema/data_encoding_format.rs b/src/sql/schema/data_encoding_format.rs
index 5b93c90a..67e6d7e3 100644
--- a/src/sql/schema/data_encoding_format.rs
+++ b/src/sql/schema/data_encoding_format.rs
@@ -16,6 +16,7 @@ use datafusion::arrow::datatypes::{DataType, Field};
 use datafusion::common::{Result, plan_err};
 
 use super::column_descriptor::ColumnDescriptor;
+use crate::sql::common::with_option_keys as opt;
 use crate::sql::common::Format;
 
 /// High-level payload encoding (orthogonal to `Format` wire details in `ConnectionSchema`).
@@ -30,10 +31,13 @@ pub enum DataEncodingFormat {
 
 impl DataEncodingFormat {
     pub fn extract_from_map(opts: &HashMap<String, String>) -> Result<Self> {
-        let format_str = opts.get("format").map(|s| s.as_str()).unwrap_or("json");
+        let format_str = opts
+            .get(opt::FORMAT)
+            .map(|s| s.as_str())
+            .unwrap_or(opt::DEFAULT_FORMAT_VALUE);
         let is_debezium = opts
-            .get("format.debezium")
-            .or_else(|| opts.get("json.debezium"))
+            .get(opt::FORMAT_DEBEZIUM_FLAG)
+            .or_else(|| opts.get(opt::JSON_DEBEZIUM))
             .map(|s| s == "true")
             .unwrap_or(false);
 
diff --git a/src/sql/schema/source_table.rs b/src/sql/schema/source_table.rs
index 85041f4b..9e0caddf 100644
--- a/src/sql/schema/source_table.rs
+++ b/src/sql/schema/source_table.rs
@@ -42,6 +42,7 @@ use crate::sql::common::kafka_catalog::{
     KafkaConfig, KafkaConfigAuthentication, KafkaTable, KafkaTableSourceOffset, ReadMode,
     SinkCommitMode, TableType as KafkaTableType,
 };
+use crate::sql::common::with_option_keys as opt;
 use crate::sql::common::{
     BadData, Format, Framing, FsSchema, JsonCompression, JsonFormat, OperatorConfig, RateLimit,
 };
@@ -246,7 +247,7 @@ impl SourceTable {
     ) -> Result<Self> {
         let _ = connection_profile;
 
-        if let Some(c) = options.pull_opt_str("connector")? {
+        if let Some(c) = options.pull_opt_str(opt::CONNECTOR)? {
             if c != connector_name {
                 return plan_err!(
                     "WITH option `connector` is '{c}' but table uses connector '{connector_name}'"
@@ -274,7 +275,7 @@ impl SourceTable {
             .map_err(|e| DataFusionError::Plan(format!("invalid framing: '{e}'")))?;
 
         if temporary
-            && let Some(t) = options.insert_str("type", "lookup")?
+            && let Some(t) = options.insert_str(opt::TYPE, "lookup")?
             && t != "lookup"
         {
             return plan_err!(
@@ -321,7 +322,7 @@ impl SourceTable {
         let role = if let Some(t) = connection_type_override {
             t.into()
         } else {
-            match options.pull_opt_str("type")?.as_deref() {
+            match options.pull_opt_str(opt::TYPE)?.as_deref() {
                 None | Some("source") => TableRole::Ingestion,
                 Some("sink") => TableRole::Egress,
                 Some("lookup") => TableRole::Reference,
@@ -349,12 +350,12 @@ impl SourceTable {
             inferred_fields: None,
         };
 
-        if let Some(event_time_field) = options.pull_opt_field("event_time_field")? {
+        if let Some(event_time_field) = options.pull_opt_field(opt::EVENT_TIME_FIELD)? {
             warn!("`event_time_field` WITH option is deprecated; use WATERMARK FOR syntax");
             table.temporal_config.event_column = Some(event_time_field);
         }
 
-        if let Some(watermark_field) = options.pull_opt_field("watermark_field")? {
+        if let Some(watermark_field) = options.pull_opt_field(opt::WATERMARK_FIELD)? {
             warn!("`watermark_field` WITH option is deprecated; use WATERMARK FOR syntax");
             table.temporal_config.watermark_strategy_column = Some(watermark_field);
         }
@@ -417,15 +418,15 @@ impl SourceTable {
         }
 
         let idle_from_micros = options
-            .pull_opt_i64("idle_micros")?
+            .pull_opt_i64(opt::IDLE_MICROS)?
             .filter(|t| *t > 0)
             .map(|t| Duration::from_micros(t as u64));
-        let idle_from_duration = options.pull_opt_duration("idle_time")?;
+        let idle_from_duration = options.pull_opt_duration(opt::IDLE_TIME)?;
         table.temporal_config.liveness_timeout = idle_from_micros.or(idle_from_duration);
 
-        table.lookup_cache_max_bytes = options.pull_opt_u64("lookup.cache.max_bytes")?;
+        table.lookup_cache_max_bytes = options.pull_opt_u64(opt::LOOKUP_CACHE_MAX_BYTES)?;
 
-        table.lookup_cache_ttl = options.pull_opt_duration("lookup.cache.ttl")?;
+        table.lookup_cache_ttl = options.pull_opt_duration(opt::LOOKUP_CACHE_TTL)?;
 
         if connector_name.eq_ignore_ascii_case("kafka") {
             let physical = table.produce_physical_schema();
@@ -442,15 +443,19 @@ impl SourceTable {
             })?;
         } else {
             let extra_opts = options.drain_remaining_string_values()?;
-            let mut config_root = serde_json::json!({
-                "connector": connector_name,
-                "connection_schema": connection_schema,
-            });
-            if let serde_json::Value::Object(ref mut map) = config_root {
-                for (k, v) in extra_opts {
-                    map.insert(k, serde_json::Value::String(v));
-                }
+            let mut map = serde_json::Map::new();
+            map.insert(
+                opt::CONNECTOR.to_string(),
+                serde_json::Value::String(connector_name.to_string()),
+            );
+            let schema_val = serde_json::to_value(&connection_schema).map_err(|e| {
+                DataFusionError::Plan(format!("failed to serialize connection schema: {e}"))
+            })?;
+            map.insert(opt::CONNECTION_SCHEMA.to_string(), schema_val);
+            for (k, v) in extra_opts {
+                map.insert(k, serde_json::Value::String(v));
             }
+            let config_root = serde_json::Value::Object(map);
             table.opaque_config = serde_json::to_string(&config_root).map_err(|e| {
                 DataFusionError::Plan(format!("failed to serialize connector config: {e}"))
             })?;
@@ -575,10 +580,10 @@ fn wire_kafka_operator_config(
     bad_data: BadData,
     framing: Option<Framing>,
 ) -> Result<OperatorConfig> {
-    let bootstrap_servers = match options.pull_opt_str("bootstrap.servers")? {
+    let bootstrap_servers = match options.pull_opt_str(opt::KAFKA_BOOTSTRAP_SERVERS)? {
         Some(s) => s,
         None => options
-            .pull_opt_str("bootstrap_servers")?
+            .pull_opt_str(opt::KAFKA_BOOTSTRAP_SERVERS_LEGACY)?
             .ok_or_else(|| {
                 plan_datafusion_err!(
                     "Kafka connector requires 'bootstrap.servers' in the WITH clause"
@@ -587,7 +592,7 @@ fn wire_kafka_operator_config(
     };
 
     let topic = options
-        .pull_opt_str("topic")?
+        .pull_opt_str(opt::KAFKA_TOPIC)?
         .ok_or_else(|| plan_datafusion_err!("Kafka connector requires 'topic' in the WITH clause"))?;
 
     let sql_format = format.clone().ok_or_else(|| {
@@ -597,16 +602,16 @@ fn wire_kafka_operator_config(
     })?;
 
     let rate_limit = options
-        .pull_opt_u64("rate_limit.messages_per_second")?
+        .pull_opt_u64(opt::KAFKA_RATE_LIMIT_MESSAGES_PER_SECOND)?
         .map(|v| RateLimit {
             messages_per_second: v.clamp(1, u32::MAX as u64) as u32,
         });
 
-    let value_subject = options.pull_opt_str("value.subject")?;
+    let value_subject = options.pull_opt_str(opt::KAFKA_VALUE_SUBJECT)?;
 
     let kind = match role {
         TableRole::Ingestion => {
-            let offset = match options.pull_opt_str("scan.startup.mode")?.as_deref() {
+            let offset = match options.pull_opt_str(opt::KAFKA_SCAN_STARTUP_MODE)?.as_deref() {
                 Some("latest") => KafkaTableSourceOffset::Latest,
                 Some("earliest") => KafkaTableSourceOffset::Earliest,
                 None | Some("group-offsets") | Some("group") => KafkaTableSourceOffset::Group,
@@ -616,7 +621,7 @@ fn wire_kafka_operator_config(
                     );
                 }
             };
-            let read_mode = match options.pull_opt_str("isolation.level")?.as_deref() {
+            let read_mode = match options.pull_opt_str(opt::KAFKA_ISOLATION_LEVEL)?.as_deref() {
                 Some("read_committed") => Some(ReadMode::ReadCommitted),
                 Some("read_uncommitted") => Some(ReadMode::ReadUncommitted),
                 None => None,
@@ -624,11 +629,11 @@ fn wire_kafka_operator_config(
                     return plan_err!("invalid isolation.level '{other}'");
                 }
             };
-            let group_id = match options.pull_opt_str("group.id")? {
+            let group_id = match options.pull_opt_str(opt::KAFKA_GROUP_ID)? {
                 Some(s) => Some(s),
-                None => options.pull_opt_str("group_id")?,
+                None => options.pull_opt_str(opt::KAFKA_GROUP_ID_LEGACY)?,
             };
-            let group_id_prefix = options.pull_opt_str("group.id.prefix")?;
+            let group_id_prefix = options.pull_opt_str(opt::KAFKA_GROUP_ID_PREFIX)?;
             KafkaTableType::Source {
                 offset,
                 read_mode,
@@ -637,20 +642,20 @@ fn wire_kafka_operator_config(
             }
         }
         TableRole::Egress => {
-            let commit_mode = match options.pull_opt_str("sink.commit.mode")?.as_deref() {
+            let commit_mode = match options.pull_opt_str(opt::KAFKA_SINK_COMMIT_MODE)?.as_deref() {
                 Some("exactly-once") | Some("exactly_once") => SinkCommitMode::ExactlyOnce,
                 None | Some("at-least-once") | Some("at_least_once") => SinkCommitMode::AtLeastOnce,
                 Some(other) => {
                     return plan_err!("invalid sink.commit.mode '{other}'");
                 }
             };
-            let key_field = match options.pull_opt_str("sink.key.field")? {
+            let key_field = match options.pull_opt_str(opt::KAFKA_SINK_KEY_FIELD)? {
                 Some(s) => Some(s),
-                None => options.pull_opt_str("key.field")?,
+                None => options.pull_opt_str(opt::KAFKA_KEY_FIELD_LEGACY)?,
             };
-            let timestamp_field = match options.pull_opt_str("sink.timestamp.field")? {
+            let timestamp_field = match options.pull_opt_str(opt::KAFKA_SINK_TIMESTAMP_FIELD)? {
                 Some(s) => Some(s),
-                None => options.pull_opt_str("timestamp.field")?,
+                None => options.pull_opt_str(opt::KAFKA_TIMESTAMP_FIELD_LEGACY)?,
             };
             KafkaTableType::Sink {
                 commit_mode,
@@ -664,8 +669,8 @@ fn wire_kafka_operator_config(
     };
 
     // Role already decided; keep these out of librdkafka `connection_properties`.
-    let _ = options.pull_opt_str("type")?;
-    let _ = options.pull_opt_str("connector")?;
+    let _ = options.pull_opt_str(opt::TYPE)?;
+    let _ = options.pull_opt_str(opt::CONNECTOR)?;
 
     let connection_properties = options.drain_remaining_string_values()?;
 
diff --git a/src/sql/schema/table_role.rs b/src/sql/schema/table_role.rs
index 31629ad8..12bd8068 100644
--- a/src/sql/schema/table_role.rs
+++ b/src/sql/schema/table_role.rs
@@ -18,6 +18,7 @@ use datafusion::error::DataFusionError;
 
 use super::column_descriptor::ColumnDescriptor;
 use super::connection_type::ConnectionType;
+use crate::sql::common::with_option_keys as opt;
 
 /// Role of a connector-backed table in the pipeline (ingest / egress / lookup).
 #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
@@ -87,7 +88,7 @@ pub fn apply_adapter_specific_rules(adapter: &str, mut cols: Vec<ColumnDescripto
 }
 
 pub fn deduce_role(options: &HashMap<String, String>) -> Result<TableRole> {
-    match options.get("type").map(|s| s.as_str()) {
+    match options.get(opt::TYPE).map(|s| s.as_str()) {
         None | Some("source") => Ok(TableRole::Ingestion),
         Some("sink") => Ok(TableRole::Egress),
         Some("lookup") => Ok(TableRole::Reference),
@@ -98,7 +99,7 @@ pub fn deduce_role(options: &HashMap<String, String>) -> Result<TableRole> {
 pub fn serialize_backend_params(adapter: &str, options: &HashMap<String, String>) -> Result<String> {
     let mut payload = serde_json::Map::new();
     payload.insert(
-        "adapter".to_string(),
+        opt::ADAPTER.to_string(),
         serde_json::Value::String(adapter.to_string()),
     );
 
diff --git a/src/sql/types/data_type.rs b/src/sql/types/data_type.rs
index 66076da3..1fc55ecc 100644
--- a/src/sql/types/data_type.rs
+++ b/src/sql/types/data_type.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use std::sync::Arc;
 
 use datafusion::arrow::datatypes::{
diff --git a/src/sql/types/df_field.rs b/src/sql/types/df_field.rs
index 3797adb2..435ae30a 100644
--- a/src/sql/types/df_field.rs
+++ b/src/sql/types/df_field.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use std::collections::HashMap;
 use std::sync::Arc;
 
diff --git a/src/sql/types/mod.rs b/src/sql/types/mod.rs
index 25c67574..16d7033b 100644
--- a/src/sql/types/mod.rs
+++ b/src/sql/types/mod.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 mod data_type;
 mod df_field;
 pub(crate) mod placeholder_udf;
diff --git a/src/sql/types/placeholder_udf.rs b/src/sql/types/placeholder_udf.rs
index 5cf96d28..0bdf17e6 100644
--- a/src/sql/types/placeholder_udf.rs
+++ b/src/sql/types/placeholder_udf.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use std::any::Any;
 use std::fmt::{Debug, Formatter};
 use std::sync::Arc;
diff --git a/src/sql/types/stream_schema.rs b/src/sql/types/stream_schema.rs
index e981111b..4b63182d 100644
--- a/src/sql/types/stream_schema.rs
+++ b/src/sql/types/stream_schema.rs
@@ -1,3 +1,15 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use std::sync::Arc;
 
 use datafusion::arrow::datatypes::{Field, Schema, SchemaRef};
diff --git a/src/sql/types/window.rs b/src/sql/types/window.rs
index 9687974a..7934bc1d 100644
--- a/src/sql/types/window.rs
+++ b/src/sql/types/window.rs
@@ -1,8 +1,22 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use std::time::Duration;
 
 use datafusion::common::{Result, plan_err};
 use datafusion::logical_expr::Expr;
 
+use crate::sql::common::constants::window_fn;
+
 use super::DFField;
 
 #[derive(Clone, Debug, PartialEq, Eq, Hash)]
@@ -54,7 +68,7 @@ pub fn find_window(expression: &Expr) -> Result<Option<WindowType>> {
 
     match expression {
         Expr::ScalarFunction(ScalarFunction { func: fun, args }) => match fun.name() {
-            "hop" => {
+            name if name == window_fn::HOP => {
                 if args.len() != 2 {
                     unreachable!();
                 }
@@ -73,14 +87,14 @@ pub fn find_window(expression: &Expr) -> Result<Option<WindowType>> {
                     Ok(Some(WindowType::Sliding { width, slide }))
                 }
             }
-            "tumble" => {
+            name if name == window_fn::TUMBLE => {
                 if args.len() != 1 {
                     unreachable!("wrong number of arguments for tumble(), expect one");
                 }
                 let width = get_duration(&args[0])?;
                 Ok(Some(WindowType::Tumbling { width }))
             }
-            "session" => {
+            name if name == window_fn::SESSION => {
                 if args.len() != 1 {
                     unreachable!("wrong number of arguments for session(), expected one");
                 }

From 58a9e5c10e59960f9053a4ac19b697674fcc655b Mon Sep 17 00:00:00 2001
From: luoluoyuyu <zhenyu@apache.org>
Date: Sun, 29 Mar 2026 11:27:15 +0800
Subject: [PATCH 24/44] update

---
 src/sql/common/constants.rs | 121 ++++++++++++++++++++++++++++++++++++
 1 file changed, 121 insertions(+)
 create mode 100644 src/sql/common/constants.rs

diff --git a/src/sql/common/constants.rs b/src/sql/common/constants.rs
new file mode 100644
index 00000000..8f791222
--- /dev/null
+++ b/src/sql/common/constants.rs
@@ -0,0 +1,121 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! SQL / 流算子相关的**名称与标识符常量**（标量函数名、窗口 TVF、逻辑扩展节点名、CDC 字段、
+//! 运行时 blueprint 字符串、`OperatorName` 特性标签等）；与 [`super::with_option_keys`]（WITH 选项键）分工。
+
+// ── 内置标量 UDF（`register_all` / `ScalarUDFImpl::name`）──────────────────────
+
+pub mod scalar_fn {
+    pub const GET_FIRST_JSON_OBJECT: &str = "get_first_json_object";
+    pub const EXTRACT_JSON: &str = "extract_json";
+    pub const EXTRACT_JSON_STRING: &str = "extract_json_string";
+    pub const SERIALIZE_JSON_UNION: &str = "serialize_json_union";
+    pub const MULTI_HASH: &str = "multi_hash";
+}
+
+// ── 窗口 TVF（`hop` / `tumble` / `session` 等，与 DataFusion 解析一致）──────────
+
+pub mod window_fn {
+    pub const HOP: &str = "hop";
+    pub const TUMBLE: &str = "tumble";
+    pub const SESSION: &str = "session";
+}
+
+// ── `OperatorName` 在指标 / 特性集合中使用的 kebab-case 标签 ─────────────────
+
+pub mod operator_feature {
+    pub const ASYNC_UDF: &str = "async-udf";
+    pub const JOIN_WITH_EXPIRATION: &str = "join-with-expiration";
+    pub const WINDOWED_JOIN: &str = "windowed-join";
+    pub const SQL_WINDOW_FUNCTION: &str = "sql-window-function";
+    pub const LOOKUP_JOIN: &str = "lookup-join";
+    pub const SQL_TUMBLING_WINDOW_AGGREGATE: &str = "sql-tumbling-window-aggregate";
+    pub const SQL_SLIDING_WINDOW_AGGREGATE: &str = "sql-sliding-window-aggregate";
+    pub const SQL_SESSION_WINDOW_AGGREGATE: &str = "sql-session-window-aggregate";
+    pub const SQL_UPDATING_AGGREGATE: &str = "sql-updating-aggregate";
+    pub const KEY_BY_ROUTING: &str = "key-by-routing";
+    pub const CONNECTOR_SOURCE: &str = "connector-source";
+    pub const CONNECTOR_SINK: &str = "connector-sink";
+}
+
+// ── 逻辑计划扩展节点的 `UserDefinedLogicalNodeCore::name` / 类型字符串 ────────
+
+pub mod extension_node {
+    pub const STREAM_WINDOW_AGGREGATE: &str = "StreamWindowAggregateNode";
+    pub const STREAMING_WINDOW_FUNCTION: &str = "StreamingWindowFunctionNode";
+    pub const EVENT_TIME_WATERMARK: &str = "EventTimeWatermarkNode";
+    pub const CONTINUOUS_AGGREGATE: &str = "ContinuousAggregateNode";
+    pub const SYSTEM_TIMESTAMP_INJECTOR: &str = "SystemTimestampInjectorNode";
+    pub const STREAM_INGESTION: &str = "StreamIngestionNode";
+    pub const STREAM_EGRESS: &str = "StreamEgressNode";
+    pub const STREAM_PROJECTION: &str = "StreamProjectionNode";
+    pub const REMOTE_TABLE_BOUNDARY: &str = "RemoteTableBoundaryNode";
+    pub const REFERENCE_TABLE_SOURCE: &str = "ReferenceTableSource";
+    pub const STREAM_REFERENCE_JOIN: &str = "StreamReferenceJoin";
+    pub const KEY_EXTRACTION: &str = "KeyExtractionNode";
+    pub const STREAMING_JOIN: &str = "StreamingJoinNode";
+    pub const ASYNC_FUNCTION_EXECUTION: &str = "AsyncFunctionExecutionNode";
+    pub const UNROLL_DEBEZIUM_PAYLOAD: &str = "UnrollDebeziumPayloadNode";
+    pub const PACK_DEBEZIUM_ENVELOPE: &str = "PackDebeziumEnvelopeNode";
+}
+
+// ── gRPC / proto 算子配置里的 `name` 字段（与 `OperatorName` 展示相关）──────────
+
+pub mod proto_operator_name {
+    pub const TUMBLING_WINDOW: &str = "TumblingWindow";
+    pub const UPDATING_AGGREGATE: &str = "UpdatingAggregate";
+    pub const WINDOW_FUNCTION: &str = "WindowFunction";
+    /// 滑动窗口 human-readable 描述片段（非固定 id）
+    pub const SLIDING_WINDOW_LABEL: &str = "sliding window";
+    pub const INSTANT_WINDOW: &str = "InstantWindow";
+    pub const INSTANT_WINDOW_LABEL: &str = "instant window";
+}
+
+// ── 下发到运行时的 blueprint / 算子种类字符串 ──────────────────────────────────
+
+pub mod runtime_operator_kind {
+    pub const STREAMING_JOIN: &str = "streaming_join";
+    pub const WATERMARK_GENERATOR: &str = "watermark_generator";
+    pub const STREAMING_WINDOW_EVALUATOR: &str = "streaming_window_evaluator";
+}
+
+// ── Debezium CDC 信封字段 ───────────────────────────────────────────────────
+
+pub mod cdc {
+    pub const BEFORE: &str = "before";
+    pub const AFTER: &str = "after";
+    pub const OP: &str = "op";
+}
+
+// ── updating aggregate 状态元数据 struct 字段 ────────────────────────────────
+
+pub mod updating_state_field {
+    pub const IS_RETRACT: &str = "is_retract";
+    pub const ID: &str = "id";
+}
+
+// ── 计划里常用的列名 / 别名 ───────────────────────────────────────────────────
+
+pub mod sql_field {
+    /// 异步 UDF 重写后的结果列（与历史 `extensions::constants` 对齐）。
+    pub const ASYNC_RESULT: &str = "__async_result";
+    pub const DEFAULT_KEY_LABEL: &str = "key";
+    pub const DEFAULT_PROJECTION_LABEL: &str = "projection";
+}
+
+// ── 连接器类型短名（工厂注册等）──────────────────────────────────────────────
+
+pub mod connector_type {
+    pub const KAFKA: &str = "kafka";
+    pub const REDIS: &str = "redis";
+}

From 0321c502aa270b8bc46053eeef4d07c557993764 Mon Sep 17 00:00:00 2001
From: luoluoyuyu <zhenyu@apache.org>
Date: Sun, 29 Mar 2026 13:53:39 +0800
Subject: [PATCH 25/44] update

---
 .../grouping/incremental_aggregate.rs         |    2 +-
 .../operators/joins/join_instance.rs          |    7 +-
 .../operators/joins/join_with_expiration.rs   |    2 +-
 .../operators/stateless_physical_executor.rs  |    2 +-
 .../windows/session_aggregating_window.rs     |    2 +-
 .../windows/sliding_aggregating_window.rs     |    2 +-
 .../windows/tumbling_aggregating_window.rs    |    2 +-
 .../operators/windows/window_function.rs      |    2 +-
 src/sql/analysis/async_udf_rewriter.rs        |    5 +-
 src/sql/analysis/join_rewriter.rs             |    5 +-
 src/sql/analysis/row_time_rewriter.rs         |    5 +-
 src/sql/analysis/unnest_rewriter.rs           |    3 +-
 src/sql/common/connector_options.rs           |   26 +-
 src/sql/common/constants.rs                   |  200 ++-
 src/sql/common/date.rs                        |   48 +-
 src/sql/common/format_from_opts.rs            |   33 +-
 src/sql/common/formats.rs                     |   48 +-
 src/sql/extensions/aggregate.rs               |    2 +-
 src/sql/extensions/async_udf.rs               |    5 +-
 src/sql/extensions/debezium.rs                |    2 +-
 src/sql/extensions/is_retract.rs              |    2 +-
 src/sql/extensions/join.rs                    |    2 +-
 src/sql/extensions/key_calculation.rs         |    2 +-
 src/sql/extensions/mod.rs                     |    3 -
 src/sql/extensions/remote_table.rs            |    2 +-
 src/sql/extensions/updating_aggregate.rs      |    2 +-
 src/sql/extensions/windows_function.rs        |    2 +-
 src/sql/logical_planner/mod.rs                | 1267 -----------------
 src/sql/logical_planner/planner.rs            |    2 +-
 src/sql/mod.rs                                |    1 +
 src/sql/physical/cdc/encode.rs                |  331 +++++
 .../constants.rs => physical/cdc/mod.rs}      |    8 +-
 src/sql/physical/cdc/unroll.rs                |  300 ++++
 src/sql/physical/codec.rs                     |  263 ++++
 src/sql/physical/meta.rs                      |   53 +
 src/sql/physical/mod.rs                       |   25 +
 src/sql/physical/physical_planner.rs          |  418 ------
 src/sql/physical/readers.rs                   |  372 +++++
 src/sql/physical/udfs.rs                      |  132 ++
 src/sql/schema/data_encoding_format.rs        |   18 +-
 src/sql/schema/schema_provider.rs             |   19 +-
 src/sql/schema/source_table.rs                |   57 +-
 src/sql/schema/table_role.rs                  |   31 +-
 src/sql/schema/temporal_pipeline_config.rs    |    3 +-
 src/sql/schema/utils.rs                       |    5 +-
 src/sql/types/data_type.rs                    |    3 +-
 46 files changed, 1889 insertions(+), 1837 deletions(-)
 create mode 100644 src/sql/physical/cdc/encode.rs
 rename src/sql/{extensions/constants.rs => physical/cdc/mod.rs} (78%)
 create mode 100644 src/sql/physical/cdc/unroll.rs
 create mode 100644 src/sql/physical/codec.rs
 create mode 100644 src/sql/physical/meta.rs
 create mode 100644 src/sql/physical/mod.rs
 delete mode 100644 src/sql/physical/physical_planner.rs
 create mode 100644 src/sql/physical/readers.rs
 create mode 100644 src/sql/physical/udfs.rs

diff --git a/src/runtime/streaming/operators/grouping/incremental_aggregate.rs b/src/runtime/streaming/operators/grouping/incremental_aggregate.rs
index 104d24a1..2d2abf18 100644
--- a/src/runtime/streaming/operators/grouping/incremental_aggregate.rs
+++ b/src/runtime/streaming/operators/grouping/incremental_aggregate.rs
@@ -49,7 +49,7 @@ use crate::runtime::streaming::arrow::decode_aggregate;
 use crate::runtime::streaming::operators::{Key, UpdatingCache};
 use crate::runtime::streaming::StreamOutput;
 use crate::sql::common::{to_nanos, CheckpointBarrier, FsSchema, Watermark, TIMESTAMP_FIELD, UPDATING_META_FIELD};
-use crate::sql::logical_planner::updating_meta_fields;
+use crate::sql::physical::updating_meta_fields;
 
 #[derive(Debug, Copy, Clone)]
 struct BatchData {
diff --git a/src/runtime/streaming/operators/joins/join_instance.rs b/src/runtime/streaming/operators/joins/join_instance.rs
index 278bc8fe..cd5b3764 100644
--- a/src/runtime/streaming/operators/joins/join_instance.rs
+++ b/src/runtime/streaming/operators/joins/join_instance.rs
@@ -34,8 +34,9 @@ use crate::runtime::streaming::api::operator::{MessageOperator, Registry};
 use async_trait::async_trait;
 use protocol::grpc::api::JoinOperator;
 use crate::runtime::streaming::StreamOutput;
+use crate::sql::common::constants::mem_exec_join_side;
 use crate::sql::common::{from_nanos, CheckpointBarrier, FsSchema, FsSchemaRef, Watermark};
-use crate::sql::logical_planner::{DecodingContext, FsPhysicalExtensionCodec};
+use crate::sql::physical::{DecodingContext, FsPhysicalExtensionCodec};
 
 #[derive(Debug, Copy, Clone, Eq, PartialEq)]
 enum JoinSide {
@@ -47,8 +48,8 @@ impl JoinSide {
     #[allow(dead_code)]
     fn name(&self) -> &'static str {
         match self {
-            JoinSide::Left => "left",
-            JoinSide::Right => "right",
+            JoinSide::Left => mem_exec_join_side::LEFT,
+            JoinSide::Right => mem_exec_join_side::RIGHT,
         }
     }
 }
diff --git a/src/runtime/streaming/operators/joins/join_with_expiration.rs b/src/runtime/streaming/operators/joins/join_with_expiration.rs
index 1a31b253..34d15932 100644
--- a/src/runtime/streaming/operators/joins/join_with_expiration.rs
+++ b/src/runtime/streaming/operators/joins/join_with_expiration.rs
@@ -33,7 +33,7 @@ use async_trait::async_trait;
 use protocol::grpc::api::JoinOperator;
 use crate::runtime::streaming::StreamOutput;
 use crate::sql::common::{CheckpointBarrier, FsSchema, Watermark};
-use crate::sql::logical_planner::{DecodingContext, FsPhysicalExtensionCodec};
+use crate::sql::physical::{DecodingContext, FsPhysicalExtensionCodec};
 
 #[derive(Debug, Copy, Clone, Eq, PartialEq)]
 enum JoinSide {
diff --git a/src/runtime/streaming/operators/stateless_physical_executor.rs b/src/runtime/streaming/operators/stateless_physical_executor.rs
index 45619dc6..dee92eb3 100644
--- a/src/runtime/streaming/operators/stateless_physical_executor.rs
+++ b/src/runtime/streaming/operators/stateless_physical_executor.rs
@@ -27,7 +27,7 @@ use futures::StreamExt;
 use prost::Message;
 
 use crate::runtime::streaming::api::operator::Registry;
-use crate::sql::logical_planner::{DecodingContext, FsPhysicalExtensionCodec};
+use crate::sql::physical::{DecodingContext, FsPhysicalExtensionCodec};
 
 pub struct StatelessPhysicalExecutor {
     batch: Arc<RwLock<Option<RecordBatch>>>,
diff --git a/src/runtime/streaming/operators/windows/session_aggregating_window.rs b/src/runtime/streaming/operators/windows/session_aggregating_window.rs
index 8fa3f2f7..73642992 100644
--- a/src/runtime/streaming/operators/windows/session_aggregating_window.rs
+++ b/src/runtime/streaming/operators/windows/session_aggregating_window.rs
@@ -44,7 +44,7 @@ use protocol::grpc::api::SessionWindowAggregateOperator;
 use crate::runtime::streaming::StreamOutput;
 use crate::sql::common::{from_nanos, to_nanos, CheckpointBarrier, FsSchema, FsSchemaRef, Watermark};
 use crate::sql::common::converter::Converter;
-use crate::sql::logical_planner::{DecodingContext, FsPhysicalExtensionCodec};
+use crate::sql::physical::{DecodingContext, FsPhysicalExtensionCodec};
 use crate::sql::schema::utils::window_arrow_struct;
 // ============================================================================
 // 领域模型与纯内存状态
diff --git a/src/runtime/streaming/operators/windows/sliding_aggregating_window.rs b/src/runtime/streaming/operators/windows/sliding_aggregating_window.rs
index e5af57f3..7d801fd6 100644
--- a/src/runtime/streaming/operators/windows/sliding_aggregating_window.rs
+++ b/src/runtime/streaming/operators/windows/sliding_aggregating_window.rs
@@ -43,7 +43,7 @@ use crate::runtime::streaming::api::operator::Registry;
 use protocol::grpc::api::SlidingWindowAggregateOperator;
 use crate::runtime::streaming::StreamOutput;
 use crate::sql::common::{from_nanos, to_nanos, CheckpointBarrier, FsSchema, Watermark};
-use crate::sql::logical_planner::{DecodingContext, FsPhysicalExtensionCodec};
+use crate::sql::physical::{DecodingContext, FsPhysicalExtensionCodec};
 // ============================================================================
 // 纯内存状态：阶梯式时间面板 (Tiered panes)
 // 这部分本身就是极佳的内存数据结构，原样保留！
diff --git a/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs b/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs
index 40c757dc..004cc205 100644
--- a/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs
+++ b/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs
@@ -44,7 +44,7 @@ use protocol::grpc::api::TumblingWindowAggregateOperator;
 use crate::runtime::streaming::StreamOutput;
 use crate::sql::common::{from_nanos, to_nanos, CheckpointBarrier, FsSchema, Watermark};
 use crate::sql::common::time_utils::print_time;
-use crate::sql::logical_planner::{DecodingContext, FsPhysicalExtensionCodec};
+use crate::sql::physical::{DecodingContext, FsPhysicalExtensionCodec};
 use crate::sql::schema::utils::add_timestamp_field_arrow;
 
 struct ActiveBin {
diff --git a/src/runtime/streaming/operators/windows/window_function.rs b/src/runtime/streaming/operators/windows/window_function.rs
index 4e9c83ce..641b0ef6 100644
--- a/src/runtime/streaming/operators/windows/window_function.rs
+++ b/src/runtime/streaming/operators/windows/window_function.rs
@@ -37,7 +37,7 @@ use async_trait::async_trait;
 use crate::runtime::streaming::StreamOutput;
 use crate::sql::common::{from_nanos, CheckpointBarrier, FsSchema, FsSchemaRef, Watermark};
 use crate::sql::common::time_utils::print_time;
-use crate::sql::logical_planner::{DecodingContext, FsPhysicalExtensionCodec};
+use crate::sql::physical::{DecodingContext, FsPhysicalExtensionCodec};
 
 // ============================================================================
 // 纯内存执行缓冲区
diff --git a/src/sql/analysis/async_udf_rewriter.rs b/src/sql/analysis/async_udf_rewriter.rs
index 31a92057..073a1f42 100644
--- a/src/sql/analysis/async_udf_rewriter.rs
+++ b/src/sql/analysis/async_udf_rewriter.rs
@@ -11,7 +11,8 @@
 // limitations under the License.
 
 use crate::sql::extensions::remote_table::RemoteTableBoundaryNode;
-use crate::sql::extensions::{ASYNC_RESULT_FIELD, AsyncFunctionExecutionNode};
+use crate::sql::common::constants::sql_field;
+use crate::sql::extensions::AsyncFunctionExecutionNode;
 use crate::sql::schema::StreamSchemaProvider;
 use datafusion::common::tree_node::{Transformed, TreeNode, TreeNodeRewriter};
 use datafusion::common::{Column, Result as DFResult, TableReference, plan_err};
@@ -55,7 +56,7 @@ impl<'a> AsyncUdfRewriter<'a> {
                         );
                     }
                     return Ok(Transformed::yes(Expr::Column(Column::new_unqualified(
-                        ASYNC_RESULT_FIELD,
+                        sql_field::ASYNC_RESULT,
                     ))));
                 }
             }
diff --git a/src/sql/analysis/join_rewriter.rs b/src/sql/analysis/join_rewriter.rs
index 77131595..4421aa99 100644
--- a/src/sql/analysis/join_rewriter.rs
+++ b/src/sql/analysis/join_rewriter.rs
@@ -15,6 +15,7 @@ use crate::sql::extensions::join::StreamingJoinNode;
 use crate::sql::extensions::key_calculation::KeyExtractionNode;
 use crate::sql::analysis::streaming_window_analzer::StreamingWindowAnalzer;
 use crate::sql::types::{WindowType, fields_with_qualifiers, schema_from_df_fields_with_metadata};
+use crate::sql::common::constants::mem_exec_join_side;
 use crate::sql::common::TIMESTAMP_FIELD;
 use datafusion::common::tree_node::{Transformed, TreeNodeRewriter};
 use datafusion::common::{
@@ -198,8 +199,8 @@ impl TreeNodeRewriter for JoinRewriter<'_> {
 
         // 2. Prepare Keyed Inputs for Shuffle
         let (left_on, right_on): (Vec<_>, Vec<_>) = join.on.clone().into_iter().unzip();
-        let keyed_left = self.build_keyed_side(join.left, left_on, "left")?;
-        let keyed_right = self.build_keyed_side(join.right, right_on, "right")?;
+        let keyed_left = self.build_keyed_side(join.left, left_on, mem_exec_join_side::LEFT)?;
+        let keyed_right = self.build_keyed_side(join.right, right_on, mem_exec_join_side::RIGHT)?;
 
         // 3. Assemble Rewritten Join Node
         let join_schema = Arc::new(build_join_schema(
diff --git a/src/sql/analysis/row_time_rewriter.rs b/src/sql/analysis/row_time_rewriter.rs
index f0c4e435..13e2a048 100644
--- a/src/sql/analysis/row_time_rewriter.rs
+++ b/src/sql/analysis/row_time_rewriter.rs
@@ -14,6 +14,7 @@ use datafusion::common::tree_node::{Transformed, TreeNodeRewriter};
 use datafusion::common::{Column, Result as DFResult};
 use datafusion::logical_expr::Expr;
 
+use crate::sql::common::constants::planning_placeholder_udf;
 use crate::sql::types::TIMESTAMP_FIELD;
 
 /// Replaces the virtual `row_time()` scalar function with a physical reference to `_timestamp`.
@@ -26,9 +27,9 @@ impl TreeNodeRewriter for RowTimeRewriter {
     type Node = Expr;
 
     fn f_down(&mut self, node: Self::Node) -> DFResult<Transformed<Self::Node>> {
-        // Use pattern matching to identify the 'row_time' scalar function.
+        // Use pattern matching to identify the `row_time` scalar function.
         if let Expr::ScalarFunction(func) = &node
-            && func.name() == "row_time"
+            && func.name() == planning_placeholder_udf::ROW_TIME
         {
             // Map the virtual function to the physical internal timestamp column.
             // We use .alias() to preserve the original name "row_time()" in the output schema,
diff --git a/src/sql/analysis/unnest_rewriter.rs b/src/sql/analysis/unnest_rewriter.rs
index 2a9eabda..535590c8 100644
--- a/src/sql/analysis/unnest_rewriter.rs
+++ b/src/sql/analysis/unnest_rewriter.rs
@@ -18,6 +18,7 @@ use datafusion::common::{Column, Result as DFResult, plan_err};
 use datafusion::logical_expr::expr::ScalarFunction;
 use datafusion::logical_expr::{ColumnUnnestList, Expr, LogicalPlan, Projection, Unnest};
 
+use crate::sql::common::constants::planning_placeholder_udf;
 use crate::sql::types::{DFField, fields_with_qualifiers, schema_from_df_fields};
 
 pub const UNNESTED_COL: &str = "__unnested";
@@ -31,7 +32,7 @@ impl UnnestRewriter {
 
         let expr = expr.transform_up(|e| {
             if let Expr::ScalarFunction(ScalarFunction { func: udf, args }) = &e
-                && udf.name() == "unnest"
+                && udf.name() == planning_placeholder_udf::UNNEST
             {
                 match args.len() {
                     1 => {
diff --git a/src/sql/common/connector_options.rs b/src/sql/common/connector_options.rs
index 6bd6dfa6..bffa766a 100644
--- a/src/sql/common/connector_options.rs
+++ b/src/sql/common/connector_options.rs
@@ -20,6 +20,8 @@ use datafusion::error::DataFusionError;
 use datafusion::sql::sqlparser::ast::{Expr, Ident, SqlOption, Value as SqlValue, ValueWithSpan};
 use tracing::warn;
 
+use super::constants::{interval_duration_unit, with_opt_bool_str};
+
 pub trait FromOpts: Sized {
     fn from_opts(opts: &mut ConnectorOptions) -> DFResult<Self>;
 }
@@ -88,8 +90,8 @@ impl ConnectorOptions {
                 value: SqlValue::SingleQuotedString(s),
                 span: _,
             })) => match s.as_str() {
-                "true" | "yes" => Ok(Some(true)),
-                "false" | "no" => Ok(Some(false)),
+                with_opt_bool_str::TRUE | with_opt_bool_str::YES => Ok(Some(true)),
+                with_opt_bool_str::FALSE | with_opt_bool_str::NO => Ok(Some(false)),
                 _ => Err(plan_datafusion_err!(
                     "expected with option '{}' to be a boolean, but it was `'{}'`",
                     name,
@@ -367,11 +369,21 @@ fn parse_interval_to_duration(s: &str) -> Result<Duration, DataFusionError> {
     let value: u64 = parts[0]
         .parse()
         .map_err(|_| DataFusionError::Plan(format!("invalid interval number: {}", parts[0])))?;
-    let duration = match parts[1].to_lowercase().as_str() {
-        "second" | "seconds" | "s" => Duration::from_secs(value),
-        "minute" | "minutes" | "min" => Duration::from_secs(value * 60),
-        "hour" | "hours" | "h" => Duration::from_secs(value * 3600),
-        "day" | "days" | "d" => Duration::from_secs(value * 86400),
+    let unit_lc = parts[1].to_lowercase();
+    let unit = unit_lc.as_str();
+    let duration = match unit {
+        interval_duration_unit::SECOND
+        | interval_duration_unit::SECONDS
+        | interval_duration_unit::S => Duration::from_secs(value),
+        interval_duration_unit::MINUTE
+        | interval_duration_unit::MINUTES
+        | interval_duration_unit::MIN => Duration::from_secs(value * 60),
+        interval_duration_unit::HOUR
+        | interval_duration_unit::HOURS
+        | interval_duration_unit::H => Duration::from_secs(value * 3600),
+        interval_duration_unit::DAY
+        | interval_duration_unit::DAYS
+        | interval_duration_unit::D => Duration::from_secs(value * 86400),
         unit => {
             return Err(DataFusionError::Plan(format!(
                 "unsupported interval unit '{unit}'"
diff --git a/src/sql/common/constants.rs b/src/sql/common/constants.rs
index 8f791222..f5dd56ef 100644
--- a/src/sql/common/constants.rs
+++ b/src/sql/common/constants.rs
@@ -31,6 +31,15 @@ pub mod window_fn {
     pub const SESSION: &str = "session";
 }
 
+// ── 流规划期占位标量 UDF（`StreamPlanningContextBuilder::with_streaming_extensions`）──
+
+pub mod planning_placeholder_udf {
+    pub const UNNEST: &str = "unnest";
+    pub const ROW_TIME: &str = "row_time";
+    /// `List` 内元素字段名，仅用于占位签名的 Arrow 形态
+    pub const LIST_ELEMENT_FIELD: &str = "field";
+}
+
 // ── `OperatorName` 在指标 / 特性集合中使用的 kebab-case 标签 ─────────────────
 
 pub mod operator_feature {
@@ -107,15 +116,204 @@ pub mod updating_state_field {
 // ── 计划里常用的列名 / 别名 ───────────────────────────────────────────────────
 
 pub mod sql_field {
-    /// 异步 UDF 重写后的结果列（与历史 `extensions::constants` 对齐）。
+    /// 异步 UDF 重写后的结果列名。
     pub const ASYNC_RESULT: &str = "__async_result";
     pub const DEFAULT_KEY_LABEL: &str = "key";
     pub const DEFAULT_PROJECTION_LABEL: &str = "projection";
+    /// `WATERMARK FOR … AS expr` 生成的计算列名（与 `TemporalPipelineConfig` 一致）。
+    pub const COMPUTED_WATERMARK: &str = "__watermark";
+}
+
+// ── `ConnectorOptions` / WITH 解析用到的字面量 ────────────────────────────────
+
+/// 单引号字符串形式的布尔取值（见 [`super::connector_options::ConnectorOptions::pull_opt_bool`]）。
+pub mod with_opt_bool_str {
+    pub const TRUE: &str = "true";
+    pub const YES: &str = "yes";
+    pub const FALSE: &str = "false";
+    pub const NO: &str = "no";
+}
+
+/// `INTERVAL '…'` / 间隔字符串解析中的单位 token（小写；解析前会对单位做 `to_lowercase`）。
+pub mod interval_duration_unit {
+    pub const SECOND: &str = "second";
+    pub const SECONDS: &str = "seconds";
+    pub const S: &str = "s";
+    pub const MINUTE: &str = "minute";
+    pub const MINUTES: &str = "minutes";
+    pub const MIN: &str = "min";
+    pub const HOUR: &str = "hour";
+    pub const HOURS: &str = "hours";
+    pub const H: &str = "h";
+    pub const DAY: &str = "day";
+    pub const DAYS: &str = "days";
+    pub const D: &str = "d";
+}
+
+// ── `format` / `framing.method` / `bad_data` 的 WITH 取值（见 `format_from_opts`）──────
+
+/// `format = '…'` 的名称（小写；`Format::from_opts` 会对值做 `to_lowercase`）。
+pub mod connection_format_value {
+    pub const JSON: &str = "json";
+    pub const DEBEZIUM_JSON: &str = "debezium_json";
+    pub const AVRO: &str = "avro";
+    pub const PARQUET: &str = "parquet";
+    pub const PROTOBUF: &str = "protobuf";
+    pub const RAW_STRING: &str = "raw_string";
+    pub const RAW_BYTES: &str = "raw_bytes";
+}
+
+/// `framing.method` 合法取值（与 `Framing::from_opts` 一致；当前不做大小写折叠）。
+pub mod framing_method_value {
+    pub const NEWLINE: &str = "newline";
+    pub const NEWLINE_DELIMITED: &str = "newline_delimited";
+}
+
+/// `bad_data = '…'`（小写；解析前 `to_lowercase`）。
+pub mod bad_data_value {
+    pub const FAIL: &str = "fail";
+    pub const DROP: &str = "drop";
+}
+
+// ── `formats.rs` 里枚举的 wire 名（与 serde `snake_case` / `TryFrom` / `FromStr` 一致）────
+
+pub mod timestamp_format_value {
+    pub const RFC3339_SNAKE: &str = "rfc3339";
+    pub const RFC3339_UPPER: &str = "RFC3339";
+    pub const UNIX_MILLIS_SNAKE: &str = "unix_millis";
+    pub const UNIX_MILLIS_PASCAL: &str = "UnixMillis";
+}
+
+pub mod decimal_encoding_value {
+    pub const NUMBER: &str = "number";
+    pub const STRING: &str = "string";
+    pub const BYTES: &str = "bytes";
+}
+
+pub mod json_compression_value {
+    pub const UNCOMPRESSED: &str = "uncompressed";
+    pub const GZIP: &str = "gzip";
+}
+
+pub mod parquet_compression_value {
+    pub const UNCOMPRESSED: &str = "uncompressed";
+    pub const SNAPPY: &str = "snappy";
+    pub const GZIP: &str = "gzip";
+    pub const ZSTD: &str = "zstd";
+    pub const LZ4: &str = "lz4";
+    pub const LZ4_RAW: &str = "lz4_raw";
+}
+
+// ── `date_part` / `date_trunc` SQL 关键字（小写；解析前对输入做 `to_lowercase`）────────
+
+pub mod date_part_keyword {
+    pub const YEAR: &str = "year";
+    pub const MONTH: &str = "month";
+    pub const WEEK: &str = "week";
+    pub const DAY: &str = "day";
+    pub const HOUR: &str = "hour";
+    pub const MINUTE: &str = "minute";
+    pub const SECOND: &str = "second";
+    pub const MILLISECOND: &str = "millisecond";
+    pub const MICROSECOND: &str = "microsecond";
+    pub const NANOSECOND: &str = "nanosecond";
+    pub const DOW: &str = "dow";
+    pub const DOY: &str = "doy";
+}
+
+pub mod date_trunc_keyword {
+    pub const YEAR: &str = "year";
+    pub const QUARTER: &str = "quarter";
+    pub const MONTH: &str = "month";
+    pub const WEEK: &str = "week";
+    pub const DAY: &str = "day";
+    pub const HOUR: &str = "hour";
+    pub const MINUTE: &str = "minute";
+    pub const SECOND: &str = "second";
+}
+
+// ── `logical_planner/mod.rs` 物理计划与 Debezium 流水线 ───────────────────────
+
+/// `FsMemExec` / codec 里表示 join 左右输入的 `table_name`。
+pub mod mem_exec_join_side {
+    pub const LEFT: &str = "left";
+    pub const RIGHT: &str = "right";
+}
+
+/// 自定义 `ExecutionPlan::name()`（与 DataFusion explain / 调试一致）。
+pub mod physical_plan_node_name {
+    pub const RW_LOCK_READER: &str = "rw_lock_reader";
+    pub const UNBOUNDED_READER: &str = "unbounded_reader";
+    pub const VEC_READER: &str = "vec_reader";
+    pub const MEM_EXEC: &str = "mem_exec";
+    pub const DEBEZIUM_UNROLLING_EXEC: &str = "debezium_unrolling_exec";
+    pub const TO_DEBEZIUM_EXEC: &str = "to_debezium_exec";
+}
+
+/// 流式 `window(start, end)` 标量 UDF 的注册名。
+pub mod window_function_udf {
+    pub const NAME: &str = "window";
+}
+
+/// `window()` UDF 返回 struct 的字段名（与 `window_arrow_struct` 一致）。
+pub mod window_interval_field {
+    pub const START: &str = "start";
+    pub const END: &str = "end";
+}
+
+/// Debezium `op` 列中的单字母取值（unroll / pack 路径）。
+pub mod debezium_op_short {
+    pub const CREATE: &str = "c";
+    pub const READ: &str = "r";
+    pub const UPDATE: &str = "u";
+    pub const DELETE: &str = "d";
 }
 
 // ── 连接器类型短名（工厂注册等）──────────────────────────────────────────────
 
 pub mod connector_type {
     pub const KAFKA: &str = "kafka";
+    pub const KINESIS: &str = "kinesis";
+    pub const FILESYSTEM: &str = "filesystem";
+    pub const DELTA: &str = "delta";
+    pub const ICEBERG: &str = "iceberg";
+    pub const PULSAR: &str = "pulsar";
+    pub const NATS: &str = "nats";
     pub const REDIS: &str = "redis";
+    pub const MQTT: &str = "mqtt";
+    pub const WEBSOCKET: &str = "websocket";
+    pub const SSE: &str = "sse";
+    pub const NEXMARK: &str = "nexmark";
+    pub const BLACKHOLE: &str = "blackhole";
+    pub const MEMORY: &str = "memory";
+    pub const POSTGRES: &str = "postgres";
+}
+
+// ── 连接表 `WITH type = 'source'|'sink'|'lookup'`（`SourceTable::from_options` / `deduce_role`）──
+
+pub mod connection_table_role {
+    pub const SOURCE: &str = "source";
+    pub const SINK: &str = "sink";
+    /// 与虚拟 `lookup` 连接器短名相同（亦在 [`SUPPORTED_CONNECTOR_ADAPTERS`] 中）。
+    pub const LOOKUP: &str = "lookup";
+}
+
+/// [`crate::sql::schema::table_role::validate_adapter_availability`] 白名单（与 SQL `connector = '…'` 短名一致）。
+pub const SUPPORTED_CONNECTOR_ADAPTERS: &[&str] = &[
+    connector_type::KAFKA,
+];
+
+// ── Kafka 连接器 WITH 选项取值（`wire_kafka_operator_config`）────────────────
+
+pub mod kafka_with_value {
+    pub const SCAN_LATEST: &str = "latest";
+    pub const SCAN_EARLIEST: &str = "earliest";
+    pub const SCAN_GROUP_OFFSETS: &str = "group-offsets";
+    pub const SCAN_GROUP: &str = "group";
+    pub const ISOLATION_READ_COMMITTED: &str = "read_committed";
+    pub const ISOLATION_READ_UNCOMMITTED: &str = "read_uncommitted";
+    pub const SINK_COMMIT_EXACTLY_ONCE_HYPHEN: &str = "exactly-once";
+    pub const SINK_COMMIT_EXACTLY_ONCE_UNDERSCORE: &str = "exactly_once";
+    pub const SINK_COMMIT_AT_LEAST_ONCE_HYPHEN: &str = "at-least-once";
+    pub const SINK_COMMIT_AT_LEAST_ONCE_UNDERSCORE: &str = "at_least_once";
 }
diff --git a/src/sql/common/date.rs b/src/sql/common/date.rs
index 718d5f56..ec310326 100644
--- a/src/sql/common/date.rs
+++ b/src/sql/common/date.rs
@@ -13,6 +13,8 @@
 use serde::Serialize;
 use std::convert::TryFrom;
 
+use super::constants::{date_part_keyword, date_trunc_keyword};
+
 #[derive(Debug, Clone, Copy, Eq, PartialEq, PartialOrd, Hash, Serialize)]
 pub enum DatePart {
     Year,
@@ -33,19 +35,20 @@ impl TryFrom<&str> for DatePart {
     type Error = String;
 
     fn try_from(value: &str) -> Result<Self, Self::Error> {
-        match value.to_lowercase().as_str() {
-            "year" => Ok(DatePart::Year),
-            "month" => Ok(DatePart::Month),
-            "week" => Ok(DatePart::Week),
-            "day" => Ok(DatePart::Day),
-            "hour" => Ok(DatePart::Hour),
-            "minute" => Ok(DatePart::Minute),
-            "second" => Ok(DatePart::Second),
-            "millisecond" => Ok(DatePart::Millisecond),
-            "microsecond" => Ok(DatePart::Microsecond),
-            "nanosecond" => Ok(DatePart::Nanosecond),
-            "dow" => Ok(DatePart::DayOfWeek),
-            "doy" => Ok(DatePart::DayOfYear),
+        let v = value.to_lowercase();
+        match v.as_str() {
+            date_part_keyword::YEAR => Ok(DatePart::Year),
+            date_part_keyword::MONTH => Ok(DatePart::Month),
+            date_part_keyword::WEEK => Ok(DatePart::Week),
+            date_part_keyword::DAY => Ok(DatePart::Day),
+            date_part_keyword::HOUR => Ok(DatePart::Hour),
+            date_part_keyword::MINUTE => Ok(DatePart::Minute),
+            date_part_keyword::SECOND => Ok(DatePart::Second),
+            date_part_keyword::MILLISECOND => Ok(DatePart::Millisecond),
+            date_part_keyword::MICROSECOND => Ok(DatePart::Microsecond),
+            date_part_keyword::NANOSECOND => Ok(DatePart::Nanosecond),
+            date_part_keyword::DOW => Ok(DatePart::DayOfWeek),
+            date_part_keyword::DOY => Ok(DatePart::DayOfYear),
             _ => Err(format!("'{value}' is not a valid DatePart")),
         }
     }
@@ -67,15 +70,16 @@ impl TryFrom<&str> for DateTruncPrecision {
     type Error = String;
 
     fn try_from(value: &str) -> Result<Self, Self::Error> {
-        match value.to_lowercase().as_str() {
-            "year" => Ok(DateTruncPrecision::Year),
-            "quarter" => Ok(DateTruncPrecision::Quarter),
-            "month" => Ok(DateTruncPrecision::Month),
-            "week" => Ok(DateTruncPrecision::Week),
-            "day" => Ok(DateTruncPrecision::Day),
-            "hour" => Ok(DateTruncPrecision::Hour),
-            "minute" => Ok(DateTruncPrecision::Minute),
-            "second" => Ok(DateTruncPrecision::Second),
+        let v = value.to_lowercase();
+        match v.as_str() {
+            date_trunc_keyword::YEAR => Ok(DateTruncPrecision::Year),
+            date_trunc_keyword::QUARTER => Ok(DateTruncPrecision::Quarter),
+            date_trunc_keyword::MONTH => Ok(DateTruncPrecision::Month),
+            date_trunc_keyword::WEEK => Ok(DateTruncPrecision::Week),
+            date_trunc_keyword::DAY => Ok(DateTruncPrecision::Day),
+            date_trunc_keyword::HOUR => Ok(DateTruncPrecision::Hour),
+            date_trunc_keyword::MINUTE => Ok(DateTruncPrecision::Minute),
+            date_trunc_keyword::SECOND => Ok(DateTruncPrecision::Second),
             _ => Err(format!("'{value}' is not a valid DateTruncPrecision")),
         }
     }
diff --git a/src/sql/common/format_from_opts.rs b/src/sql/common/format_from_opts.rs
index 2469fb08..34b6a586 100644
--- a/src/sql/common/format_from_opts.rs
+++ b/src/sql/common/format_from_opts.rs
@@ -17,6 +17,7 @@ use std::str::FromStr;
 use datafusion::common::{Result as DFResult, plan_datafusion_err, plan_err};
 
 use super::connector_options::ConnectorOptions;
+use super::constants::{bad_data_value, connection_format_value, framing_method_value};
 use super::with_option_keys as opt;
 use super::formats::{
     AvroFormat, BadData, DecimalEncoding, Format, Framing, JsonCompression, JsonFormat,
@@ -65,18 +66,25 @@ impl Format {
         let Some(name) = opts.pull_opt_str(opt::FORMAT)? else {
             return Ok(None);
         };
-        match name.to_lowercase().as_str() {
-            "json" => Ok(Some(Format::Json(JsonFormat::from_opts(opts)?))),
-            "debezium_json" => {
+        let n = name.to_lowercase();
+        match n.as_str() {
+            connection_format_value::JSON => Ok(Some(Format::Json(JsonFormat::from_opts(opts)?))),
+            connection_format_value::DEBEZIUM_JSON => {
                 let mut j = JsonFormat::from_opts(opts)?;
                 j.debezium = true;
                 Ok(Some(Format::Json(j)))
             }
-            "avro" => Ok(Some(Format::Avro(AvroFormat::from_opts(opts)?))),
-            "parquet" => Ok(Some(Format::Parquet(ParquetFormat::from_opts(opts)?))),
-            "protobuf" => Ok(Some(Format::Protobuf(ProtobufFormat::from_opts(opts)?))),
-            "raw_string" => Ok(Some(Format::RawString(RawStringFormat {}))),
-            "raw_bytes" => Ok(Some(Format::RawBytes(RawBytesFormat {}))),
+            connection_format_value::AVRO => Ok(Some(Format::Avro(AvroFormat::from_opts(opts)?))),
+            connection_format_value::PARQUET => {
+                Ok(Some(Format::Parquet(ParquetFormat::from_opts(opts)?)))
+            }
+            connection_format_value::PROTOBUF => {
+                Ok(Some(Format::Protobuf(ProtobufFormat::from_opts(opts)?)))
+            }
+            connection_format_value::RAW_STRING => {
+                Ok(Some(Format::RawString(RawStringFormat {})))
+            }
+            connection_format_value::RAW_BYTES => Ok(Some(Format::RawBytes(RawBytesFormat {}))),
             _ => plan_err!("unknown format '{name}'"),
         }
     }
@@ -150,7 +158,7 @@ impl Framing {
         let method = opts.pull_opt_str(opt::FRAMING_METHOD)?;
         match method.as_deref() {
             None => Ok(None),
-            Some("newline") | Some("newline_delimited") => {
+            Some(framing_method_value::NEWLINE) | Some(framing_method_value::NEWLINE_DELIMITED) => {
                 let max = opts.pull_opt_u64(opt::FRAMING_MAX_LINE_LENGTH)?;
                 Ok(Some(Framing::Newline(NewlineDelimitedFraming {
                     max_line_length: max,
@@ -166,9 +174,10 @@ impl BadData {
         let Some(s) = opts.pull_opt_str(opt::BAD_DATA)? else {
             return Ok(BadData::Fail {});
         };
-        match s.to_lowercase().as_str() {
-            "fail" => Ok(BadData::Fail {}),
-            "drop" => Ok(BadData::Drop {}),
+        let v = s.to_lowercase();
+        match v.as_str() {
+            bad_data_value::FAIL => Ok(BadData::Fail {}),
+            bad_data_value::DROP => Ok(BadData::Drop {}),
             _ => plan_err!("invalid bad_data '{s}'"),
         }
     }
diff --git a/src/sql/common/formats.rs b/src/sql/common/formats.rs
index e37be020..b2885797 100644
--- a/src/sql/common/formats.rs
+++ b/src/sql/common/formats.rs
@@ -11,9 +11,15 @@
 // limitations under the License.
 
 use serde::{Deserialize, Serialize};
+use std::convert::TryFrom;
 use std::fmt::{Display, Formatter};
 use std::str::FromStr;
 
+use super::constants::{
+    connection_format_value, decimal_encoding_value, json_compression_value,
+    parquet_compression_value, timestamp_format_value,
+};
+
 #[derive(Copy, Clone, Debug, Serialize, Deserialize, PartialEq, Eq, Default, Hash, PartialOrd)]
 #[serde(rename_all = "snake_case")]
 pub enum TimestampFormat {
@@ -28,8 +34,12 @@ impl TryFrom<&str> for TimestampFormat {
 
     fn try_from(value: &str) -> Result<Self, Self::Error> {
         match value {
-            "RFC3339" | "rfc3339" => Ok(TimestampFormat::RFC3339),
-            "UnixMillis" | "unix_millis" => Ok(TimestampFormat::UnixMillis),
+            timestamp_format_value::RFC3339_UPPER | timestamp_format_value::RFC3339_SNAKE => {
+                Ok(TimestampFormat::RFC3339)
+            }
+            timestamp_format_value::UNIX_MILLIS_PASCAL | timestamp_format_value::UNIX_MILLIS_SNAKE => {
+                Ok(TimestampFormat::UnixMillis)
+            }
             _ => Err(()),
         }
     }
@@ -49,9 +59,9 @@ impl TryFrom<&str> for DecimalEncoding {
 
     fn try_from(s: &str) -> Result<Self, Self::Error> {
         match s {
-            "number" => Ok(Self::Number),
-            "string" => Ok(Self::String),
-            "bytes" => Ok(Self::Bytes),
+            decimal_encoding_value::NUMBER => Ok(Self::Number),
+            decimal_encoding_value::STRING => Ok(Self::String),
+            decimal_encoding_value::BYTES => Ok(Self::Bytes),
             _ => Err(()),
         }
     }
@@ -70,8 +80,8 @@ impl FromStr for JsonCompression {
 
     fn from_str(s: &str) -> Result<Self, Self::Err> {
         match s {
-            "uncompressed" => Ok(JsonCompression::Uncompressed),
-            "gzip" => Ok(JsonCompression::Gzip),
+            json_compression_value::UNCOMPRESSED => Ok(JsonCompression::Uncompressed),
+            json_compression_value::GZIP => Ok(JsonCompression::Gzip),
             _ => Err(format!("invalid json compression '{s}'")),
         }
     }
@@ -151,12 +161,12 @@ impl FromStr for ParquetCompression {
 
     fn from_str(s: &str) -> Result<Self, Self::Err> {
         match s {
-            "uncompressed" => Ok(ParquetCompression::Uncompressed),
-            "snappy" => Ok(ParquetCompression::Snappy),
-            "gzip" => Ok(ParquetCompression::Gzip),
-            "zstd" => Ok(ParquetCompression::Zstd),
-            "lz4" => Ok(ParquetCompression::Lz4),
-            "lz4_raw" => Ok(ParquetCompression::Lz4Raw),
+            parquet_compression_value::UNCOMPRESSED => Ok(ParquetCompression::Uncompressed),
+            parquet_compression_value::SNAPPY => Ok(ParquetCompression::Snappy),
+            parquet_compression_value::GZIP => Ok(ParquetCompression::Gzip),
+            parquet_compression_value::ZSTD => Ok(ParquetCompression::Zstd),
+            parquet_compression_value::LZ4 => Ok(ParquetCompression::Lz4),
+            parquet_compression_value::LZ4_RAW => Ok(ParquetCompression::Lz4Raw),
             _ => Err(format!("invalid parquet compression '{s}'")),
         }
     }
@@ -206,12 +216,12 @@ impl Display for Format {
 impl Format {
     pub fn name(&self) -> &'static str {
         match self {
-            Format::Json(_) => "json",
-            Format::Avro(_) => "avro",
-            Format::Protobuf(_) => "protobuf",
-            Format::Parquet(_) => "parquet",
-            Format::RawString(_) => "raw_string",
-            Format::RawBytes(_) => "raw_bytes",
+            Format::Json(_) => connection_format_value::JSON,
+            Format::Avro(_) => connection_format_value::AVRO,
+            Format::Protobuf(_) => connection_format_value::PROTOBUF,
+            Format::Parquet(_) => connection_format_value::PARQUET,
+            Format::RawString(_) => connection_format_value::RAW_STRING,
+            Format::RawBytes(_) => connection_format_value::RAW_BYTES,
         }
     }
 
diff --git a/src/sql/extensions/aggregate.rs b/src/sql/extensions/aggregate.rs
index e05129c8..645315af 100644
--- a/src/sql/extensions/aggregate.rs
+++ b/src/sql/extensions/aggregate.rs
@@ -38,7 +38,7 @@ use crate::sql::extensions::{
 };
 use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName};
 use crate::sql::logical_planner::planner::{NamedNode, Planner, SplitPlanOutput};
-use crate::sql::logical_planner::{window, FsPhysicalExtensionCodec};
+use crate::sql::physical::{window, FsPhysicalExtensionCodec};
 use crate::sql::types::{
     DFField, TIMESTAMP_FIELD, WindowBehavior, WindowType, fields_with_qualifiers,
     schema_from_df_fields, schema_from_df_fields_with_metadata,
diff --git a/src/sql/extensions/async_udf.rs b/src/sql/extensions/async_udf.rs
index 8add0625..ee2ce60a 100644
--- a/src/sql/extensions/async_udf.rs
+++ b/src/sql/extensions/async_udf.rs
@@ -31,11 +31,10 @@ use crate::sql::extensions::streaming_operator_blueprint::{CompiledTopologyNode,
 use crate::sql::logical_node::logical::{
     DylibUdfConfig, LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName,
 };
+use crate::sql::common::constants::sql_field;
 use crate::sql::logical_planner::planner::{NamedNode, Planner};
 use crate::sql::types::{DFField, fields_with_qualifiers, schema_from_df_fields};
 
-use super::ASYNC_RESULT_FIELD;
-
 pub(crate) const NODE_TYPE_NAME: &str = extension_node::ASYNC_FUNCTION_EXECUTION;
 
 /// Represents a logical node that executes an external asynchronous function (UDF)
@@ -91,7 +90,7 @@ impl AsyncFunctionExecutionNode {
 
         let raw_result_field = DFField::new(
             None,
-            ASYNC_RESULT_FIELD,
+            sql_field::ASYNC_RESULT,
             self.function_config.return_type.clone(),
             true,
         );
diff --git a/src/sql/extensions/debezium.rs b/src/sql/extensions/debezium.rs
index a1042194..2afda2b4 100644
--- a/src/sql/extensions/debezium.rs
+++ b/src/sql/extensions/debezium.rs
@@ -23,7 +23,7 @@ use crate::multifield_partial_ord;
 use crate::sql::common::constants::{cdc, extension_node};
 use crate::sql::common::{FsSchema, FsSchemaRef, UPDATING_META_FIELD};
 use crate::sql::logical_planner::planner::{NamedNode, Planner};
-use crate::sql::logical_planner::updating_meta_field;
+use crate::sql::physical::updating_meta_field;
 use crate::sql::types::TIMESTAMP_FIELD;
 
 use super::{CompiledTopologyNode, StreamingOperatorBlueprint};
diff --git a/src/sql/extensions/is_retract.rs b/src/sql/extensions/is_retract.rs
index 4375b716..96493781 100644
--- a/src/sql/extensions/is_retract.rs
+++ b/src/sql/extensions/is_retract.rs
@@ -17,7 +17,7 @@ use datafusion::common::{DFSchemaRef, Result, TableReference};
 use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore};
 
 use crate::multifield_partial_ord;
-use crate::sql::logical_planner::updating_meta_field;
+use crate::sql::physical::updating_meta_field;
 use crate::sql::types::{DFField, TIMESTAMP_FIELD, fields_with_qualifiers, schema_from_df_fields};
 
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
diff --git a/src/sql/extensions/join.rs b/src/sql/extensions/join.rs
index 9789a216..829247ae 100644
--- a/src/sql/extensions/join.rs
+++ b/src/sql/extensions/join.rs
@@ -29,7 +29,7 @@ use crate::sql::logical_node::logical::{
     LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName,
 };
 use crate::sql::logical_planner::planner::{NamedNode, Planner};
-use crate::sql::logical_planner::FsPhysicalExtensionCodec;
+use crate::sql::physical::FsPhysicalExtensionCodec;
 
 // -----------------------------------------------------------------------------
 // Constants
diff --git a/src/sql/extensions/key_calculation.rs b/src/sql/extensions/key_calculation.rs
index 6a9e924b..1d271698 100644
--- a/src/sql/extensions/key_calculation.rs
+++ b/src/sql/extensions/key_calculation.rs
@@ -31,7 +31,7 @@ use crate::sql::common::constants::{extension_node, sql_field};
 use crate::sql::common::{FsSchema, FsSchemaRef};
 use crate::sql::extensions::{CompiledTopologyNode, StreamingOperatorBlueprint};
 use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName};
-use crate::sql::logical_planner::FsPhysicalExtensionCodec;
+use crate::sql::physical::FsPhysicalExtensionCodec;
 use crate::sql::logical_planner::planner::{NamedNode, Planner};
 use crate::sql::types::{fields_with_qualifiers, schema_from_df_fields_with_metadata};
 
diff --git a/src/sql/extensions/mod.rs b/src/sql/extensions/mod.rs
index eab2443b..6c0ca08a 100644
--- a/src/sql/extensions/mod.rs
+++ b/src/sql/extensions/mod.rs
@@ -12,9 +12,6 @@
 
 mod macros;
 
-pub(crate) mod constants;
-pub(crate) use constants::ASYNC_RESULT_FIELD;
-
 pub(crate) mod streaming_operator_blueprint;
 pub(crate) use streaming_operator_blueprint::{CompiledTopologyNode, StreamingOperatorBlueprint};
 
diff --git a/src/sql/extensions/remote_table.rs b/src/sql/extensions/remote_table.rs
index a9a65c51..7025e254 100644
--- a/src/sql/extensions/remote_table.rs
+++ b/src/sql/extensions/remote_table.rs
@@ -26,7 +26,7 @@ use crate::sql::common::constants::extension_node;
 use crate::sql::common::{FsSchema, FsSchemaRef};
 use crate::sql::extensions::{CompiledTopologyNode, StreamingOperatorBlueprint};
 use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName};
-use crate::sql::logical_planner::FsPhysicalExtensionCodec;
+use crate::sql::physical::FsPhysicalExtensionCodec;
 use crate::sql::logical_planner::planner::{NamedNode, Planner};
 
 // -----------------------------------------------------------------------------
diff --git a/src/sql/extensions/updating_aggregate.rs b/src/sql/extensions/updating_aggregate.rs
index 9d12806f..a76d15d4 100644
--- a/src/sql/extensions/updating_aggregate.rs
+++ b/src/sql/extensions/updating_aggregate.rs
@@ -30,7 +30,7 @@ use crate::sql::common::{FsSchema, FsSchemaRef};
 use crate::sql::extensions::{CompiledTopologyNode, IsRetractExtension, StreamingOperatorBlueprint};
 use crate::sql::functions::multi_hash;
 use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName};
-use crate::sql::logical_planner::FsPhysicalExtensionCodec;
+use crate::sql::physical::FsPhysicalExtensionCodec;
 use crate::sql::logical_planner::planner::{NamedNode, Planner};
 
 // -----------------------------------------------------------------------------
diff --git a/src/sql/extensions/windows_function.rs b/src/sql/extensions/windows_function.rs
index 09945378..ccb0ff89 100644
--- a/src/sql/extensions/windows_function.rs
+++ b/src/sql/extensions/windows_function.rs
@@ -24,7 +24,7 @@ use protocol::grpc::api::WindowFunctionOperator;
 use crate::sql::common::constants::{extension_node, proto_operator_name, runtime_operator_kind};
 use crate::sql::common::{FsSchema, FsSchemaRef};
 use crate::sql::logical_node::logical::{LogicalEdge, LogicalEdgeType, LogicalNode, OperatorName};
-use crate::sql::logical_planner::FsPhysicalExtensionCodec;
+use crate::sql::physical::FsPhysicalExtensionCodec;
 use crate::sql::logical_planner::planner::{NamedNode, Planner};
 use crate::sql::types::TIMESTAMP_FIELD;
 
diff --git a/src/sql/logical_planner/mod.rs b/src/sql/logical_planner/mod.rs
index 54634651..f29cba18 100644
--- a/src/sql/logical_planner/mod.rs
+++ b/src/sql/logical_planner/mod.rs
@@ -10,1272 +10,5 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-use datafusion::arrow::{
-    array::{
-        Array, AsArray, BooleanBuilder, PrimitiveArray, RecordBatch, StringArray, StructArray,
-        TimestampNanosecondArray, TimestampNanosecondBuilder, UInt32Builder,
-    },
-    buffer::NullBuffer,
-    compute::{concat, take},
-    datatypes::{DataType, Field, Fields, Schema, SchemaRef, TimeUnit},
-};
-use datafusion::common::{
-    DataFusionError, Result, ScalarValue, Statistics, UnnestOptions, not_impl_err, plan_err,
-};
-use datafusion::execution::{RecordBatchStream, SendableRecordBatchStream};
-use datafusion::{
-    execution::TaskContext,
-    physical_plan::{
-        DisplayAs, ExecutionPlan, Partitioning, memory::MemoryStream,
-        stream::RecordBatchStreamAdapter,
-    },
-};
-use std::collections::HashMap;
-use std::{
-    any::Any,
-    mem,
-    pin::Pin,
-    sync::{Arc, OnceLock, RwLock},
-    task::{Context, Poll},
-};
-
-use crate::make_udf_function;
-use crate::sql::functions::MultiHashFunction;
-use crate::sql::analysis::UNNESTED_COL;
-use crate::sql::schema::utils::window_arrow_struct;
-use crate::sql::common::constants::cdc;
-use crate::sql::common::{TIMESTAMP_FIELD, UPDATING_META_FIELD};
-use datafusion::arrow::datatypes::{TimestampNanosecondType, UInt64Type};
-use datafusion::catalog::memory::MemorySourceConfig;
-use datafusion::datasource::memory::DataSourceExec;
-use datafusion::logical_expr::{
-    ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignature, Volatility,
-};
-use datafusion::physical_expr::EquivalenceProperties;
-use datafusion::physical_plan::PlanProperties;
-use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType};
-use datafusion::physical_plan::unnest::{ListUnnest, UnnestExec};
-use datafusion_proto::physical_plan::PhysicalExtensionCodec;
-use futures::{
-    ready,
-    stream::{Stream, StreamExt},
-};
-use prost::Message;
-use protocol::grpc::api::{
-    DebeziumDecodeNode, DebeziumEncodeNode, FsExecNode, MemExecNode, UnnestExecNode,
-    fs_exec_node::Node,
-};
-use std::fmt::Debug;
-use tokio::sync::mpsc::UnboundedReceiver;
-use tokio_stream::wrappers::UnboundedReceiverStream;
-
 pub(crate) mod planner;
 pub mod optimizers;
-
-// ─────────────────── Updating Meta Helpers ───────────────────
-
-pub fn updating_meta_fields() -> Fields {
-    static FIELDS: OnceLock<Fields> = OnceLock::new();
-    FIELDS
-        .get_or_init(|| {
-            Fields::from(vec![
-                Field::new("is_retract", DataType::Boolean, true),
-                Field::new("id", DataType::FixedSizeBinary(16), true),
-            ])
-        })
-        .clone()
-}
-
-pub fn updating_meta_field() -> Arc<Field> {
-    static FIELD: OnceLock<Arc<Field>> = OnceLock::new();
-    FIELD
-        .get_or_init(|| {
-            Arc::new(Field::new(
-                UPDATING_META_FIELD,
-                DataType::Struct(updating_meta_fields()),
-                false,
-            ))
-        })
-        .clone()
-}
-
-// ─────────────────── WindowFunctionUdf ───────────────────
-
-#[derive(Debug)]
-pub struct WindowFunctionUdf {
-    signature: Signature,
-}
-
-impl Default for WindowFunctionUdf {
-    fn default() -> Self {
-        Self {
-            signature: Signature::new(
-                TypeSignature::Exact(vec![
-                    DataType::Timestamp(TimeUnit::Nanosecond, None),
-                    DataType::Timestamp(TimeUnit::Nanosecond, None),
-                ]),
-                Volatility::Immutable,
-            ),
-        }
-    }
-}
-
-impl ScalarUDFImpl for WindowFunctionUdf {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn name(&self) -> &str {
-        "window"
-    }
-
-    fn signature(&self) -> &Signature {
-        &self.signature
-    }
-
-    fn return_type(&self, _: &[DataType]) -> Result<DataType> {
-        Ok(window_arrow_struct())
-    }
-
-    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
-        let columns = args.args;
-        if columns.len() != 2 {
-            return plan_err!(
-                "window function expected 2 arguments, got {}",
-                columns.len()
-            );
-        }
-        if columns[0].data_type() != DataType::Timestamp(TimeUnit::Nanosecond, None) {
-            return plan_err!(
-                "window function expected first argument to be a timestamp, got {:?}",
-                columns[0].data_type()
-            );
-        }
-        if columns[1].data_type() != DataType::Timestamp(TimeUnit::Nanosecond, None) {
-            return plan_err!(
-                "window function expected second argument to be a timestamp, got {:?}",
-                columns[1].data_type()
-            );
-        }
-        let fields = vec![
-            Arc::new(Field::new(
-                "start",
-                DataType::Timestamp(TimeUnit::Nanosecond, None),
-                false,
-            )),
-            Arc::new(Field::new(
-                "end",
-                DataType::Timestamp(TimeUnit::Nanosecond, None),
-                false,
-            )),
-        ]
-        .into();
-
-        match (&columns[0], &columns[1]) {
-            (ColumnarValue::Array(start), ColumnarValue::Array(end)) => {
-                Ok(ColumnarValue::Array(Arc::new(StructArray::new(
-                    fields,
-                    vec![start.clone(), end.clone()],
-                    None,
-                ))))
-            }
-            (ColumnarValue::Array(start), ColumnarValue::Scalar(end)) => {
-                let end = end.to_array_of_size(start.len())?;
-                Ok(ColumnarValue::Array(Arc::new(StructArray::new(
-                    fields,
-                    vec![start.clone(), end],
-                    None,
-                ))))
-            }
-            (ColumnarValue::Scalar(start), ColumnarValue::Array(end)) => {
-                let start = start.to_array_of_size(end.len())?;
-                Ok(ColumnarValue::Array(Arc::new(StructArray::new(
-                    fields,
-                    vec![start, end.clone()],
-                    None,
-                ))))
-            }
-            (ColumnarValue::Scalar(start), ColumnarValue::Scalar(end)) => {
-                Ok(ColumnarValue::Scalar(ScalarValue::Struct(
-                    StructArray::new(fields, vec![start.to_array()?, end.to_array()?], None).into(),
-                )))
-            }
-        }
-    }
-}
-
-make_udf_function!(WindowFunctionUdf, WINDOW_FUNCTION, window);
-
-// ─────────────────── Physical Extension Codec ───────────────────
-
-#[derive(Debug)]
-pub struct FsPhysicalExtensionCodec {
-    pub context: DecodingContext,
-}
-
-impl Default for FsPhysicalExtensionCodec {
-    fn default() -> Self {
-        Self {
-            context: DecodingContext::None,
-        }
-    }
-}
-
-#[derive(Debug)]
-pub enum DecodingContext {
-    None,
-    Planning,
-    SingleLockedBatch(Arc<RwLock<Option<RecordBatch>>>),
-    UnboundedBatchStream(Arc<RwLock<Option<UnboundedReceiver<RecordBatch>>>>),
-    LockedBatchVec(Arc<RwLock<Vec<RecordBatch>>>),
-    LockedJoinPair {
-        left: Arc<RwLock<Option<RecordBatch>>>,
-        right: Arc<RwLock<Option<RecordBatch>>>,
-    },
-    LockedJoinStream {
-        left: Arc<RwLock<Option<UnboundedReceiver<RecordBatch>>>>,
-        right: Arc<RwLock<Option<UnboundedReceiver<RecordBatch>>>>,
-    },
-}
-
-fn make_properties(schema: SchemaRef) -> PlanProperties {
-    PlanProperties::new(
-        EquivalenceProperties::new(schema),
-        Partitioning::UnknownPartitioning(1),
-        EmissionType::Incremental,
-        Boundedness::Unbounded {
-            requires_infinite_memory: false,
-        },
-    )
-}
-
-impl PhysicalExtensionCodec for FsPhysicalExtensionCodec {
-    fn try_decode(
-        &self,
-        buf: &[u8],
-        inputs: &[Arc<dyn ExecutionPlan>],
-        _registry: &dyn datafusion::execution::FunctionRegistry,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        let exec: FsExecNode = Message::decode(buf)
-            .map_err(|err| DataFusionError::Internal(format!("couldn't deserialize: {err}")))?;
-
-        match exec
-            .node
-            .ok_or_else(|| DataFusionError::Internal("exec node is empty".to_string()))?
-        {
-            Node::MemExec(mem_exec) => {
-                let schema: Schema = serde_json::from_str(&mem_exec.schema).map_err(|e| {
-                    DataFusionError::Internal(format!("invalid schema in exec codec: {e:?}"))
-                })?;
-                let schema = Arc::new(schema);
-                match &self.context {
-                    DecodingContext::SingleLockedBatch(single_batch) => Ok(Arc::new(
-                        RwLockRecordBatchReader::new(schema, single_batch.clone()),
-                    )),
-                    DecodingContext::UnboundedBatchStream(unbounded_stream) => Ok(Arc::new(
-                        UnboundedRecordBatchReader::new(schema, unbounded_stream.clone()),
-                    )),
-                    DecodingContext::LockedBatchVec(locked_batches) => Ok(Arc::new(
-                        RecordBatchVecReader::new(schema, locked_batches.clone()),
-                    )),
-                    DecodingContext::Planning => {
-                        Ok(Arc::new(FsMemExec::new(mem_exec.table_name, schema)))
-                    }
-                    DecodingContext::None => Err(DataFusionError::Internal(
-                        "Need an internal context to decode".into(),
-                    )),
-                    DecodingContext::LockedJoinPair { left, right } => {
-                        match mem_exec.table_name.as_str() {
-                            "left" => {
-                                Ok(Arc::new(RwLockRecordBatchReader::new(schema, left.clone())))
-                            }
-                            "right" => Ok(Arc::new(RwLockRecordBatchReader::new(
-                                schema,
-                                right.clone(),
-                            ))),
-                            _ => Err(DataFusionError::Internal(format!(
-                                "unknown table name {}",
-                                mem_exec.table_name
-                            ))),
-                        }
-                    }
-                    DecodingContext::LockedJoinStream { left, right } => {
-                        match mem_exec.table_name.as_str() {
-                            "left" => Ok(Arc::new(UnboundedRecordBatchReader::new(
-                                schema,
-                                left.clone(),
-                            ))),
-                            "right" => Ok(Arc::new(UnboundedRecordBatchReader::new(
-                                schema,
-                                right.clone(),
-                            ))),
-                            _ => Err(DataFusionError::Internal(format!(
-                                "unknown table name {}",
-                                mem_exec.table_name
-                            ))),
-                        }
-                    }
-                }
-            }
-            Node::UnnestExec(unnest) => {
-                let schema: Schema = serde_json::from_str(&unnest.schema).map_err(|e| {
-                    DataFusionError::Internal(format!("invalid schema in exec codec: {e:?}"))
-                })?;
-
-                let column = schema.index_of(UNNESTED_COL).map_err(|_| {
-                    DataFusionError::Internal(format!(
-                        "unnest node schema does not contain {UNNESTED_COL} col"
-                    ))
-                })?;
-
-                Ok(Arc::new(UnnestExec::new(
-                    inputs
-                        .first()
-                        .ok_or_else(|| {
-                            DataFusionError::Internal("no input for unnest node".to_string())
-                        })?
-                        .clone(),
-                    vec![ListUnnest {
-                        index_in_input_schema: column,
-                        depth: 1,
-                    }],
-                    vec![],
-                    Arc::new(schema),
-                    UnnestOptions::default(),
-                )))
-            }
-            Node::DebeziumDecode(debezium) => {
-                let schema = Arc::new(serde_json::from_str::<Schema>(&debezium.schema).map_err(
-                    |e| DataFusionError::Internal(format!("invalid schema in exec codec: {e:?}")),
-                )?);
-                Ok(Arc::new(DebeziumUnrollingExec {
-                    input: inputs
-                        .first()
-                        .ok_or_else(|| {
-                            DataFusionError::Internal("no input for debezium node".to_string())
-                        })?
-                        .clone(),
-                    schema: schema.clone(),
-                    properties: make_properties(schema),
-                    primary_keys: debezium
-                        .primary_keys
-                        .into_iter()
-                        .map(|c| c as usize)
-                        .collect(),
-                }))
-            }
-            Node::DebeziumEncode(debezium) => {
-                let schema = Arc::new(serde_json::from_str::<Schema>(&debezium.schema).map_err(
-                    |e| DataFusionError::Internal(format!("invalid schema in exec codec: {e:?}")),
-                )?);
-                Ok(Arc::new(ToDebeziumExec {
-                    input: inputs
-                        .first()
-                        .ok_or_else(|| {
-                            DataFusionError::Internal("no input for debezium node".to_string())
-                        })?
-                        .clone(),
-                    schema: schema.clone(),
-                    properties: make_properties(schema),
-                }))
-            }
-        }
-    }
-
-    fn try_encode(&self, node: Arc<dyn ExecutionPlan>, buf: &mut Vec<u8>) -> Result<()> {
-        let mut proto = None;
-
-        let mem_table: Option<&FsMemExec> = node.as_any().downcast_ref();
-        if let Some(table) = mem_table {
-            proto = Some(FsExecNode {
-                node: Some(Node::MemExec(MemExecNode {
-                    table_name: table.table_name.clone(),
-                    schema: serde_json::to_string(&table.schema).unwrap(),
-                })),
-            });
-        }
-
-        let unnest: Option<&UnnestExec> = node.as_any().downcast_ref();
-        if let Some(unnest) = unnest {
-            proto = Some(FsExecNode {
-                node: Some(Node::UnnestExec(UnnestExecNode {
-                    schema: serde_json::to_string(&unnest.schema()).unwrap(),
-                })),
-            });
-        }
-
-        let debezium_decode: Option<&DebeziumUnrollingExec> = node.as_any().downcast_ref();
-        if let Some(decode) = debezium_decode {
-            proto = Some(FsExecNode {
-                node: Some(Node::DebeziumDecode(DebeziumDecodeNode {
-                    schema: serde_json::to_string(&decode.schema).unwrap(),
-                    primary_keys: (*decode.primary_keys).iter().map(|c| *c as u64).collect(),
-                })),
-            });
-        }
-
-        let debezium_encode: Option<&ToDebeziumExec> = node.as_any().downcast_ref();
-        if let Some(encode) = debezium_encode {
-            proto = Some(FsExecNode {
-                node: Some(Node::DebeziumEncode(DebeziumEncodeNode {
-                    schema: serde_json::to_string(&encode.schema).unwrap(),
-                })),
-            });
-        }
-
-        if let Some(node) = proto {
-            node.encode(buf).map_err(|err| {
-                DataFusionError::Internal(format!("couldn't serialize exec node {err}"))
-            })?;
-            Ok(())
-        } else {
-            Err(DataFusionError::Internal(format!(
-                "cannot serialize {node:?}"
-            )))
-        }
-    }
-}
-
-// ─────────────────── RwLockRecordBatchReader ───────────────────
-
-#[derive(Debug)]
-struct RwLockRecordBatchReader {
-    schema: SchemaRef,
-    locked_batch: Arc<RwLock<Option<RecordBatch>>>,
-    properties: PlanProperties,
-}
-
-impl RwLockRecordBatchReader {
-    fn new(schema: SchemaRef, locked_batch: Arc<RwLock<Option<RecordBatch>>>) -> Self {
-        Self {
-            schema: schema.clone(),
-            locked_batch,
-            properties: make_properties(schema),
-        }
-    }
-}
-
-impl DisplayAs for RwLockRecordBatchReader {
-    fn fmt_as(
-        &self,
-        _t: datafusion::physical_plan::DisplayFormatType,
-        f: &mut std::fmt::Formatter,
-    ) -> std::fmt::Result {
-        write!(f, "RW Lock RecordBatchReader")
-    }
-}
-
-impl ExecutionPlan for RwLockRecordBatchReader {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-
-    fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
-        vec![]
-    }
-
-    fn with_new_children(
-        self: Arc<Self>,
-        _children: Vec<Arc<dyn ExecutionPlan>>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        Err(DataFusionError::Internal("not supported".into()))
-    }
-
-    fn execute(
-        &self,
-        _partition: usize,
-        _context: Arc<TaskContext>,
-    ) -> Result<SendableRecordBatchStream> {
-        let result = self
-            .locked_batch
-            .write()
-            .unwrap()
-            .take()
-            .expect("should have set a record batch before calling execute()");
-        Ok(Box::pin(MemoryStream::try_new(
-            vec![result],
-            self.schema.clone(),
-            None,
-        )?))
-    }
-
-    fn statistics(&self) -> Result<Statistics> {
-        Ok(Statistics::new_unknown(&self.schema))
-    }
-
-    fn reset(&self) -> Result<()> {
-        Ok(())
-    }
-
-    fn properties(&self) -> &PlanProperties {
-        &self.properties
-    }
-
-    fn name(&self) -> &str {
-        "rw_lock_reader"
-    }
-}
-
-// ─────────────────── UnboundedRecordBatchReader ───────────────────
-
-#[derive(Debug)]
-struct UnboundedRecordBatchReader {
-    schema: SchemaRef,
-    receiver: Arc<RwLock<Option<UnboundedReceiver<RecordBatch>>>>,
-    properties: PlanProperties,
-}
-
-impl UnboundedRecordBatchReader {
-    fn new(
-        schema: SchemaRef,
-        receiver: Arc<RwLock<Option<UnboundedReceiver<RecordBatch>>>>,
-    ) -> Self {
-        Self {
-            schema: schema.clone(),
-            receiver,
-            properties: make_properties(schema),
-        }
-    }
-}
-
-impl DisplayAs for UnboundedRecordBatchReader {
-    fn fmt_as(
-        &self,
-        _t: datafusion::physical_plan::DisplayFormatType,
-        f: &mut std::fmt::Formatter,
-    ) -> std::fmt::Result {
-        write!(f, "unbounded record batch reader")
-    }
-}
-
-impl ExecutionPlan for UnboundedRecordBatchReader {
-    fn name(&self) -> &str {
-        "unbounded_reader"
-    }
-
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-
-    fn properties(&self) -> &PlanProperties {
-        &self.properties
-    }
-
-    fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
-        vec![]
-    }
-
-    fn with_new_children(
-        self: Arc<Self>,
-        _children: Vec<Arc<dyn ExecutionPlan>>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        Err(DataFusionError::Internal("not supported".into()))
-    }
-
-    fn execute(
-        &self,
-        _partition: usize,
-        _context: Arc<TaskContext>,
-    ) -> Result<SendableRecordBatchStream> {
-        Ok(Box::pin(RecordBatchStreamAdapter::new(
-            self.schema.clone(),
-            UnboundedReceiverStream::new(
-                self.receiver
-                    .write()
-                    .unwrap()
-                    .take()
-                    .expect("unbounded receiver should be present before calling exec"),
-            )
-            .map(Ok),
-        )))
-    }
-
-    fn statistics(&self) -> Result<Statistics> {
-        Ok(Statistics::new_unknown(&self.schema))
-    }
-
-    fn reset(&self) -> Result<()> {
-        Ok(())
-    }
-}
-
-// ─────────────────── RecordBatchVecReader ───────────────────
-
-#[derive(Debug)]
-struct RecordBatchVecReader {
-    schema: SchemaRef,
-    receiver: Arc<RwLock<Vec<RecordBatch>>>,
-    properties: PlanProperties,
-}
-
-impl RecordBatchVecReader {
-    fn new(schema: SchemaRef, receiver: Arc<RwLock<Vec<RecordBatch>>>) -> Self {
-        Self {
-            schema: schema.clone(),
-            receiver,
-            properties: make_properties(schema),
-        }
-    }
-}
-
-impl DisplayAs for RecordBatchVecReader {
-    fn fmt_as(
-        &self,
-        _t: datafusion::physical_plan::DisplayFormatType,
-        f: &mut std::fmt::Formatter,
-    ) -> std::fmt::Result {
-        write!(f, "record batch vec reader")
-    }
-}
-
-impl ExecutionPlan for RecordBatchVecReader {
-    fn name(&self) -> &str {
-        "vec_reader"
-    }
-
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-
-    fn properties(&self) -> &PlanProperties {
-        &self.properties
-    }
-
-    fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
-        vec![]
-    }
-
-    fn with_new_children(
-        self: Arc<Self>,
-        _children: Vec<Arc<dyn ExecutionPlan>>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        Err(DataFusionError::Internal("not supported".into()))
-    }
-
-    fn execute(
-        &self,
-        partition: usize,
-        context: Arc<TaskContext>,
-    ) -> Result<SendableRecordBatchStream> {
-        let memory = MemorySourceConfig::try_new(
-            &[mem::take(self.receiver.write().unwrap().as_mut())],
-            self.schema.clone(),
-            None,
-        )?;
-
-        DataSourceExec::new(Arc::new(memory)).execute(partition, context)
-    }
-
-    fn statistics(&self) -> Result<Statistics> {
-        Ok(Statistics::new_unknown(&self.schema))
-    }
-
-    fn reset(&self) -> Result<()> {
-        Ok(())
-    }
-}
-
-// ─────────────────── FsMemExec ───────────────────
-
-#[derive(Debug, Clone)]
-pub struct FsMemExec {
-    pub table_name: String,
-    pub schema: SchemaRef,
-    properties: PlanProperties,
-}
-
-impl DisplayAs for FsMemExec {
-    fn fmt_as(
-        &self,
-        _t: datafusion::physical_plan::DisplayFormatType,
-        f: &mut std::fmt::Formatter,
-    ) -> std::fmt::Result {
-        write!(f, "EmptyPartitionStream: schema={}", self.schema)
-    }
-}
-
-impl FsMemExec {
-    pub fn new(table_name: String, schema: SchemaRef) -> Self {
-        Self {
-            schema: schema.clone(),
-            table_name,
-            properties: make_properties(schema),
-        }
-    }
-}
-
-impl ExecutionPlan for FsMemExec {
-    fn name(&self) -> &str {
-        "mem_exec"
-    }
-
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-
-    fn properties(&self) -> &PlanProperties {
-        &self.properties
-    }
-
-    fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
-        vec![]
-    }
-
-    fn with_new_children(
-        self: Arc<Self>,
-        _children: Vec<Arc<dyn ExecutionPlan>>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        not_impl_err!("with_new_children is not implemented for mem_exec; should not be called")
-    }
-
-    fn execute(
-        &self,
-        _partition: usize,
-        _context: Arc<TaskContext>,
-    ) -> Result<SendableRecordBatchStream> {
-        plan_err!(
-            "EmptyPartitionStream cannot be executed, this is only used for physical planning before serialization"
-        )
-    }
-
-    fn statistics(&self) -> Result<Statistics> {
-        Ok(Statistics::new_unknown(&self.schema))
-    }
-
-    fn reset(&self) -> Result<()> {
-        Ok(())
-    }
-}
-
-// ─────────────────── DebeziumUnrollingExec ───────────────────
-
-#[derive(Debug)]
-pub struct DebeziumUnrollingExec {
-    input: Arc<dyn ExecutionPlan>,
-    schema: SchemaRef,
-    properties: PlanProperties,
-    primary_keys: Vec<usize>,
-}
-
-impl DebeziumUnrollingExec {
-    pub fn try_new(input: Arc<dyn ExecutionPlan>, primary_keys: Vec<usize>) -> Result<Self> {
-        let input_schema = input.schema();
-        let before_index = input_schema.index_of(cdc::BEFORE)?;
-        let after_index = input_schema.index_of(cdc::AFTER)?;
-        let op_index = input_schema.index_of(cdc::OP)?;
-        let _timestamp_index = input_schema.index_of(TIMESTAMP_FIELD)?;
-        let before_type = input_schema.field(before_index).data_type();
-        let after_type = input_schema.field(after_index).data_type();
-        if before_type != after_type {
-            return Err(DataFusionError::Internal(
-                "before and after columns must have the same type".to_string(),
-            ));
-        }
-        let op_type = input_schema.field(op_index).data_type();
-        if *op_type != DataType::Utf8 {
-            return Err(DataFusionError::Internal(
-                "op column must be a string".to_string(),
-            ));
-        }
-        let DataType::Struct(fields) = before_type else {
-            return Err(DataFusionError::Internal(
-                "before and after columns must be structs".to_string(),
-            ));
-        };
-        let mut fields = fields.to_vec();
-        fields.push(updating_meta_field());
-        fields.push(Arc::new(Field::new(
-            TIMESTAMP_FIELD,
-            DataType::Timestamp(TimeUnit::Nanosecond, None),
-            false,
-        )));
-
-        let schema = Arc::new(Schema::new(fields));
-        Ok(Self {
-            input,
-            schema: schema.clone(),
-            properties: make_properties(schema),
-            primary_keys,
-        })
-    }
-}
-
-impl DisplayAs for DebeziumUnrollingExec {
-    fn fmt_as(
-        &self,
-        _t: datafusion::physical_plan::DisplayFormatType,
-        f: &mut std::fmt::Formatter,
-    ) -> std::fmt::Result {
-        write!(f, "DebeziumUnrollingExec")
-    }
-}
-
-impl ExecutionPlan for DebeziumUnrollingExec {
-    fn name(&self) -> &str {
-        "debezium_unrolling_exec"
-    }
-
-    fn as_any(&self) -> &dyn Any {
-        self as &dyn Any
-    }
-
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-
-    fn properties(&self) -> &PlanProperties {
-        &self.properties
-    }
-
-    fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
-        vec![&self.input]
-    }
-
-    fn with_new_children(
-        self: Arc<Self>,
-        children: Vec<Arc<dyn ExecutionPlan>>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        if children.len() != 1 {
-            return Err(DataFusionError::Internal(
-                "DebeziumUnrollingExec wrong number of children".to_string(),
-            ));
-        }
-        Ok(Arc::new(DebeziumUnrollingExec {
-            input: children[0].clone(),
-            schema: self.schema.clone(),
-            properties: self.properties.clone(),
-            primary_keys: self.primary_keys.clone(),
-        }))
-    }
-
-    fn execute(
-        &self,
-        partition: usize,
-        context: Arc<TaskContext>,
-    ) -> Result<SendableRecordBatchStream> {
-        Ok(Box::pin(DebeziumUnrollingStream::try_new(
-            self.input.execute(partition, context)?,
-            self.schema.clone(),
-            self.primary_keys.clone(),
-        )?))
-    }
-
-    fn reset(&self) -> Result<()> {
-        self.input.reset()
-    }
-}
-
-struct DebeziumUnrollingStream {
-    input: SendableRecordBatchStream,
-    schema: SchemaRef,
-    before_index: usize,
-    after_index: usize,
-    op_index: usize,
-    timestamp_index: usize,
-    primary_keys: Vec<usize>,
-}
-
-impl DebeziumUnrollingStream {
-    fn try_new(
-        input: SendableRecordBatchStream,
-        schema: SchemaRef,
-        primary_keys: Vec<usize>,
-    ) -> Result<Self> {
-        if primary_keys.is_empty() {
-            return plan_err!("there must be at least one primary key for a Debezium source");
-        }
-        let input_schema = input.schema();
-        let before_index = input_schema.index_of(cdc::BEFORE)?;
-        let after_index = input_schema.index_of(cdc::AFTER)?;
-        let op_index = input_schema.index_of(cdc::OP)?;
-        let timestamp_index = input_schema.index_of(TIMESTAMP_FIELD)?;
-
-        Ok(Self {
-            input,
-            schema,
-            before_index,
-            after_index,
-            op_index,
-            timestamp_index,
-            primary_keys,
-        })
-    }
-
-    fn unroll_batch(&self, batch: &RecordBatch) -> Result<RecordBatch> {
-        let before = batch.column(self.before_index).as_ref();
-        let after = batch.column(self.after_index).as_ref();
-        let op = batch
-            .column(self.op_index)
-            .as_any()
-            .downcast_ref::<StringArray>()
-            .ok_or_else(|| DataFusionError::Internal("op column is not a string".to_string()))?;
-
-        let timestamp = batch
-            .column(self.timestamp_index)
-            .as_any()
-            .downcast_ref::<TimestampNanosecondArray>()
-            .ok_or_else(|| {
-                DataFusionError::Internal("timestamp column is not a timestamp".to_string())
-            })?;
-
-        let num_rows = batch.num_rows();
-        let combined_array = concat(&[before, after])?;
-        let mut take_indices = UInt32Builder::with_capacity(num_rows);
-        let mut is_retract_builder = BooleanBuilder::with_capacity(num_rows);
-
-        let mut timestamp_builder = TimestampNanosecondBuilder::with_capacity(2 * num_rows);
-        for i in 0..num_rows {
-            let op = op.value(i);
-            match op {
-                "c" | "r" => {
-                    take_indices.append_value((i + num_rows) as u32);
-                    is_retract_builder.append_value(false);
-                    timestamp_builder.append_value(timestamp.value(i));
-                }
-                "u" => {
-                    take_indices.append_value(i as u32);
-                    is_retract_builder.append_value(true);
-                    timestamp_builder.append_value(timestamp.value(i));
-                    take_indices.append_value((i + num_rows) as u32);
-                    is_retract_builder.append_value(false);
-                    timestamp_builder.append_value(timestamp.value(i));
-                }
-                "d" => {
-                    take_indices.append_value(i as u32);
-                    is_retract_builder.append_value(true);
-                    timestamp_builder.append_value(timestamp.value(i));
-                }
-                _ => {
-                    return Err(DataFusionError::Internal(format!(
-                        "unexpected op value: {op}"
-                    )));
-                }
-            }
-        }
-        let take_indices = take_indices.finish();
-        let unrolled_array = take(&combined_array, &take_indices, None)?;
-
-        let mut columns = unrolled_array.as_struct().columns().to_vec();
-
-        let hash = MultiHashFunction::default().invoke(
-            &self
-                .primary_keys
-                .iter()
-                .map(|i| ColumnarValue::Array(columns[*i].clone()))
-                .collect::<Vec<_>>(),
-        )?;
-
-        let ids = hash.into_array(num_rows)?;
-
-        let meta = StructArray::try_new(
-            updating_meta_fields(),
-            vec![Arc::new(is_retract_builder.finish()), ids],
-            None,
-        )?;
-        columns.push(Arc::new(meta));
-        columns.push(Arc::new(timestamp_builder.finish()));
-        Ok(RecordBatch::try_new(self.schema.clone(), columns)?)
-    }
-}
-
-impl Stream for DebeziumUnrollingStream {
-    type Item = Result<RecordBatch>;
-
-    fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
-        let result =
-            ready!(self.input.poll_next_unpin(cx)).map(|result| self.unroll_batch(&result?));
-        Poll::Ready(result)
-    }
-}
-
-impl RecordBatchStream for DebeziumUnrollingStream {
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-}
-
-// ─────────────────── ToDebeziumExec ───────────────────
-
-#[derive(Debug)]
-pub struct ToDebeziumExec {
-    input: Arc<dyn ExecutionPlan>,
-    schema: SchemaRef,
-    properties: PlanProperties,
-}
-
-impl ToDebeziumExec {
-    pub fn try_new(input: Arc<dyn ExecutionPlan>) -> Result<Self> {
-        let input_schema = input.schema();
-        let timestamp_index = input_schema.index_of(TIMESTAMP_FIELD)?;
-        let struct_fields: Vec<_> = input_schema
-            .fields()
-            .into_iter()
-            .enumerate()
-            .filter_map(|(index, field)| {
-                if field.name() == UPDATING_META_FIELD || index == timestamp_index {
-                    None
-                } else {
-                    Some(field.clone())
-                }
-            })
-            .collect();
-        let struct_data_type = DataType::Struct(struct_fields.into());
-        let before_field = Arc::new(Field::new("before", struct_data_type.clone(), true));
-        let after_field = Arc::new(Field::new("after", struct_data_type, true));
-        let op_field = Arc::new(Field::new("op", DataType::Utf8, false));
-        let timestamp_field = Arc::new(input_schema.field(timestamp_index).clone());
-
-        let output_schema = Arc::new(Schema::new(vec![
-            before_field,
-            after_field,
-            op_field,
-            timestamp_field,
-        ]));
-
-        Ok(Self {
-            input,
-            schema: output_schema.clone(),
-            properties: make_properties(output_schema),
-        })
-    }
-}
-
-impl DisplayAs for ToDebeziumExec {
-    fn fmt_as(
-        &self,
-        _t: datafusion::physical_plan::DisplayFormatType,
-        f: &mut std::fmt::Formatter,
-    ) -> std::fmt::Result {
-        write!(f, "ToDebeziumExec")
-    }
-}
-
-impl ExecutionPlan for ToDebeziumExec {
-    fn name(&self) -> &str {
-        "to_debezium_exec"
-    }
-
-    fn as_any(&self) -> &dyn Any {
-        self as &dyn Any
-    }
-
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-
-    fn properties(&self) -> &PlanProperties {
-        &self.properties
-    }
-
-    fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
-        vec![&self.input]
-    }
-
-    fn with_new_children(
-        self: Arc<Self>,
-        children: Vec<Arc<dyn ExecutionPlan>>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        if children.len() != 1 {
-            return Err(DataFusionError::Internal(
-                "ToDebeziumExec wrong number of children".to_string(),
-            ));
-        }
-        Ok(Arc::new(ToDebeziumExec::try_new(children[0].clone())?))
-    }
-
-    fn execute(
-        &self,
-        partition: usize,
-        context: Arc<TaskContext>,
-    ) -> Result<SendableRecordBatchStream> {
-        let updating_meta_index = self.input.schema().index_of(UPDATING_META_FIELD).ok();
-        let timestamp_index = self.input.schema().index_of(TIMESTAMP_FIELD)?;
-        let struct_projection = (0..self.input.schema().fields().len())
-            .filter(|index| {
-                updating_meta_index
-                    .map(|is_retract_index| *index != is_retract_index)
-                    .unwrap_or(true)
-                    && *index != timestamp_index
-            })
-            .collect();
-
-        Ok(Box::pin(ToDebeziumStream {
-            input: self.input.execute(partition, context)?,
-            schema: self.schema.clone(),
-            updating_meta_index,
-            timestamp_index,
-            struct_projection,
-        }))
-    }
-
-    fn reset(&self) -> Result<()> {
-        self.input.reset()
-    }
-}
-
-struct ToDebeziumStream {
-    input: SendableRecordBatchStream,
-    schema: SchemaRef,
-    updating_meta_index: Option<usize>,
-    timestamp_index: usize,
-    struct_projection: Vec<usize>,
-}
-
-impl ToDebeziumStream {
-    fn as_debezium_batch(&mut self, batch: &RecordBatch) -> Result<RecordBatch> {
-        let value_struct = batch.project(&self.struct_projection)?;
-        let timestamps = batch
-            .column(self.timestamp_index)
-            .as_primitive::<TimestampNanosecondType>();
-
-        let columns: Vec<Arc<dyn Array>> = if let Some(metadata_index) = self.updating_meta_index {
-            let metadata = batch
-                .column(metadata_index)
-                .as_any()
-                .downcast_ref::<StructArray>()
-                .ok_or_else(|| {
-                    DataFusionError::Internal("Invalid type for updating_meta column".to_string())
-                })?;
-
-            let is_retract = metadata.column(0).as_boolean();
-            let id = metadata.column(1).as_fixed_size_binary();
-
-            let mut id_map: HashMap<&[u8], (usize, usize, bool, bool, i64)> = HashMap::new();
-            let mut order = vec![];
-            for i in 0..batch.num_rows() {
-                let row_id = id.value(i);
-                let is_create = !is_retract.value(i);
-                let timestamp = timestamps.value(i);
-
-                id_map
-                    .entry(row_id)
-                    .and_modify(|e| {
-                        e.1 = i;
-                        e.3 = is_create;
-                        e.4 = e.4.max(timestamp);
-                    })
-                    .or_insert_with(|| {
-                        order.push(row_id);
-                        (i, i, is_create, is_create, timestamp)
-                    });
-            }
-
-            let mut before = Vec::with_capacity(id_map.len());
-            let mut after = Vec::with_capacity(id_map.len());
-            let mut op = Vec::with_capacity(id_map.len());
-            let mut ts = TimestampNanosecondBuilder::with_capacity(id_map.len());
-
-            for row_id in order {
-                let (first_idx, last_idx, first_is_create, last_is_create, timestamp) =
-                    id_map.get(row_id).unwrap();
-
-                if *first_is_create && *last_is_create {
-                    before.push(None);
-                    after.push(Some(*last_idx));
-                    op.push("c");
-                } else if !(*first_is_create) && !(*last_is_create) {
-                    before.push(Some(*first_idx));
-                    after.push(None);
-                    op.push("d");
-                } else if !(*first_is_create) && *last_is_create {
-                    before.push(Some(*first_idx));
-                    after.push(Some(*last_idx));
-                    op.push("u");
-                } else {
-                    continue;
-                }
-
-                ts.append_value(*timestamp);
-            }
-
-            let before_array = Self::create_output_array(&value_struct, &before)?;
-            let after_array = Self::create_output_array(&value_struct, &after)?;
-            let op_array = StringArray::from(op);
-
-            vec![
-                Arc::new(before_array),
-                Arc::new(after_array),
-                Arc::new(op_array),
-                Arc::new(ts.finish()),
-            ]
-        } else {
-            let after_array = StructArray::try_new(
-                value_struct.schema().fields().clone(),
-                value_struct.columns().to_vec(),
-                None,
-            )?;
-
-            let before_array = StructArray::new_null(
-                value_struct.schema().fields().clone(),
-                value_struct.num_rows(),
-            );
-
-            vec![
-                Arc::new(before_array),
-                Arc::new(after_array),
-                Arc::new(StringArray::from(vec!["c"; value_struct.num_rows()])),
-                batch.column(self.timestamp_index).clone(),
-            ]
-        };
-
-        Ok(RecordBatch::try_new(self.schema.clone(), columns)?)
-    }
-
-    fn create_output_array(
-        value_struct: &RecordBatch,
-        indices: &[Option<usize>],
-    ) -> Result<StructArray> {
-        let mut arrays: Vec<Arc<dyn Array>> = Vec::with_capacity(value_struct.num_columns());
-        for col in value_struct.columns() {
-            let new_array = take(
-                col.as_ref(),
-                &indices
-                    .iter()
-                    .map(|&idx| idx.map(|i| i as u64))
-                    .collect::<PrimitiveArray<UInt64Type>>(),
-                None,
-            )?;
-            arrays.push(new_array);
-        }
-
-        Ok(StructArray::try_new(
-            value_struct.schema().fields().clone(),
-            arrays,
-            Some(NullBuffer::from(
-                indices.iter().map(|&idx| idx.is_some()).collect::<Vec<_>>(),
-            )),
-        )?)
-    }
-}
-
-impl Stream for ToDebeziumStream {
-    type Item = Result<RecordBatch>;
-
-    fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
-        let result =
-            ready!(self.input.poll_next_unpin(cx)).map(|result| self.as_debezium_batch(&result?));
-        Poll::Ready(result)
-    }
-}
-
-impl RecordBatchStream for ToDebeziumStream {
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-}
diff --git a/src/sql/logical_planner/planner.rs b/src/sql/logical_planner/planner.rs
index be388ad4..b0a712c7 100644
--- a/src/sql/logical_planner/planner.rs
+++ b/src/sql/logical_planner/planner.rs
@@ -43,7 +43,7 @@ use datafusion_proto::physical_plan::DefaultPhysicalExtensionCodec;
 use datafusion_proto::physical_plan::to_proto::serialize_physical_expr;
 
 use crate::sql::logical_node::logical::{LogicalEdge, LogicalGraph, LogicalNode};
-use crate::sql::logical_planner::{
+use crate::sql::physical::{
     DebeziumUnrollingExec, DecodingContext, FsMemExec, FsPhysicalExtensionCodec, ToDebeziumExec,
 };
 use crate::sql::extensions::debezium::{PACK_NODE_NAME, UNROLL_NODE_NAME, UnrollDebeziumPayloadNode};
diff --git a/src/sql/mod.rs b/src/sql/mod.rs
index 04f6c897..5cb53705 100644
--- a/src/sql/mod.rs
+++ b/src/sql/mod.rs
@@ -18,6 +18,7 @@ pub mod functions;
 pub mod parse;
 pub mod logical_node;
 pub mod logical_planner;
+pub mod physical;
 pub mod analysis;
 pub(crate) mod extensions;
 pub mod types;
diff --git a/src/sql/physical/cdc/encode.rs b/src/sql/physical/cdc/encode.rs
new file mode 100644
index 00000000..b1a1cc2e
--- /dev/null
+++ b/src/sql/physical/cdc/encode.rs
@@ -0,0 +1,331 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! 内部回撤流压回 Debezium `before` / `after` / `op` 信封。
+
+use std::any::Any;
+use std::collections::HashMap;
+use std::pin::Pin;
+use std::sync::Arc;
+use std::task::{Context, Poll};
+
+use datafusion::arrow::array::AsArray;
+use datafusion::arrow::array::{
+    Array, BooleanArray, FixedSizeBinaryArray, PrimitiveArray, RecordBatch, StringArray,
+    StructArray, TimestampNanosecondBuilder,
+};
+use datafusion::arrow::buffer::NullBuffer;
+use datafusion::arrow::compute::take;
+use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef, UInt64Type};
+use datafusion::arrow::datatypes::TimestampNanosecondType;
+use datafusion::common::{DataFusionError, Result};
+use datafusion::execution::{RecordBatchStream, SendableRecordBatchStream, TaskContext};
+use datafusion::physical_plan::{DisplayAs, ExecutionPlan, PlanProperties};
+use futures::{ready, stream::Stream, StreamExt};
+
+use crate::sql::common::constants::{cdc, debezium_op_short, physical_plan_node_name};
+use crate::sql::common::{TIMESTAMP_FIELD, UPDATING_META_FIELD};
+use crate::sql::physical::readers::make_stream_properties;
+
+#[derive(Debug)]
+pub struct ToDebeziumExec {
+    input: Arc<dyn ExecutionPlan>,
+    schema: SchemaRef,
+    properties: PlanProperties,
+}
+
+impl ToDebeziumExec {
+    pub fn try_new(input: Arc<dyn ExecutionPlan>) -> Result<Self> {
+        let input_schema = input.schema();
+        let timestamp_index = input_schema.index_of(TIMESTAMP_FIELD)?;
+        let struct_fields: Vec<_> = input_schema
+            .fields()
+            .into_iter()
+            .enumerate()
+            .filter_map(|(index, field)| {
+                if field.name() == UPDATING_META_FIELD || index == timestamp_index {
+                    None
+                } else {
+                    Some(field.clone())
+                }
+            })
+            .collect();
+        let struct_data_type = DataType::Struct(struct_fields.into());
+        let before_field = Arc::new(Field::new(cdc::BEFORE, struct_data_type.clone(), true));
+        let after_field = Arc::new(Field::new(cdc::AFTER, struct_data_type, true));
+        let op_field = Arc::new(Field::new(cdc::OP, DataType::Utf8, false));
+        let timestamp_field = Arc::new(input_schema.field(timestamp_index).clone());
+
+        let output_schema = Arc::new(Schema::new(vec![
+            before_field,
+            after_field,
+            op_field,
+            timestamp_field,
+        ]));
+
+        Ok(Self {
+            input,
+            schema: output_schema.clone(),
+            properties: make_stream_properties(output_schema),
+        })
+    }
+
+    pub(crate) fn from_decoded_parts(input: Arc<dyn ExecutionPlan>, schema: SchemaRef) -> Self {
+        Self {
+            properties: make_stream_properties(schema.clone()),
+            input,
+            schema,
+        }
+    }
+}
+
+impl DisplayAs for ToDebeziumExec {
+    fn fmt_as(
+        &self,
+        _t: datafusion::physical_plan::DisplayFormatType,
+        f: &mut std::fmt::Formatter,
+    ) -> std::fmt::Result {
+        write!(f, "ToDebeziumExec")
+    }
+}
+
+impl ExecutionPlan for ToDebeziumExec {
+    fn name(&self) -> &str {
+        physical_plan_node_name::TO_DEBEZIUM_EXEC
+    }
+
+    fn as_any(&self) -> &dyn Any {
+        self as &dyn Any
+    }
+
+    fn schema(&self) -> SchemaRef {
+        self.schema.clone()
+    }
+
+    fn properties(&self) -> &PlanProperties {
+        &self.properties
+    }
+
+    fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
+        vec![&self.input]
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        children: Vec<Arc<dyn ExecutionPlan>>,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        if children.len() != 1 {
+            return Err(DataFusionError::Internal(
+                "ToDebeziumExec wrong number of children".to_string(),
+            ));
+        }
+        Ok(Arc::new(ToDebeziumExec::try_new(children[0].clone())?))
+    }
+
+    fn execute(
+        &self,
+        partition: usize,
+        context: Arc<TaskContext>,
+    ) -> Result<SendableRecordBatchStream> {
+        let updating_meta_index = self.input.schema().index_of(UPDATING_META_FIELD).ok();
+        let timestamp_index = self.input.schema().index_of(TIMESTAMP_FIELD)?;
+        let struct_projection = (0..self.input.schema().fields().len())
+            .filter(|index| {
+                updating_meta_index
+                    .map(|is_retract_index| *index != is_retract_index)
+                    .unwrap_or(true)
+                    && *index != timestamp_index
+            })
+            .collect();
+
+        Ok(Box::pin(ToDebeziumStream {
+            input: self.input.execute(partition, context)?,
+            schema: self.schema.clone(),
+            updating_meta_index,
+            timestamp_index,
+            struct_projection,
+        }))
+    }
+
+    fn reset(&self) -> Result<()> {
+        self.input.reset()
+    }
+}
+
+struct ToDebeziumStream {
+    input: SendableRecordBatchStream,
+    schema: SchemaRef,
+    updating_meta_index: Option<usize>,
+    timestamp_index: usize,
+    struct_projection: Vec<usize>,
+}
+
+/// 按主键 id 归并一行内的 changelog，输出 before/after 行索引与 op 字母。
+fn compact_changelog_by_id<'a>(
+    num_rows: usize,
+    is_retract: &'a BooleanArray,
+    id: &'a FixedSizeBinaryArray,
+    timestamps: &'a PrimitiveArray<TimestampNanosecondType>,
+) -> (
+    Vec<&'a [u8]>,
+    HashMap<&'a [u8], (usize, usize, bool, bool, i64)>,
+) {
+    let mut id_map: HashMap<&[u8], (usize, usize, bool, bool, i64)> = HashMap::new();
+    let mut order = vec![];
+    for i in 0..num_rows {
+        let row_id = id.value(i);
+        let is_create = !is_retract.value(i);
+        let timestamp = timestamps.value(i);
+
+        id_map
+            .entry(row_id)
+            .and_modify(|e| {
+                e.1 = i;
+                e.3 = is_create;
+                e.4 = e.4.max(timestamp);
+            })
+            .or_insert_with(|| {
+                order.push(row_id);
+                (i, i, is_create, is_create, timestamp)
+            });
+    }
+    (order, id_map)
+}
+
+impl ToDebeziumStream {
+    fn as_debezium_batch(&mut self, batch: &RecordBatch) -> Result<RecordBatch> {
+        let value_struct = batch.project(&self.struct_projection)?;
+        let timestamps = batch
+            .column(self.timestamp_index)
+            .as_primitive::<TimestampNanosecondType>();
+
+        let columns: Vec<Arc<dyn Array>> = if let Some(metadata_index) = self.updating_meta_index {
+            let metadata = batch
+                .column(metadata_index)
+                .as_any()
+                .downcast_ref::<StructArray>()
+                .ok_or_else(|| {
+                    DataFusionError::Internal("Invalid type for updating_meta column".to_string())
+                })?;
+
+            let is_retract = metadata.column(0).as_boolean();
+            let id = metadata.column(1).as_fixed_size_binary();
+
+            let (order, id_map) =
+                compact_changelog_by_id(batch.num_rows(), is_retract, id, timestamps);
+
+            let mut before = Vec::with_capacity(id_map.len());
+            let mut after = Vec::with_capacity(id_map.len());
+            let mut op = Vec::with_capacity(id_map.len());
+            let mut ts = TimestampNanosecondBuilder::with_capacity(id_map.len());
+
+            for row_id in order {
+                let (first_idx, last_idx, first_is_create, last_is_create, timestamp) =
+                    id_map.get(row_id).unwrap();
+
+                if *first_is_create && *last_is_create {
+                    before.push(None);
+                    after.push(Some(*last_idx));
+                    op.push(debezium_op_short::CREATE);
+                } else if !(*first_is_create) && !(*last_is_create) {
+                    before.push(Some(*first_idx));
+                    after.push(None);
+                    op.push(debezium_op_short::DELETE);
+                } else if !(*first_is_create) && *last_is_create {
+                    before.push(Some(*first_idx));
+                    after.push(Some(*last_idx));
+                    op.push(debezium_op_short::UPDATE);
+                } else {
+                    continue;
+                }
+
+                ts.append_value(*timestamp);
+            }
+
+            let before_array = Self::create_output_array(&value_struct, &before)?;
+            let after_array = Self::create_output_array(&value_struct, &after)?;
+            let op_array = StringArray::from(op);
+
+            vec![
+                Arc::new(before_array),
+                Arc::new(after_array),
+                Arc::new(op_array),
+                Arc::new(ts.finish()),
+            ]
+        } else {
+            let after_array = StructArray::try_new(
+                value_struct.schema().fields().clone(),
+                value_struct.columns().to_vec(),
+                None,
+            )?;
+
+            let before_array = StructArray::new_null(
+                value_struct.schema().fields().clone(),
+                value_struct.num_rows(),
+            );
+
+            vec![
+                Arc::new(before_array),
+                Arc::new(after_array),
+                Arc::new(StringArray::from(vec![
+                    debezium_op_short::CREATE;
+                    value_struct.num_rows()
+                ])),
+                batch.column(self.timestamp_index).clone(),
+            ]
+        };
+
+        Ok(RecordBatch::try_new(self.schema.clone(), columns)?)
+    }
+
+    fn create_output_array(
+        value_struct: &RecordBatch,
+        indices: &[Option<usize>],
+    ) -> Result<StructArray> {
+        let mut arrays: Vec<Arc<dyn Array>> = Vec::with_capacity(value_struct.num_columns());
+        for col in value_struct.columns() {
+            let new_array = take(
+                col.as_ref(),
+                &indices
+                    .iter()
+                    .map(|&idx| idx.map(|i| i as u64))
+                    .collect::<PrimitiveArray<UInt64Type>>(),
+                None,
+            )?;
+            arrays.push(new_array);
+        }
+
+        Ok(StructArray::try_new(
+            value_struct.schema().fields().clone(),
+            arrays,
+            Some(NullBuffer::from(
+                indices.iter().map(|&idx| idx.is_some()).collect::<Vec<_>>(),
+            )),
+        )?)
+    }
+}
+
+impl Stream for ToDebeziumStream {
+    type Item = Result<RecordBatch>;
+
+    fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
+        let result =
+            ready!(self.input.poll_next_unpin(cx)).map(|result| self.as_debezium_batch(&result?));
+        Poll::Ready(result)
+    }
+}
+
+impl RecordBatchStream for ToDebeziumStream {
+    fn schema(&self) -> SchemaRef {
+        self.schema.clone()
+    }
+}
diff --git a/src/sql/extensions/constants.rs b/src/sql/physical/cdc/mod.rs
similarity index 78%
rename from src/sql/extensions/constants.rs
rename to src/sql/physical/cdc/mod.rs
index 245dacec..743ca966 100644
--- a/src/sql/extensions/constants.rs
+++ b/src/sql/physical/cdc/mod.rs
@@ -10,4 +10,10 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-pub use crate::sql::common::constants::sql_field::ASYNC_RESULT as ASYNC_RESULT_FIELD;
+//! Debezium 与内部 changelog 行格式的互转。
+
+mod encode;
+mod unroll;
+
+pub use encode::ToDebeziumExec;
+pub use unroll::DebeziumUnrollingExec;
diff --git a/src/sql/physical/cdc/unroll.rs b/src/sql/physical/cdc/unroll.rs
new file mode 100644
index 00000000..345d2642
--- /dev/null
+++ b/src/sql/physical/cdc/unroll.rs
@@ -0,0 +1,300 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Debezium 信封展开为内部带 retract 语义的行流。
+
+use std::any::Any;
+use std::pin::Pin;
+use std::sync::Arc;
+use std::task::{Context, Poll};
+
+use datafusion::arrow::array::AsArray;
+use datafusion::arrow::array::{
+    Array, BooleanBuilder, RecordBatch, StringArray, StructArray, TimestampNanosecondArray,
+    TimestampNanosecondBuilder, UInt32Builder,
+};
+use datafusion::arrow::compute::{concat, take};
+use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef, TimeUnit};
+use datafusion::common::{DataFusionError, Result, plan_err};
+use datafusion::execution::{RecordBatchStream, SendableRecordBatchStream, TaskContext};
+use datafusion::logical_expr::ColumnarValue;
+use datafusion::physical_plan::{DisplayAs, ExecutionPlan, PlanProperties};
+use futures::{ready, stream::Stream, StreamExt};
+
+use crate::sql::common::constants::{cdc, debezium_op_short, physical_plan_node_name};
+use crate::sql::common::TIMESTAMP_FIELD;
+use crate::sql::functions::MultiHashFunction;
+use crate::sql::physical::meta::{updating_meta_field, updating_meta_fields};
+use crate::sql::physical::readers::make_stream_properties;
+
+#[derive(Debug)]
+pub struct DebeziumUnrollingExec {
+    input: Arc<dyn ExecutionPlan>,
+    schema: SchemaRef,
+    properties: PlanProperties,
+    primary_keys: Vec<usize>,
+}
+
+impl DebeziumUnrollingExec {
+    pub fn try_new(input: Arc<dyn ExecutionPlan>, primary_keys: Vec<usize>) -> Result<Self> {
+        let input_schema = input.schema();
+        let before_index = input_schema.index_of(cdc::BEFORE)?;
+        let after_index = input_schema.index_of(cdc::AFTER)?;
+        let op_index = input_schema.index_of(cdc::OP)?;
+        let _timestamp_index = input_schema.index_of(TIMESTAMP_FIELD)?;
+        let before_type = input_schema.field(before_index).data_type();
+        let after_type = input_schema.field(after_index).data_type();
+        if before_type != after_type {
+            return Err(DataFusionError::Internal(
+                "before and after columns must have the same type".to_string(),
+            ));
+        }
+        let op_type = input_schema.field(op_index).data_type();
+        if *op_type != DataType::Utf8 {
+            return Err(DataFusionError::Internal(
+                "op column must be a string".to_string(),
+            ));
+        }
+        let DataType::Struct(fields) = before_type else {
+            return Err(DataFusionError::Internal(
+                "before and after columns must be structs".to_string(),
+            ));
+        };
+        let mut fields = fields.to_vec();
+        fields.push(updating_meta_field());
+        fields.push(Arc::new(Field::new(
+            TIMESTAMP_FIELD,
+            DataType::Timestamp(TimeUnit::Nanosecond, None),
+            false,
+        )));
+
+        let schema = Arc::new(Schema::new(fields));
+        Ok(Self {
+            input,
+            schema: schema.clone(),
+            properties: make_stream_properties(schema),
+            primary_keys,
+        })
+    }
+
+    /// 分布式反序列化路径：跳过 `try_new` 的 schema 校验（proto 已约定形态）。
+    pub(crate) fn from_decoded_parts(
+        input: Arc<dyn ExecutionPlan>,
+        schema: SchemaRef,
+        primary_keys: Vec<usize>,
+    ) -> Self {
+        Self {
+            properties: make_stream_properties(schema.clone()),
+            input,
+            schema,
+            primary_keys,
+        }
+    }
+
+    pub fn primary_key_indices(&self) -> &[usize] {
+        &self.primary_keys
+    }
+}
+
+impl DisplayAs for DebeziumUnrollingExec {
+    fn fmt_as(
+        &self,
+        _t: datafusion::physical_plan::DisplayFormatType,
+        f: &mut std::fmt::Formatter,
+    ) -> std::fmt::Result {
+        write!(f, "DebeziumUnrollingExec")
+    }
+}
+
+impl ExecutionPlan for DebeziumUnrollingExec {
+    fn name(&self) -> &str {
+        physical_plan_node_name::DEBEZIUM_UNROLLING_EXEC
+    }
+
+    fn as_any(&self) -> &dyn Any {
+        self as &dyn Any
+    }
+
+    fn schema(&self) -> SchemaRef {
+        self.schema.clone()
+    }
+
+    fn properties(&self) -> &PlanProperties {
+        &self.properties
+    }
+
+    fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
+        vec![&self.input]
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        children: Vec<Arc<dyn ExecutionPlan>>,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        if children.len() != 1 {
+            return Err(DataFusionError::Internal(
+                "DebeziumUnrollingExec wrong number of children".to_string(),
+            ));
+        }
+        Ok(Arc::new(DebeziumUnrollingExec {
+            input: children[0].clone(),
+            schema: self.schema.clone(),
+            properties: self.properties.clone(),
+            primary_keys: self.primary_keys.clone(),
+        }))
+    }
+
+    fn execute(
+        &self,
+        partition: usize,
+        context: Arc<TaskContext>,
+    ) -> Result<SendableRecordBatchStream> {
+        Ok(Box::pin(DebeziumUnrollingStream::try_new(
+            self.input.execute(partition, context)?,
+            self.schema.clone(),
+            self.primary_keys.clone(),
+        )?))
+    }
+
+    fn reset(&self) -> Result<()> {
+        self.input.reset()
+    }
+}
+
+struct DebeziumUnrollingStream {
+    input: SendableRecordBatchStream,
+    schema: SchemaRef,
+    before_index: usize,
+    after_index: usize,
+    op_index: usize,
+    timestamp_index: usize,
+    primary_keys: Vec<usize>,
+}
+
+impl DebeziumUnrollingStream {
+    fn try_new(
+        input: SendableRecordBatchStream,
+        schema: SchemaRef,
+        primary_keys: Vec<usize>,
+    ) -> Result<Self> {
+        if primary_keys.is_empty() {
+            return plan_err!("there must be at least one primary key for a Debezium source");
+        }
+        let input_schema = input.schema();
+        let before_index = input_schema.index_of(cdc::BEFORE)?;
+        let after_index = input_schema.index_of(cdc::AFTER)?;
+        let op_index = input_schema.index_of(cdc::OP)?;
+        let timestamp_index = input_schema.index_of(TIMESTAMP_FIELD)?;
+
+        Ok(Self {
+            input,
+            schema,
+            before_index,
+            after_index,
+            op_index,
+            timestamp_index,
+            primary_keys,
+        })
+    }
+
+    fn unroll_batch(&self, batch: &RecordBatch) -> Result<RecordBatch> {
+        let before = batch.column(self.before_index).as_ref();
+        let after = batch.column(self.after_index).as_ref();
+        let op = batch
+            .column(self.op_index)
+            .as_any()
+            .downcast_ref::<StringArray>()
+            .ok_or_else(|| DataFusionError::Internal("op column is not a string".to_string()))?;
+
+        let timestamp = batch
+            .column(self.timestamp_index)
+            .as_any()
+            .downcast_ref::<TimestampNanosecondArray>()
+            .ok_or_else(|| {
+                DataFusionError::Internal("timestamp column is not a timestamp".to_string())
+            })?;
+
+        let num_rows = batch.num_rows();
+        let combined_array = concat(&[before, after])?;
+        let mut take_indices = UInt32Builder::with_capacity(num_rows);
+        let mut is_retract_builder = BooleanBuilder::with_capacity(num_rows);
+
+        let mut timestamp_builder = TimestampNanosecondBuilder::with_capacity(2 * num_rows);
+        for i in 0..num_rows {
+            let op = op.value(i);
+            match op {
+                debezium_op_short::CREATE | debezium_op_short::READ => {
+                    take_indices.append_value((i + num_rows) as u32);
+                    is_retract_builder.append_value(false);
+                    timestamp_builder.append_value(timestamp.value(i));
+                }
+                debezium_op_short::UPDATE => {
+                    take_indices.append_value(i as u32);
+                    is_retract_builder.append_value(true);
+                    timestamp_builder.append_value(timestamp.value(i));
+                    take_indices.append_value((i + num_rows) as u32);
+                    is_retract_builder.append_value(false);
+                    timestamp_builder.append_value(timestamp.value(i));
+                }
+                debezium_op_short::DELETE => {
+                    take_indices.append_value(i as u32);
+                    is_retract_builder.append_value(true);
+                    timestamp_builder.append_value(timestamp.value(i));
+                }
+                _ => {
+                    return Err(DataFusionError::Internal(format!(
+                        "unexpected op value: {op}"
+                    )));
+                }
+            }
+        }
+        let take_indices = take_indices.finish();
+        let unrolled_array = take(&combined_array, &take_indices, None)?;
+
+        let mut columns = unrolled_array.as_struct().columns().to_vec();
+
+        let hash = MultiHashFunction::default().invoke(
+            &self
+                .primary_keys
+                .iter()
+                .map(|i| ColumnarValue::Array(columns[*i].clone()))
+                .collect::<Vec<_>>(),
+        )?;
+
+        let ids = hash.into_array(num_rows)?;
+
+        let meta = StructArray::try_new(
+            updating_meta_fields(),
+            vec![Arc::new(is_retract_builder.finish()), ids],
+            None,
+        )?;
+        columns.push(Arc::new(meta));
+        columns.push(Arc::new(timestamp_builder.finish()));
+        Ok(RecordBatch::try_new(self.schema.clone(), columns)?)
+    }
+}
+
+impl Stream for DebeziumUnrollingStream {
+    type Item = Result<RecordBatch>;
+
+    fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
+        let result =
+            ready!(self.input.poll_next_unpin(cx)).map(|result| self.unroll_batch(&result?));
+        Poll::Ready(result)
+    }
+}
+
+impl RecordBatchStream for DebeziumUnrollingStream {
+    fn schema(&self) -> SchemaRef {
+        self.schema.clone()
+    }
+}
diff --git a/src/sql/physical/codec.rs b/src/sql/physical/codec.rs
new file mode 100644
index 00000000..e90e4b3a
--- /dev/null
+++ b/src/sql/physical/codec.rs
@@ -0,0 +1,263 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! 分布式物理计划 proto 编解码（`FsExecNode`）。
+
+use std::sync::Arc;
+
+use datafusion::arrow::array::RecordBatch;
+use datafusion::arrow::datatypes::Schema;
+use datafusion::common::{DataFusionError, Result, UnnestOptions};
+use datafusion::execution::FunctionRegistry;
+use datafusion::physical_plan::ExecutionPlan;
+use datafusion::physical_plan::unnest::{ListUnnest, UnnestExec};
+use datafusion_proto::physical_plan::PhysicalExtensionCodec;
+use prost::Message;
+use protocol::grpc::api::{
+    DebeziumDecodeNode, DebeziumEncodeNode, FsExecNode, MemExecNode, UnnestExecNode,
+    fs_exec_node::Node,
+};
+use tokio::sync::mpsc::UnboundedReceiver;
+
+use crate::sql::analysis::UNNESTED_COL;
+use crate::sql::common::constants::mem_exec_join_side;
+use crate::sql::physical::cdc::{DebeziumUnrollingExec, ToDebeziumExec};
+use crate::sql::physical::readers::{
+    FsMemExec, RecordBatchVecReader, RwLockRecordBatchReader, UnboundedRecordBatchReader,
+};
+
+#[derive(Debug)]
+pub struct FsPhysicalExtensionCodec {
+    pub context: DecodingContext,
+}
+
+impl Default for FsPhysicalExtensionCodec {
+    fn default() -> Self {
+        Self {
+            context: DecodingContext::None,
+        }
+    }
+}
+
+#[derive(Debug)]
+pub enum DecodingContext {
+    None,
+    Planning,
+    SingleLockedBatch(Arc<std::sync::RwLock<Option<RecordBatch>>>),
+    UnboundedBatchStream(Arc<std::sync::RwLock<Option<UnboundedReceiver<RecordBatch>>>>),
+    LockedBatchVec(Arc<std::sync::RwLock<Vec<RecordBatch>>>),
+    LockedJoinPair {
+        left: Arc<std::sync::RwLock<Option<RecordBatch>>>,
+        right: Arc<std::sync::RwLock<Option<RecordBatch>>>,
+    },
+    LockedJoinStream {
+        left: Arc<std::sync::RwLock<Option<UnboundedReceiver<RecordBatch>>>>,
+        right: Arc<std::sync::RwLock<Option<UnboundedReceiver<RecordBatch>>>>,
+    },
+}
+
+impl PhysicalExtensionCodec for FsPhysicalExtensionCodec {
+    fn try_decode(
+        &self,
+        buf: &[u8],
+        inputs: &[Arc<dyn ExecutionPlan>],
+        _registry: &dyn FunctionRegistry,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        let exec: FsExecNode = Message::decode(buf)
+            .map_err(|err| DataFusionError::Internal(format!("couldn't deserialize: {err}")))?;
+
+        let node = exec
+            .node
+            .ok_or_else(|| DataFusionError::Internal("exec node is empty".to_string()))?;
+
+        match node {
+            Node::MemExec(mem) => self.decode_mem_exec(mem),
+            Node::UnnestExec(unnest) => decode_unnest_exec(unnest, inputs),
+            Node::DebeziumDecode(debezium) => decode_debezium_decode(debezium, inputs),
+            Node::DebeziumEncode(debezium) => decode_debezium_encode(debezium, inputs),
+        }
+    }
+
+    fn try_encode(&self, node: Arc<dyn ExecutionPlan>, buf: &mut Vec<u8>) -> Result<()> {
+        let mut proto = None;
+
+        if let Some(table) = node.as_any().downcast_ref::<FsMemExec>() {
+            proto = Some(FsExecNode {
+                node: Some(Node::MemExec(MemExecNode {
+                    table_name: table.table_name.clone(),
+                    schema: serde_json::to_string(&table.schema).unwrap(),
+                })),
+            });
+        }
+
+        if let Some(unnest) = node.as_any().downcast_ref::<UnnestExec>() {
+            proto = Some(FsExecNode {
+                node: Some(Node::UnnestExec(UnnestExecNode {
+                    schema: serde_json::to_string(&unnest.schema()).unwrap(),
+                })),
+            });
+        }
+
+        if let Some(decode) = node.as_any().downcast_ref::<DebeziumUnrollingExec>() {
+            proto = Some(FsExecNode {
+                node: Some(Node::DebeziumDecode(DebeziumDecodeNode {
+                    schema: serde_json::to_string(decode.schema().as_ref()).unwrap(),
+                    primary_keys: decode
+                        .primary_key_indices()
+                        .iter()
+                        .map(|c| *c as u64)
+                        .collect(),
+                })),
+            });
+        }
+
+        if let Some(encode) = node.as_any().downcast_ref::<ToDebeziumExec>() {
+            proto = Some(FsExecNode {
+                node: Some(Node::DebeziumEncode(DebeziumEncodeNode {
+                    schema: serde_json::to_string(encode.schema().as_ref()).unwrap(),
+                })),
+            });
+        }
+
+        if let Some(node) = proto {
+            node.encode(buf).map_err(|err| {
+                DataFusionError::Internal(format!("couldn't serialize exec node {err}"))
+            })?;
+            Ok(())
+        } else {
+            Err(DataFusionError::Internal(format!(
+                "cannot serialize {node:?}"
+            )))
+        }
+    }
+}
+
+impl FsPhysicalExtensionCodec {
+    fn decode_mem_exec(&self, mem_exec: MemExecNode) -> Result<Arc<dyn ExecutionPlan>> {
+        let schema: Schema = serde_json::from_str(&mem_exec.schema).map_err(|e| {
+            DataFusionError::Internal(format!("invalid schema in exec codec: {e:?}"))
+        })?;
+        let schema = Arc::new(schema);
+        match &self.context {
+            DecodingContext::SingleLockedBatch(single_batch) => Ok(Arc::new(
+                RwLockRecordBatchReader::new(schema, single_batch.clone()),
+            )),
+            DecodingContext::UnboundedBatchStream(unbounded_stream) => Ok(Arc::new(
+                UnboundedRecordBatchReader::new(schema, unbounded_stream.clone()),
+            )),
+            DecodingContext::LockedBatchVec(locked_batches) => Ok(Arc::new(
+                RecordBatchVecReader::new(schema, locked_batches.clone()),
+            )),
+            DecodingContext::Planning => Ok(Arc::new(FsMemExec::new(mem_exec.table_name, schema))),
+            DecodingContext::None => Err(DataFusionError::Internal(
+                "Need an internal context to decode".into(),
+            )),
+            DecodingContext::LockedJoinPair { left, right } => {
+                match mem_exec.table_name.as_str() {
+                    mem_exec_join_side::LEFT => {
+                        Ok(Arc::new(RwLockRecordBatchReader::new(schema, left.clone())))
+                    }
+                    mem_exec_join_side::RIGHT => Ok(Arc::new(RwLockRecordBatchReader::new(
+                        schema,
+                        right.clone(),
+                    ))),
+                    _ => Err(DataFusionError::Internal(format!(
+                        "unknown table name {}",
+                        mem_exec.table_name
+                    ))),
+                }
+            }
+            DecodingContext::LockedJoinStream { left, right } => {
+                match mem_exec.table_name.as_str() {
+                    mem_exec_join_side::LEFT => Ok(Arc::new(UnboundedRecordBatchReader::new(
+                        schema,
+                        left.clone(),
+                    ))),
+                    mem_exec_join_side::RIGHT => Ok(Arc::new(UnboundedRecordBatchReader::new(
+                        schema,
+                        right.clone(),
+                    ))),
+                    _ => Err(DataFusionError::Internal(format!(
+                        "unknown table name {}",
+                        mem_exec.table_name
+                    ))),
+                }
+            }
+        }
+    }
+}
+
+fn decode_unnest_exec(
+    unnest: UnnestExecNode,
+    inputs: &[Arc<dyn ExecutionPlan>],
+) -> Result<Arc<dyn ExecutionPlan>> {
+    let schema: Schema = serde_json::from_str(&unnest.schema).map_err(|e| {
+        DataFusionError::Internal(format!("invalid schema in exec codec: {e:?}"))
+    })?;
+
+    let column = schema.index_of(UNNESTED_COL).map_err(|_| {
+        DataFusionError::Internal(format!(
+            "unnest node schema does not contain {UNNESTED_COL} col"
+        ))
+    })?;
+
+    Ok(Arc::new(UnnestExec::new(
+        inputs
+            .first()
+            .ok_or_else(|| DataFusionError::Internal("no input for unnest node".to_string()))?
+            .clone(),
+        vec![ListUnnest {
+            index_in_input_schema: column,
+            depth: 1,
+        }],
+        vec![],
+        Arc::new(schema),
+        UnnestOptions::default(),
+    )))
+}
+
+fn decode_debezium_decode(
+    debezium: DebeziumDecodeNode,
+    inputs: &[Arc<dyn ExecutionPlan>],
+) -> Result<Arc<dyn ExecutionPlan>> {
+    let schema = Arc::new(serde_json::from_str::<Schema>(&debezium.schema).map_err(|e| {
+        DataFusionError::Internal(format!("invalid schema in exec codec: {e:?}"))
+    })?);
+    let input = inputs
+        .first()
+        .ok_or_else(|| DataFusionError::Internal("no input for debezium node".to_string()))?
+        .clone();
+    let primary_keys = debezium
+        .primary_keys
+        .into_iter()
+        .map(|c| c as usize)
+        .collect();
+    Ok(Arc::new(DebeziumUnrollingExec::from_decoded_parts(
+        input,
+        schema.clone(),
+        primary_keys,
+    )))
+}
+
+fn decode_debezium_encode(
+    debezium: DebeziumEncodeNode,
+    inputs: &[Arc<dyn ExecutionPlan>],
+) -> Result<Arc<dyn ExecutionPlan>> {
+    let schema = Arc::new(serde_json::from_str::<Schema>(&debezium.schema).map_err(|e| {
+        DataFusionError::Internal(format!("invalid schema in exec codec: {e:?}"))
+    })?);
+    let input = inputs
+        .first()
+        .ok_or_else(|| DataFusionError::Internal("no input for debezium node".to_string()))?
+        .clone();
+    Ok(Arc::new(ToDebeziumExec::from_decoded_parts(input, schema)))
+}
diff --git a/src/sql/physical/meta.rs b/src/sql/physical/meta.rs
new file mode 100644
index 00000000..5828593c
--- /dev/null
+++ b/src/sql/physical/meta.rs
@@ -0,0 +1,53 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Changelog 元数据列：`is_retract`、行 `id`（FixedSizeBinary）。
+
+use std::sync::{Arc, OnceLock};
+
+use datafusion::arrow::datatypes::{DataType, Field, Fields};
+
+use crate::sql::common::constants::updating_state_field;
+use crate::sql::common::UPDATING_META_FIELD;
+
+pub fn updating_meta_fields() -> Fields {
+    static FIELDS: OnceLock<Fields> = OnceLock::new();
+    FIELDS
+        .get_or_init(|| {
+            Fields::from(vec![
+                Field::new(
+                    updating_state_field::IS_RETRACT,
+                    DataType::Boolean,
+                    true,
+                ),
+                Field::new(
+                    updating_state_field::ID,
+                    DataType::FixedSizeBinary(16),
+                    true,
+                ),
+            ])
+        })
+        .clone()
+}
+
+pub fn updating_meta_field() -> Arc<Field> {
+    static FIELD: OnceLock<Arc<Field>> = OnceLock::new();
+    FIELD
+        .get_or_init(|| {
+            Arc::new(Field::new(
+                UPDATING_META_FIELD,
+                DataType::Struct(updating_meta_fields()),
+                false,
+            ))
+        })
+        .clone()
+}
diff --git a/src/sql/physical/mod.rs b/src/sql/physical/mod.rs
new file mode 100644
index 00000000..1ba09eb0
--- /dev/null
+++ b/src/sql/physical/mod.rs
@@ -0,0 +1,25 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! 流式物理执行扩展：元数据列、UDF、内存/无界 Reader、CDC 与 proto 编解码。
+
+mod cdc;
+mod codec;
+mod meta;
+mod readers;
+mod udfs;
+
+pub use cdc::{DebeziumUnrollingExec, ToDebeziumExec};
+pub use codec::{DecodingContext, FsPhysicalExtensionCodec};
+pub use meta::{updating_meta_field, updating_meta_fields};
+pub use readers::FsMemExec;
+pub use udfs::{WindowFunctionUdf, window};
diff --git a/src/sql/physical/physical_planner.rs b/src/sql/physical/physical_planner.rs
deleted file mode 100644
index fc66b3b0..00000000
--- a/src/sql/physical/physical_planner.rs
+++ /dev/null
@@ -1,418 +0,0 @@
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use std::collections::HashMap;
-use std::sync::Arc;
-use std::thread;
-use std::time::Duration;
-
-use datafusion::arrow::datatypes::IntervalMonthDayNanoType;
-use datafusion::common::tree_node::{TreeNode, TreeNodeRecursion, TreeNodeVisitor};
-use datafusion::common::{
-    DFSchema, DFSchemaRef, DataFusionError, Result, ScalarValue, Spans, plan_err,
-};
-use datafusion::execution::context::SessionState;
-use datafusion::execution::runtime_env::RuntimeEnvBuilder;
-use datafusion::functions::datetime::date_bin;
-use datafusion::logical_expr::{Expr, Extension, LogicalPlan, UserDefinedLogicalNode};
-use datafusion::physical_expr::PhysicalExpr;
-use datafusion::physical_plan::ExecutionPlan;
-use datafusion::physical_planner::{DefaultPhysicalPlanner, ExtensionPlanner, PhysicalPlanner};
-use datafusion_proto::protobuf::{PhysicalExprNode, PhysicalPlanNode};
-use datafusion_proto::{
-    physical_plan::AsExecutionPlan,
-    protobuf::{AggregateMode, physical_plan_node::PhysicalPlanType},
-};
-use petgraph::graph::{DiGraph, NodeIndex};
-use prost::Message;
-use tokio::runtime::Builder;
-use tokio::sync::oneshot;
-
-use async_trait::async_trait;
-use datafusion_common::TableReference;
-use datafusion_proto::physical_plan::DefaultPhysicalExtensionCodec;
-use datafusion_proto::physical_plan::to_proto::serialize_physical_expr;
-
-use crate::sql::datastream::logical::{LogicalEdge, LogicalGraph, LogicalNode};
-use crate::sql::physical::{
-    DebeziumUnrollingExec, DecodingContext, FsMemExec, FsPhysicalExtensionCodec, ToDebeziumExec,
-};
-use crate::sql::logical_node::debezium::{
-    DEBEZIUM_UNROLLING_EXTENSION_NAME, DebeziumUnrollingExtension, TO_DEBEZIUM_EXTENSION_NAME,
-};
-use crate::sql::logical_node::key_calculation::KeyCalculationExtension;
-use crate::sql::logical_node::{NodeWithIncomingEdges, StreamExtension};
-use crate::sql::schema::utils::add_timestamp_field_arrow;
-use crate::sql::schema::StreamSchemaProvider;
-use crate::types::{FsSchema, FsSchemaRef};
-
-#[derive(Eq, Hash, PartialEq)]
-#[derive(Debug)]
-pub(crate) enum NamedNode {
-    Source(TableReference),
-    Watermark(TableReference),
-    RemoteTable(TableReference),
-    Sink(TableReference),
-}
-
-pub(crate) struct PlanToGraphVisitor<'a> {
-    graph: DiGraph<LogicalNode, LogicalEdge>,
-    output_schemas: HashMap<NodeIndex, FsSchemaRef>,
-    named_nodes: HashMap<NamedNode, NodeIndex>,
-    traversal: Vec<Vec<NodeIndex>>,
-    planner: Planner<'a>,
-}
-
-impl<'a> PlanToGraphVisitor<'a> {
-    pub fn new(schema_provider: &'a StreamSchemaProvider, session_state: &'a SessionState) -> Self {
-        Self {
-            graph: Default::default(),
-            output_schemas: Default::default(),
-            named_nodes: Default::default(),
-            traversal: vec![],
-            planner: Planner::new(schema_provider, session_state),
-        }
-    }
-}
-
-pub(crate) struct Planner<'a> {
-    schema_provider: &'a StreamSchemaProvider,
-    planner: DefaultPhysicalPlanner,
-    session_state: &'a SessionState,
-}
-
-impl<'a> Planner<'a> {
-    pub(crate) fn new(
-        schema_provider: &'a StreamSchemaProvider,
-        session_state: &'a SessionState,
-    ) -> Self {
-        let planner =
-            DefaultPhysicalPlanner::with_extension_planners(vec![Arc::new(FsExtensionPlanner {})]);
-        Self {
-            schema_provider,
-            planner,
-            session_state,
-        }
-    }
-
-    pub(crate) fn sync_plan(&self, plan: &LogicalPlan) -> Result<Arc<dyn ExecutionPlan>> {
-        let fut = self.planner.create_physical_plan(plan, self.session_state);
-        let (tx, mut rx) = oneshot::channel();
-        thread::scope(|s| {
-            let _handle = tokio::runtime::Handle::current();
-            let builder = thread::Builder::new();
-            let builder = if cfg!(debug_assertions) {
-                builder.stack_size(10_000_000)
-            } else {
-                builder
-            };
-            builder
-                .spawn_scoped(s, move || {
-                    let rt = Builder::new_current_thread().enable_all().build().unwrap();
-                    rt.block_on(async {
-                        let plan = fut.await;
-                        tx.send(plan).unwrap();
-                    });
-                })
-                .unwrap();
-        });
-
-        rx.try_recv().unwrap()
-    }
-
-    pub(crate) fn create_physical_expr(
-        &self,
-        expr: &Expr,
-        input_dfschema: &DFSchema,
-    ) -> Result<Arc<dyn PhysicalExpr>> {
-        self.planner
-            .create_physical_expr(expr, input_dfschema, self.session_state)
-    }
-
-    pub(crate) fn serialize_as_physical_expr(
-        &self,
-        expr: &Expr,
-        schema: &DFSchema,
-    ) -> Result<Vec<u8>> {
-        let physical = self.create_physical_expr(expr, schema)?;
-        let proto = serialize_physical_expr(&physical, &DefaultPhysicalExtensionCodec {})?;
-        Ok(proto.encode_to_vec())
-    }
-
-    pub(crate) fn split_physical_plan(
-        &self,
-        key_indices: Vec<usize>,
-        aggregate: &LogicalPlan,
-        add_timestamp_field: bool,
-    ) -> Result<SplitPlanOutput> {
-        let physical_plan = self.sync_plan(aggregate)?;
-        let codec = FsPhysicalExtensionCodec {
-            context: DecodingContext::Planning,
-        };
-        let mut physical_plan_node =
-            PhysicalPlanNode::try_from_physical_plan(physical_plan.clone(), &codec)?;
-        let PhysicalPlanType::Aggregate(mut final_aggregate_proto) = physical_plan_node
-            .physical_plan_type
-            .take()
-            .ok_or_else(|| DataFusionError::Plan("missing physical plan type".to_string()))?
-        else {
-            return plan_err!("unexpected physical plan type");
-        };
-        let AggregateMode::Final = final_aggregate_proto.mode() else {
-            return plan_err!("unexpected physical plan type");
-        };
-
-        let partial_aggregation_plan = *final_aggregate_proto
-            .input
-            .take()
-            .ok_or_else(|| DataFusionError::Plan("missing input".to_string()))?;
-
-        let partial_aggregation_exec_plan = partial_aggregation_plan.try_into_physical_plan(
-            self.schema_provider,
-            &RuntimeEnvBuilder::new().build().unwrap(),
-            &codec,
-        )?;
-
-        let partial_schema = partial_aggregation_exec_plan.schema();
-        let final_input_table_provider = FsMemExec::new("partial".into(), partial_schema.clone());
-
-        final_aggregate_proto.input = Some(Box::new(PhysicalPlanNode::try_from_physical_plan(
-            Arc::new(final_input_table_provider),
-            &codec,
-        )?));
-
-        let finish_plan = PhysicalPlanNode {
-            physical_plan_type: Some(PhysicalPlanType::Aggregate(final_aggregate_proto)),
-        };
-
-        let (partial_schema, timestamp_index) = if add_timestamp_field {
-            (
-                add_timestamp_field_arrow((*partial_schema).clone()),
-                partial_schema.fields().len(),
-            )
-        } else {
-            (partial_schema.clone(), partial_schema.fields().len() - 1)
-        };
-
-        let partial_schema = FsSchema::new_keyed(partial_schema, timestamp_index, key_indices);
-
-        Ok(SplitPlanOutput {
-            partial_aggregation_plan,
-            partial_schema,
-            finish_plan,
-        })
-    }
-
-    pub fn binning_function_proto(
-        &self,
-        width: Duration,
-        input_schema: DFSchemaRef,
-    ) -> Result<PhysicalExprNode> {
-        let date_bin = date_bin().call(vec![
-            Expr::Literal(
-                ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNanoType::make_value(
-                    0,
-                    0,
-                    width.as_nanos() as i64,
-                ))),
-                None,
-            ),
-            Expr::Column(datafusion::common::Column {
-                relation: None,
-                name: "_timestamp".into(),
-                spans: Spans::new(),
-            }),
-        ]);
-
-        let binning_function = self.create_physical_expr(&date_bin, &input_schema)?;
-        serialize_physical_expr(&binning_function, &DefaultPhysicalExtensionCodec {})
-    }
-}
-
-struct FsExtensionPlanner {}
-
-#[async_trait]
-impl ExtensionPlanner for FsExtensionPlanner {
-    async fn plan_extension(
-        &self,
-        _planner: &dyn PhysicalPlanner,
-        node: &dyn UserDefinedLogicalNode,
-        _logical_inputs: &[&LogicalPlan],
-        physical_inputs: &[Arc<dyn ExecutionPlan>],
-        _session_state: &SessionState,
-    ) -> Result<Option<Arc<dyn ExecutionPlan>>> {
-        let schema = node.schema().as_ref().into();
-        if let Ok::<&dyn StreamExtension, _>(stream_extension) = node.try_into() {
-            if stream_extension.transparent() {
-                match node.name() {
-                    DEBEZIUM_UNROLLING_EXTENSION_NAME => {
-                        let node = node
-                            .as_any()
-                            .downcast_ref::<DebeziumUnrollingExtension>()
-                            .unwrap();
-                        let input = physical_inputs[0].clone();
-                        return Ok(Some(Arc::new(DebeziumUnrollingExec::try_new(
-                            input,
-                            node.primary_keys.clone(),
-                        )?)));
-                    }
-                    TO_DEBEZIUM_EXTENSION_NAME => {
-                        let input = physical_inputs[0].clone();
-                        return Ok(Some(Arc::new(ToDebeziumExec::try_new(input)?)));
-                    }
-                    _ => return Ok(None),
-                }
-            }
-        };
-        let name =
-            if let Some(key_extension) = node.as_any().downcast_ref::<KeyCalculationExtension>() {
-                key_extension.name.clone()
-            } else {
-                None
-            };
-        Ok(Some(Arc::new(FsMemExec::new(
-            name.unwrap_or("memory".to_string()),
-            Arc::new(schema),
-        ))))
-    }
-}
-
-impl PlanToGraphVisitor<'_> {
-    fn add_index_to_traversal(&mut self, index: NodeIndex) {
-        if let Some(last) = self.traversal.last_mut() {
-            last.push(index);
-        }
-    }
-
-    pub(crate) fn add_plan(&mut self, plan: LogicalPlan) -> Result<()> {
-        self.traversal.clear();
-        plan.visit(self)?;
-        Ok(())
-    }
-
-    pub fn into_graph(self) -> LogicalGraph {
-        self.graph
-    }
-
-    pub fn build_extension(
-        &mut self,
-        input_nodes: Vec<NodeIndex>,
-        extension: &dyn StreamExtension,
-    ) -> Result<()> {
-        if let Some(node_name) = extension.node_name() {
-            if self.named_nodes.contains_key(&node_name) {
-                return plan_err!(
-                    "extension {:?} has already been planned, shouldn't try again.",
-                    node_name
-                );
-            }
-        }
-
-        let input_schemas = input_nodes
-            .iter()
-            .map(|index| {
-                Ok(self
-                    .output_schemas
-                    .get(index)
-                    .ok_or_else(|| DataFusionError::Plan("missing input node".to_string()))?
-                    .clone())
-            })
-            .collect::<Result<Vec<_>>>()?;
-
-        let NodeWithIncomingEdges { node, edges } = extension
-            .plan_node(&self.planner, self.graph.node_count(), input_schemas)
-            .map_err(|e| e.context(format!("planning operator {extension:?}")))?;
-
-        let node_index = self.graph.add_node(node);
-        self.add_index_to_traversal(node_index);
-
-        for (source, edge) in input_nodes.into_iter().zip(edges.into_iter()) {
-            self.graph.add_edge(source, node_index, edge);
-        }
-
-        self.output_schemas
-            .insert(node_index, extension.output_schema().into());
-
-        if let Some(node_name) = extension.node_name() {
-            self.named_nodes.insert(node_name, node_index);
-        }
-        Ok(())
-    }
-}
-
-impl TreeNodeVisitor<'_> for PlanToGraphVisitor<'_> {
-    type Node = LogicalPlan;
-
-    fn f_down(&mut self, node: &Self::Node) -> Result<TreeNodeRecursion> {
-        let LogicalPlan::Extension(Extension { node }) = node else {
-            return Ok(TreeNodeRecursion::Continue);
-        };
-
-        let stream_extension: &dyn StreamExtension = node
-            .try_into()
-            .map_err(|e: DataFusionError| e.context("converting extension"))?;
-        if stream_extension.transparent() {
-            return Ok(TreeNodeRecursion::Continue);
-        }
-
-        if let Some(name) = stream_extension.node_name() {
-            if let Some(node_index) = self.named_nodes.get(&name) {
-                self.add_index_to_traversal(*node_index);
-                return Ok(TreeNodeRecursion::Jump);
-            }
-        }
-
-        if !node.inputs().is_empty() {
-            self.traversal.push(vec![]);
-        }
-
-        Ok(TreeNodeRecursion::Continue)
-    }
-
-    fn f_up(&mut self, node: &Self::Node) -> Result<TreeNodeRecursion> {
-        let LogicalPlan::Extension(Extension { node }) = node else {
-            return Ok(TreeNodeRecursion::Continue);
-        };
-
-        let stream_extension: &dyn StreamExtension = node
-            .try_into()
-            .map_err(|e: DataFusionError| e.context("planning extension"))?;
-
-        if stream_extension.transparent() {
-            return Ok(TreeNodeRecursion::Continue);
-        }
-
-        if let Some(name) = stream_extension.node_name() {
-            if self.named_nodes.contains_key(&name) {
-                return Ok(TreeNodeRecursion::Continue);
-            }
-        }
-
-        let input_nodes = if !node.inputs().is_empty() {
-            self.traversal.pop().unwrap_or_default()
-        } else {
-            vec![]
-        };
-        let stream_extension: &dyn StreamExtension = node
-            .try_into()
-            .map_err(|e: DataFusionError| e.context("converting extension"))?;
-        self.build_extension(input_nodes, stream_extension)?;
-
-        Ok(TreeNodeRecursion::Continue)
-    }
-}
-
-pub(crate) struct SplitPlanOutput {
-    pub(crate) partial_aggregation_plan: PhysicalPlanNode,
-    pub(crate) partial_schema: FsSchema,
-    pub(crate) finish_plan: PhysicalPlanNode,
-}
diff --git a/src/sql/physical/readers.rs b/src/sql/physical/readers.rs
new file mode 100644
index 00000000..67a250fd
--- /dev/null
+++ b/src/sql/physical/readers.rs
@@ -0,0 +1,372 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! 无界/锁控 `RecordBatch` 数据源与规划期占位 `FsMemExec`。
+
+use std::any::Any;
+use std::mem;
+use std::sync::Arc;
+
+use datafusion::arrow::array::RecordBatch;
+use datafusion::arrow::datatypes::SchemaRef;
+use datafusion::catalog::memory::MemorySourceConfig;
+use datafusion::common::{DataFusionError, Result, Statistics, not_impl_err, plan_err};
+use datafusion::datasource::memory::DataSourceExec;
+use datafusion::execution::{SendableRecordBatchStream, TaskContext};
+use datafusion::physical_expr::EquivalenceProperties;
+use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType};
+use datafusion::physical_plan::memory::MemoryStream;
+use datafusion::physical_plan::stream::RecordBatchStreamAdapter;
+use datafusion::physical_plan::{
+    DisplayAs, ExecutionPlan, Partitioning, PlanProperties,
+};
+use futures::StreamExt;
+use tokio::sync::mpsc::UnboundedReceiver;
+use tokio_stream::wrappers::UnboundedReceiverStream;
+
+use crate::sql::common::constants::physical_plan_node_name;
+
+pub(crate) fn make_stream_properties(schema: SchemaRef) -> PlanProperties {
+    PlanProperties::new(
+        EquivalenceProperties::new(schema),
+        Partitioning::UnknownPartitioning(1),
+        EmissionType::Incremental,
+        Boundedness::Unbounded {
+            requires_infinite_memory: false,
+        },
+    )
+}
+
+#[derive(Debug)]
+pub(crate) struct RwLockRecordBatchReader {
+    schema: SchemaRef,
+    locked_batch: Arc<std::sync::RwLock<Option<RecordBatch>>>,
+    properties: PlanProperties,
+}
+
+impl RwLockRecordBatchReader {
+    pub(crate) fn new(
+        schema: SchemaRef,
+        locked_batch: Arc<std::sync::RwLock<Option<RecordBatch>>>,
+    ) -> Self {
+        Self {
+            schema: schema.clone(),
+            locked_batch,
+            properties: make_stream_properties(schema),
+        }
+    }
+}
+
+impl DisplayAs for RwLockRecordBatchReader {
+    fn fmt_as(
+        &self,
+        _t: datafusion::physical_plan::DisplayFormatType,
+        f: &mut std::fmt::Formatter,
+    ) -> std::fmt::Result {
+        write!(f, "RW Lock RecordBatchReader")
+    }
+}
+
+impl ExecutionPlan for RwLockRecordBatchReader {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn schema(&self) -> SchemaRef {
+        self.schema.clone()
+    }
+
+    fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
+        vec![]
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        _children: Vec<Arc<dyn ExecutionPlan>>,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        Err(DataFusionError::Internal("not supported".into()))
+    }
+
+    fn execute(
+        &self,
+        _partition: usize,
+        _context: Arc<TaskContext>,
+    ) -> Result<SendableRecordBatchStream> {
+        let result = self
+            .locked_batch
+            .write()
+            .unwrap()
+            .take()
+            .expect("should have set a record batch before calling execute()");
+        Ok(Box::pin(MemoryStream::try_new(
+            vec![result],
+            self.schema.clone(),
+            None,
+        )?))
+    }
+
+    fn statistics(&self) -> Result<Statistics> {
+        Ok(Statistics::new_unknown(&self.schema))
+    }
+
+    fn reset(&self) -> Result<()> {
+        Ok(())
+    }
+
+    fn properties(&self) -> &PlanProperties {
+        &self.properties
+    }
+
+    fn name(&self) -> &str {
+        physical_plan_node_name::RW_LOCK_READER
+    }
+}
+
+#[derive(Debug)]
+pub(crate) struct UnboundedRecordBatchReader {
+    schema: SchemaRef,
+    receiver: Arc<std::sync::RwLock<Option<UnboundedReceiver<RecordBatch>>>>,
+    properties: PlanProperties,
+}
+
+impl UnboundedRecordBatchReader {
+    pub(crate) fn new(
+        schema: SchemaRef,
+        receiver: Arc<std::sync::RwLock<Option<UnboundedReceiver<RecordBatch>>>>,
+    ) -> Self {
+        Self {
+            schema: schema.clone(),
+            receiver,
+            properties: make_stream_properties(schema),
+        }
+    }
+}
+
+impl DisplayAs for UnboundedRecordBatchReader {
+    fn fmt_as(
+        &self,
+        _t: datafusion::physical_plan::DisplayFormatType,
+        f: &mut std::fmt::Formatter,
+    ) -> std::fmt::Result {
+        write!(f, "unbounded record batch reader")
+    }
+}
+
+impl ExecutionPlan for UnboundedRecordBatchReader {
+    fn name(&self) -> &str {
+        physical_plan_node_name::UNBOUNDED_READER
+    }
+
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn schema(&self) -> SchemaRef {
+        self.schema.clone()
+    }
+
+    fn properties(&self) -> &PlanProperties {
+        &self.properties
+    }
+
+    fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
+        vec![]
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        _children: Vec<Arc<dyn ExecutionPlan>>,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        Err(DataFusionError::Internal("not supported".into()))
+    }
+
+    fn execute(
+        &self,
+        _partition: usize,
+        _context: Arc<TaskContext>,
+    ) -> Result<SendableRecordBatchStream> {
+        Ok(Box::pin(RecordBatchStreamAdapter::new(
+            self.schema.clone(),
+            UnboundedReceiverStream::new(
+                self.receiver
+                    .write()
+                    .unwrap()
+                    .take()
+                    .expect("unbounded receiver should be present before calling exec"),
+            )
+            .map(Ok),
+        )))
+    }
+
+    fn statistics(&self) -> Result<Statistics> {
+        Ok(Statistics::new_unknown(&self.schema))
+    }
+
+    fn reset(&self) -> Result<()> {
+        Ok(())
+    }
+}
+
+#[derive(Debug)]
+pub(crate) struct RecordBatchVecReader {
+    schema: SchemaRef,
+    receiver: Arc<std::sync::RwLock<Vec<RecordBatch>>>,
+    properties: PlanProperties,
+}
+
+impl RecordBatchVecReader {
+    pub(crate) fn new(
+        schema: SchemaRef,
+        receiver: Arc<std::sync::RwLock<Vec<RecordBatch>>>,
+    ) -> Self {
+        Self {
+            schema: schema.clone(),
+            receiver,
+            properties: make_stream_properties(schema),
+        }
+    }
+}
+
+impl DisplayAs for RecordBatchVecReader {
+    fn fmt_as(
+        &self,
+        _t: datafusion::physical_plan::DisplayFormatType,
+        f: &mut std::fmt::Formatter,
+    ) -> std::fmt::Result {
+        write!(f, "record batch vec reader")
+    }
+}
+
+impl ExecutionPlan for RecordBatchVecReader {
+    fn name(&self) -> &str {
+        physical_plan_node_name::VEC_READER
+    }
+
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn schema(&self) -> SchemaRef {
+        self.schema.clone()
+    }
+
+    fn properties(&self) -> &PlanProperties {
+        &self.properties
+    }
+
+    fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
+        vec![]
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        _children: Vec<Arc<dyn ExecutionPlan>>,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        Err(DataFusionError::Internal("not supported".into()))
+    }
+
+    fn execute(
+        &self,
+        partition: usize,
+        context: Arc<TaskContext>,
+    ) -> Result<SendableRecordBatchStream> {
+        let memory = MemorySourceConfig::try_new(
+            &[mem::take(self.receiver.write().unwrap().as_mut())],
+            self.schema.clone(),
+            None,
+        )?;
+
+        DataSourceExec::new(Arc::new(memory)).execute(partition, context)
+    }
+
+    fn statistics(&self) -> Result<Statistics> {
+        Ok(Statistics::new_unknown(&self.schema))
+    }
+
+    fn reset(&self) -> Result<()> {
+        Ok(())
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct FsMemExec {
+    pub table_name: String,
+    pub schema: SchemaRef,
+    properties: PlanProperties,
+}
+
+impl DisplayAs for FsMemExec {
+    fn fmt_as(
+        &self,
+        _t: datafusion::physical_plan::DisplayFormatType,
+        f: &mut std::fmt::Formatter,
+    ) -> std::fmt::Result {
+        write!(f, "EmptyPartitionStream: schema={}", self.schema)
+    }
+}
+
+impl FsMemExec {
+    pub fn new(table_name: String, schema: SchemaRef) -> Self {
+        Self {
+            schema: schema.clone(),
+            table_name,
+            properties: make_stream_properties(schema),
+        }
+    }
+}
+
+impl ExecutionPlan for FsMemExec {
+    fn name(&self) -> &str {
+        physical_plan_node_name::MEM_EXEC
+    }
+
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn schema(&self) -> SchemaRef {
+        self.schema.clone()
+    }
+
+    fn properties(&self) -> &PlanProperties {
+        &self.properties
+    }
+
+    fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
+        vec![]
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        _children: Vec<Arc<dyn ExecutionPlan>>,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        not_impl_err!("with_new_children is not implemented for mem_exec; should not be called")
+    }
+
+    fn execute(
+        &self,
+        _partition: usize,
+        _context: Arc<TaskContext>,
+    ) -> Result<SendableRecordBatchStream> {
+        plan_err!(
+            "EmptyPartitionStream cannot be executed, this is only used for physical planning before serialization"
+        )
+    }
+
+    fn statistics(&self) -> Result<Statistics> {
+        Ok(Statistics::new_unknown(&self.schema))
+    }
+
+    fn reset(&self) -> Result<()> {
+        Ok(())
+    }
+}
diff --git a/src/sql/physical/udfs.rs b/src/sql/physical/udfs.rs
new file mode 100644
index 00000000..267cb6e3
--- /dev/null
+++ b/src/sql/physical/udfs.rs
@@ -0,0 +1,132 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! 流式 `window(start, end)` 标量 UDF。
+
+use std::any::Any;
+use std::sync::Arc;
+
+use datafusion::arrow::array::StructArray;
+use datafusion::arrow::datatypes::{DataType, Field, TimeUnit};
+use datafusion::common::{Result, ScalarValue, plan_err};
+use datafusion::logical_expr::{
+    ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignature, Volatility,
+};
+
+use crate::make_udf_function;
+use crate::sql::common::constants::{window_function_udf, window_interval_field};
+use crate::sql::schema::utils::window_arrow_struct;
+
+#[derive(Debug)]
+pub struct WindowFunctionUdf {
+    signature: Signature,
+}
+
+impl Default for WindowFunctionUdf {
+    fn default() -> Self {
+        Self {
+            signature: Signature::new(
+                TypeSignature::Exact(vec![
+                    DataType::Timestamp(TimeUnit::Nanosecond, None),
+                    DataType::Timestamp(TimeUnit::Nanosecond, None),
+                ]),
+                Volatility::Immutable,
+            ),
+        }
+    }
+}
+
+impl ScalarUDFImpl for WindowFunctionUdf {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        window_function_udf::NAME
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, _: &[DataType]) -> Result<DataType> {
+        Ok(window_arrow_struct())
+    }
+
+    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
+        let columns = args.args;
+        if columns.len() != 2 {
+            return plan_err!(
+                "window function expected 2 arguments, got {}",
+                columns.len()
+            );
+        }
+        if columns[0].data_type() != DataType::Timestamp(TimeUnit::Nanosecond, None) {
+            return plan_err!(
+                "window function expected first argument to be a timestamp, got {:?}",
+                columns[0].data_type()
+            );
+        }
+        if columns[1].data_type() != DataType::Timestamp(TimeUnit::Nanosecond, None) {
+            return plan_err!(
+                "window function expected second argument to be a timestamp, got {:?}",
+                columns[1].data_type()
+            );
+        }
+        let fields = vec![
+            Arc::new(Field::new(
+                window_interval_field::START,
+                DataType::Timestamp(TimeUnit::Nanosecond, None),
+                false,
+            )),
+            Arc::new(Field::new(
+                window_interval_field::END,
+                DataType::Timestamp(TimeUnit::Nanosecond, None),
+                false,
+            )),
+        ]
+        .into();
+
+        match (&columns[0], &columns[1]) {
+            (ColumnarValue::Array(start), ColumnarValue::Array(end)) => {
+                Ok(ColumnarValue::Array(Arc::new(StructArray::new(
+                    fields,
+                    vec![start.clone(), end.clone()],
+                    None,
+                ))))
+            }
+            (ColumnarValue::Array(start), ColumnarValue::Scalar(end)) => {
+                let end = end.to_array_of_size(start.len())?;
+                Ok(ColumnarValue::Array(Arc::new(StructArray::new(
+                    fields,
+                    vec![start.clone(), end],
+                    None,
+                ))))
+            }
+            (ColumnarValue::Scalar(start), ColumnarValue::Array(end)) => {
+                let start = start.to_array_of_size(end.len())?;
+                Ok(ColumnarValue::Array(Arc::new(StructArray::new(
+                    fields,
+                    vec![start, end.clone()],
+                    None,
+                ))))
+            }
+            (ColumnarValue::Scalar(start), ColumnarValue::Scalar(end)) => {
+                Ok(ColumnarValue::Scalar(ScalarValue::Struct(
+                    StructArray::new(fields, vec![start.to_array()?, end.to_array()?], None).into(),
+                )))
+            }
+        }
+    }
+}
+
+make_udf_function!(WindowFunctionUdf, WINDOW_FUNCTION, window);
diff --git a/src/sql/schema/data_encoding_format.rs b/src/sql/schema/data_encoding_format.rs
index 67e6d7e3..29828c86 100644
--- a/src/sql/schema/data_encoding_format.rs
+++ b/src/sql/schema/data_encoding_format.rs
@@ -16,6 +16,7 @@ use datafusion::arrow::datatypes::{DataType, Field};
 use datafusion::common::{Result, plan_err};
 
 use super::column_descriptor::ColumnDescriptor;
+use crate::sql::common::constants::{cdc, connection_format_value, with_opt_bool_str};
 use crate::sql::common::with_option_keys as opt;
 use crate::sql::common::Format;
 
@@ -38,14 +39,15 @@ impl DataEncodingFormat {
         let is_debezium = opts
             .get(opt::FORMAT_DEBEZIUM_FLAG)
             .or_else(|| opts.get(opt::JSON_DEBEZIUM))
-            .map(|s| s == "true")
+            .map(|s| s == with_opt_bool_str::TRUE)
             .unwrap_or(false);
 
         match (format_str, is_debezium) {
-            ("json", true) | ("debezium_json", _) => Ok(Self::DebeziumJson),
-            ("json", false) => Ok(Self::StandardJson),
-            ("avro", _) => Ok(Self::Avro),
-            ("parquet", _) => Ok(Self::Parquet),
+            (f, true) if f == connection_format_value::JSON => Ok(Self::DebeziumJson),
+            (f, _) if f == connection_format_value::DEBEZIUM_JSON => Ok(Self::DebeziumJson),
+            (f, false) if f == connection_format_value::JSON => Ok(Self::StandardJson),
+            (f, _) if f == connection_format_value::AVRO => Ok(Self::Avro),
+            (f, _) if f == connection_format_value::PARQUET => Ok(Self::Parquet),
             _ => Ok(Self::Raw),
         }
     }
@@ -78,9 +80,9 @@ impl DataEncodingFormat {
         let struct_type = DataType::Struct(fields.into());
 
         Ok(vec![
-            ColumnDescriptor::new_physical(Field::new("before", struct_type.clone(), true)),
-            ColumnDescriptor::new_physical(Field::new("after", struct_type.clone(), true)),
-            ColumnDescriptor::new_physical(Field::new("op", DataType::Utf8, true)),
+            ColumnDescriptor::new_physical(Field::new(cdc::BEFORE, struct_type.clone(), true)),
+            ColumnDescriptor::new_physical(Field::new(cdc::AFTER, struct_type.clone(), true)),
+            ColumnDescriptor::new_physical(Field::new(cdc::OP, DataType::Utf8, true)),
         ])
     }
 }
diff --git a/src/sql/schema/schema_provider.rs b/src/sql/schema/schema_provider.rs
index 2bb5ef87..5ecde68e 100644
--- a/src/sql/schema/schema_provider.rs
+++ b/src/sql/schema/schema_provider.rs
@@ -26,6 +26,7 @@ use datafusion::sql::TableReference;
 use unicase::UniCase;
 
 use crate::sql::logical_node::logical::{DylibUdfConfig, LogicalProgram};
+use crate::sql::common::constants::{planning_placeholder_udf, window_fn};
 use crate::sql::schema::table::Table as CatalogTable;
 use crate::sql::schema::utils::window_arrow_struct;
 use crate::sql::types::{PlaceholderUdf, PlanningOptions};
@@ -95,7 +96,7 @@ impl TableProvider for LogicalBatchInput {
         _filters: &[Expr],
         _limit: Option<usize>,
     ) -> Result<Arc<dyn datafusion::physical_plan::ExecutionPlan>> {
-        Ok(Arc::new(crate::sql::logical_planner::FsMemExec::new(
+        Ok(Arc::new(crate::sql::physical::FsMemExec::new(
             self.table_name.clone(),
             Arc::clone(&self.schema),
         )))
@@ -378,7 +379,7 @@ impl StreamPlanningContextBuilder {
     pub fn with_streaming_extensions(mut self) -> Result<Self> {
         let extensions = vec![
             PlaceholderUdf::with_return(
-                "hop",
+                window_fn::HOP,
                 vec![
                     DataType::Interval(datatypes::IntervalUnit::MonthDayNano),
                     DataType::Interval(datatypes::IntervalUnit::MonthDayNano),
@@ -386,22 +387,26 @@ impl StreamPlanningContextBuilder {
                 window_arrow_struct(),
             ),
             PlaceholderUdf::with_return(
-                "tumble",
+                window_fn::TUMBLE,
                 vec![DataType::Interval(datatypes::IntervalUnit::MonthDayNano)],
                 window_arrow_struct(),
             ),
             PlaceholderUdf::with_return(
-                "session",
+                window_fn::SESSION,
                 vec![DataType::Interval(datatypes::IntervalUnit::MonthDayNano)],
                 window_arrow_struct(),
             ),
             PlaceholderUdf::with_return(
-                "unnest",
-                vec![DataType::List(Arc::new(Field::new("field", DataType::Utf8, true)))],
+                planning_placeholder_udf::UNNEST,
+                vec![DataType::List(Arc::new(Field::new(
+                    planning_placeholder_udf::LIST_ELEMENT_FIELD,
+                    DataType::Utf8,
+                    true,
+                )))],
                 DataType::Utf8,
             ),
             PlaceholderUdf::with_return(
-                "row_time",
+                planning_placeholder_udf::ROW_TIME,
                 vec![],
                 DataType::Timestamp(datatypes::TimeUnit::Nanosecond, None),
             ),
diff --git a/src/sql/schema/source_table.rs b/src/sql/schema/source_table.rs
index 9e0caddf..63baca2a 100644
--- a/src/sql/schema/source_table.rs
+++ b/src/sql/schema/source_table.rs
@@ -37,6 +37,9 @@ use super::temporal_pipeline_config::{resolve_temporal_logic, TemporalPipelineCo
 use super::StreamSchemaProvider;
 use crate::multifield_partial_ord;
 use crate::sql::api::{ConnectionProfile, ConnectionSchema, SourceField};
+use crate::sql::common::constants::{
+    connection_table_role, connector_type, kafka_with_value, sql_field,
+};
 use crate::sql::common::connector_options::ConnectorOptions;
 use crate::sql::common::kafka_catalog::{
     KafkaConfig, KafkaConfigAuthentication, KafkaTable, KafkaTableSourceOffset, ReadMode,
@@ -266,7 +269,7 @@ impl SourceTable {
 
         if let Some(Format::Json(JsonFormat { compression, .. })) = &format
             && !matches!(compression, JsonCompression::Uncompressed)
-            && connector_name != "filesystem"
+            && connector_name != connector_type::FILESYSTEM
         {
             return plan_err!("'json.compression' is only supported for the filesystem connector");
         }
@@ -275,8 +278,8 @@ impl SourceTable {
             .map_err(|e| DataFusionError::Plan(format!("invalid framing: '{e}'")))?;
 
         if temporary
-            && let Some(t) = options.insert_str(opt::TYPE, "lookup")?
-            && t != "lookup"
+            && let Some(t) = options.insert_str(opt::TYPE, connection_table_role::LOOKUP)?
+            && t != connection_table_role::LOOKUP
         {
             return plan_err!(
                 "Cannot have a temporary table with type '{t}'; temporary tables must be type 'lookup'"
@@ -323,9 +326,9 @@ impl SourceTable {
             t.into()
         } else {
             match options.pull_opt_str(opt::TYPE)?.as_deref() {
-                None | Some("source") => TableRole::Ingestion,
-                Some("sink") => TableRole::Egress,
-                Some("lookup") => TableRole::Reference,
+                None | Some(connection_table_role::SOURCE) => TableRole::Ingestion,
+                Some(connection_table_role::SINK) => TableRole::Egress,
+                Some(connection_table_role::LOOKUP) => TableRole::Reference,
                 Some(other) => {
                     return plan_err!("invalid connection type '{other}' in WITH options");
                 }
@@ -405,13 +408,14 @@ impl SourceTable {
 
                 table.schema_specs.push(ColumnDescriptor::new_computed(
                     Field::new(
-                        "__watermark",
+                        sql_field::COMPUTED_WATERMARK,
                         logical_expr.get_type(&df_schema)?,
                         false,
                     ),
                     logical_expr,
                 ));
-                table.temporal_config.watermark_strategy_column = Some("__watermark".to_string());
+                table.temporal_config.watermark_strategy_column =
+                    Some(sql_field::COMPUTED_WATERMARK.to_string());
             } else {
                 table.temporal_config.watermark_strategy_column = Some(time_field);
             }
@@ -428,7 +432,7 @@ impl SourceTable {
 
         table.lookup_cache_ttl = options.pull_opt_duration(opt::LOOKUP_CACHE_TTL)?;
 
-        if connector_name.eq_ignore_ascii_case("kafka") {
+        if connector_name.eq_ignore_ascii_case(connector_type::KAFKA) {
             let physical = table.produce_physical_schema();
             let op_cfg = wire_kafka_operator_config(
                 options,
@@ -612,9 +616,15 @@ fn wire_kafka_operator_config(
     let kind = match role {
         TableRole::Ingestion => {
             let offset = match options.pull_opt_str(opt::KAFKA_SCAN_STARTUP_MODE)?.as_deref() {
-                Some("latest") => KafkaTableSourceOffset::Latest,
-                Some("earliest") => KafkaTableSourceOffset::Earliest,
-                None | Some("group-offsets") | Some("group") => KafkaTableSourceOffset::Group,
+                Some(s) if s == kafka_with_value::SCAN_LATEST => KafkaTableSourceOffset::Latest,
+                Some(s) if s == kafka_with_value::SCAN_EARLIEST => KafkaTableSourceOffset::Earliest,
+                Some(s)
+                    if s == kafka_with_value::SCAN_GROUP_OFFSETS
+                        || s == kafka_with_value::SCAN_GROUP =>
+                {
+                    KafkaTableSourceOffset::Group
+                }
+                None => KafkaTableSourceOffset::Group,
                 Some(other) => {
                     return plan_err!(
                         "invalid scan.startup.mode '{other}'; expected latest, earliest, or group-offsets"
@@ -622,8 +632,12 @@ fn wire_kafka_operator_config(
                 }
             };
             let read_mode = match options.pull_opt_str(opt::KAFKA_ISOLATION_LEVEL)?.as_deref() {
-                Some("read_committed") => Some(ReadMode::ReadCommitted),
-                Some("read_uncommitted") => Some(ReadMode::ReadUncommitted),
+                Some(s) if s == kafka_with_value::ISOLATION_READ_COMMITTED => {
+                    Some(ReadMode::ReadCommitted)
+                }
+                Some(s) if s == kafka_with_value::ISOLATION_READ_UNCOMMITTED => {
+                    Some(ReadMode::ReadUncommitted)
+                }
                 None => None,
                 Some(other) => {
                     return plan_err!("invalid isolation.level '{other}'");
@@ -643,8 +657,19 @@ fn wire_kafka_operator_config(
         }
         TableRole::Egress => {
             let commit_mode = match options.pull_opt_str(opt::KAFKA_SINK_COMMIT_MODE)?.as_deref() {
-                Some("exactly-once") | Some("exactly_once") => SinkCommitMode::ExactlyOnce,
-                None | Some("at-least-once") | Some("at_least_once") => SinkCommitMode::AtLeastOnce,
+                Some(s)
+                    if s == kafka_with_value::SINK_COMMIT_EXACTLY_ONCE_HYPHEN
+                        || s == kafka_with_value::SINK_COMMIT_EXACTLY_ONCE_UNDERSCORE =>
+                {
+                    SinkCommitMode::ExactlyOnce
+                }
+                None => SinkCommitMode::AtLeastOnce,
+                Some(s)
+                    if s == kafka_with_value::SINK_COMMIT_AT_LEAST_ONCE_HYPHEN
+                        || s == kafka_with_value::SINK_COMMIT_AT_LEAST_ONCE_UNDERSCORE =>
+                {
+                    SinkCommitMode::AtLeastOnce
+                }
                 Some(other) => {
                     return plan_err!("invalid sink.commit.mode '{other}'");
                 }
diff --git a/src/sql/schema/table_role.rs b/src/sql/schema/table_role.rs
index 12bd8068..bf3fed74 100644
--- a/src/sql/schema/table_role.rs
+++ b/src/sql/schema/table_role.rs
@@ -18,6 +18,9 @@ use datafusion::error::DataFusionError;
 
 use super::column_descriptor::ColumnDescriptor;
 use super::connection_type::ConnectionType;
+use crate::sql::common::constants::{
+    connection_table_role, connector_type, SUPPORTED_CONNECTOR_ADAPTERS,
+};
 use crate::sql::common::with_option_keys as opt;
 
 /// Role of a connector-backed table in the pipeline (ingest / egress / lookup).
@@ -49,25 +52,7 @@ impl From<ConnectionType> for TableRole {
 }
 
 pub fn validate_adapter_availability(adapter: &str) -> Result<()> {
-    let supported = [
-        "kafka",
-        "kinesis",
-        "filesystem",
-        "delta",
-        "iceberg",
-        "pulsar",
-        "nats",
-        "redis",
-        "mqtt",
-        "websocket",
-        "sse",
-        "nexmark",
-        "blackhole",
-        "lookup",
-        "memory",
-        "postgres",
-    ];
-    if !supported.contains(&adapter) {
+    if !SUPPORTED_CONNECTOR_ADAPTERS.contains(&adapter) {
         return Err(DataFusionError::Plan(format!("Unknown adapter '{adapter}'")));
     }
     Ok(())
@@ -75,7 +60,7 @@ pub fn validate_adapter_availability(adapter: &str) -> Result<()> {
 
 pub fn apply_adapter_specific_rules(adapter: &str, mut cols: Vec<ColumnDescriptor>) -> Vec<ColumnDescriptor> {
     match adapter {
-        "delta" | "iceberg" => {
+        a if a == connector_type::DELTA || a == connector_type::ICEBERG => {
             for c in &mut cols {
                 if matches!(c.data_type(), DataType::Timestamp(_, _)) {
                     c.force_precision(TimeUnit::Microsecond);
@@ -89,9 +74,9 @@ pub fn apply_adapter_specific_rules(adapter: &str, mut cols: Vec<ColumnDescripto
 
 pub fn deduce_role(options: &HashMap<String, String>) -> Result<TableRole> {
     match options.get(opt::TYPE).map(|s| s.as_str()) {
-        None | Some("source") => Ok(TableRole::Ingestion),
-        Some("sink") => Ok(TableRole::Egress),
-        Some("lookup") => Ok(TableRole::Reference),
+        None | Some(connection_table_role::SOURCE) => Ok(TableRole::Ingestion),
+        Some(connection_table_role::SINK) => Ok(TableRole::Egress),
+        Some(connection_table_role::LOOKUP) => Ok(TableRole::Reference),
         Some(other) => plan_err!("Invalid role '{other}'"),
     }
 }
diff --git a/src/sql/schema/temporal_pipeline_config.rs b/src/sql/schema/temporal_pipeline_config.rs
index eb29f71c..f672e552 100644
--- a/src/sql/schema/temporal_pipeline_config.rs
+++ b/src/sql/schema/temporal_pipeline_config.rs
@@ -16,6 +16,7 @@ use datafusion::common::{Result, plan_err};
 use datafusion::logical_expr::Expr;
 
 use super::column_descriptor::ColumnDescriptor;
+use crate::sql::common::constants::sql_field;
 
 /// Event-time and watermark configuration for streaming tables.
 #[derive(Debug, Clone, Default, PartialEq, Eq, Hash)]
@@ -47,7 +48,7 @@ pub fn resolve_temporal_logic(
         config.event_column = Some(meta.time_field.clone());
 
         if meta.watermark_expr.is_some() {
-            config.watermark_strategy_column = Some("__watermark".to_string());
+            config.watermark_strategy_column = Some(sql_field::COMPUTED_WATERMARK.to_string());
         } else {
             config.watermark_strategy_column = Some(meta.time_field);
         }
diff --git a/src/sql/schema/utils.rs b/src/sql/schema/utils.rs
index c0b8a7d0..ba408f22 100644
--- a/src/sql/schema/utils.rs
+++ b/src/sql/schema/utils.rs
@@ -16,6 +16,7 @@ use std::sync::Arc;
 use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef, TimeUnit};
 use datafusion::common::{DFSchema, DFSchemaRef, Result as DFResult, TableReference};
 
+use crate::sql::common::constants::window_interval_field;
 use crate::sql::types::{DFField, TIMESTAMP_FIELD};
 
 /// Returns the Arrow struct type for a window (start, end) pair.
@@ -23,12 +24,12 @@ pub fn window_arrow_struct() -> DataType {
     DataType::Struct(
         vec![
             Arc::new(Field::new(
-                "start",
+                window_interval_field::START,
                 DataType::Timestamp(TimeUnit::Nanosecond, None),
                 false,
             )),
             Arc::new(Field::new(
-                "end",
+                window_interval_field::END,
                 DataType::Timestamp(TimeUnit::Nanosecond, None),
                 false,
             )),
diff --git a/src/sql/types/data_type.rs b/src/sql/types/data_type.rs
index 1fc55ecc..4736f812 100644
--- a/src/sql/types/data_type.rs
+++ b/src/sql/types/data_type.rs
@@ -17,6 +17,7 @@ use datafusion::arrow::datatypes::{
 };
 use datafusion::common::{Result, plan_datafusion_err, plan_err};
 
+use crate::sql::common::constants::planning_placeholder_udf;
 use crate::sql::common::FsExtensionType;
 
 pub fn convert_data_type(
@@ -33,7 +34,7 @@ pub fn convert_data_type(
             Ok((
                 DataType::List(Arc::new(FsExtensionType::add_metadata(
                     extension,
-                    Field::new("field", data_type, true),
+                    Field::new(planning_placeholder_udf::LIST_ELEMENT_FIELD, data_type, true),
                 ))),
                 None,
             ))

From 9b41175685bae4f26ba4b9d66b1f58bba871d7b8 Mon Sep 17 00:00:00 2001
From: luoluoyuyu <zhenyu@apache.org>
Date: Sun, 29 Mar 2026 15:56:22 +0800
Subject: [PATCH 26/44] update

---
 src/sql/common/constants.rs | 12 ++++++++++++
 src/sql/common/mod.rs       |  3 +--
 src/sql/types/mod.rs        |  8 +++++---
 3 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/src/sql/common/constants.rs b/src/sql/common/constants.rs
index f5dd56ef..5f249227 100644
--- a/src/sql/common/constants.rs
+++ b/src/sql/common/constants.rs
@@ -122,6 +122,18 @@ pub mod sql_field {
     pub const DEFAULT_PROJECTION_LABEL: &str = "projection";
     /// `WATERMARK FOR … AS expr` 生成的计算列名（与 `TemporalPipelineConfig` 一致）。
     pub const COMPUTED_WATERMARK: &str = "__watermark";
+    /// 流表事件时间物理列名（与 DataFusion 计划注入列一致）。
+    pub const TIMESTAMP_FIELD: &str = "_timestamp";
+    /// Changelog / updating 模式下的元数据列名。
+    pub const UPDATING_META_FIELD: &str = "_updating_meta";
+}
+
+// ── `SqlConfig` / `PlanningOptions` 默认值 ────────────────────────────────────
+
+pub mod sql_planning_default {
+    pub const DEFAULT_PARALLELISM: usize = 4;
+    /// [`PlanningOptions::default`] 的 TTL（秒）：24h。
+    pub const PLANNING_TTL_SECS: u64 = 24 * 60 * 60;
 }
 
 // ── `ConnectorOptions` / WITH 解析用到的字面量 ────────────────────────────────
diff --git a/src/sql/common/mod.rs b/src/sql/common/mod.rs
index 7a4b4ee4..3302eb10 100644
--- a/src/sql/common/mod.rs
+++ b/src/sql/common/mod.rs
@@ -61,8 +61,7 @@ pub use formats::{BadData, Format, Framing, JsonCompression, JsonFormat};
 pub use operator_config::{MetadataField, OperatorConfig, RateLimit};
 
 // ── Well-known column names ──
-pub const TIMESTAMP_FIELD: &str = "_timestamp";
-pub const UPDATING_META_FIELD: &str = "_updating_meta";
+pub use constants::sql_field::{TIMESTAMP_FIELD, UPDATING_META_FIELD};
 
 // ── Environment variables ──
 pub const JOB_ID_ENV: &str = "JOB_ID";
diff --git a/src/sql/types/mod.rs b/src/sql/types/mod.rs
index 16d7033b..41753e38 100644
--- a/src/sql/types/mod.rs
+++ b/src/sql/types/mod.rs
@@ -18,6 +18,8 @@ mod window;
 
 use std::time::Duration;
 
+use crate::sql::common::constants::sql_planning_default;
+
 pub use data_type::convert_data_type;
 pub use df_field::{
     DFField, fields_with_qualifiers, schema_from_df_fields, schema_from_df_fields_with_metadata,
@@ -27,7 +29,7 @@ pub use stream_schema::StreamSchema;
 pub(crate) use window::WindowBehavior;
 pub use window::{WindowType, find_window, get_duration};
 
-pub const TIMESTAMP_FIELD: &str = "_timestamp";
+pub use crate::sql::common::constants::sql_field::TIMESTAMP_FIELD;
 
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub enum ProcessingMode {
@@ -43,7 +45,7 @@ pub struct SqlConfig {
 impl Default for SqlConfig {
     fn default() -> Self {
         Self {
-            default_parallelism: 4,
+            default_parallelism: sql_planning_default::DEFAULT_PARALLELISM,
         }
     }
 }
@@ -56,7 +58,7 @@ pub struct PlanningOptions {
 impl Default for PlanningOptions {
     fn default() -> Self {
         Self {
-            ttl: Duration::from_secs(24 * 60 * 60),
+            ttl: Duration::from_secs(sql_planning_default::PLANNING_TTL_SECS),
         }
     }
 }

From 5b596f2c59432652d6e533b95466df6d4eab2eb2 Mon Sep 17 00:00:00 2001
From: luoluoyuyu <zhenyu@apache.org>
Date: Sun, 29 Mar 2026 17:15:58 +0800
Subject: [PATCH 27/44] update

---
 Cargo.lock                                    | 53 ------------
 Cargo.toml                                    |  4 -
 src/coordinator/execution/executor.rs         |  3 +-
 src/coordinator/plan/logical_plan_visitor.rs  |  1 -
 src/coordinator/plan/streaming_table_plan.rs  |  2 -
 src/runtime/mod.rs                            |  1 +
 src/runtime/streaming/api/mod.rs              |  2 +-
 src/runtime/streaming/api/operator.rs         | 59 -------------
 .../factory/connector/dispatchers.rs          | 58 +++++++++++++
 .../kafka_factory.rs => connector/kafka.rs}   | 12 +--
 .../streaming/factory/connector/mod.rs        | 19 +++++
 src/runtime/streaming/factory/global/mod.rs   | 17 ++++
 .../factory/global/session_registry.rs        | 64 ++++++++++++++
 src/runtime/streaming/factory/mod.rs          | 53 +++++++++++-
 .../streaming/factory/operator_constructor.rs | 26 ++++++
 .../{registry/mod.rs => operator_factory.rs}  | 83 ++-----------------
 src/runtime/streaming/job/job_manager.rs      |  7 +-
 src/runtime/streaming/lib.rs                  |  1 -
 src/runtime/streaming/mod.rs                  |  1 -
 .../grouping/incremental_aggregate.rs         |  5 +-
 .../operators/joins/join_instance.rs          |  3 +-
 .../operators/joins/join_with_expiration.rs   |  3 +-
 .../streaming/operators/sink/kafka/mod.rs     |  3 +-
 .../operators/stateless_physical_executor.rs  |  2 +-
 .../watermark/watermark_generator.rs          |  3 +-
 .../windows/session_aggregating_window.rs     |  2 +-
 .../windows/sliding_aggregating_window.rs     |  2 +-
 .../windows/tumbling_aggregating_window.rs    |  2 +-
 .../operators/windows/window_function.rs      |  3 +-
 src/runtime/util/mod.rs                       | 17 ++++
 .../mod.rs => util/physical_aggregate.rs}     |  9 +-
 src/server/initializer.rs                     |  2 +-
 src/sql/common/constants.rs                   |  9 ++
 33 files changed, 300 insertions(+), 231 deletions(-)
 create mode 100644 src/runtime/streaming/factory/connector/dispatchers.rs
 rename src/runtime/streaming/factory/{registry/kafka_factory.rs => connector/kafka.rs} (95%)
 create mode 100644 src/runtime/streaming/factory/connector/mod.rs
 create mode 100644 src/runtime/streaming/factory/global/mod.rs
 create mode 100644 src/runtime/streaming/factory/global/session_registry.rs
 create mode 100644 src/runtime/streaming/factory/operator_constructor.rs
 rename src/runtime/streaming/factory/{registry/mod.rs => operator_factory.rs} (72%)
 create mode 100644 src/runtime/util/mod.rs
 rename src/runtime/{streaming/arrow/mod.rs => util/physical_aggregate.rs} (95%)

diff --git a/Cargo.lock b/Cargo.lock
index e9ce4109..c6994ec0 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2264,14 +2264,12 @@ dependencies = [
  "base64",
  "bincode",
  "chrono",
- "clap",
  "crossbeam-channel",
  "datafusion",
  "datafusion-common",
  "datafusion-execution",
  "datafusion-expr",
  "datafusion-functions",
- "datafusion-functions-aggregate",
  "datafusion-functions-window",
  "datafusion-physical-expr",
  "datafusion-physical-plan",
@@ -2286,8 +2284,6 @@ dependencies = [
  "num_cpus",
  "parking_lot",
  "parquet 55.2.0 (git+https://github.com/ArroyoSystems/arrow-rs?branch=55.2.0%2Fparquet)",
- "pest",
- "pest_derive",
  "petgraph 0.7.1",
  "proctitle",
  "prost",
@@ -3762,49 +3758,6 @@ version = "2.3.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220"
 
-[[package]]
-name = "pest"
-version = "2.8.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2c9eb05c21a464ea704b53158d358a31e6425db2f63a1a7312268b05fe2b75f7"
-dependencies = [
- "memchr",
- "ucd-trie",
-]
-
-[[package]]
-name = "pest_derive"
-version = "2.8.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "68f9dbced329c441fa79d80472764b1a2c7e57123553b8519b36663a2fb234ed"
-dependencies = [
- "pest",
- "pest_generator",
-]
-
-[[package]]
-name = "pest_generator"
-version = "2.8.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3bb96d5051a78f44f43c8f712d8e810adb0ebf923fc9ed2655a7f66f63ba8ee5"
-dependencies = [
- "pest",
- "pest_meta",
- "proc-macro2",
- "quote",
- "syn",
-]
-
-[[package]]
-name = "pest_meta"
-version = "2.8.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "602113b5b5e8621770cfd490cfd90b9f84ab29bd2b0e49ad83eb6d186cef2365"
-dependencies = [
- "pest",
- "sha2",
-]
-
 [[package]]
 name = "petgraph"
 version = "0.6.5"
@@ -5431,12 +5384,6 @@ dependencies = [
  "typify-impl",
 ]
 
-[[package]]
-name = "ucd-trie"
-version = "0.1.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971"
-
 [[package]]
 name = "unicase"
 version = "2.9.0"
diff --git a/Cargo.toml b/Cargo.toml
index 8e343baa..7c49d04c 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -38,9 +38,6 @@ protocol = { path = "./protocol" }
 prost = "0.13"
 rdkafka = { version = "0.38", features = ["cmake-build", "ssl", "gssapi"] }
 crossbeam-channel = "0.5"
-pest = "2.7"
-pest_derive = "2.7"
-clap = { version = "4.5", features = ["derive"] }
 wasmtime = { version = "41.0.3", features = ["component-model", "async"] }
 base64 = "0.22"
 wasmtime-wasi = "41.0.3"
@@ -63,7 +60,6 @@ petgraph = "0.7"
 rand = { version = "0.8", features = ["small_rng"] }
 itertools = "0.14"
 strum = { version = "0.26", features = ["derive"] }
-datafusion-functions-aggregate = {git = 'https://github.com/ArroyoSystems/arrow-datafusion', branch = '48.0.1/arroyo'}
 
 typify = { git = 'https://github.com/ArroyoSystems/typify.git', branch = 'arroyo' }
 parquet = {git = 'https://github.com/ArroyoSystems/arrow-rs', branch = '55.2.0/parquet'}
diff --git a/src/coordinator/execution/executor.rs b/src/coordinator/execution/executor.rs
index 28082abe..f9f26cd0 100644
--- a/src/coordinator/execution/executor.rs
+++ b/src/coordinator/execution/executor.rs
@@ -284,8 +284,9 @@ impl PlanVisitor for Executor {
             let fs_program: FsProgram = plan.program.clone().into();
             let job_manager: Arc<JobManager> = Arc::clone(&self.job_manager);
 
+            let job_id = plan.name.clone();
             let job_id = tokio::task::block_in_place(|| {
-                tokio::runtime::Handle::current().block_on(job_manager.submit_job(fs_program))
+                tokio::runtime::Handle::current().block_on(job_manager.submit_job(job_id, fs_program))
             })
             .map_err(|e| ExecuteError::Internal(format!("Failed to submit streaming job: {e}")))?;
 
diff --git a/src/coordinator/plan/logical_plan_visitor.rs b/src/coordinator/plan/logical_plan_visitor.rs
index 9e95c5bd..e2e457eb 100644
--- a/src/coordinator/plan/logical_plan_visitor.rs
+++ b/src/coordinator/plan/logical_plan_visitor.rs
@@ -155,7 +155,6 @@ impl LogicalPlanVisitor {
         Ok(StreamingTable {
             name: sink_table_name,
             comment: comment.clone(),
-            source_table: sink_definition,
             program: validated_program,
         })
     }
diff --git a/src/coordinator/plan/streaming_table_plan.rs b/src/coordinator/plan/streaming_table_plan.rs
index c7b09c26..512ec266 100644
--- a/src/coordinator/plan/streaming_table_plan.rs
+++ b/src/coordinator/plan/streaming_table_plan.rs
@@ -12,14 +12,12 @@
 
 use super::{PlanNode, PlanVisitor, PlanVisitorContext, PlanVisitorResult};
 use crate::sql::logical_node::logical::LogicalProgram;
-use crate::sql::schema::source_table::SourceTable;
 
 /// Plan node representing a fully resolved streaming table (DDL).
 #[derive(Debug)]
 pub struct StreamingTable {
     pub name: String,
     pub comment: Option<String>,
-    pub source_table: SourceTable,
     pub program: LogicalProgram,
 }
 
diff --git a/src/runtime/mod.rs b/src/runtime/mod.rs
index 0dce921e..61b67e1f 100644
--- a/src/runtime/mod.rs
+++ b/src/runtime/mod.rs
@@ -15,6 +15,7 @@
 pub mod buffer_and_event;
 pub mod common;
 pub mod streaming;
+pub mod util;
 pub mod task;
 pub mod taskexecutor;
 pub mod wasm;
diff --git a/src/runtime/streaming/api/mod.rs b/src/runtime/streaming/api/mod.rs
index 49e45328..aec4b8fb 100644
--- a/src/runtime/streaming/api/mod.rs
+++ b/src/runtime/streaming/api/mod.rs
@@ -17,5 +17,5 @@ pub mod operator;
 pub mod source;
 
 pub use context::TaskContext;
-pub use operator::{ConstructedOperator, MessageOperator, Registry};
+pub use operator::{ConstructedOperator, MessageOperator};
 pub use source::{SourceEvent, SourceOffset, SourceOperator};
diff --git a/src/runtime/streaming/api/operator.rs b/src/runtime/streaming/api/operator.rs
index eabeff85..29d24f82 100644
--- a/src/runtime/streaming/api/operator.rs
+++ b/src/runtime/streaming/api/operator.rs
@@ -15,68 +15,9 @@ use crate::runtime::streaming::api::source::SourceOperator;
 use crate::runtime::streaming::protocol::stream_out::StreamOutput;
 use arrow_array::RecordBatch;
 use async_trait::async_trait;
-use datafusion::common::Result as DfResult;
-use datafusion::execution::context::SessionContext;
-use datafusion::execution::FunctionRegistry;
-use datafusion::logical_expr::{AggregateUDF, ScalarUDF, WindowUDF};
-use datafusion::logical_expr::planner::ExprPlanner;
-use std::collections::HashSet;
-use std::sync::Arc;
 use std::time::Duration;
 use crate::sql::common::{CheckpointBarrier, Watermark};
 
-// ---------------------------------------------------------------------------
-// Registry — 算子 / UDF 注册表（取代 tracing_subscriber::Registry）
-// ---------------------------------------------------------------------------
-
-/// 运行时函数与状态注册表。
-///
-/// 包装 DataFusion [`SessionContext`]，为物理计划反序列化提供 UDF / UDAF / UDWF 查询能力。
-/// `Arc<Registry>` 在工厂中创建后，由各构造器共享。
-pub struct Registry {
-    ctx: SessionContext,
-}
-
-impl Registry {
-    pub fn new() -> Self {
-        Self {
-            ctx: SessionContext::new(),
-        }
-    }
-
-    pub fn session_context(&self) -> &SessionContext {
-        &self.ctx
-    }
-}
-
-impl Default for Registry {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
-impl FunctionRegistry for Registry {
-    fn udfs(&self) -> HashSet<String> {
-        self.ctx.udfs()
-    }
-
-    fn udf(&self, name: &str) -> DfResult<Arc<ScalarUDF>> {
-        self.ctx.udf(name)
-    }
-
-    fn udaf(&self, name: &str) -> DfResult<Arc<AggregateUDF>> {
-        self.ctx.udaf(name)
-    }
-
-    fn udwf(&self, name: &str) -> DfResult<Arc<WindowUDF>> {
-        self.ctx.udwf(name)
-    }
-
-    fn expr_planners(&self) -> Vec<Arc<dyn ExprPlanner>> {
-        self.ctx.expr_planners()
-    }
-}
-
 // ---------------------------------------------------------------------------
 // ConstructedOperator
 // ---------------------------------------------------------------------------
diff --git a/src/runtime/streaming/factory/connector/dispatchers.rs b/src/runtime/streaming/factory/connector/dispatchers.rs
new file mode 100644
index 00000000..dcdd1e32
--- /dev/null
+++ b/src/runtime/streaming/factory/connector/dispatchers.rs
@@ -0,0 +1,58 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Source / Sink 连接器协议：按 [`ConnectorOp::connector`] 分发到具体实现。
+
+use anyhow::{anyhow, Result};
+use prost::Message;
+use std::sync::Arc;
+
+use protocol::grpc::api::ConnectorOp;
+
+use crate::runtime::streaming::api::operator::ConstructedOperator;
+use crate::runtime::streaming::factory::global::Registry;
+use crate::runtime::streaming::factory::operator_constructor::OperatorConstructor;
+use crate::sql::common::constants::connector_type;
+
+use super::kafka::{KafkaSinkDispatcher, KafkaSourceDispatcher};
+
+pub struct ConnectorSourceDispatcher;
+
+impl OperatorConstructor for ConnectorSourceDispatcher {
+    fn with_config(&self, config: &[u8], registry: Arc<Registry>) -> Result<ConstructedOperator> {
+        let op = ConnectorOp::decode(config)
+            .map_err(|e| anyhow!("decode ConnectorOp (source): {e}"))?;
+
+        match op.connector.as_str() {
+            ct if ct == connector_type::KAFKA => KafkaSourceDispatcher.with_config(config, registry),
+            ct if ct == connector_type::REDIS => Err(anyhow!(
+                "ConnectorSource '{}' factory wiring not yet implemented",
+                op.connector
+            )),
+            other => Err(anyhow!("Unsupported source connector type: {}", other)),
+        }
+    }
+}
+
+pub struct ConnectorSinkDispatcher;
+
+impl OperatorConstructor for ConnectorSinkDispatcher {
+    fn with_config(&self, config: &[u8], registry: Arc<Registry>) -> Result<ConstructedOperator> {
+        let op = ConnectorOp::decode(config)
+            .map_err(|e| anyhow!("decode ConnectorOp (sink): {e}"))?;
+
+        match op.connector.as_str() {
+            ct if ct == connector_type::KAFKA => KafkaSinkDispatcher.with_config(config, registry),
+            other => Err(anyhow!("Unsupported sink connector type: {}", other)),
+        }
+    }
+}
diff --git a/src/runtime/streaming/factory/registry/kafka_factory.rs b/src/runtime/streaming/factory/connector/kafka.rs
similarity index 95%
rename from src/runtime/streaming/factory/registry/kafka_factory.rs
rename to src/runtime/streaming/factory/connector/kafka.rs
index 8f42acd9..ab72ea9e 100644
--- a/src/runtime/streaming/factory/registry/kafka_factory.rs
+++ b/src/runtime/streaming/factory/connector/kafka.rs
@@ -21,9 +21,10 @@ use std::sync::Arc;
 use protocol::grpc::api::ConnectorOp;
 use tracing::{info, warn};
 
-use super::OperatorConstructor;
-use crate::runtime::streaming::api::operator::{ConstructedOperator, Registry};
+use crate::runtime::streaming::api::operator::ConstructedOperator;
 use crate::runtime::streaming::api::source::SourceOffset;
+use crate::runtime::streaming::factory::global::Registry;
+use crate::runtime::streaming::factory::operator_constructor::OperatorConstructor;
 use crate::runtime::streaming::format::{
     BadDataPolicy, DataSerializer, DecimalEncoding as RtDecimalEncoding, Format as RuntimeFormat,
     JsonFormat as RuntimeJsonFormat, TimestampFormat as RtTimestampFormat,
@@ -332,10 +333,3 @@ impl OperatorConstructor for KafkaSinkDispatcher {
         Ok(ConstructedOperator::Operator(Box::new(sink_op)))
     }
 }
-
-/// 注册 `KafkaSource` / `KafkaSink` 构造器（由 [`super::OperatorFactory::register_builtins`] 调用）。
-pub fn register_kafka_plugins(factory: &mut super::OperatorFactory) {
-    factory.register("KafkaSource", Box::new(KafkaSourceDispatcher));
-    factory.register("KafkaSink", Box::new(KafkaSinkDispatcher));
-    info!("Registered Kafka connector plugins (KafkaSource, KafkaSink)");
-}
diff --git a/src/runtime/streaming/factory/connector/mod.rs b/src/runtime/streaming/factory/connector/mod.rs
new file mode 100644
index 00000000..3b8af292
--- /dev/null
+++ b/src/runtime/streaming/factory/connector/mod.rs
@@ -0,0 +1,19 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Source / Sink 连接器：`ConnectorOp` 分发与各连接器实现（如 Kafka）。
+
+mod dispatchers;
+pub mod kafka;
+
+pub use dispatchers::{ConnectorSinkDispatcher, ConnectorSourceDispatcher};
+pub use kafka::{KafkaSinkDispatcher, KafkaSourceDispatcher};
diff --git a/src/runtime/streaming/factory/global/mod.rs b/src/runtime/streaming/factory/global/mod.rs
new file mode 100644
index 00000000..9434c157
--- /dev/null
+++ b/src/runtime/streaming/factory/global/mod.rs
@@ -0,0 +1,17 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! 全局运行时资源：共享 [`Registry`]（Session + UDF 表），与连接器协议无关。
+
+mod session_registry;
+
+pub use session_registry::Registry;
diff --git a/src/runtime/streaming/factory/global/session_registry.rs b/src/runtime/streaming/factory/global/session_registry.rs
new file mode 100644
index 00000000..ef32c30e
--- /dev/null
+++ b/src/runtime/streaming/factory/global/session_registry.rs
@@ -0,0 +1,64 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! 运行时 UDF / UDAF / UDWF 查询表（基于 DataFusion [`SessionContext`]）。
+
+use std::collections::HashSet;
+use std::sync::Arc;
+
+use datafusion::common::Result as DfResult;
+use datafusion::execution::context::SessionContext;
+use datafusion::execution::FunctionRegistry;
+use datafusion::logical_expr::planner::ExprPlanner;
+use datafusion::logical_expr::{AggregateUDF, ScalarUDF, WindowUDF};
+
+/// 为物理计划反序列化等路径提供 [`FunctionRegistry`] 实现。
+///
+/// 由 [`crate::runtime::streaming::factory::OperatorFactory`] 持有 `Arc<Registry>`，
+/// 与各 [`crate::runtime::streaming::factory::OperatorConstructor`] 共享；须显式 [`Self::new`] 构造，无默认实例。
+pub struct Registry {
+    ctx: SessionContext,
+}
+
+impl Registry {
+    pub fn new() -> Self {
+        Self {
+            ctx: SessionContext::new(),
+        }
+    }
+
+    pub fn session_context(&self) -> &SessionContext {
+        &self.ctx
+    }
+}
+
+impl FunctionRegistry for Registry {
+    fn udfs(&self) -> HashSet<String> {
+        self.ctx.udfs()
+    }
+
+    fn udf(&self, name: &str) -> DfResult<Arc<ScalarUDF>> {
+        self.ctx.udf(name)
+    }
+
+    fn udaf(&self, name: &str) -> DfResult<Arc<AggregateUDF>> {
+        self.ctx.udaf(name)
+    }
+
+    fn udwf(&self, name: &str) -> DfResult<Arc<WindowUDF>> {
+        self.ctx.udwf(name)
+    }
+
+    fn expr_planners(&self) -> Vec<Arc<dyn ExprPlanner>> {
+        self.ctx.expr_planners()
+    }
+}
diff --git a/src/runtime/streaming/factory/mod.rs b/src/runtime/streaming/factory/mod.rs
index 8c03c298..8a7e686a 100644
--- a/src/runtime/streaming/factory/mod.rs
+++ b/src/runtime/streaming/factory/mod.rs
@@ -10,9 +10,54 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-pub mod registry;
+//! 流算子工厂：[`global`] 为共享注册表；[`connector`] 为 Source/Sink 协议与实现；
+//! [`OperatorFactory`]、[`OperatorConstructor`] 在根模块，避免与 `connector` 循环依赖。
 
-pub use registry::{
-    ConnectorSinkDispatcher, ConnectorSourceDispatcher, OperatorConstructor, OperatorFactory,
-    PassthroughConstructor,
+pub mod connector;
+pub mod global;
+
+mod operator_constructor;
+mod operator_factory;
+
+use tracing::info;
+
+use crate::sql::common::constants::factory_operator_name;
+
+#[allow(unused_imports)]
+pub use connector::{
+    ConnectorSinkDispatcher, ConnectorSourceDispatcher, KafkaSinkDispatcher, KafkaSourceDispatcher,
 };
+pub use global::Registry;
+pub use operator_constructor::OperatorConstructor;
+pub use operator_factory::OperatorFactory;
+#[allow(unused_imports)]
+pub use operator_factory::PassthroughConstructor;
+
+/// 注册 `ConnectorSource` / `ConnectorSink` 分发器（打破 `operator_factory` ↔ `connector` 依赖环）。
+fn register_builtin_connectors(factory: &mut OperatorFactory) {
+    factory.register(
+        factory_operator_name::CONNECTOR_SOURCE,
+        Box::new(connector::ConnectorSourceDispatcher),
+    );
+    factory.register(
+        factory_operator_name::CONNECTOR_SINK,
+        Box::new(connector::ConnectorSinkDispatcher),
+    );
+}
+
+/// 注册直连 Kafka 算子（名称见 [`crate::sql::common::constants::factory_operator_name`]）。
+fn register_kafka_connector_plugins(factory: &mut OperatorFactory) {
+    factory.register(
+        factory_operator_name::KAFKA_SOURCE,
+        Box::new(connector::KafkaSourceDispatcher),
+    );
+    factory.register(
+        factory_operator_name::KAFKA_SINK,
+        Box::new(connector::KafkaSinkDispatcher),
+    );
+    info!(
+        "Registered Kafka connector plugins ({}, {})",
+        factory_operator_name::KAFKA_SOURCE,
+        factory_operator_name::KAFKA_SINK
+    );
+}
diff --git a/src/runtime/streaming/factory/operator_constructor.rs b/src/runtime/streaming/factory/operator_constructor.rs
new file mode 100644
index 00000000..b6b6203f
--- /dev/null
+++ b/src/runtime/streaming/factory/operator_constructor.rs
@@ -0,0 +1,26 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! 算子构造协议：与具体连接器实现解耦，供 [`super::OperatorFactory`] 与 `connector` 共用。
+
+use anyhow::Result;
+use std::sync::Arc;
+
+use crate::runtime::streaming::api::operator::ConstructedOperator;
+use crate::runtime::streaming::factory::global::Registry;
+
+/// 算子构造器 trait：每个实现者负责从 protobuf 字节流反序列化配置并构造 [`ConstructedOperator`]。
+///
+/// 外部插件可实现此 trait 并通过 [`crate::runtime::streaming::factory::OperatorFactory::register`] 注入。
+pub trait OperatorConstructor: Send + Sync {
+    fn with_config(&self, config: &[u8], registry: Arc<Registry>) -> Result<ConstructedOperator>;
+}
diff --git a/src/runtime/streaming/factory/registry/mod.rs b/src/runtime/streaming/factory/operator_factory.rs
similarity index 72%
rename from src/runtime/streaming/factory/registry/mod.rs
rename to src/runtime/streaming/factory/operator_factory.rs
index d129f644..dfc6bb87 100644
--- a/src/runtime/streaming/factory/registry/mod.rs
+++ b/src/runtime/streaming/factory/operator_factory.rs
@@ -10,15 +10,16 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+//! 全局算子工厂：内置窗口 / Join / KeyBy 等 Bridge。
+
 use anyhow::{anyhow, Result};
 use prost::Message;
 use std::collections::HashMap;
 use std::sync::Arc;
 
-use crate::sql::common::constants::connector_type;
-use crate::runtime::streaming::api::operator::Registry;
-
+use super::operator_constructor::OperatorConstructor;
 use crate::runtime::streaming::api::operator::ConstructedOperator;
+use crate::runtime::streaming::factory::global::Registry;
 use crate::runtime::streaming::operators::PassthroughOperator;
 use crate::runtime::streaming::operators::grouping::IncrementalAggregatingConstructor;
 use crate::runtime::streaming::operators::joins::{
@@ -31,34 +32,13 @@ use crate::runtime::streaming::operators::windows::{
     TumblingAggregateWindowConstructor, WindowFunctionConstructor,
 };
 
-pub mod kafka_factory;
-
-use kafka_factory::{register_kafka_plugins, KafkaSinkDispatcher, KafkaSourceDispatcher};
-
 use protocol::grpc::api::{
-    ConnectorOp, ExpressionWatermarkConfig,
-    JoinOperator as JoinOperatorProto,
-    KeyPlanOperator as KeyByProto,
-    SessionWindowAggregateOperator, SlidingWindowAggregateOperator,
+    ExpressionWatermarkConfig, JoinOperator as JoinOperatorProto,
+    KeyPlanOperator as KeyByProto, SessionWindowAggregateOperator, SlidingWindowAggregateOperator,
     TumblingWindowAggregateOperator, UpdatingAggregateOperator,
     WindowFunctionOperator as WindowFunctionProto,
 };
 
-// ---------------------------------------------------------------------------
-// 1. Core Trait (工厂契约)
-// ---------------------------------------------------------------------------
-
-/// 算子构造器 trait：每个实现者负责从 protobuf 字节流反序列化配置并构造 [`ConstructedOperator`]。
-///
-/// 外部插件可实现此 trait 并通过 [`OperatorFactory::register`] 注入。
-pub trait OperatorConstructor: Send + Sync {
-    fn with_config(&self, config: &[u8], registry: Arc<Registry>) -> Result<ConstructedOperator>;
-}
-
-// ---------------------------------------------------------------------------
-// 2. 工业级工厂注册表
-// ---------------------------------------------------------------------------
-
 /// 持有 `name → OperatorConstructor` 映射与共享 [`Registry`]。
 ///
 /// `JobManager` 在部署任务时调用 [`create_operator`]，完成从字节流到运行时算子的
@@ -126,23 +106,16 @@ impl OperatorFactory {
         // ─── 物理网络路由 ───
         self.register("KeyBy", Box::new(KeyByBridge));
 
-        // ─── 连接器 Source / Sink（分发器模式，不硬编码具体连接器） ───
-        self.register("ConnectorSource", Box::new(ConnectorSourceDispatcher));
-        self.register("ConnectorSink", Box::new(ConnectorSinkDispatcher));
-
         // ─── 透传类算子 ───
         self.register("Projection", Box::new(PassthroughConstructor("Projection")));
         self.register("ArrowValue", Box::new(PassthroughConstructor("ArrowValue")));
         self.register("ArrowKey", Box::new(PassthroughConstructor("ArrowKey")));
 
-        register_kafka_plugins(self);
+        crate::runtime::streaming::factory::register_builtin_connectors(self);
+        crate::runtime::streaming::factory::register_kafka_connector_plugins(self);
     }
 }
 
-// ---------------------------------------------------------------------------
-// 3. 构造器适配 — 解码 protobuf 后委托给各算子模块的 Constructor
-// ---------------------------------------------------------------------------
-
 struct TumblingWindowBridge;
 impl OperatorConstructor for TumblingWindowBridge {
     fn with_config(&self, config: &[u8], registry: Arc<Registry>) -> Result<ConstructedOperator> {
@@ -240,46 +213,6 @@ impl OperatorConstructor for KeyByBridge {
     }
 }
 
-// ---------------------------------------------------------------------------
-// 4. 连接器分发抽象 (Connector Dispatcher) — 不硬编码具体连接器
-// ---------------------------------------------------------------------------
-
-pub struct ConnectorSourceDispatcher;
-
-impl OperatorConstructor for ConnectorSourceDispatcher {
-    fn with_config(&self, config: &[u8], registry: Arc<Registry>) -> Result<ConstructedOperator> {
-        let op = ConnectorOp::decode(config)
-            .map_err(|e| anyhow!("decode ConnectorOp (source): {e}"))?;
-
-        match op.connector.as_str() {
-            ct if ct == connector_type::KAFKA => KafkaSourceDispatcher.with_config(config, registry),
-            ct if ct == connector_type::REDIS => Err(anyhow!(
-                "ConnectorSource '{}' factory wiring not yet implemented",
-                op.connector
-            )),
-            other => Err(anyhow!("Unsupported source connector type: {}", other)),
-        }
-    }
-}
-
-pub struct ConnectorSinkDispatcher;
-
-impl OperatorConstructor for ConnectorSinkDispatcher {
-    fn with_config(&self, config: &[u8], registry: Arc<Registry>) -> Result<ConstructedOperator> {
-        let op = ConnectorOp::decode(config)
-            .map_err(|e| anyhow!("decode ConnectorOp (sink): {e}"))?;
-
-        match op.connector.as_str() {
-            ct if ct == connector_type::KAFKA => KafkaSinkDispatcher.with_config(config, registry),
-            other => Err(anyhow!("Unsupported sink connector type: {}", other)),
-        }
-    }
-}
-
-// ---------------------------------------------------------------------------
-// 5. 透传类算子
-// ---------------------------------------------------------------------------
-
 pub struct PassthroughConstructor(pub &'static str);
 
 impl OperatorConstructor for PassthroughConstructor {
diff --git a/src/runtime/streaming/job/job_manager.rs b/src/runtime/streaming/job/job_manager.rs
index 844131a0..e13279e1 100644
--- a/src/runtime/streaming/job/job_manager.rs
+++ b/src/runtime/streaming/job/job_manager.rs
@@ -61,9 +61,10 @@ impl JobManager {
             .ok_or_else(|| anyhow!("JobManager not initialized. Call init() first."))
     }
 
-    /// 核心主干：从逻辑计划点火物理流水线
-    pub async fn submit_job(&self, program: FsProgram) -> anyhow::Result<String> {
-        let job_id = format!("job-{}", chrono::Utc::now().timestamp_millis());
+    /// 核心主干：从逻辑计划点火物理流水线。
+    ///
+    /// `job_id` 由调用方指定（须全局唯一），用于线程命名、`PhysicalExecutionGraph` 与 [`Self::stop_job`] 等。
+    pub async fn submit_job(&self, job_id: String, program: FsProgram) -> anyhow::Result<String> {
         let mut edge_manager = EdgeManager::build(&program.nodes, &program.edges);
         let mut pipelines = HashMap::new();
 
diff --git a/src/runtime/streaming/lib.rs b/src/runtime/streaming/lib.rs
index 06cab2ee..4dd6316b 100644
--- a/src/runtime/streaming/lib.rs
+++ b/src/runtime/streaming/lib.rs
@@ -14,7 +14,6 @@
 //! Streaming actor runtime (vendored from Arroyo `arroyo-actor-runtime`).
 
 pub mod api;
-pub mod arrow;
 pub mod error;
 pub mod execution;
 pub mod factory;
diff --git a/src/runtime/streaming/mod.rs b/src/runtime/streaming/mod.rs
index 4a761460..1da5f952 100644
--- a/src/runtime/streaming/mod.rs
+++ b/src/runtime/streaming/mod.rs
@@ -14,7 +14,6 @@
 //! Streaming actor runtime (vendored from Arroyo `arroyo-actor-runtime`).
 
 pub mod api;
-pub mod arrow;
 pub mod error;
 pub mod execution;
 pub mod factory;
diff --git a/src/runtime/streaming/operators/grouping/incremental_aggregate.rs b/src/runtime/streaming/operators/grouping/incremental_aggregate.rs
index 2d2abf18..5d174323 100644
--- a/src/runtime/streaming/operators/grouping/incremental_aggregate.rs
+++ b/src/runtime/streaming/operators/grouping/incremental_aggregate.rs
@@ -44,8 +44,9 @@ use protocol::grpc::api::UpdatingAggregateOperator;
 // 引入全新的 Actor 框架核心协议 (取代了老旧的 ArrowOperator 和 Collector)
 // =========================================================================
 use crate::runtime::streaming::api::context::TaskContext;
-use crate::runtime::streaming::api::operator::{MessageOperator, Registry};
-use crate::runtime::streaming::arrow::decode_aggregate;
+use crate::runtime::streaming::api::operator::MessageOperator;
+use crate::runtime::streaming::factory::Registry;
+use crate::runtime::util::decode_aggregate;
 use crate::runtime::streaming::operators::{Key, UpdatingCache};
 use crate::runtime::streaming::StreamOutput;
 use crate::sql::common::{to_nanos, CheckpointBarrier, FsSchema, Watermark, TIMESTAMP_FIELD, UPDATING_META_FIELD};
diff --git a/src/runtime/streaming/operators/joins/join_instance.rs b/src/runtime/streaming/operators/joins/join_instance.rs
index cd5b3764..7fe32d6e 100644
--- a/src/runtime/streaming/operators/joins/join_instance.rs
+++ b/src/runtime/streaming/operators/joins/join_instance.rs
@@ -30,7 +30,8 @@ use tokio::sync::mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender};
 use tracing::warn;
 
 use crate::runtime::streaming::api::context::TaskContext;
-use crate::runtime::streaming::api::operator::{MessageOperator, Registry};
+use crate::runtime::streaming::api::operator::MessageOperator;
+use crate::runtime::streaming::factory::Registry;
 use async_trait::async_trait;
 use protocol::grpc::api::JoinOperator;
 use crate::runtime::streaming::StreamOutput;
diff --git a/src/runtime/streaming/operators/joins/join_with_expiration.rs b/src/runtime/streaming/operators/joins/join_with_expiration.rs
index 34d15932..9e6de6c9 100644
--- a/src/runtime/streaming/operators/joins/join_with_expiration.rs
+++ b/src/runtime/streaming/operators/joins/join_with_expiration.rs
@@ -28,7 +28,8 @@ use std::time::{Duration, SystemTime};
 use tracing::warn;
 
 use crate::runtime::streaming::api::context::TaskContext;
-use crate::runtime::streaming::api::operator::{MessageOperator, Registry};
+use crate::runtime::streaming::api::operator::MessageOperator;
+use crate::runtime::streaming::factory::Registry;
 use async_trait::async_trait;
 use protocol::grpc::api::JoinOperator;
 use crate::runtime::streaming::StreamOutput;
diff --git a/src/runtime/streaming/operators/sink/kafka/mod.rs b/src/runtime/streaming/operators/sink/kafka/mod.rs
index dccc561d..aecef032 100644
--- a/src/runtime/streaming/operators/sink/kafka/mod.rs
+++ b/src/runtime/streaming/operators/sink/kafka/mod.rs
@@ -31,6 +31,7 @@ use crate::runtime::streaming::api::context::TaskContext;
 use crate::runtime::streaming::api::operator::MessageOperator;
 use crate::runtime::streaming::format::DataSerializer;
 use crate::runtime::streaming::StreamOutput;
+use crate::sql::common::constants::factory_operator_name;
 use crate::sql::common::{CheckpointBarrier, FsSchema, Watermark};
 // ============================================================================
 // 1. 领域模型：一致性级别与事务状态机
@@ -210,7 +211,7 @@ fn row_key_bytes(batch: &RecordBatch, row: usize, col: usize) -> Option<Vec<u8>>
 #[async_trait]
 impl MessageOperator for KafkaSinkOperator {
     fn name(&self) -> &str {
-        "KafkaSink"
+        factory_operator_name::KAFKA_SINK
     }
 
     async fn on_start(&mut self, ctx: &mut TaskContext) -> Result<()> {
diff --git a/src/runtime/streaming/operators/stateless_physical_executor.rs b/src/runtime/streaming/operators/stateless_physical_executor.rs
index dee92eb3..188015e2 100644
--- a/src/runtime/streaming/operators/stateless_physical_executor.rs
+++ b/src/runtime/streaming/operators/stateless_physical_executor.rs
@@ -26,7 +26,7 @@ use datafusion_proto::protobuf::PhysicalPlanNode;
 use futures::StreamExt;
 use prost::Message;
 
-use crate::runtime::streaming::api::operator::Registry;
+use crate::runtime::streaming::factory::Registry;
 use crate::sql::physical::{DecodingContext, FsPhysicalExtensionCodec};
 
 pub struct StatelessPhysicalExecutor {
diff --git a/src/runtime/streaming/operators/watermark/watermark_generator.rs b/src/runtime/streaming/operators/watermark/watermark_generator.rs
index 2b255f9b..63f5acec 100644
--- a/src/runtime/streaming/operators/watermark/watermark_generator.rs
+++ b/src/runtime/streaming/operators/watermark/watermark_generator.rs
@@ -28,7 +28,8 @@ use std::time::{Duration, SystemTime};
 use tracing::{debug, info};
 
 use crate::runtime::streaming::api::context::TaskContext;
-use crate::runtime::streaming::api::operator::{MessageOperator, Registry};
+use crate::runtime::streaming::api::operator::MessageOperator;
+use crate::runtime::streaming::factory::Registry;
 use async_trait::async_trait;
 use protocol::grpc::api::ExpressionWatermarkConfig;
 use crate::runtime::streaming::StreamOutput;
diff --git a/src/runtime/streaming/operators/windows/session_aggregating_window.rs b/src/runtime/streaming/operators/windows/session_aggregating_window.rs
index 73642992..aaf65cf1 100644
--- a/src/runtime/streaming/operators/windows/session_aggregating_window.rs
+++ b/src/runtime/streaming/operators/windows/session_aggregating_window.rs
@@ -39,7 +39,7 @@ use tokio::sync::mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender};
 use crate::runtime::streaming::api::context::TaskContext;
 use crate::runtime::streaming::api::operator::MessageOperator;
 use async_trait::async_trait;
-use crate::runtime::streaming::api::operator::Registry;
+use crate::runtime::streaming::factory::Registry;
 use protocol::grpc::api::SessionWindowAggregateOperator;
 use crate::runtime::streaming::StreamOutput;
 use crate::sql::common::{from_nanos, to_nanos, CheckpointBarrier, FsSchema, FsSchemaRef, Watermark};
diff --git a/src/runtime/streaming/operators/windows/sliding_aggregating_window.rs b/src/runtime/streaming/operators/windows/sliding_aggregating_window.rs
index 7d801fd6..6f0aa7f9 100644
--- a/src/runtime/streaming/operators/windows/sliding_aggregating_window.rs
+++ b/src/runtime/streaming/operators/windows/sliding_aggregating_window.rs
@@ -39,7 +39,7 @@ use tokio::sync::mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender};
 use crate::runtime::streaming::api::context::TaskContext;
 use crate::runtime::streaming::api::operator::MessageOperator;
 use async_trait::async_trait;
-use crate::runtime::streaming::api::operator::Registry;
+use crate::runtime::streaming::factory::Registry;
 use protocol::grpc::api::SlidingWindowAggregateOperator;
 use crate::runtime::streaming::StreamOutput;
 use crate::sql::common::{from_nanos, to_nanos, CheckpointBarrier, FsSchema, Watermark};
diff --git a/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs b/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs
index 004cc205..30724f59 100644
--- a/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs
+++ b/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs
@@ -39,7 +39,7 @@ use tracing::warn;
 use crate::runtime::streaming::api::context::TaskContext;
 use crate::runtime::streaming::api::operator::MessageOperator;
 use async_trait::async_trait;
-use crate::runtime::streaming::api::operator::Registry;
+use crate::runtime::streaming::factory::Registry;
 use protocol::grpc::api::TumblingWindowAggregateOperator;
 use crate::runtime::streaming::StreamOutput;
 use crate::sql::common::{from_nanos, to_nanos, CheckpointBarrier, FsSchema, Watermark};
diff --git a/src/runtime/streaming/operators/windows/window_function.rs b/src/runtime/streaming/operators/windows/window_function.rs
index 641b0ef6..f6ae2a1a 100644
--- a/src/runtime/streaming/operators/windows/window_function.rs
+++ b/src/runtime/streaming/operators/windows/window_function.rs
@@ -32,7 +32,8 @@ use tokio::sync::mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender};
 use tracing::warn;
 
 use crate::runtime::streaming::api::context::TaskContext;
-use crate::runtime::streaming::api::operator::{MessageOperator, Registry};
+use crate::runtime::streaming::api::operator::MessageOperator;
+use crate::runtime::streaming::factory::Registry;
 use async_trait::async_trait;
 use crate::runtime::streaming::StreamOutput;
 use crate::sql::common::{from_nanos, CheckpointBarrier, FsSchema, FsSchemaRef, Watermark};
diff --git a/src/runtime/util/mod.rs b/src/runtime/util/mod.rs
new file mode 100644
index 00000000..3b4c7e60
--- /dev/null
+++ b/src/runtime/util/mod.rs
@@ -0,0 +1,17 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! 运行时跨子系统复用的工具函数（物理计划 proto 解码等）。
+
+mod physical_aggregate;
+
+pub use physical_aggregate::decode_aggregate;
diff --git a/src/runtime/streaming/arrow/mod.rs b/src/runtime/util/physical_aggregate.rs
similarity index 95%
rename from src/runtime/streaming/arrow/mod.rs
rename to src/runtime/util/physical_aggregate.rs
index d706199f..83a6e3bd 100644
--- a/src/runtime/streaming/arrow/mod.rs
+++ b/src/runtime/util/physical_aggregate.rs
@@ -10,9 +10,9 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//! Arrow / DataFusion 辅助：聚合表达式解码等。
-//!
-//! `UpdatingCache` 位于 [`crate::runtime::streaming::operators::updating_cache`]。
+//! 从 DataFusion proto 物理表达式节点解码聚合（UDAF）表达式。
+
+use std::sync::Arc;
 
 use arrow::datatypes::SchemaRef;
 use datafusion::common::internal_err;
@@ -25,9 +25,8 @@ use datafusion_proto::physical_plan::{DefaultPhysicalExtensionCodec, PhysicalExt
 use datafusion_proto::protobuf::physical_aggregate_expr_node::AggregateFunction;
 use datafusion_proto::protobuf::physical_expr_node::ExprType;
 use datafusion_proto::protobuf::{PhysicalExprNode, proto_error};
-use std::sync::Arc;
 
-/// 从 `PhysicalExprNode` 解码 UDAF 聚合表达式（与 worker `arrow/mod` 一致）。
+/// 从 `PhysicalExprNode` 解码 UDAF 聚合表达式（与 worker `arrow/mod` 历史路径语义一致）。
 pub fn decode_aggregate(
     schema: &SchemaRef,
     name: &str,
diff --git a/src/server/initializer.rs b/src/server/initializer.rs
index 7786169a..a73ec14a 100644
--- a/src/server/initializer.rs
+++ b/src/server/initializer.rs
@@ -152,7 +152,7 @@ fn initialize_python_service(config: &GlobalConfig) -> Result<()> {
 }
 
 fn initialize_job_manager(config: &GlobalConfig) -> Result<()> {
-    use crate::runtime::streaming::api::operator::Registry;
+    use crate::runtime::streaming::factory::Registry;
     use crate::runtime::streaming::factory::OperatorFactory;
     use crate::runtime::streaming::job::JobManager;
     use std::sync::Arc;
diff --git a/src/sql/common/constants.rs b/src/sql/common/constants.rs
index 5f249227..cf2a39cc 100644
--- a/src/sql/common/constants.rs
+++ b/src/sql/common/constants.rs
@@ -98,6 +98,15 @@ pub mod runtime_operator_kind {
     pub const STREAMING_WINDOW_EVALUATOR: &str = "streaming_window_evaluator";
 }
 
+// ── Worker [`OperatorFactory`] 注册键（须与任务包 `operator_name`、`OperatorName::Display` 一致）──
+
+pub mod factory_operator_name {
+    pub const CONNECTOR_SOURCE: &str = "ConnectorSource";
+    pub const CONNECTOR_SINK: &str = "ConnectorSink";
+    pub const KAFKA_SOURCE: &str = "KafkaSource";
+    pub const KAFKA_SINK: &str = "KafkaSink";
+}
+
 // ── Debezium CDC 信封字段 ───────────────────────────────────────────────────
 
 pub mod cdc {

From 18a19f1d8a33f6d005f3e39b7c85daad8abce10a Mon Sep 17 00:00:00 2001
From: luoluoyuyu <zhenyu@apache.org>
Date: Sun, 29 Mar 2026 17:20:20 +0800
Subject: [PATCH 28/44] update

---
 src/runtime/streaming/api/context.rs          |  9 ----
 src/runtime/streaming/api/mod.rs              |  1 -
 src/runtime/streaming/api/operator.rs         |  6 ---
 src/runtime/streaming/api/source.rs           |  5 ---
 src/runtime/streaming/error.rs                |  9 ----
 src/runtime/streaming/execution/mod.rs        |  1 -
 src/runtime/streaming/execution/runner.rs     |  5 ---
 src/runtime/streaming/execution/source.rs     |  2 -
 .../execution/tracker/barrier_aligner.rs      |  3 --
 .../streaming/execution/tracker/mod.rs        |  1 -
 .../factory/connector/dispatchers.rs          |  1 -
 .../streaming/factory/connector/kafka.rs      |  4 --
 .../streaming/factory/connector/mod.rs        |  1 -
 src/runtime/streaming/factory/global/mod.rs   |  1 -
 .../factory/global/session_registry.rs        |  4 --
 src/runtime/streaming/factory/mod.rs          |  4 --
 .../streaming/factory/operator_constructor.rs |  3 --
 .../streaming/factory/operator_factory.rs     | 11 -----
 src/runtime/streaming/format/config.rs        |  3 --
 src/runtime/streaming/format/deserializer.rs  |  2 -
 src/runtime/streaming/format/json_encoder.rs  |  7 ----
 src/runtime/streaming/format/serializer.rs    |  1 -
 src/runtime/streaming/job/edge_manager.rs     |  1 -
 src/runtime/streaming/job/job_manager.rs      |  3 --
 src/runtime/streaming/job/models.rs           |  3 --
 src/runtime/streaming/memory/pool.rs          |  1 -
 src/runtime/streaming/memory/ticket.rs        |  3 --
 src/runtime/streaming/network/endpoint.rs     |  7 ----
 src/runtime/streaming/network/environment.rs  |  1 -
 .../grouping/incremental_aggregate.rs         | 14 -------
 .../operators/grouping/updating_cache.rs      |  2 -
 .../operators/joins/join_instance.rs          |  5 ---
 .../operators/joins/join_with_expiration.rs   |  5 ---
 src/runtime/streaming/operators/key_by.rs     |  6 ---
 .../streaming/operators/key_operator.rs       | 18 --------
 src/runtime/streaming/operators/mod.rs        |  2 -
 src/runtime/streaming/operators/projection.rs |  2 -
 .../streaming/operators/sink/kafka/mod.rs     |  4 --
 src/runtime/streaming/operators/sink/mod.rs   |  1 -
 .../streaming/operators/source/kafka/mod.rs   | 11 -----
 src/runtime/streaming/operators/source/mod.rs |  1 -
 .../operators/stateless_physical_executor.rs  |  1 -
 .../streaming/operators/value_execution.rs    |  2 -
 .../watermark/watermark_generator.rs          |  7 ----
 .../windows/session_aggregating_window.rs     |  7 +---
 .../windows/sliding_aggregating_window.rs     |  8 ----
 .../windows/tumbling_aggregating_window.rs    |  1 -
 .../operators/windows/window_function.rs      |  7 ----
 src/runtime/streaming/protocol/control.rs     |  4 --
 src/runtime/streaming/protocol/event.rs       |  1 -
 src/runtime/streaming/protocol/mod.rs         |  1 -
 src/runtime/streaming/protocol/stream_out.rs  |  5 ---
 src/runtime/streaming/protocol/tracked.rs     |  4 --
 src/runtime/streaming/protocol/watermark.rs   |  4 --
 src/runtime/util/mod.rs                       |  1 -
 src/runtime/util/physical_aggregate.rs        |  2 -
 src/sql/common/constants.rs                   | 41 -------------------
 src/sql/common/kafka_catalog.rs               |  8 ----
 src/sql/common/operator_config.rs             |  1 -
 src/sql/common/with_option_keys.rs            |  6 ---
 src/sql/logical_node/logical/operator_name.rs |  1 -
 src/sql/physical/cdc/encode.rs                |  2 -
 src/sql/physical/cdc/mod.rs                   |  1 -
 src/sql/physical/cdc/unroll.rs                |  2 -
 src/sql/physical/codec.rs                     |  1 -
 src/sql/physical/meta.rs                      |  1 -
 src/sql/physical/mod.rs                       |  1 -
 src/sql/physical/readers.rs                   |  1 -
 src/sql/physical/udfs.rs                      |  1 -
 69 files changed, 1 insertion(+), 295 deletions(-)

diff --git a/src/runtime/streaming/api/context.rs b/src/runtime/streaming/api/context.rs
index b70d40df..f0c3dfcb 100644
--- a/src/runtime/streaming/api/context.rs
+++ b/src/runtime/streaming/api/context.rs
@@ -52,15 +52,12 @@ impl TaskContext {
     }
 
     // ========================================================================
-    // 水位线与时间流管理 API
     // ========================================================================
 
-    /// 供业务算子调用：获取当前任务的安全水位线
     pub fn last_present_watermark(&self) -> Option<std::time::SystemTime> {
         self.current_watermark
     }
 
-    /// 供底座框架 (SubtaskRunner) 调用：推进本地时间，保证单调递增
     pub fn advance_watermark(&mut self, watermark: std::time::SystemTime) {
         if let Some(current) = self.current_watermark {
             if watermark > current {
@@ -72,10 +69,8 @@ impl TaskContext {
     }
 
     // ========================================================================
-    // 可观测性 API (Observability)
     // ========================================================================
 
-    /// 格式化当前 Task 的唯一标识，用于分布式追踪和日志打印
     pub fn task_identity(&self) -> String {
         format!(
             "Job[{}], Vertex[{}], Subtask[{}/{}]",
@@ -84,10 +79,8 @@ impl TaskContext {
     }
 
     // ========================================================================
-    // 背压网络发送 API
     // ========================================================================
 
-    /// 受内存池管控的数据发送：申请精准字节的内存船票后广播到所有下游
     pub async fn collect(&self, batch: RecordBatch) -> anyhow::Result<()> {
         if self.outboxes.is_empty() {
             return Ok(());
@@ -103,7 +96,6 @@ impl TaskContext {
         Ok(())
     }
 
-    /// 按 Key 哈希路由到单分区（用于 Shuffle / KeyBy）
     pub async fn collect_keyed(
         &self,
         key_hash: u64,
@@ -122,7 +114,6 @@ impl TaskContext {
         Ok(())
     }
 
-    /// 广播控制信号（如 Watermark, Barrier：不申请内存船票，保证在拥堵时畅通无阻）
     pub async fn broadcast(&self, event: StreamEvent) -> anyhow::Result<()> {
         let tracked_event = TrackedEvent::control(event);
         for outbox in &self.outboxes {
diff --git a/src/runtime/streaming/api/mod.rs b/src/runtime/streaming/api/mod.rs
index aec4b8fb..f004de58 100644
--- a/src/runtime/streaming/api/mod.rs
+++ b/src/runtime/streaming/api/mod.rs
@@ -10,7 +10,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//! 接口层：算子与源实现需遵循的 trait 与运行时上下文。
 
 pub mod context;
 pub mod operator;
diff --git a/src/runtime/streaming/api/operator.rs b/src/runtime/streaming/api/operator.rs
index 29d24f82..4683379b 100644
--- a/src/runtime/streaming/api/operator.rs
+++ b/src/runtime/streaming/api/operator.rs
@@ -22,13 +22,11 @@ use crate::sql::common::{CheckpointBarrier, Watermark};
 // ConstructedOperator
 // ---------------------------------------------------------------------------
 
-/// 工厂反射产出的具体算子实例
 pub enum ConstructedOperator {
     Source(Box<dyn SourceOperator>),
     Operator(Box<dyn MessageOperator>),
 }
 
-/// 多上游、被动驱动的消息算子。
 #[async_trait]
 pub trait MessageOperator: Send + 'static {
     fn name(&self) -> &str;
@@ -37,7 +35,6 @@ pub trait MessageOperator: Send + 'static {
         Ok(())
     }
 
-    /// `input_idx`：多输入拓扑下第几条边（与 `SubtaskRunner` 的 inbox 下标一致；单输入恒为 0）。
     async fn process_data(
         &mut self,
         input_idx: usize,
@@ -57,7 +54,6 @@ pub trait MessageOperator: Send + 'static {
         ctx: &mut TaskContext,
     ) -> anyhow::Result<()>;
 
-    /// 全局 checkpoint 确认后由 `SubtaskRunner` 在 [`ControlCommand::Commit`] 上调用（如 Kafka EOS 二阶段提交）。
     async fn commit_checkpoint(
         &mut self,
         _epoch: u32,
@@ -66,12 +62,10 @@ pub trait MessageOperator: Send + 'static {
         Ok(())
     }
 
-    /// 周期性时钟（如 Idle 检测）；`None` 表示不注册 tick。
     fn tick_interval(&self) -> Option<Duration> {
         None
     }
 
-    /// 与 [`Self::tick_interval`] 配套，由 `SubtaskRunner` 按固定间隔调用。
     async fn process_tick(
         &mut self,
         _tick_index: u64,
diff --git a/src/runtime/streaming/api/source.rs b/src/runtime/streaming/api/source.rs
index 1f79de38..f46f3de7 100644
--- a/src/runtime/streaming/api/source.rs
+++ b/src/runtime/streaming/api/source.rs
@@ -10,14 +10,12 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//! 源算子：由 [`crate::runtime::streaming::execution::SourceRunner`] 驱动 `fetch_next`，不得在内部死循环阻塞控制面。
 
 use crate::runtime::streaming::api::context::TaskContext;
 use arrow_array::RecordBatch;
 use async_trait::async_trait;
 use crate::sql::common::{CheckpointBarrier, Watermark};
 
-/// Kafka 等外部源在 **无已存位点** 时的起始消费策略（与 `arroyo-connectors` 语义对齐）。
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
 pub enum SourceOffset {
     Earliest,
@@ -30,7 +28,6 @@ pub enum SourceOffset {
 pub enum SourceEvent {
     Data(RecordBatch),
     Watermark(Watermark),
-    /// 无数据可读：必须由 Runner 调度退避，禁止在 `fetch_next` 内长时间阻塞。
     Idle,
     EndOfStream,
 }
@@ -43,10 +40,8 @@ pub trait SourceOperator: Send + 'static {
         Ok(())
     }
 
-    /// 核心拉取：无数据时必须返回 [`SourceEvent::Idle`]，严禁内部阻塞控制面。
     async fn fetch_next(&mut self, ctx: &mut TaskContext) -> anyhow::Result<SourceEvent>;
 
-    /// 独立于 `fetch_next` 的水位线脉搏（例如解决 Idle 时仍要推进水印）。
     fn poll_watermark(&mut self) -> Option<Watermark> {
         None
     }
diff --git a/src/runtime/streaming/error.rs b/src/runtime/streaming/error.rs
index c8d1944a..178f5bbb 100644
--- a/src/runtime/streaming/error.rs
+++ b/src/runtime/streaming/error.rs
@@ -13,42 +13,33 @@
 use std::fmt::Display;
 use thiserror::Error;
 
-/// 流水线 / 子任务运行期间的错误定义。
 #[derive(Debug, Error)]
 pub enum RunError {
-    /// 算子内部业务逻辑抛出的错误
     #[error("Operator execution failed: {0:#}")]
     Operator(#[from] anyhow::Error),
 
-    /// 向下游 Task 发送数据/信号时通道阻塞或断开
     #[error("Downstream send failed: {0}")]
     DownstreamSend(String),
 
-    /// 引擎内部状态机错误或拓扑规划错误（如：DAG 为空、在链条中间发生 Shuffle）
     #[error("Internal engine error: {0}")]
     Internal(String),
 
-    /// Checkpoint 状态持久化或恢复时发生的错误
     #[error("State backend error: {0}")]
     State(String),
 
-    /// 底层网络或文件 I/O 错误
     #[error("I/O error: {0}")]
     Io(#[from] std::io::Error),
 }
 
 impl RunError {
-    /// 快捷构造器：引擎内部错误（常用于防御性编程和边界校验）
     pub fn internal<T: Display>(msg: T) -> Self {
         Self::Internal(msg.to_string())
     }
 
-    /// 快捷构造器：下游发送异常
     pub fn downstream<T: Display>(msg: T) -> Self {
         Self::DownstreamSend(msg.to_string())
     }
 
-    /// 快捷构造器：状态后端异常
     pub fn state<T: Display>(msg: T) -> Self {
         Self::State(msg.to_string())
     }
diff --git a/src/runtime/streaming/execution/mod.rs b/src/runtime/streaming/execution/mod.rs
index a4fb6d95..40beabe4 100644
--- a/src/runtime/streaming/execution/mod.rs
+++ b/src/runtime/streaming/execution/mod.rs
@@ -10,7 +10,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//! 执行层：Tokio Actor 运行容器。
 
 pub mod runner;
 pub mod source;
diff --git a/src/runtime/streaming/execution/runner.rs b/src/runtime/streaming/execution/runner.rs
index 994d8c04..d43f052d 100644
--- a/src/runtime/streaming/execution/runner.rs
+++ b/src/runtime/streaming/execution/runner.rs
@@ -32,7 +32,6 @@ use crate::runtime::streaming::execution::tracker::{
 use crate::sql::common::{CheckpointBarrier, Watermark};
 
 // ==========================================
-// 第一部分：逻辑处理层 - 算子融合链 (Logical Driver)
 // ==========================================
 
 #[async_trait]
@@ -62,7 +61,6 @@ impl ChainedDriver {
         Self { operator, next }
     }
 
-    /// 从后往前组装算子，构建责任链
     pub fn build_chain(mut operators: Vec<Box<dyn MessageOperator>>) -> Option<Box<dyn OperatorDrive>> {
         if operators.is_empty() {
             return None;
@@ -227,7 +225,6 @@ impl OperatorDrive for ChainedDriver {
 }
 
 // ==========================================
-// 第二部分：物理执行层 - 流水线 (Physical Driver)
 // ==========================================
 
 pub struct Pipeline {
@@ -238,7 +235,6 @@ pub struct Pipeline {
 
     wm_tracker: WatermarkTracker,
     barrier_aligner: BarrierAligner,
-    /// Barrier 未对齐时从轮询池移除的输入流（背压）
     paused_streams: Vec<Option<BoxedEventStream>>,
 }
 
@@ -376,5 +372,4 @@ impl Pipeline {
     }
 }
 
-/// 与执行引擎语义对齐的别名
 pub type SubtaskRunner = Pipeline;
diff --git a/src/runtime/streaming/execution/source.rs b/src/runtime/streaming/execution/source.rs
index a9fbd561..a85b0839 100644
--- a/src/runtime/streaming/execution/source.rs
+++ b/src/runtime/streaming/execution/source.rs
@@ -10,7 +10,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//! 源任务物理驱动：控制面优先、`fetch_next` 非阻塞契约、可选融合算子链下推。
 
 use crate::runtime::streaming::api::context::TaskContext;
 use crate::runtime::streaming::api::source::{SourceEvent, SourceOperator};
@@ -30,7 +29,6 @@ pub const WATERMARK_EMIT_INTERVAL: Duration = Duration::from_millis(200);
 
 pub struct SourceRunner {
     operator: Box<dyn SourceOperator>,
-    /// 有链时数据与信号经链尾再 `collect` / `broadcast`；无链则直接走 `TaskContext`。
     chain_head: Option<Box<dyn OperatorDrive>>,
     ctx: TaskContext,
     control_rx: Receiver<ControlCommand>,
diff --git a/src/runtime/streaming/execution/tracker/barrier_aligner.rs b/src/runtime/streaming/execution/tracker/barrier_aligner.rs
index 34b5380a..b227e439 100644
--- a/src/runtime/streaming/execution/tracker/barrier_aligner.rs
+++ b/src/runtime/streaming/execution/tracker/barrier_aligner.rs
@@ -10,7 +10,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//! Chandy–Lamport 风格屏障对齐（零内存缓冲：未对齐时从轮询池移除输入流，依赖底层背压）。
 
 use std::collections::HashSet;
 
@@ -18,9 +17,7 @@ use crate::sql::common::CheckpointBarrier;
 
 #[derive(Debug)]
 pub enum AlignmentStatus {
-    /// 未对齐：外层应将当前通道从 `StreamMap` 挂起（Pause）。
     Pending,
-    /// 已对齐：外层触发快照并唤醒所有挂起通道（Resume）。
     Complete,
 }
 
diff --git a/src/runtime/streaming/execution/tracker/mod.rs b/src/runtime/streaming/execution/tracker/mod.rs
index 81329c27..b00ee0a2 100644
--- a/src/runtime/streaming/execution/tracker/mod.rs
+++ b/src/runtime/streaming/execution/tracker/mod.rs
@@ -10,7 +10,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//! 协调层：屏障对齐与多路水位线追踪。
 
 pub mod barrier_aligner;
 pub mod watermark_tracker;
diff --git a/src/runtime/streaming/factory/connector/dispatchers.rs b/src/runtime/streaming/factory/connector/dispatchers.rs
index dcdd1e32..cca85c1a 100644
--- a/src/runtime/streaming/factory/connector/dispatchers.rs
+++ b/src/runtime/streaming/factory/connector/dispatchers.rs
@@ -10,7 +10,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//! Source / Sink 连接器协议：按 [`ConnectorOp::connector`] 分发到具体实现。
 
 use anyhow::{anyhow, Result};
 use prost::Message;
diff --git a/src/runtime/streaming/factory/connector/kafka.rs b/src/runtime/streaming/factory/connector/kafka.rs
index ab72ea9e..7e548cec 100644
--- a/src/runtime/streaming/factory/connector/kafka.rs
+++ b/src/runtime/streaming/factory/connector/kafka.rs
@@ -10,7 +10,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//! Kafka Source/Sink：从 [`ConnectorOp`] + [`OperatorConfig`] 构造物理算子（鉴权与 client 配置合并）。
 
 use anyhow::{anyhow, bail, Context, Result};
 use prost::Message;
@@ -43,7 +42,6 @@ use crate::sql::common::{FsSchema, OperatorConfig};
 
 const DEFAULT_SOURCE_BATCH_SIZE: usize = 1024;
 
-/// 合并连接级鉴权、全局 `connection_properties` 与表级 `client_configs`（表级覆盖同名键）。
 pub fn build_client_configs(config: &KafkaConfig, table: &KafkaTable) -> Result<HashMap<String, String>> {
     let mut client_configs = HashMap::new();
 
@@ -184,7 +182,6 @@ fn decode_operator_config(op: &ConnectorOp) -> Result<OperatorConfig> {
     })
 }
 
-/// 由 [`ConnectorOp`] 构造 Kafka Source（`connector` 须为 `kafka`）。
 pub struct KafkaSourceDispatcher;
 
 impl OperatorConstructor for KafkaSourceDispatcher {
@@ -263,7 +260,6 @@ impl OperatorConstructor for KafkaSourceDispatcher {
     }
 }
 
-/// 由 [`ConnectorOp`] 构造 Kafka Sink（`connector` 须为 `kafka`）。
 pub struct KafkaSinkDispatcher;
 
 impl OperatorConstructor for KafkaSinkDispatcher {
diff --git a/src/runtime/streaming/factory/connector/mod.rs b/src/runtime/streaming/factory/connector/mod.rs
index 3b8af292..be63478d 100644
--- a/src/runtime/streaming/factory/connector/mod.rs
+++ b/src/runtime/streaming/factory/connector/mod.rs
@@ -10,7 +10,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//! Source / Sink 连接器：`ConnectorOp` 分发与各连接器实现（如 Kafka）。
 
 mod dispatchers;
 pub mod kafka;
diff --git a/src/runtime/streaming/factory/global/mod.rs b/src/runtime/streaming/factory/global/mod.rs
index 9434c157..0dc2130e 100644
--- a/src/runtime/streaming/factory/global/mod.rs
+++ b/src/runtime/streaming/factory/global/mod.rs
@@ -10,7 +10,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//! 全局运行时资源：共享 [`Registry`]（Session + UDF 表），与连接器协议无关。
 
 mod session_registry;
 
diff --git a/src/runtime/streaming/factory/global/session_registry.rs b/src/runtime/streaming/factory/global/session_registry.rs
index ef32c30e..4b7895a2 100644
--- a/src/runtime/streaming/factory/global/session_registry.rs
+++ b/src/runtime/streaming/factory/global/session_registry.rs
@@ -10,7 +10,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//! 运行时 UDF / UDAF / UDWF 查询表（基于 DataFusion [`SessionContext`]）。
 
 use std::collections::HashSet;
 use std::sync::Arc;
@@ -21,10 +20,7 @@ use datafusion::execution::FunctionRegistry;
 use datafusion::logical_expr::planner::ExprPlanner;
 use datafusion::logical_expr::{AggregateUDF, ScalarUDF, WindowUDF};
 
-/// 为物理计划反序列化等路径提供 [`FunctionRegistry`] 实现。
 ///
-/// 由 [`crate::runtime::streaming::factory::OperatorFactory`] 持有 `Arc<Registry>`，
-/// 与各 [`crate::runtime::streaming::factory::OperatorConstructor`] 共享；须显式 [`Self::new`] 构造，无默认实例。
 pub struct Registry {
     ctx: SessionContext,
 }
diff --git a/src/runtime/streaming/factory/mod.rs b/src/runtime/streaming/factory/mod.rs
index 8a7e686a..024dfb14 100644
--- a/src/runtime/streaming/factory/mod.rs
+++ b/src/runtime/streaming/factory/mod.rs
@@ -10,8 +10,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//! 流算子工厂：[`global`] 为共享注册表；[`connector`] 为 Source/Sink 协议与实现；
-//! [`OperatorFactory`]、[`OperatorConstructor`] 在根模块，避免与 `connector` 循环依赖。
 
 pub mod connector;
 pub mod global;
@@ -33,7 +31,6 @@ pub use operator_factory::OperatorFactory;
 #[allow(unused_imports)]
 pub use operator_factory::PassthroughConstructor;
 
-/// 注册 `ConnectorSource` / `ConnectorSink` 分发器（打破 `operator_factory` ↔ `connector` 依赖环）。
 fn register_builtin_connectors(factory: &mut OperatorFactory) {
     factory.register(
         factory_operator_name::CONNECTOR_SOURCE,
@@ -45,7 +42,6 @@ fn register_builtin_connectors(factory: &mut OperatorFactory) {
     );
 }
 
-/// 注册直连 Kafka 算子（名称见 [`crate::sql::common::constants::factory_operator_name`]）。
 fn register_kafka_connector_plugins(factory: &mut OperatorFactory) {
     factory.register(
         factory_operator_name::KAFKA_SOURCE,
diff --git a/src/runtime/streaming/factory/operator_constructor.rs b/src/runtime/streaming/factory/operator_constructor.rs
index b6b6203f..832fe734 100644
--- a/src/runtime/streaming/factory/operator_constructor.rs
+++ b/src/runtime/streaming/factory/operator_constructor.rs
@@ -10,7 +10,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//! 算子构造协议：与具体连接器实现解耦，供 [`super::OperatorFactory`] 与 `connector` 共用。
 
 use anyhow::Result;
 use std::sync::Arc;
@@ -18,9 +17,7 @@ use std::sync::Arc;
 use crate::runtime::streaming::api::operator::ConstructedOperator;
 use crate::runtime::streaming::factory::global::Registry;
 
-/// 算子构造器 trait：每个实现者负责从 protobuf 字节流反序列化配置并构造 [`ConstructedOperator`]。
 ///
-/// 外部插件可实现此 trait 并通过 [`crate::runtime::streaming::factory::OperatorFactory::register`] 注入。
 pub trait OperatorConstructor: Send + Sync {
     fn with_config(&self, config: &[u8], registry: Arc<Registry>) -> Result<ConstructedOperator>;
 }
diff --git a/src/runtime/streaming/factory/operator_factory.rs b/src/runtime/streaming/factory/operator_factory.rs
index dfc6bb87..a95c0241 100644
--- a/src/runtime/streaming/factory/operator_factory.rs
+++ b/src/runtime/streaming/factory/operator_factory.rs
@@ -10,7 +10,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//! 全局算子工厂：内置窗口 / Join / KeyBy 等 Bridge。
 
 use anyhow::{anyhow, Result};
 use prost::Message;
@@ -39,10 +38,7 @@ use protocol::grpc::api::{
     WindowFunctionOperator as WindowFunctionProto,
 };
 
-/// 持有 `name → OperatorConstructor` 映射与共享 [`Registry`]。
 ///
-/// `JobManager` 在部署任务时调用 [`create_operator`]，完成从字节流到运行时算子的
-/// 反射式实例化。
 pub struct OperatorFactory {
     constructors: HashMap<String, Box<dyn OperatorConstructor>>,
     registry: Arc<Registry>,
@@ -62,7 +58,6 @@ impl OperatorFactory {
         self.constructors.insert(name.to_string(), constructor);
     }
 
-    /// 反射与实例化：从 TDD 的字节流中拉起运行时的业务算子
     pub fn create_operator(&self, name: &str, payload: &[u8]) -> Result<ConstructedOperator> {
         let ctor = self
             .constructors
@@ -78,18 +73,15 @@ impl OperatorFactory {
         ctor.with_config(payload, self.registry.clone())
     }
 
-    /// 列出已注册的所有算子名称（调试用）。
     pub fn registered_operators(&self) -> Vec<&str> {
         self.constructors.keys().map(|s| s.as_str()).collect()
     }
 
     fn register_builtins(&mut self) {
-        // ─── 窗口聚合 ───
         self.register("TumblingWindowAggregate", Box::new(TumblingWindowBridge));
         self.register("SlidingWindowAggregate", Box::new(SlidingWindowBridge));
         self.register("SessionWindowAggregate", Box::new(SessionWindowBridge));
 
-        // ─── 水位 ───
         self.register("ExpressionWatermark", Box::new(WatermarkBridge));
 
         // ─── SQL Window Function ───
@@ -100,13 +92,10 @@ impl OperatorFactory {
         self.register("InstantJoin", Box::new(InstantJoinBridge));
         self.register("LookupJoin", Box::new(LookupJoinBridge));
 
-        // ─── 增量聚合 ───
         self.register("UpdatingAggregate", Box::new(IncrementalAggregateBridge));
 
-        // ─── 物理网络路由 ───
         self.register("KeyBy", Box::new(KeyByBridge));
 
-        // ─── 透传类算子 ───
         self.register("Projection", Box::new(PassthroughConstructor("Projection")));
         self.register("ArrowValue", Box::new(PassthroughConstructor("ArrowValue")));
         self.register("ArrowKey", Box::new(PassthroughConstructor("ArrowKey")));
diff --git a/src/runtime/streaming/format/config.rs b/src/runtime/streaming/format/config.rs
index 235e1d82..15a58008 100644
--- a/src/runtime/streaming/format/config.rs
+++ b/src/runtime/streaming/format/config.rs
@@ -26,12 +26,9 @@ pub enum DecimalEncoding {
     Bytes,
 }
 
-/// 数据容错策略
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
 pub enum BadDataPolicy {
-    /// 遇到脏数据直接报错，导致算子 Panic 和重启
     Fail,
-    /// 丢弃脏数据，并记录监控 Metrics
     Drop,
 }
 
diff --git a/src/runtime/streaming/format/deserializer.rs b/src/runtime/streaming/format/deserializer.rs
index 1c32d48a..3e9e6d66 100644
--- a/src/runtime/streaming/format/deserializer.rs
+++ b/src/runtime/streaming/format/deserializer.rs
@@ -10,7 +10,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//! 数据反序列化器：将外界收到的字节流转化为结构化 [`RecordBatch`]。
 
 use anyhow::{anyhow, Result};
 use arrow_array::builder::StringBuilder;
@@ -36,7 +35,6 @@ impl DataDeserializer {
         }
     }
 
-    /// 工业级反序列化：包含完整的脏数据容错兜底
     pub fn deserialize_batch(&self, messages: &[&[u8]]) -> Result<RecordBatch> {
         match &self.format {
             Format::Json(_) => self.deserialize_json(messages),
diff --git a/src/runtime/streaming/format/json_encoder.rs b/src/runtime/streaming/format/json_encoder.rs
index 8d34e9ef..f834a192 100644
--- a/src/runtime/streaming/format/json_encoder.rs
+++ b/src/runtime/streaming/format/json_encoder.rs
@@ -10,9 +10,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//! 极致优化的 Arrow JSON 编码器。
 //!
-//! 解决 Arrow 原生 JSON 导出时不兼容 Kafka / 时间戳 / Decimal 的痛点。
 
 use arrow_array::{
     Array, Decimal128Array, TimestampMicrosecondArray,
@@ -44,7 +42,6 @@ impl EncoderFactory for CustomEncoderFactory {
             &self.timestamp_format,
             array.data_type(),
         ) {
-            // ── Timestamp → Unix 毫秒 ──
             (_, TimestampFormat::UnixMillis, DataType::Timestamp(TimeUnit::Nanosecond, _)) => {
                 let arr = array
                     .as_any()
@@ -106,7 +103,6 @@ impl EncoderFactory for CustomEncoderFactory {
                 Box::new(BinaryEncoder(arr))
             }
 
-            // 其他类型：降级使用 Arrow 原生 encoder
             _ => return Ok(None),
         };
 
@@ -115,7 +111,6 @@ impl EncoderFactory for CustomEncoderFactory {
 }
 
 // ---------------------------------------------------------------------------
-// UnixMillisEncoder — 各精度 Timestamp → i64 毫秒
 // ---------------------------------------------------------------------------
 
 enum UnixMillisEncoder {
@@ -138,7 +133,6 @@ impl Encoder for UnixMillisEncoder {
 }
 
 // ---------------------------------------------------------------------------
-// DecimalEncoder — Decimal128 → JSON 字符串 / Base64 Bytes
 // ---------------------------------------------------------------------------
 
 enum DecimalEncoder {
@@ -168,7 +162,6 @@ impl Encoder for DecimalEncoder {
 }
 
 // ---------------------------------------------------------------------------
-// BinaryEncoder — Binary → Base64 字符串
 // ---------------------------------------------------------------------------
 
 struct BinaryEncoder(arrow_array::BinaryArray);
diff --git a/src/runtime/streaming/format/serializer.rs b/src/runtime/streaming/format/serializer.rs
index 7b61d055..bb123499 100644
--- a/src/runtime/streaming/format/serializer.rs
+++ b/src/runtime/streaming/format/serializer.rs
@@ -10,7 +10,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//! 数据序列化器：将内存 [`RecordBatch`] 转换为二进制消息流，供 Sink 连接器发送。
 
 use anyhow::{anyhow, Result};
 use arrow_array::{Array, RecordBatch, StructArray};
diff --git a/src/runtime/streaming/job/edge_manager.rs b/src/runtime/streaming/job/edge_manager.rs
index 53f82cb9..b57b761f 100644
--- a/src/runtime/streaming/job/edge_manager.rs
+++ b/src/runtime/streaming/job/edge_manager.rs
@@ -18,7 +18,6 @@ use tokio::sync::mpsc;
 use crate::runtime::streaming::protocol::tracked::TrackedEvent;
 
 pub struct EdgeManager {
-    // PipelineID -> (输入 Receiver, 输出 Sender 列表)
     endpoints: HashMap<u32, (Option<mpsc::Receiver<TrackedEvent>>, Vec<mpsc::Sender<TrackedEvent>>)>,
 }
 
diff --git a/src/runtime/streaming/job/job_manager.rs b/src/runtime/streaming/job/job_manager.rs
index e13279e1..20ecad9f 100644
--- a/src/runtime/streaming/job/job_manager.rs
+++ b/src/runtime/streaming/job/job_manager.rs
@@ -61,9 +61,7 @@ impl JobManager {
             .ok_or_else(|| anyhow!("JobManager not initialized. Call init() first."))
     }
 
-    /// 核心主干：从逻辑计划点火物理流水线。
     ///
-    /// `job_id` 由调用方指定（须全局唯一），用于线程命名、`PhysicalExecutionGraph` 与 [`Self::stop_job`] 等。
     pub async fn submit_job(&self, job_id: String, program: FsProgram) -> anyhow::Result<String> {
         let mut edge_manager = EdgeManager::build(&program.nodes, &program.edges);
         let mut pipelines = HashMap::new();
@@ -150,7 +148,6 @@ impl JobManager {
     }
 
     // ========================================================================
-    // 内部私有方法
     // ========================================================================
 
     fn build_operator_chain(
diff --git a/src/runtime/streaming/job/models.rs b/src/runtime/streaming/job/models.rs
index 35b48da7..45ea3bb7 100644
--- a/src/runtime/streaming/job/models.rs
+++ b/src/runtime/streaming/job/models.rs
@@ -20,7 +20,6 @@ use tokio::sync::mpsc;
 
 use crate::runtime::streaming::protocol::control::ControlCommand;
 
-/// 物理 Pipeline 的实时状态
 #[derive(Debug, Clone, PartialEq)]
 pub enum PipelineStatus {
     Initializing,
@@ -30,7 +29,6 @@ pub enum PipelineStatus {
     Stopping,
 }
 
-/// 物理执行图中的一个执行单元
 pub struct PhysicalPipeline {
     pub pipeline_id: u32,
     pub handle: Option<JoinHandle<()>>,
@@ -38,7 +36,6 @@ pub struct PhysicalPipeline {
     pub control_tx: mpsc::Sender<ControlCommand>,
 }
 
-/// 一个 SQL Job 的物理执行图
 pub struct PhysicalExecutionGraph {
     pub job_id: String,
     pub program: FsProgram,
diff --git a/src/runtime/streaming/memory/pool.rs b/src/runtime/streaming/memory/pool.rs
index 54276088..4813a63e 100644
--- a/src/runtime/streaming/memory/pool.rs
+++ b/src/runtime/streaming/memory/pool.rs
@@ -18,7 +18,6 @@ use tracing::{debug, warn};
 
 use super::ticket::MemoryTicket;
 
-/// 工业级全局内存池 (Global Memory Pool)
 #[derive(Debug)]
 pub struct MemoryPool {
     max_bytes: usize,
diff --git a/src/runtime/streaming/memory/ticket.rs b/src/runtime/streaming/memory/ticket.rs
index 1c9d2798..cb105be0 100644
--- a/src/runtime/streaming/memory/ticket.rs
+++ b/src/runtime/streaming/memory/ticket.rs
@@ -14,9 +14,6 @@ use std::sync::Arc;
 
 use super::pool::MemoryPool;
 
-/// 内存船票 (RAII Guard)
-/// 不实现 Clone：生命周期严格对应唯一的字节扣减。
-/// 跨多路广播时应包裹在 `Arc<MemoryTicket>` 中。
 #[derive(Debug)]
 pub struct MemoryTicket {
     bytes: usize,
diff --git a/src/runtime/streaming/network/endpoint.rs b/src/runtime/streaming/network/endpoint.rs
index a8525e1e..7448e9cd 100644
--- a/src/runtime/streaming/network/endpoint.rs
+++ b/src/runtime/streaming/network/endpoint.rs
@@ -19,7 +19,6 @@ use tokio_stream::Stream;
 use tracing::debug;
 
 // ========================================================================
-// 1. 网络桩 (Stub)：为后续 gRPC/TCP 扩展预留孔位
 // ========================================================================
 
 #[derive(Clone)]
@@ -34,16 +33,11 @@ impl RemoteSenderStub {
 }
 
 // ========================================================================
-// 2. 物理发送端点 (Physical Sender Endpoint)
 // ========================================================================
 
-/// 统一的物理发送端点。
-/// 算子无需知道目标是同机还是异机，只管调用 `send`。
 #[derive(Clone)]
 pub enum PhysicalSender {
-    /// 本地线程间传输，携带内存船票，零开销
     Local(mpsc::Sender<TrackedEvent>),
-    /// 跨机网络传输，需要序列化，并在发送后丢弃本地船票
     Remote(RemoteSenderStub),
 }
 
@@ -65,7 +59,6 @@ impl PhysicalSender {
 }
 
 // ========================================================================
-// 3. 物理接收端点 (Physical Receiver Endpoint)
 // ========================================================================
 
 pub type BoxedEventStream = Pin<Box<dyn Stream<Item = TrackedEvent> + Send>>;
diff --git a/src/runtime/streaming/network/environment.rs b/src/runtime/streaming/network/environment.rs
index 07ea0cab..fe8544c5 100644
--- a/src/runtime/streaming/network/environment.rs
+++ b/src/runtime/streaming/network/environment.rs
@@ -16,7 +16,6 @@ use std::collections::HashMap;
 pub type VertexId = u32;
 pub type SubtaskIndex = u32;
 
-/// 物理网络路由注册表
 pub struct NetworkEnvironment {
     pub outboxes: HashMap<(VertexId, SubtaskIndex), Vec<PhysicalSender>>,
     pub inboxes: HashMap<(VertexId, SubtaskIndex), Vec<BoxedEventStream>>,
diff --git a/src/runtime/streaming/operators/grouping/incremental_aggregate.rs b/src/runtime/streaming/operators/grouping/incremental_aggregate.rs
index 5d174323..4b1af6b3 100644
--- a/src/runtime/streaming/operators/grouping/incremental_aggregate.rs
+++ b/src/runtime/streaming/operators/grouping/incremental_aggregate.rs
@@ -41,7 +41,6 @@ use std::{collections::HashMap, mem, sync::Arc};
 use tracing::{debug, warn};
 use protocol::grpc::api::UpdatingAggregateOperator;
 // =========================================================================
-// 引入全新的 Actor 框架核心协议 (取代了老旧的 ArrowOperator 和 Collector)
 // =========================================================================
 use crate::runtime::streaming::api::context::TaskContext;
 use crate::runtime::streaming::api::operator::MessageOperator;
@@ -175,7 +174,6 @@ struct Aggregator {
 }
 
 // =========================================================================
-// 核心算子结构体
 // =========================================================================
 
 pub struct IncrementalAggregatingFunc {
@@ -185,21 +183,18 @@ pub struct IncrementalAggregatingFunc {
     accumulators: UpdatingCache<Vec<IncrementalState>>,
     updated_keys: HashMap<Key, Option<Vec<ScalarValue>>>,
     
-    // 【新增】：算子自身持有输入元数据，不再依赖外部动态传入
     input_schema: Arc<FsSchema>,
     has_routing_keys: bool,
 
     sliding_state_schema: Arc<FsSchema>,
     batch_state_schema: Arc<FsSchema>,
     schema_without_metadata: Arc<Schema>,
-    /// 下游 changelog 批次 schema（与 planner `final_schema` 一致）。
     final_output_schema: Arc<Schema>,
     ttl: Duration,
     key_converter: RowConverter,
     new_generation: u64,
 }
 
-/// 全局聚合使用的空 key（单分区无 routing key）。
 static GLOBAL_KEY: LazyLock<Arc<Vec<u8>>> = LazyLock::new(|| Arc::new(Vec::new()));
 
 impl IncrementalAggregatingFunc {
@@ -389,7 +384,6 @@ impl IncrementalAggregatingFunc {
     }
 
     // =========================================================================
-    // 状态读写逻辑 (Checkpointing & Restore)
     // =========================================================================
 
     fn checkpoint_sliding(&mut self) -> DFResult<Option<Vec<ArrayRef>>> {
@@ -592,13 +586,11 @@ impl IncrementalAggregatingFunc {
         Ok(())
     }
 
-    /// 核心逻辑：从内存中提取这段时间的所有变更，生成 Changelog（追加与撤回）
     fn generate_changelog(&mut self) -> Result<Option<RecordBatch>> {
         let mut output_keys = Vec::with_capacity(self.updated_keys.len() * 2);
         let mut output_values = vec![Vec::with_capacity(self.updated_keys.len() * 2); self.aggregates.len()];
         let mut is_retracts = Vec::with_capacity(self.updated_keys.len() * 2);
 
-        // 提取变更
         let (updated_keys, updated_values): (Vec<_>, Vec<_>) = mem::take(&mut self.updated_keys).into_iter().unzip();
         let mut deleted_keys = vec![];
 
@@ -606,7 +598,6 @@ impl IncrementalAggregatingFunc {
             let append = self.evaluate(&k.0)?;
 
             if let Some(v) = retract {
-                // 如果没有变化，直接跳过
                 if v.iter().zip(append.iter()).take(v.len() - 1).all(|(a, b)| a == b) { continue; }
                 is_retracts.push(true);
                 output_keys.push(k.clone());
@@ -663,7 +654,6 @@ fn set_retract_metadata(metadata: ArrayRef, is_retract: Arc<BooleanArray>) -> Ar
 }
 
 // =========================================================================
-// 实现全新的 Actor MessageOperator 接口
 // =========================================================================
 
 #[async_trait::async_trait]
@@ -683,7 +673,6 @@ impl MessageOperator for IncrementalAggregatingFunc {
         batch: RecordBatch,
         _ctx: &mut TaskContext,
     ) -> Result<Vec<StreamOutput>> {
-        // 数据进入仅更新内存中的 HashMap，暂不发送数据
         if self.has_routing_keys {
             self.keyed_aggregate(&batch)?;
         } else {
@@ -698,9 +687,7 @@ impl MessageOperator for IncrementalAggregatingFunc {
         _watermark: Watermark,
         _ctx: &mut TaskContext,
     ) -> Result<Vec<StreamOutput>> {
-        // 如果是基于时间的 flush (可根据业务决定是否在水位线推进时 flush)
         if let Some(changelog_batch) = self.generate_changelog()? {
-            // Forward 表示按原路直连发送给下游
             Ok(vec![StreamOutput::Forward(changelog_batch)])
         } else {
             Ok(vec![])
@@ -747,7 +734,6 @@ impl MessageOperator for IncrementalAggregatingFunc {
 }
 
 // =========================================================================
-// 算子构造器保持对外 API 兼容
 // =========================================================================
 
 pub struct IncrementalAggregatingConstructor;
diff --git a/src/runtime/streaming/operators/grouping/updating_cache.rs b/src/runtime/streaming/operators/grouping/updating_cache.rs
index bdba9fa7..2172535b 100644
--- a/src/runtime/streaming/operators/grouping/updating_cache.rs
+++ b/src/runtime/streaming/operators/grouping/updating_cache.rs
@@ -10,7 +10,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//! 按 key 的增量状态缓存：LRU + TTL（idle），供 [`super::incremental_aggregate`] 等使用。
 
 use std::borrow::Borrow;
 use std::collections::HashMap;
@@ -35,7 +34,6 @@ struct Node<T> {
     next: Option<usize>,
 }
 
-/// 基于数组槽位 + 双向链表（LRU）的 UpdatingCache，支持按代更新与 TTL 逐出。
 pub struct UpdatingCache<T: Send + Sync> {
     map: HashMap<Key, usize>,
     nodes: Vec<Node<T>>,
diff --git a/src/runtime/streaming/operators/joins/join_instance.rs b/src/runtime/streaming/operators/joins/join_instance.rs
index 7fe32d6e..ef49c323 100644
--- a/src/runtime/streaming/operators/joins/join_instance.rs
+++ b/src/runtime/streaming/operators/joins/join_instance.rs
@@ -10,7 +10,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//! 瞬时 JOIN：双通道喂入 DataFusion 物理计划，水位线推进时闭合实例并抽干结果（纯内存版）。
 
 use anyhow::{anyhow, Result};
 use arrow::compute::{max, min, partition, sort_to_indices, take};
@@ -55,7 +54,6 @@ impl JoinSide {
     }
 }
 
-/// 瞬时 JOIN 执行实例：保存通道；窗口闭合时关闭通道并同步抽干 `SendableRecordBatchStream`。
 struct JoinInstance {
     left_tx: UnboundedSender<RecordBatch>,
     right_tx: UnboundedSender<RecordBatch>,
@@ -76,7 +74,6 @@ impl JoinInstance {
         }
     }
 
-    /// 关闭输入流，促使执行计划结束，并拉取全部 JOIN 结果。
     async fn close_and_drain(self) -> Result<Vec<RecordBatch>> {
         drop(self.left_tx);
         drop(self.right_tx);
@@ -264,8 +261,6 @@ impl MessageOperator for InstantJoinOperator {
     }
 }
 
-/// 与 `OperatorConstructor` 类似的配置入口；返回 [`InstantJoinOperator`]（实现 [`MessageOperator`]），
-/// 而非 `ConstructedOperator`（后者仅包装 `ArrowOperator`）。
 pub struct InstantJoinConstructor;
 
 impl InstantJoinConstructor {
diff --git a/src/runtime/streaming/operators/joins/join_with_expiration.rs b/src/runtime/streaming/operators/joins/join_with_expiration.rs
index 9e6de6c9..91fd38a6 100644
--- a/src/runtime/streaming/operators/joins/join_with_expiration.rs
+++ b/src/runtime/streaming/operators/joins/join_with_expiration.rs
@@ -10,8 +10,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//! 带 TTL 的 Key-Time Join：纯内存状态版 + DataFusion 物理计划成对计算。
-//! 完全移除了底层 TableManager 和持久化状态依赖。
 
 use anyhow::{anyhow, Result};
 use arrow::compute::concat_batches;
@@ -43,7 +41,6 @@ enum JoinSide {
 }
 
 // ============================================================================
-// 纯内存状态缓冲区 (In-Memory TTL Buffer)
 // ============================================================================
 
 struct StateBuffer {
@@ -82,7 +79,6 @@ impl StateBuffer {
 }
 
 // ============================================================================
-// 算子主体
 // ============================================================================
 
 pub struct JoinWithExpirationOperator {
@@ -229,7 +225,6 @@ impl MessageOperator for JoinWithExpirationOperator {
 }
 
 // ============================================================================
-// 构造器
 // ============================================================================
 
 pub struct JoinWithExpirationConstructor;
diff --git a/src/runtime/streaming/operators/key_by.rs b/src/runtime/streaming/operators/key_by.rs
index a432011d..8d0da441 100644
--- a/src/runtime/streaming/operators/key_by.rs
+++ b/src/runtime/streaming/operators/key_by.rs
@@ -10,7 +10,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//! 物理网络路由算子：利用 DataFusion 物理表达式提取 Key，基于 Hash 排序执行零拷贝切片路由。
 
 use anyhow::{anyhow, Result};
 use arrow_array::{Array, RecordBatch, UInt64Array};
@@ -65,7 +64,6 @@ impl MessageOperator for KeyByOperator {
             return Ok(vec![]);
         }
 
-        // 1. 执行物理表达式，提取所有 Key 列
         let mut key_columns = Vec::with_capacity(self.key_extractors.len());
         for expr in &self.key_extractors {
             let column_array = expr
@@ -76,18 +74,15 @@ impl MessageOperator for KeyByOperator {
             key_columns.push(column_array);
         }
 
-        // 2. 向量化计算 Hash 数组
         let mut hash_buffer = vec![0u64; num_rows];
         create_hashes(&key_columns, &self.random_state, &mut hash_buffer)
             .map_err(|e| anyhow!("Failed to compute hashes: {}", e))?;
 
         let hash_array = UInt64Array::from(hash_buffer);
 
-        // 3. 基于 Hash 值排序，获取重排 Indices
         let sorted_indices = sort_to_indices(&hash_array, None, None)
             .map_err(|e| anyhow!("Failed to sort hashes: {}", e))?;
 
-        // 4. 对齐重排 Hash 数组和原始 Batch
         let sorted_hashes_ref = take(&hash_array, &sorted_indices, None)?;
         let sorted_hashes = sorted_hashes_ref
             .as_any()
@@ -101,7 +96,6 @@ impl MessageOperator for KeyByOperator {
             .collect();
         let sorted_batch = RecordBatch::try_new(batch.schema(), sorted_columns?)?;
 
-        // 5. 零拷贝微批切片 —— 按 Hash 值连续段切分并标记路由意图
         let mut outputs = Vec::new();
         let mut start_idx = 0;
 
diff --git a/src/runtime/streaming/operators/key_operator.rs b/src/runtime/streaming/operators/key_operator.rs
index 5dfd66f6..0202f924 100644
--- a/src/runtime/streaming/operators/key_operator.rs
+++ b/src/runtime/streaming/operators/key_operator.rs
@@ -10,12 +10,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//! 物理网络路由算子：利用 DataFusion 物理表达式提取 Key，基于 Hash 排序执行零拷贝切片路由。
 //!
-//! 提供两种算子：
-//! - [`KeyByOperator`]：纯 Key 提取 + Hash 路由，适用于简单的 GROUP BY / PARTITION BY。
-//! - [`KeyExecutionOperator`]：先执行完整物理计划，再按指定列 Hash 路由，适用于需要先做
-//!   计算（如聚合结果映射）再分区的场景。
 
 use anyhow::{anyhow, Result};
 use arrow_array::{Array, ArrayRef, RecordBatch, UInt64Array};
@@ -72,7 +67,6 @@ impl MessageOperator for KeyByOperator {
             return Ok(vec![]);
         }
 
-        // 1. 执行物理表达式，提取所有 Key 列
         let mut key_columns = Vec::with_capacity(self.key_extractors.len());
         for expr in &self.key_extractors {
             let column_array = expr
@@ -83,18 +77,15 @@ impl MessageOperator for KeyByOperator {
             key_columns.push(column_array);
         }
 
-        // 2. 向量化计算 Hash 数组
         let mut hash_buffer = vec![0u64; num_rows];
         create_hashes(&key_columns, &self.random_state, &mut hash_buffer)
             .map_err(|e| anyhow!("Failed to compute hashes: {}", e))?;
 
         let hash_array = UInt64Array::from(hash_buffer);
 
-        // 3. 基于 Hash 值排序，获取重排 Indices
         let sorted_indices = sort_to_indices(&hash_array, None, None)
             .map_err(|e| anyhow!("Failed to sort hashes: {}", e))?;
 
-        // 4. 对齐重排 Hash 数组和原始 Batch
         let sorted_hashes_ref = take(&hash_array, &sorted_indices, None)?;
         let sorted_hashes = sorted_hashes_ref
             .as_any()
@@ -108,7 +99,6 @@ impl MessageOperator for KeyByOperator {
             .collect();
         let sorted_batch = RecordBatch::try_new(batch.schema(), sorted_columns?)?;
 
-        // 5. 零拷贝微批切片 —— 按 Hash 值连续段切分并标记路由意图
         let mut outputs = Vec::new();
         let mut start_idx = 0;
 
@@ -177,12 +167,8 @@ impl KeyByConstructor {
 }
 
 // ===========================================================================
-// KeyExecutionOperator — 先执行物理计划，再按 Key 列 Hash 路由
 // ===========================================================================
 
-/// 键控路由执行算子：先驱动 DataFusion 物理计划完成计算（如聚合结果映射），
-/// 再根据 `key_fields` 指定列计算 Hash 并以 [`StreamOutput::Keyed`] 输出，
-/// 实现算子内部分区。
 pub struct KeyExecutionOperator {
     name: String,
     executor: StatelessPhysicalExecutor,
@@ -219,7 +205,6 @@ impl MessageOperator for KeyExecutionOperator {
     ) -> Result<Vec<StreamOutput>> {
         let mut outputs = Vec::new();
 
-        // 1. 执行物理转换
         let mut stream = self.executor.process_batch(batch).await?;
 
         while let Some(batch_result) = stream.next().await {
@@ -229,7 +214,6 @@ impl MessageOperator for KeyExecutionOperator {
                 continue;
             }
 
-            // 2. 提取 Key 列并计算 Hash
             let key_columns: Vec<ArrayRef> = self
                 .key_fields
                 .iter()
@@ -241,7 +225,6 @@ impl MessageOperator for KeyExecutionOperator {
                 .map_err(|e| anyhow!("hash compute: {e}"))?;
             let hash_array = UInt64Array::from(hash_buffer);
 
-            // 3. 基于 Hash 排序，获取重排 Indices
             let sorted_indices = sort_to_indices(&hash_array, None, None)
                 .map_err(|e| anyhow!("sort hashes: {e}"))?;
 
@@ -259,7 +242,6 @@ impl MessageOperator for KeyExecutionOperator {
             let sorted_batch =
                 RecordBatch::try_new(out_batch.schema(), sorted_columns?)?;
 
-            // 4. 零拷贝切片 —— 按 Hash 连续段分组，标记 Keyed 路由意图
             let mut start_idx = 0;
             while start_idx < num_rows {
                 let current_hash = sorted_hashes.value(start_idx);
diff --git a/src/runtime/streaming/operators/mod.rs b/src/runtime/streaming/operators/mod.rs
index dc8b39b7..cb8412d8 100644
--- a/src/runtime/streaming/operators/mod.rs
+++ b/src/runtime/streaming/operators/mod.rs
@@ -10,7 +10,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//! 内置算子。
 
 pub mod grouping;
 pub mod joins;
@@ -44,7 +43,6 @@ use async_trait::async_trait;
 use crate::runtime::streaming::StreamOutput;
 use crate::sql::common::{CheckpointBarrier, Watermark};
 
-/// 透传数据。
 pub struct PassthroughOperator {
     name: String,
 }
diff --git a/src/runtime/streaming/operators/projection.rs b/src/runtime/streaming/operators/projection.rs
index 33fe0d51..3afb93ef 100644
--- a/src/runtime/streaming/operators/projection.rs
+++ b/src/runtime/streaming/operators/projection.rs
@@ -10,8 +10,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//! 高性能投影算子：直接操作 Arrow Array 执行列映射与标量运算，
-//! 避开 DataFusion 执行树开销，适用于 SELECT 字段筛选和简单标量计算。
 
 use anyhow::Result;
 use arrow_array::RecordBatch;
diff --git a/src/runtime/streaming/operators/sink/kafka/mod.rs b/src/runtime/streaming/operators/sink/kafka/mod.rs
index aecef032..9f82a4ce 100644
--- a/src/runtime/streaming/operators/sink/kafka/mod.rs
+++ b/src/runtime/streaming/operators/sink/kafka/mod.rs
@@ -10,7 +10,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//! Kafka Sink：实现 [`crate::runtime::streaming::api::operator::MessageOperator`]，支持 At-Least-Once 与 Exactly-Once（事务 + 二阶段提交）。
 
 use anyhow::{anyhow, bail, Result};
 use arrow_array::cast::AsArray;
@@ -34,7 +33,6 @@ use crate::runtime::streaming::StreamOutput;
 use crate::sql::common::constants::factory_operator_name;
 use crate::sql::common::{CheckpointBarrier, FsSchema, Watermark};
 // ============================================================================
-// 1. 领域模型：一致性级别与事务状态机
 // ============================================================================
 
 #[derive(Debug, Clone)]
@@ -50,7 +48,6 @@ struct TransactionalState {
 }
 
 // ============================================================================
-// 2. 核心算子外壳
 // ============================================================================
 
 pub struct KafkaSinkOperator {
@@ -205,7 +202,6 @@ fn row_key_bytes(batch: &RecordBatch, row: usize, col: usize) -> Option<Vec<u8>>
 }
 
 // ============================================================================
-// 3. 实现 MessageOperator 协议
 // ============================================================================
 
 #[async_trait]
diff --git a/src/runtime/streaming/operators/sink/mod.rs b/src/runtime/streaming/operators/sink/mod.rs
index 93b3b0ee..8abd2985 100644
--- a/src/runtime/streaming/operators/sink/mod.rs
+++ b/src/runtime/streaming/operators/sink/mod.rs
@@ -10,7 +10,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//! 与外部系统对接的 Sink 实现（Kafka 等）。
 
 pub mod kafka;
 
diff --git a/src/runtime/streaming/operators/source/kafka/mod.rs b/src/runtime/streaming/operators/source/kafka/mod.rs
index 59507c2e..d0de692a 100644
--- a/src/runtime/streaming/operators/source/kafka/mod.rs
+++ b/src/runtime/streaming/operators/source/kafka/mod.rs
@@ -10,7 +10,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//! Kafka 源算子：实现 [`crate::runtime::streaming::api::source::SourceOperator`]，由 [`crate::runtime::streaming::execution::SourceRunner`] 轮询 `fetch_next`。
 
 use anyhow::{anyhow, Context as _, Result};
 use arrow_array::RecordBatch;
@@ -31,7 +30,6 @@ use crate::runtime::streaming::format::{BadDataPolicy, DataDeserializer, Format}
 use crate::sql::common::{CheckpointBarrier, MetadataField};
 use crate::sql::common::fs_schema::FieldValueType;
 // ============================================================================
-// 1. 领域模型：Kafka 状态与配置
 // ============================================================================
 
 #[derive(Copy, Clone, Debug, Encode, Decode, PartialEq, PartialOrd)]
@@ -40,7 +38,6 @@ pub struct KafkaState {
     offset: i64,
 }
 
-/// 增量反序列化缓冲 trait：Source 逐条 `deserialize_slice`，攒满或超时后 `flush_buffer` 输出 [`RecordBatch`]。
 pub trait BatchDeserializer: Send + 'static {
     fn deserialize_slice(
         &mut self,
@@ -53,15 +50,12 @@ pub trait BatchDeserializer: Send + 'static {
 
     fn flush_buffer(&mut self) -> Result<Option<RecordBatch>>;
 
-    /// 缓冲区是否无任何待反序列化数据。
     fn is_empty(&self) -> bool;
 }
 
 // ---------------------------------------------------------------------------
-// BufferedDeserializer — 基于 DataDeserializer 的默认 BatchDeserializer 实现
 // ---------------------------------------------------------------------------
 
-/// 将 [`DataDeserializer`] 包装为 [`BatchDeserializer`]：逐条缓存 payload，达到阈值后批量反序列化。
 pub struct BufferedDeserializer {
     inner: DataDeserializer,
     buffer: Vec<Vec<u8>>,
@@ -120,7 +114,6 @@ impl SourceOffset {
 }
 
 // ============================================================================
-// 2. 核心算子外壳
 // ============================================================================
 
 const KAFKA_POLL_TIMEOUT: Duration = Duration::from_millis(100);
@@ -144,7 +137,6 @@ pub struct KafkaSourceOperator {
     current_offsets: HashMap<i32, i64>,
     is_empty_assignment: bool,
 
-    /// 上次成功 flush 出 batch 的时间，用于低流量时按逗留时间强制发车。
     last_flush_time: Instant,
 }
 
@@ -251,7 +243,6 @@ impl KafkaSourceOperator {
 }
 
 // ============================================================================
-// 3. 实现 SourceOperator 协议
 // ============================================================================
 
 #[async_trait]
@@ -288,7 +279,6 @@ impl SourceOperator for KafkaSourceOperator {
                 let offset = msg.offset();
                 let timestamp = msg.timestamp().to_millis().unwrap_or(0);
 
-                // 无论是否有 payload（含 Tombstone），都必须推进位点，否则会永久卡在墓碑消息上。
                 self.current_offsets.insert(partition, offset);
 
                 if let Some(payload) = msg.payload() {
@@ -345,7 +335,6 @@ impl SourceOperator for KafkaSourceOperator {
                 Err(anyhow!("Kafka error: {}", e))
             }
             Err(_) => {
-                // 超时内无新消息：若缓冲区仍有积压，强制 flush，避免低流量下数据长期滞留。
                 if !self.deserializer.is_empty() {
                     if let Some(batch) = self.deserializer.flush_buffer()? {
                         self.last_flush_time = Instant::now();
diff --git a/src/runtime/streaming/operators/source/mod.rs b/src/runtime/streaming/operators/source/mod.rs
index 687e2289..76f3639a 100644
--- a/src/runtime/streaming/operators/source/mod.rs
+++ b/src/runtime/streaming/operators/source/mod.rs
@@ -10,7 +10,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//! 与外部系统对接的源实现（Kafka 等）。
 
 pub mod kafka;
 
diff --git a/src/runtime/streaming/operators/stateless_physical_executor.rs b/src/runtime/streaming/operators/stateless_physical_executor.rs
index 188015e2..6c1e5c90 100644
--- a/src/runtime/streaming/operators/stateless_physical_executor.rs
+++ b/src/runtime/streaming/operators/stateless_physical_executor.rs
@@ -10,7 +10,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//! 无状态物理计划执行器：将单批次写入 `SingleLockedBatch` 并让 DataFusion 计划消费。
 
 use std::sync::{Arc, RwLock};
 
diff --git a/src/runtime/streaming/operators/value_execution.rs b/src/runtime/streaming/operators/value_execution.rs
index c3b3d525..86596512 100644
--- a/src/runtime/streaming/operators/value_execution.rs
+++ b/src/runtime/streaming/operators/value_execution.rs
@@ -10,8 +10,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//! 通用无状态执行算子：驱动 DataFusion 物理计划（Filter, Case When, Scalar UDF 等），
-//! 不改变分区状态，适用于 Map / Filter 阶段。
 
 use anyhow::Result;
 use arrow_array::RecordBatch;
diff --git a/src/runtime/streaming/operators/watermark/watermark_generator.rs b/src/runtime/streaming/operators/watermark/watermark_generator.rs
index 63f5acec..b512f842 100644
--- a/src/runtime/streaming/operators/watermark/watermark_generator.rs
+++ b/src/runtime/streaming/operators/watermark/watermark_generator.rs
@@ -10,7 +10,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//! 表达式水位生成器：与 worker `arrow/watermark_generator` 对齐，通过 [`StreamOutput::Watermark`] 向下游广播。
 
 use anyhow::{anyhow, Result};
 use arrow::compute::kernels::aggregate;
@@ -35,7 +34,6 @@ use protocol::grpc::api::ExpressionWatermarkConfig;
 use crate::runtime::streaming::StreamOutput;
 use crate::sql::common::{from_nanos, to_millis, CheckpointBarrier, FsSchema, Watermark};
 
-/// 需持久化到 Checkpoint 的状态（与 worker `WatermarkGeneratorState` 语义一致）。
 #[derive(Debug, Copy, Clone, Encode, Decode, PartialEq, Eq)]
 pub struct WatermarkGeneratorState {
     pub last_watermark_emitted_at: SystemTime,
@@ -86,8 +84,6 @@ impl WatermarkGeneratorOperator {
         Some(from_nanos(max_ts as u128))
     }
 
-    /// 水位线计算必须取评估后数组的 **Max**，不能取 Min：同一 Batch 内多行时，
-    /// Min 会低估“已见事件时间”的安全基线（例如 ts-5s 在两行上 min 会偏早）。
     fn evaluate_watermark(&self, batch: &RecordBatch) -> Result<SystemTime> {
         let watermark_array = self
             .expression
@@ -137,14 +133,12 @@ impl MessageOperator for WatermarkGeneratorOperator {
 
         let new_watermark = self.evaluate_watermark(&batch)?;
 
-        // 死守单调递增底线，绝不倒流
         self.state.max_watermark = self.state.max_watermark.max(new_watermark);
 
         let time_since_last_emit = max_batch_ts
             .duration_since(self.state.last_watermark_emitted_at)
             .unwrap_or(Duration::ZERO);
 
-        // 空闲唤醒或达到发射间隔则发射水印
         if self.is_idle || time_since_last_emit > self.interval {
             debug!(
                 "[{}] emitting expression watermark {}",
@@ -181,7 +175,6 @@ impl MessageOperator for WatermarkGeneratorOperator {
                 .last_event_wall
                 .elapsed()
                 .unwrap_or(Duration::ZERO);
-            // 系统时钟超时，发射 Idle 水印，避免下游一直等不到推进
             if !self.is_idle && elapsed > idle_timeout {
                 info!(
                     "task [{}] entering Idle after {:?}",
diff --git a/src/runtime/streaming/operators/windows/session_aggregating_window.rs b/src/runtime/streaming/operators/windows/session_aggregating_window.rs
index aaf65cf1..d7257223 100644
--- a/src/runtime/streaming/operators/windows/session_aggregating_window.rs
+++ b/src/runtime/streaming/operators/windows/session_aggregating_window.rs
@@ -10,8 +10,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//! 会话窗口聚合：纯内存版，完全脱离持久化状态存储。
-//! 利用 BTreeMap 充当优先队列，数据天然在内存中进行 Gap 合并与触发。
 
 use anyhow::{anyhow, bail, Context, Result};
 use arrow::compute::{
@@ -47,7 +45,6 @@ use crate::sql::common::converter::Converter;
 use crate::sql::physical::{DecodingContext, FsPhysicalExtensionCodec};
 use crate::sql::schema::utils::window_arrow_struct;
 // ============================================================================
-// 领域模型与纯内存状态
 // ============================================================================
 
 struct SessionWindowConfig {
@@ -179,7 +176,7 @@ struct SessionWindowResult {
 struct KeySessionState {
     config: Arc<SessionWindowConfig>,
     active_session: Option<ActiveSession>,
-    buffered_batches: BTreeMap<SystemTime, Vec<RecordBatch>>, // 纯内存缓冲
+    buffered_batches: BTreeMap<SystemTime, Vec<RecordBatch>>,
 }
 
 impl KeySessionState {
@@ -335,7 +332,6 @@ fn build_session_output_schema(
 }
 
 // ============================================================================
-// 算子本体：负责处理输入数据与时间流，路由给具体的 KeySessionState
 // ============================================================================
 
 pub struct SessionWindowOperator {
@@ -662,7 +658,6 @@ impl MessageOperator for SessionWindowOperator {
 }
 
 // ============================================================================
-// 构造器
 // ============================================================================
 
 pub struct SessionAggregatingWindowConstructor;
diff --git a/src/runtime/streaming/operators/windows/sliding_aggregating_window.rs b/src/runtime/streaming/operators/windows/sliding_aggregating_window.rs
index 6f0aa7f9..7bad21bc 100644
--- a/src/runtime/streaming/operators/windows/sliding_aggregating_window.rs
+++ b/src/runtime/streaming/operators/windows/sliding_aggregating_window.rs
@@ -10,9 +10,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//! 滑动窗口聚合：纯内存版。
-//! 完全依赖内部的 TieredRecordBatchHolder 和 ActiveBin 在内存中进行计算，
-//! 摆脱 TableManager 依赖，遇到 Barrier 自动透传。
 
 use anyhow::{anyhow, bail, Result};
 use arrow::compute::{partition, sort_to_indices, take};
@@ -45,8 +42,6 @@ use crate::runtime::streaming::StreamOutput;
 use crate::sql::common::{from_nanos, to_nanos, CheckpointBarrier, FsSchema, Watermark};
 use crate::sql::physical::{DecodingContext, FsPhysicalExtensionCodec};
 // ============================================================================
-// 纯内存状态：阶梯式时间面板 (Tiered panes)
-// 这部分本身就是极佳的内存数据结构，原样保留！
 // ============================================================================
 
 #[derive(Default, Debug)]
@@ -217,7 +212,6 @@ impl TieredRecordBatchHolder {
 }
 
 // ============================================================================
-// Per-bin partial aggregation (纯内存缓冲区)
 // ============================================================================
 
 struct ActiveBin {
@@ -264,7 +258,6 @@ impl ActiveBin {
 }
 
 // ============================================================================
-// 算子主体
 // ============================================================================
 
 pub struct SlidingWindowOperator {
@@ -473,7 +466,6 @@ impl MessageOperator for SlidingWindowOperator {
 }
 
 // ============================================================================
-// 构造器
 // ============================================================================
 
 pub struct SlidingAggregatingWindowConstructor;
diff --git a/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs b/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs
index 30724f59..093823bb 100644
--- a/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs
+++ b/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs
@@ -10,7 +10,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//! 滚动（tumbling）窗口聚合：与 worker `arrow/tumbling_aggregating_window` 对齐，实现 [`MessageOperator`]。
 
 use anyhow::{anyhow, Result};
 use arrow::compute::{partition, sort_to_indices, take};
diff --git a/src/runtime/streaming/operators/windows/window_function.rs b/src/runtime/streaming/operators/windows/window_function.rs
index f6ae2a1a..d067eccf 100644
--- a/src/runtime/streaming/operators/windows/window_function.rs
+++ b/src/runtime/streaming/operators/windows/window_function.rs
@@ -10,9 +10,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//! 窗口函数（按事件时间分桶的瞬时执行）：纯内存版。
-//! 完全依赖内部的 ActiveWindowExec 通道在内存中缓冲数据，
-//! 摆脱持久化状态存储的依赖，遇到 Barrier 自动透传。
 
 use anyhow::{anyhow, Result};
 use arrow::compute::{max, min};
@@ -41,7 +38,6 @@ use crate::sql::common::time_utils::print_time;
 use crate::sql::physical::{DecodingContext, FsPhysicalExtensionCodec};
 
 // ============================================================================
-// 纯内存执行缓冲区
 // ============================================================================
 
 struct ActiveWindowExec {
@@ -77,7 +73,6 @@ impl ActiveWindowExec {
 }
 
 // ============================================================================
-// 算子主体
 // ============================================================================
 
 pub struct WindowFunctionOperator {
@@ -199,7 +194,6 @@ impl MessageOperator for WindowFunctionOperator {
 
         let mut final_outputs = Vec::new();
 
-        // 与 worker 一致：仅当桶时间戳 **严格小于** 当前事件时间水位时才结算（`watermark <= ts` 时保留）。
         let mut expired_timestamps = Vec::new();
         for &k in self.active_execs.keys() {
             if k < current_time {
@@ -233,7 +227,6 @@ impl MessageOperator for WindowFunctionOperator {
 }
 
 // ============================================================================
-// 构造器
 // ============================================================================
 
 pub struct WindowFunctionConstructor;
diff --git a/src/runtime/streaming/protocol/control.rs b/src/runtime/streaming/protocol/control.rs
index d225e2e8..d337046e 100644
--- a/src/runtime/streaming/protocol/control.rs
+++ b/src/runtime/streaming/protocol/control.rs
@@ -10,14 +10,12 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//! 控制平面：与 [`super::event::StreamEvent`] 队列分离的高优先级指令。
 
 use serde::{Deserialize, Serialize};
 use std::time::Duration;
 use tokio::sync::mpsc::{self, Receiver, Sender};
 use crate::sql::common::CheckpointBarrier;
 
-/// 可序列化的 barrier 载荷（`CheckpointBarrier` 本身未实现 `serde`，供 RPC / 持久化使用）。
 #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
 pub struct CheckpointBarrierWire {
     pub epoch: u32,
@@ -55,7 +53,6 @@ impl From<CheckpointBarrierWire> for CheckpointBarrier {
     }
 }
 
-/// JobManager / 调度器下发的高优控制指令。
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub enum ControlCommand {
     Start,
@@ -63,7 +60,6 @@ pub enum ControlCommand {
     DropState,
     Commit { epoch: u32 },
     UpdateConfig { config_json: String },
-    /// 通常由 [`crate::runtime::streaming::SourceRunner`] 接收，源头落盘后向下游注入 `Barrier`。
     TriggerCheckpoint { barrier: CheckpointBarrierWire },
 }
 
diff --git a/src/runtime/streaming/protocol/event.rs b/src/runtime/streaming/protocol/event.rs
index efd43952..b78b7fbc 100644
--- a/src/runtime/streaming/protocol/event.rs
+++ b/src/runtime/streaming/protocol/event.rs
@@ -13,7 +13,6 @@
 use arrow_array::RecordBatch;
 use crate::sql::common::{CheckpointBarrier, Watermark};
 
-/// 核心数据面事件
 #[derive(Debug, Clone)]
 pub enum StreamEvent {
     Data(RecordBatch),
diff --git a/src/runtime/streaming/protocol/mod.rs b/src/runtime/streaming/protocol/mod.rs
index f859df28..63f7f0bf 100644
--- a/src/runtime/streaming/protocol/mod.rs
+++ b/src/runtime/streaming/protocol/mod.rs
@@ -10,7 +10,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//! 协议层：数据事件、控制命令、水位线合并与比较语义。
 
 pub mod control;
 pub mod event;
diff --git a/src/runtime/streaming/protocol/stream_out.rs b/src/runtime/streaming/protocol/stream_out.rs
index 0f6619f9..fc7b9bba 100644
--- a/src/runtime/streaming/protocol/stream_out.rs
+++ b/src/runtime/streaming/protocol/stream_out.rs
@@ -13,15 +13,10 @@
 use arrow_array::RecordBatch;
 use crate::sql::common::Watermark;
 
-/// 算子产出的数据及下游 **路由意图**（由 `SubtaskRunner` 选择 `collect` / `collect_keyed` / `broadcast` / 水位广播）。
 #[derive(Debug, Clone)]
 pub enum StreamOutput {
-    /// 发往所有下游（与 `TaskContext::collect` 一致：当前实现为每条边各发一份 `Data`）。
     Forward(RecordBatch),
-    /// 按 `key_hash % outboxes.len()` 发往单一分区（KeyBy / Shuffle）。
     Keyed(u64, RecordBatch),
-    /// 广播同一份数据到所有下游边（如 broadcast join）。
     Broadcast(RecordBatch),
-    /// 向所有下游广播水位线（如表达式水位生成器）。
     Watermark(Watermark),
 }
diff --git a/src/runtime/streaming/protocol/tracked.rs b/src/runtime/streaming/protocol/tracked.rs
index 5034abd2..d4360627 100644
--- a/src/runtime/streaming/protocol/tracked.rs
+++ b/src/runtime/streaming/protocol/tracked.rs
@@ -15,11 +15,7 @@ use std::sync::Arc;
 use crate::runtime::streaming::memory::MemoryTicket;
 use crate::runtime::streaming::protocol::event::StreamEvent;
 
-/// 在 Channel 中实际传输的事件，完美解决多路广播 (Broadcast) 的内存管理问题。
 ///
-/// `MemoryTicket` 包在 `Arc` 中：如果 Event 被发送给 N 个下游分区（Broadcast 路由），
-/// 只需 Clone 此 `TrackedEvent`，底层数据共享一块内存，Arc 引用计数 +N。
-/// 只有当所有下游全部处理完并 Drop 后，Arc 归零，内存才被真正释放给 Pool。
 #[derive(Debug, Clone)]
 pub struct TrackedEvent {
     pub event: StreamEvent,
diff --git a/src/runtime/streaming/protocol/watermark.rs b/src/runtime/streaming/protocol/watermark.rs
index 9c039aa5..f6e8388a 100644
--- a/src/runtime/streaming/protocol/watermark.rs
+++ b/src/runtime/streaming/protocol/watermark.rs
@@ -10,12 +10,9 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//! 水位线类型来自 `arroyo_types::Watermark`；此处提供 **多路对齐合并** 与 **单调推进** 判断。
 
 use crate::sql::common::Watermark;
 
-/// 多输入对齐：`Idle` 不参与事件时间取最小；若全部为 `Idle` 则输出 `Idle`。
-/// 任一路尚未有水位线时返回 `None`（木桶短板未齐）。
 pub fn merge_watermarks(per_input: &[Option<Watermark>]) -> Option<Watermark> {
     if per_input.iter().any(|w| w.is_none()) {
         return None;
@@ -46,7 +43,6 @@ pub fn merge_watermarks(per_input: &[Option<Watermark>]) -> Option<Watermark> {
     }
 }
 
-/// `new` 相对 `previous` 是否为 **严格推进**；`previous == None` 时恒为真。
 pub fn watermark_strictly_advances(new: Watermark, previous: Option<Watermark>) -> bool {
     match previous {
         None => true,
diff --git a/src/runtime/util/mod.rs b/src/runtime/util/mod.rs
index 3b4c7e60..0e3a3f7b 100644
--- a/src/runtime/util/mod.rs
+++ b/src/runtime/util/mod.rs
@@ -10,7 +10,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//! 运行时跨子系统复用的工具函数（物理计划 proto 解码等）。
 
 mod physical_aggregate;
 
diff --git a/src/runtime/util/physical_aggregate.rs b/src/runtime/util/physical_aggregate.rs
index 83a6e3bd..33dd1e9f 100644
--- a/src/runtime/util/physical_aggregate.rs
+++ b/src/runtime/util/physical_aggregate.rs
@@ -10,7 +10,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//! 从 DataFusion proto 物理表达式节点解码聚合（UDAF）表达式。
 
 use std::sync::Arc;
 
@@ -26,7 +25,6 @@ use datafusion_proto::protobuf::physical_aggregate_expr_node::AggregateFunction;
 use datafusion_proto::protobuf::physical_expr_node::ExprType;
 use datafusion_proto::protobuf::{PhysicalExprNode, proto_error};
 
-/// 从 `PhysicalExprNode` 解码 UDAF 聚合表达式（与 worker `arrow/mod` 历史路径语义一致）。
 pub fn decode_aggregate(
     schema: &SchemaRef,
     name: &str,
diff --git a/src/sql/common/constants.rs b/src/sql/common/constants.rs
index cf2a39cc..8eb697e2 100644
--- a/src/sql/common/constants.rs
+++ b/src/sql/common/constants.rs
@@ -10,10 +10,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//! SQL / 流算子相关的**名称与标识符常量**（标量函数名、窗口 TVF、逻辑扩展节点名、CDC 字段、
-//! 运行时 blueprint 字符串、`OperatorName` 特性标签等）；与 [`super::with_option_keys`]（WITH 选项键）分工。
 
-// ── 内置标量 UDF（`register_all` / `ScalarUDFImpl::name`）──────────────────────
 
 pub mod scalar_fn {
     pub const GET_FIRST_JSON_OBJECT: &str = "get_first_json_object";
@@ -23,7 +20,6 @@ pub mod scalar_fn {
     pub const MULTI_HASH: &str = "multi_hash";
 }
 
-// ── 窗口 TVF（`hop` / `tumble` / `session` 等，与 DataFusion 解析一致）──────────
 
 pub mod window_fn {
     pub const HOP: &str = "hop";
@@ -31,16 +27,13 @@ pub mod window_fn {
     pub const SESSION: &str = "session";
 }
 
-// ── 流规划期占位标量 UDF（`StreamPlanningContextBuilder::with_streaming_extensions`）──
 
 pub mod planning_placeholder_udf {
     pub const UNNEST: &str = "unnest";
     pub const ROW_TIME: &str = "row_time";
-    /// `List` 内元素字段名，仅用于占位签名的 Arrow 形态
     pub const LIST_ELEMENT_FIELD: &str = "field";
 }
 
-// ── `OperatorName` 在指标 / 特性集合中使用的 kebab-case 标签 ─────────────────
 
 pub mod operator_feature {
     pub const ASYNC_UDF: &str = "async-udf";
@@ -57,7 +50,6 @@ pub mod operator_feature {
     pub const CONNECTOR_SINK: &str = "connector-sink";
 }
 
-// ── 逻辑计划扩展节点的 `UserDefinedLogicalNodeCore::name` / 类型字符串 ────────
 
 pub mod extension_node {
     pub const STREAM_WINDOW_AGGREGATE: &str = "StreamWindowAggregateNode";
@@ -78,19 +70,16 @@ pub mod extension_node {
     pub const PACK_DEBEZIUM_ENVELOPE: &str = "PackDebeziumEnvelopeNode";
 }
 
-// ── gRPC / proto 算子配置里的 `name` 字段（与 `OperatorName` 展示相关）──────────
 
 pub mod proto_operator_name {
     pub const TUMBLING_WINDOW: &str = "TumblingWindow";
     pub const UPDATING_AGGREGATE: &str = "UpdatingAggregate";
     pub const WINDOW_FUNCTION: &str = "WindowFunction";
-    /// 滑动窗口 human-readable 描述片段（非固定 id）
     pub const SLIDING_WINDOW_LABEL: &str = "sliding window";
     pub const INSTANT_WINDOW: &str = "InstantWindow";
     pub const INSTANT_WINDOW_LABEL: &str = "instant window";
 }
 
-// ── 下发到运行时的 blueprint / 算子种类字符串 ──────────────────────────────────
 
 pub mod runtime_operator_kind {
     pub const STREAMING_JOIN: &str = "streaming_join";
@@ -98,7 +87,6 @@ pub mod runtime_operator_kind {
     pub const STREAMING_WINDOW_EVALUATOR: &str = "streaming_window_evaluator";
 }
 
-// ── Worker [`OperatorFactory`] 注册键（须与任务包 `operator_name`、`OperatorName::Display` 一致）──
 
 pub mod factory_operator_name {
     pub const CONNECTOR_SOURCE: &str = "ConnectorSource";
@@ -107,7 +95,6 @@ pub mod factory_operator_name {
     pub const KAFKA_SINK: &str = "KafkaSink";
 }
 
-// ── Debezium CDC 信封字段 ───────────────────────────────────────────────────
 
 pub mod cdc {
     pub const BEFORE: &str = "before";
@@ -115,39 +102,29 @@ pub mod cdc {
     pub const OP: &str = "op";
 }
 
-// ── updating aggregate 状态元数据 struct 字段 ────────────────────────────────
 
 pub mod updating_state_field {
     pub const IS_RETRACT: &str = "is_retract";
     pub const ID: &str = "id";
 }
 
-// ── 计划里常用的列名 / 别名 ───────────────────────────────────────────────────
 
 pub mod sql_field {
-    /// 异步 UDF 重写后的结果列名。
     pub const ASYNC_RESULT: &str = "__async_result";
     pub const DEFAULT_KEY_LABEL: &str = "key";
     pub const DEFAULT_PROJECTION_LABEL: &str = "projection";
-    /// `WATERMARK FOR … AS expr` 生成的计算列名（与 `TemporalPipelineConfig` 一致）。
     pub const COMPUTED_WATERMARK: &str = "__watermark";
-    /// 流表事件时间物理列名（与 DataFusion 计划注入列一致）。
     pub const TIMESTAMP_FIELD: &str = "_timestamp";
-    /// Changelog / updating 模式下的元数据列名。
     pub const UPDATING_META_FIELD: &str = "_updating_meta";
 }
 
-// ── `SqlConfig` / `PlanningOptions` 默认值 ────────────────────────────────────
 
 pub mod sql_planning_default {
     pub const DEFAULT_PARALLELISM: usize = 4;
-    /// [`PlanningOptions::default`] 的 TTL（秒）：24h。
     pub const PLANNING_TTL_SECS: u64 = 24 * 60 * 60;
 }
 
-// ── `ConnectorOptions` / WITH 解析用到的字面量 ────────────────────────────────
 
-/// 单引号字符串形式的布尔取值（见 [`super::connector_options::ConnectorOptions::pull_opt_bool`]）。
 pub mod with_opt_bool_str {
     pub const TRUE: &str = "true";
     pub const YES: &str = "yes";
@@ -155,7 +132,6 @@ pub mod with_opt_bool_str {
     pub const NO: &str = "no";
 }
 
-/// `INTERVAL '…'` / 间隔字符串解析中的单位 token（小写；解析前会对单位做 `to_lowercase`）。
 pub mod interval_duration_unit {
     pub const SECOND: &str = "second";
     pub const SECONDS: &str = "seconds";
@@ -171,9 +147,7 @@ pub mod interval_duration_unit {
     pub const D: &str = "d";
 }
 
-// ── `format` / `framing.method` / `bad_data` 的 WITH 取值（见 `format_from_opts`）──────
 
-/// `format = '…'` 的名称（小写；`Format::from_opts` 会对值做 `to_lowercase`）。
 pub mod connection_format_value {
     pub const JSON: &str = "json";
     pub const DEBEZIUM_JSON: &str = "debezium_json";
@@ -184,19 +158,16 @@ pub mod connection_format_value {
     pub const RAW_BYTES: &str = "raw_bytes";
 }
 
-/// `framing.method` 合法取值（与 `Framing::from_opts` 一致；当前不做大小写折叠）。
 pub mod framing_method_value {
     pub const NEWLINE: &str = "newline";
     pub const NEWLINE_DELIMITED: &str = "newline_delimited";
 }
 
-/// `bad_data = '…'`（小写；解析前 `to_lowercase`）。
 pub mod bad_data_value {
     pub const FAIL: &str = "fail";
     pub const DROP: &str = "drop";
 }
 
-// ── `formats.rs` 里枚举的 wire 名（与 serde `snake_case` / `TryFrom` / `FromStr` 一致）────
 
 pub mod timestamp_format_value {
     pub const RFC3339_SNAKE: &str = "rfc3339";
@@ -225,7 +196,6 @@ pub mod parquet_compression_value {
     pub const LZ4_RAW: &str = "lz4_raw";
 }
 
-// ── `date_part` / `date_trunc` SQL 关键字（小写；解析前对输入做 `to_lowercase`）────────
 
 pub mod date_part_keyword {
     pub const YEAR: &str = "year";
@@ -253,15 +223,12 @@ pub mod date_trunc_keyword {
     pub const SECOND: &str = "second";
 }
 
-// ── `logical_planner/mod.rs` 物理计划与 Debezium 流水线 ───────────────────────
 
-/// `FsMemExec` / codec 里表示 join 左右输入的 `table_name`。
 pub mod mem_exec_join_side {
     pub const LEFT: &str = "left";
     pub const RIGHT: &str = "right";
 }
 
-/// 自定义 `ExecutionPlan::name()`（与 DataFusion explain / 调试一致）。
 pub mod physical_plan_node_name {
     pub const RW_LOCK_READER: &str = "rw_lock_reader";
     pub const UNBOUNDED_READER: &str = "unbounded_reader";
@@ -271,18 +238,15 @@ pub mod physical_plan_node_name {
     pub const TO_DEBEZIUM_EXEC: &str = "to_debezium_exec";
 }
 
-/// 流式 `window(start, end)` 标量 UDF 的注册名。
 pub mod window_function_udf {
     pub const NAME: &str = "window";
 }
 
-/// `window()` UDF 返回 struct 的字段名（与 `window_arrow_struct` 一致）。
 pub mod window_interval_field {
     pub const START: &str = "start";
     pub const END: &str = "end";
 }
 
-/// Debezium `op` 列中的单字母取值（unroll / pack 路径）。
 pub mod debezium_op_short {
     pub const CREATE: &str = "c";
     pub const READ: &str = "r";
@@ -290,7 +254,6 @@ pub mod debezium_op_short {
     pub const DELETE: &str = "d";
 }
 
-// ── 连接器类型短名（工厂注册等）──────────────────────────────────────────────
 
 pub mod connector_type {
     pub const KAFKA: &str = "kafka";
@@ -310,21 +273,17 @@ pub mod connector_type {
     pub const POSTGRES: &str = "postgres";
 }
 
-// ── 连接表 `WITH type = 'source'|'sink'|'lookup'`（`SourceTable::from_options` / `deduce_role`）──
 
 pub mod connection_table_role {
     pub const SOURCE: &str = "source";
     pub const SINK: &str = "sink";
-    /// 与虚拟 `lookup` 连接器短名相同（亦在 [`SUPPORTED_CONNECTOR_ADAPTERS`] 中）。
     pub const LOOKUP: &str = "lookup";
 }
 
-/// [`crate::sql::schema::table_role::validate_adapter_availability`] 白名单（与 SQL `connector = '…'` 短名一致）。
 pub const SUPPORTED_CONNECTOR_ADAPTERS: &[&str] = &[
     connector_type::KAFKA,
 ];
 
-// ── Kafka 连接器 WITH 选项取值（`wire_kafka_operator_config`）────────────────
 
 pub mod kafka_with_value {
     pub const SCAN_LATEST: &str = "latest";
diff --git a/src/sql/common/kafka_catalog.rs b/src/sql/common/kafka_catalog.rs
index e54e6901..5d54b1b2 100644
--- a/src/sql/common/kafka_catalog.rs
+++ b/src/sql/common/kafka_catalog.rs
@@ -10,22 +10,16 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//! Kafka 表级与连接级配置（与 JSON Schema / Catalog 对齐）。
 //!
-//! 放在 [`crate::sql::common`] 而非 `runtime::streaming`，以便 **SQL 规划、Coordinator、连接配置存储**
-//! 与 **运行时工厂**（如 `ConnectorSourceDispatcher`）共用同一套类型，避免循环依赖。
 //!
-//! 与 [`crate::runtime::streaming::api::source::SourceOffset`] 语义相同但独立定义，运行时可用 `From`/`match` 做映射。
 
 use serde::{Deserialize, Serialize};
 use std::collections::HashMap;
 
-// ── KafkaTable：单表 Source/Sink ─────────────────────────────────────────
 
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
 pub struct KafkaTable {
     pub topic: String,
-    /// Source / Sink 判别及各自字段；与顶层 JSON 扁平字段共用 `type` 标签。
     #[serde(flatten)]
     pub kind: TableType,
     #[serde(default)]
@@ -34,7 +28,6 @@ pub struct KafkaTable {
 }
 
 impl KafkaTable {
-    /// Schema Registry subject；未配置时与常见约定一致：`{topic}-value`。
     pub fn subject(&self) -> String {
         self.value_subject
             .clone()
@@ -82,7 +75,6 @@ pub enum SinkCommitMode {
     ExactlyOnce,
 }
 
-// ── KafkaConfig：集群 / 鉴权 / Schema Registry ───────────────────────────
 
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
 #[serde(rename_all = "camelCase")]
diff --git a/src/sql/common/operator_config.rs b/src/sql/common/operator_config.rs
index b9e40391..ba61b36a 100644
--- a/src/sql/common/operator_config.rs
+++ b/src/sql/common/operator_config.rs
@@ -40,7 +40,6 @@ pub struct OperatorConfig {
     pub rate_limit: Option<RateLimit>,
     #[serde(default)]
     pub metadata_fields: Vec<MetadataField>,
-    /// Arrow 行 schema（Kafka Source/Sink 反序列化、序列化必需）。
     #[serde(default)]
     pub input_schema: Option<FsSchema>,
 }
diff --git a/src/sql/common/with_option_keys.rs b/src/sql/common/with_option_keys.rs
index e48d9b7a..a42f7405 100644
--- a/src/sql/common/with_option_keys.rs
+++ b/src/sql/common/with_option_keys.rs
@@ -10,14 +10,11 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//! SQL `WITH` 子句中的选项名，以及部分连接器序列化 JSON 的字段名（单一来源）。
 
-// ── 通用 / 表级 ─────────────────────────────────────────────────────────────
 
 pub const CONNECTOR: &str = "connector";
 pub const TYPE: &str = "type";
 pub const FORMAT: &str = "format";
-/// 未指定 `format` 选项时的默认格式名（值，非键）。
 pub const DEFAULT_FORMAT_VALUE: &str = "json";
 pub const BAD_DATA: &str = "bad_data";
 pub const PARTITION_BY: &str = "partition_by";
@@ -31,11 +28,9 @@ pub const IDLE_TIME: &str = "idle_time";
 pub const LOOKUP_CACHE_MAX_BYTES: &str = "lookup.cache.max_bytes";
 pub const LOOKUP_CACHE_TTL: &str = "lookup.cache.ttl";
 
-// ── 非 Kafka 连接器的 opaque JSON（`CONNECTOR` 与 WITH 选项同名）────────────
 
 pub const CONNECTION_SCHEMA: &str = "connection_schema";
 
-// ── 后端参数序列化（如 lookup）──────────────────────────────────────────────
 
 pub const ADAPTER: &str = "adapter";
 
@@ -92,6 +87,5 @@ pub const PROTOBUF_LENGTH_DELIMITED: &str = "protobuf.length_delimited";
 pub const FRAMING_METHOD: &str = "framing.method";
 pub const FRAMING_MAX_LINE_LENGTH: &str = "framing.max_line_length";
 
-// ── 从字符串 map 推断编码（catalog 等）──────────────────────────────────────
 
 pub const FORMAT_DEBEZIUM_FLAG: &str = "format.debezium";
diff --git a/src/sql/logical_node/logical/operator_name.rs b/src/sql/logical_node/logical/operator_name.rs
index 6cb00914..d157234b 100644
--- a/src/sql/logical_node/logical/operator_name.rs
+++ b/src/sql/logical_node/logical/operator_name.rs
@@ -38,7 +38,6 @@ pub enum OperatorName {
 }
 
 impl OperatorName {
-    /// 特性 / 指标聚合使用的 kebab-case 标签（与 [`crate::sql::common::constants::operator_feature`] 一致）。
     pub fn feature_tag(self) -> Option<&'static str> {
         match self {
             Self::ExpressionWatermark | Self::ArrowValue | Self::ArrowKey | Self::Projection => None,
diff --git a/src/sql/physical/cdc/encode.rs b/src/sql/physical/cdc/encode.rs
index b1a1cc2e..07495a38 100644
--- a/src/sql/physical/cdc/encode.rs
+++ b/src/sql/physical/cdc/encode.rs
@@ -10,7 +10,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//! 内部回撤流压回 Debezium `before` / `after` / `op` 信封。
 
 use std::any::Any;
 use std::collections::HashMap;
@@ -169,7 +168,6 @@ struct ToDebeziumStream {
     struct_projection: Vec<usize>,
 }
 
-/// 按主键 id 归并一行内的 changelog，输出 before/after 行索引与 op 字母。
 fn compact_changelog_by_id<'a>(
     num_rows: usize,
     is_retract: &'a BooleanArray,
diff --git a/src/sql/physical/cdc/mod.rs b/src/sql/physical/cdc/mod.rs
index 743ca966..9e32e67a 100644
--- a/src/sql/physical/cdc/mod.rs
+++ b/src/sql/physical/cdc/mod.rs
@@ -10,7 +10,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//! Debezium 与内部 changelog 行格式的互转。
 
 mod encode;
 mod unroll;
diff --git a/src/sql/physical/cdc/unroll.rs b/src/sql/physical/cdc/unroll.rs
index 345d2642..f40beb06 100644
--- a/src/sql/physical/cdc/unroll.rs
+++ b/src/sql/physical/cdc/unroll.rs
@@ -10,7 +10,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//! Debezium 信封展开为内部带 retract 语义的行流。
 
 use std::any::Any;
 use std::pin::Pin;
@@ -86,7 +85,6 @@ impl DebeziumUnrollingExec {
         })
     }
 
-    /// 分布式反序列化路径：跳过 `try_new` 的 schema 校验（proto 已约定形态）。
     pub(crate) fn from_decoded_parts(
         input: Arc<dyn ExecutionPlan>,
         schema: SchemaRef,
diff --git a/src/sql/physical/codec.rs b/src/sql/physical/codec.rs
index e90e4b3a..1301ef09 100644
--- a/src/sql/physical/codec.rs
+++ b/src/sql/physical/codec.rs
@@ -10,7 +10,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//! 分布式物理计划 proto 编解码（`FsExecNode`）。
 
 use std::sync::Arc;
 
diff --git a/src/sql/physical/meta.rs b/src/sql/physical/meta.rs
index 5828593c..95dd8fd8 100644
--- a/src/sql/physical/meta.rs
+++ b/src/sql/physical/meta.rs
@@ -10,7 +10,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//! Changelog 元数据列：`is_retract`、行 `id`（FixedSizeBinary）。
 
 use std::sync::{Arc, OnceLock};
 
diff --git a/src/sql/physical/mod.rs b/src/sql/physical/mod.rs
index 1ba09eb0..ee63a2be 100644
--- a/src/sql/physical/mod.rs
+++ b/src/sql/physical/mod.rs
@@ -10,7 +10,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//! 流式物理执行扩展：元数据列、UDF、内存/无界 Reader、CDC 与 proto 编解码。
 
 mod cdc;
 mod codec;
diff --git a/src/sql/physical/readers.rs b/src/sql/physical/readers.rs
index 67a250fd..1c785464 100644
--- a/src/sql/physical/readers.rs
+++ b/src/sql/physical/readers.rs
@@ -10,7 +10,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//! 无界/锁控 `RecordBatch` 数据源与规划期占位 `FsMemExec`。
 
 use std::any::Any;
 use std::mem;
diff --git a/src/sql/physical/udfs.rs b/src/sql/physical/udfs.rs
index 267cb6e3..03895fda 100644
--- a/src/sql/physical/udfs.rs
+++ b/src/sql/physical/udfs.rs
@@ -10,7 +10,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//! 流式 `window(start, end)` 标量 UDF。
 
 use std::any::Any;
 use std::sync::Arc;

From 561da59c1761d9ea4546ecbdc3c68b2a9fe85fc7 Mon Sep 17 00:00:00 2001
From: luoluoyuyu <zhenyu@apache.org>
Date: Sun, 29 Mar 2026 18:13:31 +0800
Subject: [PATCH 29/44] update

---
 Cargo.lock                                    |   1 -
 cli/cli/Cargo.toml                            |   1 -
 cli/cli/src/repl.rs                           |  52 ++++-
 conf/config.yaml                              |   7 +
 protocol/proto/storage.proto                  |   2 +
 src/config/global_config.rs                   |   2 +
 src/config/storage.rs                         |  24 +++
 src/coordinator/analyze/analyzer.rs           |  21 +-
 src/coordinator/dataset/mod.rs                |   4 +
 .../dataset/show_catalog_tables_result.rs     |  92 ++++++++
 .../dataset/show_create_table_result.rs       |  51 +++++
 src/coordinator/execution/executor.rs         |  53 ++++-
 src/coordinator/mod.rs                        |   3 +-
 src/coordinator/plan/logical_plan_visitor.rs  |  25 ++-
 src/coordinator/plan/mod.rs                   |   4 +
 .../plan/show_catalog_tables_plan.rs          |  28 +++
 .../plan/show_create_table_plan.rs            |  30 +++
 src/coordinator/plan/visitor.rs               |  16 +-
 src/coordinator/statement/mod.rs              |   4 +
 .../statement/show_catalog_tables.rs          |  33 +++
 .../statement/show_create_table.rs            |  35 +++
 src/coordinator/statement/visitor.rs          |  15 +-
 src/server/handler.rs                         |   7 +-
 src/sql/common/connector_options.rs           |  27 ++-
 src/sql/frontend_sql_coverage_tests.rs        |  16 ++
 src/sql/parse.rs                              |  37 +++-
 src/sql/schema/catalog_ddl.rs                 | 199 ++++++++++++++++++
 src/sql/schema/mod.rs                         |   2 +
 src/sql/schema/schema_provider.rs             |   5 +-
 src/sql/schema/source_table.rs                |  22 +-
 src/storage/stream_catalog/manager.rs         |  93 +++++++-
 src/storage/stream_catalog/mod.rs             |   2 +
 .../stream_catalog/rocksdb_meta_store.rs      | 131 ++++++++++++
 33 files changed, 1001 insertions(+), 43 deletions(-)
 create mode 100644 src/coordinator/dataset/show_catalog_tables_result.rs
 create mode 100644 src/coordinator/dataset/show_create_table_result.rs
 create mode 100644 src/coordinator/plan/show_catalog_tables_plan.rs
 create mode 100644 src/coordinator/plan/show_create_table_plan.rs
 create mode 100644 src/coordinator/statement/show_catalog_tables.rs
 create mode 100644 src/coordinator/statement/show_create_table.rs
 create mode 100644 src/sql/schema/catalog_ddl.rs
 create mode 100644 src/storage/stream_catalog/rocksdb_meta_store.rs

diff --git a/Cargo.lock b/Cargo.lock
index c6994ec0..9cdca7e8 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2324,7 +2324,6 @@ dependencies = [
  "comfy-table",
  "protocol",
  "rustyline",
- "thiserror 2.0.17",
  "tokio",
  "tonic",
 ]
diff --git a/cli/cli/Cargo.toml b/cli/cli/Cargo.toml
index e3c1c591..3c05d6b4 100644
--- a/cli/cli/Cargo.toml
+++ b/cli/cli/Cargo.toml
@@ -14,7 +14,6 @@ arrow-schema = "52"
 comfy-table = "7"
 protocol = { path = "../../protocol" }
 clap = { version = "4.5", features = ["derive"] }
-thiserror = "2"
 tokio = { version = "1.0", features = ["full", "signal"] }
 tonic = { version = "0.12", features = ["default"] }
 rustyline = { version = "14.0", features = ["with-dirs"] }
diff --git a/cli/cli/src/repl.rs b/cli/cli/src/repl.rs
index 7f8087b3..8c3882b2 100644
--- a/cli/cli/src/repl.rs
+++ b/cli/cli/src/repl.rs
@@ -20,26 +20,62 @@ use comfy_table::{Attribute, Cell, Color, ContentArrangement, Table, TableCompon
 use protocol::cli::{function_stream_service_client::FunctionStreamServiceClient, SqlRequest};
 use rustyline::error::ReadlineError;
 use rustyline::{Config, DefaultEditor, EditMode};
+use std::fmt;
 use std::io::{self, Cursor, Write};
 use std::sync::Arc;
 use tokio::sync::Mutex;
 use tonic::Request;
 
-#[derive(Debug, thiserror::Error)]
+/// CLI errors.
+///
+/// **Important:** [`tonic::Status`] must not be formatted with `{}` — its [`fmt::Display`] dumps
+/// `details` / `metadata` (e.g. HTTP headers). Only [`tonic::Status::message`] is stored in
+/// [`ReplError::Rpc`].
+#[derive(Debug)]
 pub enum ReplError {
-    #[error("RPC error: {0}")]
-    Rpc(Box<tonic::Status>),
-    #[error("Connection failed: {0}")]
+    Rpc(String),
     Connection(String),
-    #[error("Internal error: {0}")]
     Internal(String),
-    #[error("IO error: {0}")]
-    Io(#[from] io::Error),
+    Io(io::Error),
+}
+
+impl fmt::Display for ReplError {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            ReplError::Rpc(s) => f.write_str(s),
+            ReplError::Connection(s) => f.write_str(s),
+            ReplError::Internal(s) => write!(f, "Internal error: {s}"),
+            ReplError::Io(e) => write!(f, "IO error: {e}"),
+        }
+    }
+}
+
+impl std::error::Error for ReplError {
+    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
+        match self {
+            ReplError::Io(e) => Some(e),
+            _ => None,
+        }
+    }
+}
+
+impl From<io::Error> for ReplError {
+    fn from(e: io::Error) -> Self {
+        ReplError::Io(e)
+    }
 }
 
 impl From<tonic::Status> for ReplError {
     fn from(s: tonic::Status) -> Self {
-        ReplError::Rpc(Box::new(s))
+        let msg = s.message();
+        if msg.is_empty() {
+            ReplError::Rpc(format!(
+                "gRPC {} (server returned no message)",
+                s.code()
+            ))
+        } else {
+            ReplError::Rpc(msg.to_string())
+        }
     }
 }
 
diff --git a/conf/config.yaml b/conf/config.yaml
index 3f19493d..9d0f625e 100644
--- a/conf/config.yaml
+++ b/conf/config.yaml
@@ -117,3 +117,10 @@ task_storage:
     
     # Maximum bytes for level base in bytes (optional)
     max_bytes_for_level_base: 268435456
+
+# Stream table catalog (SQL: CREATE TABLE connector sources, SHOW TABLES, SHOW CREATE TABLE).
+# When persist is true (default), metadata is stored under RocksDB at db_path (default: data/stream_catalog)
+# and reloaded after process restart. Set persist: false only for tests/ephemeral nodes.
+stream_catalog:
+  persist: true
+  # db_path: data/stream_catalog
diff --git a/protocol/proto/storage.proto b/protocol/proto/storage.proto
index cace3107..5ad09d38 100644
--- a/protocol/proto/storage.proto
+++ b/protocol/proto/storage.proto
@@ -27,6 +27,8 @@ message StreamSource {
   bytes arrow_schema_ipc = 1;
   optional string event_time_field = 2;
   optional string watermark_field = 3;
+  // Original CREATE TABLE ... WITH ('k'='v', ...) pairs (best-effort; keys sorted in DDL).
+  map<string, string> with_options = 4;
 }
 
 message StreamSink {
diff --git a/src/config/global_config.rs b/src/config/global_config.rs
index 33676125..c76bf4b0 100644
--- a/src/config/global_config.rs
+++ b/src/config/global_config.rs
@@ -40,6 +40,8 @@ pub struct GlobalConfig {
     pub task_storage: crate::config::storage::TaskStorageConfig,
     #[serde(default)]
     pub streaming: StreamingConfig,
+    #[serde(default)]
+    pub stream_catalog: crate::config::storage::StreamCatalogConfig,
 }
 
 impl GlobalConfig {
diff --git a/src/config/storage.rs b/src/config/storage.rs
index e5186648..28396d7d 100644
--- a/src/config/storage.rs
+++ b/src/config/storage.rs
@@ -118,3 +118,27 @@ impl Default for TaskStorageConfig {
         }
     }
 }
+
+/// Stream table catalog (`CREATE TABLE` / `SHOW TABLES`) storage.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct StreamCatalogConfig {
+    /// When `false`, the catalog is in-memory only and is **lost on process restart**.
+    #[serde(default = "default_stream_catalog_persist")]
+    pub persist: bool,
+    /// RocksDB directory for persisted catalog. Default: `{data_dir}/stream_catalog`.
+    #[serde(default)]
+    pub db_path: Option<String>,
+}
+
+fn default_stream_catalog_persist() -> bool {
+    true
+}
+
+impl Default for StreamCatalogConfig {
+    fn default() -> Self {
+        Self {
+            persist: default_stream_catalog_persist(),
+            db_path: None,
+        }
+    }
+}
diff --git a/src/coordinator/analyze/analyzer.rs b/src/coordinator/analyze/analyzer.rs
index 3889431e..dbac78cf 100644
--- a/src/coordinator/analyze/analyzer.rs
+++ b/src/coordinator/analyze/analyzer.rs
@@ -14,8 +14,9 @@ use super::Analysis;
 use crate::coordinator::execution_context::ExecutionContext;
 use crate::coordinator::statement::{
     CreateFunction, CreatePythonFunction, CreateTable, DropFunction, DropTableStatement,
-    ShowFunctions, StartFunction, Statement, StatementVisitor, StatementVisitorContext,
-    StatementVisitorResult, StopFunction, StreamingTableStatement,
+    ShowCatalogTables, ShowCreateTable, ShowFunctions, StartFunction, Statement,
+    StatementVisitor, StatementVisitorContext, StatementVisitorResult, StopFunction,
+    StreamingTableStatement,
 };
 use std::fmt;
 
@@ -109,6 +110,22 @@ impl StatementVisitor for Analyzer<'_> {
         StatementVisitorResult::Analyze(Box::new(stmt.clone()))
     }
 
+    fn visit_show_catalog_tables(
+        &self,
+        stmt: &ShowCatalogTables,
+        _context: &StatementVisitorContext,
+    ) -> StatementVisitorResult {
+        StatementVisitorResult::Analyze(Box::new(stmt.clone()))
+    }
+
+    fn visit_show_create_table(
+        &self,
+        stmt: &ShowCreateTable,
+        _context: &StatementVisitorContext,
+    ) -> StatementVisitorResult {
+        StatementVisitorResult::Analyze(Box::new(stmt.clone()))
+    }
+
     fn visit_create_python_function(
         &self,
         stmt: &CreatePythonFunction,
diff --git a/src/coordinator/dataset/mod.rs b/src/coordinator/dataset/mod.rs
index b72613da..f09c24ca 100644
--- a/src/coordinator/dataset/mod.rs
+++ b/src/coordinator/dataset/mod.rs
@@ -12,8 +12,12 @@
 
 mod data_set;
 mod execute_result;
+mod show_catalog_tables_result;
+mod show_create_table_result;
 mod show_functions_result;
 
 pub use data_set::{DataSet, empty_record_batch};
 pub use execute_result::ExecuteResult;
+pub use show_catalog_tables_result::ShowCatalogTablesResult;
+pub use show_create_table_result::ShowCreateTableResult;
 pub use show_functions_result::ShowFunctionsResult;
diff --git a/src/coordinator/dataset/show_catalog_tables_result.rs b/src/coordinator/dataset/show_catalog_tables_result.rs
new file mode 100644
index 00000000..77792517
--- /dev/null
+++ b/src/coordinator/dataset/show_catalog_tables_result.rs
@@ -0,0 +1,92 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::sync::Arc;
+
+use arrow_array::{Int32Array, StringArray};
+use arrow_schema::{DataType, Field, Schema};
+
+use super::DataSet;
+use crate::sql::schema::{schema_columns_one_line, stream_table_row_detail, StreamTable};
+
+#[derive(Clone, Debug)]
+pub struct ShowCatalogTablesResult {
+    names: Vec<String>,
+    kinds: Vec<String>,
+    column_counts: Vec<i32>,
+    schema_lines: Vec<String>,
+    details: Vec<String>,
+}
+
+impl ShowCatalogTablesResult {
+    pub fn from_tables(tables: &[Arc<StreamTable>]) -> Self {
+        let mut names = Vec::with_capacity(tables.len());
+        let mut kinds = Vec::with_capacity(tables.len());
+        let mut column_counts = Vec::with_capacity(tables.len());
+        let mut schema_lines = Vec::with_capacity(tables.len());
+        let mut details = Vec::with_capacity(tables.len());
+
+        for t in tables {
+            let schema = t.schema();
+            let ncols = schema.fields().len() as i32;
+            names.push(t.name().to_string());
+            kinds.push(match t.as_ref() {
+                StreamTable::Source { .. } => "SOURCE",
+                StreamTable::Sink { .. } => "SINK",
+            }
+            .to_string());
+            column_counts.push(ncols);
+            schema_lines.push(schema_columns_one_line(&schema));
+            details.push(stream_table_row_detail(t.as_ref()));
+        }
+
+        Self {
+            names,
+            kinds,
+            column_counts,
+            schema_lines,
+            details,
+        }
+    }
+}
+
+impl DataSet for ShowCatalogTablesResult {
+    fn to_record_batch(&self) -> arrow_array::RecordBatch {
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("table_name", DataType::Utf8, false),
+            Field::new("kind", DataType::Utf8, false),
+            Field::new("column_count", DataType::Int32, false),
+            Field::new("schema_columns", DataType::Utf8, false),
+            Field::new("details", DataType::Utf8, false),
+        ]));
+
+        arrow_array::RecordBatch::try_new(
+            schema,
+            vec![
+                Arc::new(StringArray::from(
+                    self.names.iter().map(|s| s.as_str()).collect::<Vec<_>>(),
+                )),
+                Arc::new(StringArray::from(
+                    self.kinds.iter().map(|s| s.as_str()).collect::<Vec<_>>(),
+                )),
+                Arc::new(Int32Array::from(self.column_counts.clone())),
+                Arc::new(StringArray::from(
+                    self.schema_lines.iter().map(|s| s.as_str()).collect::<Vec<_>>(),
+                )),
+                Arc::new(StringArray::from(
+                    self.details.iter().map(|s| s.as_str()).collect::<Vec<_>>(),
+                )),
+            ],
+        )
+        .unwrap_or_else(|_| arrow_array::RecordBatch::new_empty(Arc::new(Schema::empty())))
+    }
+}
diff --git a/src/coordinator/dataset/show_create_table_result.rs b/src/coordinator/dataset/show_create_table_result.rs
new file mode 100644
index 00000000..47f49d59
--- /dev/null
+++ b/src/coordinator/dataset/show_create_table_result.rs
@@ -0,0 +1,51 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::sync::Arc;
+
+use arrow_array::StringArray;
+use arrow_schema::{DataType, Field, Schema};
+
+use super::DataSet;
+
+#[derive(Clone, Debug)]
+pub struct ShowCreateTableResult {
+    table_name: String,
+    create_sql: String,
+}
+
+impl ShowCreateTableResult {
+    pub fn new(table_name: String, create_sql: String) -> Self {
+        Self {
+            table_name,
+            create_sql,
+        }
+    }
+}
+
+impl DataSet for ShowCreateTableResult {
+    fn to_record_batch(&self) -> arrow_array::RecordBatch {
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("Table", DataType::Utf8, false),
+            Field::new("Create Table", DataType::Utf8, false),
+        ]));
+
+        arrow_array::RecordBatch::try_new(
+            schema,
+            vec![
+                Arc::new(StringArray::from(vec![self.table_name.as_str()])),
+                Arc::new(StringArray::from(vec![self.create_sql.as_str()])),
+            ],
+        )
+        .unwrap_or_else(|_| arrow_array::RecordBatch::new_empty(Arc::new(Schema::empty())))
+    }
+}
diff --git a/src/coordinator/execution/executor.rs b/src/coordinator/execution/executor.rs
index f9f26cd0..4a7fc273 100644
--- a/src/coordinator/execution/executor.rs
+++ b/src/coordinator/execution/executor.rs
@@ -16,17 +16,20 @@ use protocol::grpc::api::FsProgram;
 use thiserror::Error;
 use tracing::{debug, info};
 
-use crate::coordinator::dataset::{empty_record_batch, ExecuteResult, ShowFunctionsResult};
+use crate::coordinator::dataset::{
+    empty_record_batch, ExecuteResult, ShowCatalogTablesResult, ShowCreateTableResult,
+    ShowFunctionsResult,
+};
 use crate::coordinator::plan::{
     CreateFunctionPlan, CreatePythonFunctionPlan, CreateTablePlan, CreateTablePlanBody,
     DropFunctionPlan, DropTablePlan, LookupTablePlan, PlanNode, PlanVisitor, PlanVisitorContext,
-    PlanVisitorResult, ShowFunctionsPlan, StartFunctionPlan, StopFunctionPlan, StreamingTable,
-    StreamingTableConnectorPlan,
+    PlanVisitorResult, ShowCatalogTablesPlan, ShowCreateTablePlan, ShowFunctionsPlan,
+    StartFunctionPlan, StopFunctionPlan, StreamingTable, StreamingTableConnectorPlan,
 };
 use crate::coordinator::statement::{ConfigSource, FunctionSource};
 use crate::runtime::streaming::job::JobManager;
 use crate::runtime::taskexecutor::TaskManager;
-use crate::sql::schema::StreamTable;
+use crate::sql::schema::{show_create_stream_table, StreamTable};
 use crate::storage::stream_catalog::CatalogManager;
 
 #[derive(Error, Debug)]
@@ -171,6 +174,44 @@ impl PlanVisitor for Executor {
         PlanVisitorResult::Execute(Ok(result))
     }
 
+    fn visit_show_catalog_tables(
+        &self,
+        _plan: &ShowCatalogTablesPlan,
+        _context: &PlanVisitorContext,
+    ) -> PlanVisitorResult {
+        let tables = self.catalog_manager.list_stream_tables();
+        let n = tables.len();
+        let result = ExecuteResult::ok_with_data(
+            format!("{n} stream catalog table(s)"),
+            ShowCatalogTablesResult::from_tables(&tables),
+        );
+        PlanVisitorResult::Execute(Ok(result))
+    }
+
+    fn visit_show_create_table(
+        &self,
+        plan: &ShowCreateTablePlan,
+        _context: &PlanVisitorContext,
+    ) -> PlanVisitorResult {
+        let execute = || -> Result<ExecuteResult, ExecuteError> {
+            let t = self
+                .catalog_manager
+                .get_stream_table(&plan.table_name)
+                .ok_or_else(|| {
+                    ExecuteError::Validation(format!(
+                        "Table '{}' not found in stream catalog",
+                        plan.table_name
+                    ))
+                })?;
+            let ddl = show_create_stream_table(t.as_ref());
+            Ok(ExecuteResult::ok_with_data(
+                format!("SHOW CREATE TABLE {}", plan.table_name),
+                ShowCreateTableResult::new(plan.table_name.clone(), ddl),
+            ))
+        };
+        PlanVisitorResult::Execute(execute())
+    }
+
     fn visit_create_python_function(
         &self,
         plan: &CreatePythonFunctionPlan,
@@ -233,6 +274,7 @@ impl PlanVisitor for Executor {
                         schema,
                         event_time_field: source_table.event_time_field().map(str::to_string),
                         watermark_field: source_table.watermark_field().map(str::to_string),
+                        with_options: source_table.catalog_with_options().clone(),
                     };
                     (table_name, *if_not_exists, table_instance)
                 }
@@ -254,7 +296,8 @@ impl PlanVisitor for Executor {
                 .add_table(stream_table)
                 .map_err(|e| {
                     ExecuteError::Internal(format!(
-                        "Failed to register connector source table '{table_name}': {e}"
+                        "Failed to register connector source table '{}': {}",
+                        table_name, e
                     ))
                 })?;
 
diff --git a/src/coordinator/mod.rs b/src/coordinator/mod.rs
index a781f1e1..23cd2925 100644
--- a/src/coordinator/mod.rs
+++ b/src/coordinator/mod.rs
@@ -26,5 +26,6 @@ pub use runtime_context::CoordinatorRuntimeContext;
 pub use dataset::{DataSet, ShowFunctionsResult};
 pub use statement::{
     CreateFunction, CreatePythonFunction, CreateTable, DropFunction, DropTableStatement,
-    PythonModule, ShowFunctions, StartFunction, Statement, StopFunction, StreamingTableStatement,
+    PythonModule, ShowCatalogTables, ShowCreateTable, ShowFunctions, StartFunction, Statement,
+    StopFunction, StreamingTableStatement,
 };
diff --git a/src/coordinator/plan/logical_plan_visitor.rs b/src/coordinator/plan/logical_plan_visitor.rs
index e2e457eb..e5b5e36a 100644
--- a/src/coordinator/plan/logical_plan_visitor.rs
+++ b/src/coordinator/plan/logical_plan_visitor.rs
@@ -27,12 +27,13 @@ use tracing::debug;
 use crate::coordinator::analyze::analysis::Analysis;
 use crate::coordinator::plan::{
     CreateFunctionPlan, CreatePythonFunctionPlan, CreateTablePlan, DropFunctionPlan, DropTablePlan,
-    PlanNode, ShowFunctionsPlan, StartFunctionPlan, StopFunctionPlan, StreamingTable,
+    PlanNode, ShowCatalogTablesPlan, ShowCreateTablePlan, ShowFunctionsPlan, StartFunctionPlan,
+    StopFunctionPlan, StreamingTable,
 };
 use crate::coordinator::statement::{
     CreateFunction, CreatePythonFunction, CreateTable, DropFunction, DropTableStatement,
-    ShowFunctions, StartFunction, StatementVisitor, StatementVisitorContext,
-    StatementVisitorResult, StopFunction, StreamingTableStatement,
+    ShowCatalogTables, ShowCreateTable, ShowFunctions, StartFunction, StatementVisitor,
+    StatementVisitorContext, StatementVisitorResult, StopFunction, StreamingTableStatement,
 };
 use crate::coordinator::tool::ConnectorOptions;
 use crate::sql::analysis::{
@@ -352,6 +353,24 @@ impl StatementVisitor for LogicalPlanVisitor {
         StatementVisitorResult::Plan(Box::new(ShowFunctionsPlan::new()))
     }
 
+    fn visit_show_catalog_tables(
+        &self,
+        _stmt: &ShowCatalogTables,
+        _ctx: &StatementVisitorContext,
+    ) -> StatementVisitorResult {
+        StatementVisitorResult::Plan(Box::new(ShowCatalogTablesPlan::new()))
+    }
+
+    fn visit_show_create_table(
+        &self,
+        stmt: &ShowCreateTable,
+        _ctx: &StatementVisitorContext,
+    ) -> StatementVisitorResult {
+        StatementVisitorResult::Plan(Box::new(ShowCreateTablePlan::new(
+            stmt.table_name.clone(),
+        )))
+    }
+
     fn visit_create_python_function(
         &self,
         stmt: &CreatePythonFunction,
diff --git a/src/coordinator/plan/mod.rs b/src/coordinator/plan/mod.rs
index 2dbbab77..b04234d5 100644
--- a/src/coordinator/plan/mod.rs
+++ b/src/coordinator/plan/mod.rs
@@ -18,6 +18,8 @@ mod drop_table_plan;
 mod logical_plan_visitor;
 mod lookup_table_plan;
 mod optimizer;
+mod show_catalog_tables_plan;
+mod show_create_table_plan;
 mod show_functions_plan;
 mod start_function_plan;
 mod stop_function_plan;
@@ -33,6 +35,8 @@ pub use drop_table_plan::DropTablePlan;
 pub use logical_plan_visitor::LogicalPlanVisitor;
 pub use lookup_table_plan::LookupTablePlan;
 pub use optimizer::LogicalPlanner;
+pub use show_catalog_tables_plan::ShowCatalogTablesPlan;
+pub use show_create_table_plan::ShowCreateTablePlan;
 pub use show_functions_plan::ShowFunctionsPlan;
 pub use start_function_plan::StartFunctionPlan;
 pub use stop_function_plan::StopFunctionPlan;
diff --git a/src/coordinator/plan/show_catalog_tables_plan.rs b/src/coordinator/plan/show_catalog_tables_plan.rs
new file mode 100644
index 00000000..420fdb40
--- /dev/null
+++ b/src/coordinator/plan/show_catalog_tables_plan.rs
@@ -0,0 +1,28 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use super::{PlanNode, PlanVisitor, PlanVisitorContext, PlanVisitorResult};
+
+#[derive(Debug, Default)]
+pub struct ShowCatalogTablesPlan;
+
+impl ShowCatalogTablesPlan {
+    pub fn new() -> Self {
+        Self
+    }
+}
+
+impl PlanNode for ShowCatalogTablesPlan {
+    fn accept(&self, visitor: &dyn PlanVisitor, context: &PlanVisitorContext) -> PlanVisitorResult {
+        visitor.visit_show_catalog_tables(self, context)
+    }
+}
diff --git a/src/coordinator/plan/show_create_table_plan.rs b/src/coordinator/plan/show_create_table_plan.rs
new file mode 100644
index 00000000..c5fe6376
--- /dev/null
+++ b/src/coordinator/plan/show_create_table_plan.rs
@@ -0,0 +1,30 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use super::{PlanNode, PlanVisitor, PlanVisitorContext, PlanVisitorResult};
+
+#[derive(Debug, Clone)]
+pub struct ShowCreateTablePlan {
+    pub table_name: String,
+}
+
+impl ShowCreateTablePlan {
+    pub fn new(table_name: String) -> Self {
+        Self { table_name }
+    }
+}
+
+impl PlanNode for ShowCreateTablePlan {
+    fn accept(&self, visitor: &dyn PlanVisitor, context: &PlanVisitorContext) -> PlanVisitorResult {
+        visitor.visit_show_create_table(self, context)
+    }
+}
diff --git a/src/coordinator/plan/visitor.rs b/src/coordinator/plan/visitor.rs
index e8efcf32..28f11f53 100644
--- a/src/coordinator/plan/visitor.rs
+++ b/src/coordinator/plan/visitor.rs
@@ -12,8 +12,8 @@
 
 use super::{
     CreateFunctionPlan, CreatePythonFunctionPlan, CreateTablePlan, DropFunctionPlan, DropTablePlan,
-    LookupTablePlan, ShowFunctionsPlan, StartFunctionPlan, StopFunctionPlan, StreamingTable,
-    StreamingTableConnectorPlan,
+    LookupTablePlan, ShowCatalogTablesPlan, ShowCreateTablePlan, ShowFunctionsPlan,
+    StartFunctionPlan, StopFunctionPlan, StreamingTable, StreamingTableConnectorPlan,
 };
 
 /// Context passed to PlanVisitor methods
@@ -80,6 +80,18 @@ pub trait PlanVisitor {
         context: &PlanVisitorContext,
     ) -> PlanVisitorResult;
 
+    fn visit_show_catalog_tables(
+        &self,
+        plan: &ShowCatalogTablesPlan,
+        context: &PlanVisitorContext,
+    ) -> PlanVisitorResult;
+
+    fn visit_show_create_table(
+        &self,
+        plan: &ShowCreateTablePlan,
+        context: &PlanVisitorContext,
+    ) -> PlanVisitorResult;
+
     fn visit_create_python_function(
         &self,
         plan: &CreatePythonFunctionPlan,
diff --git a/src/coordinator/statement/mod.rs b/src/coordinator/statement/mod.rs
index 7b39787d..83f6ca0e 100644
--- a/src/coordinator/statement/mod.rs
+++ b/src/coordinator/statement/mod.rs
@@ -15,6 +15,8 @@ mod create_python_function;
 mod create_table;
 mod drop_function;
 mod drop_table;
+mod show_catalog_tables;
+mod show_create_table;
 mod show_functions;
 mod start_function;
 mod stop_function;
@@ -26,6 +28,8 @@ pub use create_python_function::{CreatePythonFunction, PythonModule};
 pub use create_table::CreateTable;
 pub use drop_function::DropFunction;
 pub use drop_table::DropTableStatement;
+pub use show_catalog_tables::ShowCatalogTables;
+pub use show_create_table::ShowCreateTable;
 pub use show_functions::ShowFunctions;
 pub use start_function::StartFunction;
 pub use stop_function::StopFunction;
diff --git a/src/coordinator/statement/show_catalog_tables.rs b/src/coordinator/statement/show_catalog_tables.rs
new file mode 100644
index 00000000..1f034562
--- /dev/null
+++ b/src/coordinator/statement/show_catalog_tables.rs
@@ -0,0 +1,33 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use super::{Statement, StatementVisitor, StatementVisitorContext, StatementVisitorResult};
+
+/// `SHOW TABLES` over the stream catalog (connector sources + streaming sinks).
+#[derive(Debug, Clone, Default)]
+pub struct ShowCatalogTables;
+
+impl ShowCatalogTables {
+    pub fn new() -> Self {
+        Self
+    }
+}
+
+impl Statement for ShowCatalogTables {
+    fn accept(
+        &self,
+        visitor: &dyn StatementVisitor,
+        context: &StatementVisitorContext,
+    ) -> StatementVisitorResult {
+        visitor.visit_show_catalog_tables(self, context)
+    }
+}
diff --git a/src/coordinator/statement/show_create_table.rs b/src/coordinator/statement/show_create_table.rs
new file mode 100644
index 00000000..5b54a726
--- /dev/null
+++ b/src/coordinator/statement/show_create_table.rs
@@ -0,0 +1,35 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use super::{Statement, StatementVisitor, StatementVisitorContext, StatementVisitorResult};
+
+/// `SHOW CREATE TABLE <name>` for a stream-catalog table.
+#[derive(Debug, Clone)]
+pub struct ShowCreateTable {
+    pub table_name: String,
+}
+
+impl ShowCreateTable {
+    pub fn new(table_name: String) -> Self {
+        Self { table_name }
+    }
+}
+
+impl Statement for ShowCreateTable {
+    fn accept(
+        &self,
+        visitor: &dyn StatementVisitor,
+        context: &StatementVisitorContext,
+    ) -> StatementVisitorResult {
+        visitor.visit_show_create_table(self, context)
+    }
+}
diff --git a/src/coordinator/statement/visitor.rs b/src/coordinator/statement/visitor.rs
index 641abf98..f24c85be 100644
--- a/src/coordinator/statement/visitor.rs
+++ b/src/coordinator/statement/visitor.rs
@@ -12,7 +12,8 @@
 
 use super::{
     CreateFunction, CreatePythonFunction, CreateTable, DropFunction, DropTableStatement,
-    ShowFunctions, StartFunction, StopFunction, StreamingTableStatement,
+    ShowCatalogTables, ShowCreateTable, ShowFunctions, StartFunction, StopFunction,
+    StreamingTableStatement,
 };
 use crate::coordinator::plan::PlanNode;
 use crate::coordinator::statement::Statement;
@@ -83,6 +84,18 @@ pub trait StatementVisitor {
         context: &StatementVisitorContext,
     ) -> StatementVisitorResult;
 
+    fn visit_show_catalog_tables(
+        &self,
+        stmt: &ShowCatalogTables,
+        context: &StatementVisitorContext,
+    ) -> StatementVisitorResult;
+
+    fn visit_show_create_table(
+        &self,
+        stmt: &ShowCreateTable,
+        context: &StatementVisitorContext,
+    ) -> StatementVisitorResult;
+
     fn visit_create_python_function(
         &self,
         stmt: &CreatePythonFunction,
diff --git a/src/server/handler.rs b/src/server/handler.rs
index 8ed484d6..2ef6b529 100644
--- a/src/server/handler.rs
+++ b/src/server/handler.rs
@@ -112,8 +112,9 @@ impl FunctionStreamService for FunctionStreamServiceImpl {
         let req = request.into_inner();
 
         let statements = parse_sql(&req.sql).map_err(|e| {
-            warn!("SQL parse rejection: {}", e);
-            Status::invalid_argument("Provided SQL syntax is invalid")
+            let detail = e.to_string();
+            warn!("SQL parse rejection: {}", detail);
+            Status::invalid_argument(detail)
         })?;
 
         if statements.is_empty() {
@@ -229,7 +230,7 @@ impl FunctionStreamService for FunctionStreamServiceImpl {
             error!("show_functions execution failed: {}", result.message);
             return Ok(TonicResponse::new(ShowFunctionsResponse {
                 status_code: StatusCode::InternalServerError as i32,
-                message: "Failed to retrieve function definitions".to_string(),
+                message: result.message,
                 functions: vec![],
             }));
         }
diff --git a/src/sql/common/connector_options.rs b/src/sql/common/connector_options.rs
index bffa766a..98e3299e 100644
--- a/src/sql/common/connector_options.rs
+++ b/src/sql/common/connector_options.rs
@@ -10,7 +10,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-use std::collections::HashMap;
+use std::collections::{BTreeMap, HashMap};
 use std::num::{NonZero, NonZeroU64};
 use std::str::FromStr;
 use std::time::Duration;
@@ -31,6 +31,22 @@ pub struct ConnectorOptions {
     partitions: Vec<Expr>,
 }
 
+fn sql_expr_to_catalog_string(e: &Expr) -> String {
+    match e {
+        Expr::Value(ValueWithSpan { value, .. }) => match value {
+            SqlValue::SingleQuotedString(s) | SqlValue::DoubleQuotedString(s) => s.clone(),
+            SqlValue::NationalStringLiteral(s) => s.clone(),
+            SqlValue::HexStringLiteral(s) => s.clone(),
+            SqlValue::Number(n, _) => n.clone(),
+            SqlValue::Boolean(b) => b.to_string(),
+            SqlValue::Null => "NULL".to_string(),
+            other => other.to_string(),
+        },
+        Expr::Identifier(ident) => ident.value.clone(),
+        other => other.to_string(),
+    }
+}
+
 impl ConnectorOptions {
     pub fn new(sql_opts: &[SqlOption], partition_by: &Option<Vec<Expr>>) -> DFResult<Self> {
         let mut options = HashMap::new();
@@ -331,6 +347,15 @@ impl ConnectorOptions {
         }
         Ok(out)
     }
+
+    /// Snapshot of all current `WITH` key/value pairs for catalog persistence (`SHOW CREATE TABLE`).
+    /// Call before any `pull_*` consumes options.
+    pub fn snapshot_for_catalog(&self) -> BTreeMap<String, String> {
+        self.options
+            .iter()
+            .map(|(k, v)| (k.clone(), sql_expr_to_catalog_string(v)))
+            .collect()
+    }
 }
 
 fn duration_from_sql_expr(expr: &Expr) -> Result<Duration, DataFusionError> {
diff --git a/src/sql/frontend_sql_coverage_tests.rs b/src/sql/frontend_sql_coverage_tests.rs
index cee4d82e..0a201f9e 100644
--- a/src/sql/frontend_sql_coverage_tests.rs
+++ b/src/sql/frontend_sql_coverage_tests.rs
@@ -359,6 +359,22 @@ fn compile_plan_show_functions() {
         .expect("ShowFunctions plan");
 }
 
+#[test]
+fn compile_plan_show_tables() {
+    let stmts = parse_sql("SHOW TABLES").unwrap();
+    Coordinator::new()
+        .compile_plan(stmts[0].as_ref(), StreamSchemaProvider::new())
+        .expect("ShowCatalogTables plan");
+}
+
+#[test]
+fn compile_plan_show_create_table() {
+    let stmts = parse_sql("SHOW CREATE TABLE my_table").unwrap();
+    Coordinator::new()
+        .compile_plan(stmts[0].as_ref(), StreamSchemaProvider::new())
+        .expect("ShowCreateTable plan");
+}
+
 #[test]
 fn compile_plan_start_stop_drop_function() {
     for sql in [
diff --git a/src/sql/parse.rs b/src/sql/parse.rs
index 1feff64a..33bb13ad 100644
--- a/src/sql/parse.rs
+++ b/src/sql/parse.rs
@@ -19,6 +19,8 @@
 //! - **`CREATE TABLE ...`** other forms (including `CREATE TABLE ... AS SELECT` where DataFusion accepts it)
 //! - **`CREATE STREAMING TABLE ... WITH (...) AS SELECT ...`** (streaming sink DDL)
 //! - **`DROP TABLE`** / **`DROP TABLE IF EXISTS`** / **`DROP STREAMING TABLE`** (alias for `DROP TABLE` on the stream catalog)
+//! - **`SHOW TABLES`** — list stream catalog tables (connector sources and streaming sinks)
+//! - **`SHOW CREATE TABLE <name>`** — best-effort DDL text (full `WITH` / `AS SELECT` may not be stored)
 //!
 //! **`INSERT` is not supported** here — use `CREATE TABLE ... AS SELECT` or
 //! `CREATE STREAMING TABLE ... AS SELECT` to define the query shape instead.
@@ -29,13 +31,16 @@ use std::collections::HashMap;
 
 use datafusion::common::{Result, plan_err};
 use datafusion::error::DataFusionError;
-use datafusion::sql::sqlparser::ast::{ObjectType, SqlOption, Statement as DFStatement};
+use datafusion::sql::sqlparser::ast::{
+    ObjectType, ShowCreateObject, SqlOption, Statement as DFStatement,
+};
 use datafusion::sql::sqlparser::dialect::FunctionStreamDialect;
 use datafusion::sql::sqlparser::parser::Parser;
 
 use crate::coordinator::{
-    CreateFunction, CreateTable, DropFunction, DropTableStatement, ShowFunctions, StartFunction,
-    Statement as CoordinatorStatement, StopFunction, StreamingTableStatement,
+    CreateFunction, CreateTable, DropFunction, DropTableStatement, ShowCatalogTables,
+    ShowCreateTable, ShowFunctions, StartFunction, Statement as CoordinatorStatement, StopFunction,
+    StreamingTableStatement,
 };
 
 /// `DROP STREAMING TABLE t` is accepted as sugar for `DROP TABLE t` against the same catalog.
@@ -89,6 +94,15 @@ fn classify_statement(stmt: DFStatement) -> Result<Box<dyn CoordinatorStatement>
             Ok(Box::new(DropFunction::new(name)))
         }
         DFStatement::ShowFunctions { .. } => Ok(Box::new(ShowFunctions::new())),
+        DFStatement::ShowTables { .. } => Ok(Box::new(ShowCatalogTables::new())),
+        DFStatement::ShowCreate { obj_type, obj_name } => {
+            if obj_type != ShowCreateObject::Table {
+                return plan_err!(
+                    "SHOW CREATE {obj_type} is not supported; use SHOW CREATE TABLE <name>"
+                );
+            }
+            Ok(Box::new(ShowCreateTable::new(obj_name.to_string())))
+        },
         s @ DFStatement::CreateTable(_) => Ok(Box::new(CreateTable::new(s))),
         s @ DFStatement::CreateStreamingTable { .. } => {
             Ok(Box::new(StreamingTableStatement::new(s)))
@@ -116,10 +130,7 @@ fn classify_statement(stmt: DFStatement) -> Result<Box<dyn CoordinatorStatement>
             "INSERT is not supported; only CREATE TABLE and CREATE STREAMING TABLE (with AS SELECT) \
              are supported for defining table/query pipelines in this SQL frontend"
         ),
-        other => plan_err!(
-            "Unsupported SQL statement: {other}. \
-             For tables/pipelines use CREATE TABLE or CREATE STREAMING TABLE ... AS SELECT; INSERT is not supported."
-        ),
+        other => plan_err!("Unsupported SQL statement: {other}"),
     }
 }
 
@@ -190,6 +201,18 @@ mod tests {
         assert!(is_type(stmt.as_ref(), "ShowFunctions"));
     }
 
+    #[test]
+    fn test_parse_show_tables() {
+        let stmt = first_stmt("SHOW TABLES");
+        assert!(is_type(stmt.as_ref(), "ShowCatalogTables"));
+    }
+
+    #[test]
+    fn test_parse_show_create_table() {
+        let stmt = first_stmt("SHOW CREATE TABLE my_src");
+        assert!(is_type(stmt.as_ref(), "ShowCreateTable"));
+    }
+
     #[test]
     fn test_parse_create_table() {
         let stmt = first_stmt("CREATE TABLE foo (id INT, name VARCHAR)");
diff --git a/src/sql/schema/catalog_ddl.rs b/src/sql/schema/catalog_ddl.rs
new file mode 100644
index 00000000..2eea78f9
--- /dev/null
+++ b/src/sql/schema/catalog_ddl.rs
@@ -0,0 +1,199 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Best-effort SQL text for catalog introspection (`SHOW CREATE TABLE`).
+
+use std::collections::BTreeMap;
+
+use datafusion::arrow::datatypes::{DataType, TimeUnit};
+
+use super::schema_provider::StreamTable;
+use crate::sql::logical_node::logical::LogicalProgram;
+
+fn data_type_sql(dt: &DataType) -> String {
+    match dt {
+        DataType::Null => "NULL".to_string(),
+        DataType::Boolean => "BOOLEAN".to_string(),
+        DataType::Int8 => "TINYINT".to_string(),
+        DataType::Int16 => "SMALLINT".to_string(),
+        DataType::Int32 => "INT".to_string(),
+        DataType::Int64 => "BIGINT".to_string(),
+        DataType::UInt8 => "TINYINT UNSIGNED".to_string(),
+        DataType::UInt16 => "SMALLINT UNSIGNED".to_string(),
+        DataType::UInt32 => "INT UNSIGNED".to_string(),
+        DataType::UInt64 => "BIGINT UNSIGNED".to_string(),
+        DataType::Float16 => "FLOAT".to_string(),
+        DataType::Float32 => "REAL".to_string(),
+        DataType::Float64 => "DOUBLE".to_string(),
+        DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View => "VARCHAR".to_string(),
+        DataType::Binary | DataType::LargeBinary => "VARBINARY".to_string(),
+        DataType::Date32 => "DATE".to_string(),
+        DataType::Date64 => "DATE".to_string(),
+        DataType::Timestamp(unit, tz) => match (unit, tz) {
+            (TimeUnit::Second, None) => "TIMESTAMP(0)".to_string(),
+            (TimeUnit::Millisecond, None) => "TIMESTAMP(3)".to_string(),
+            (TimeUnit::Microsecond, None) => "TIMESTAMP(6)".to_string(),
+            (TimeUnit::Nanosecond, None) => "TIMESTAMP(9)".to_string(),
+            (_, Some(_)) => "TIMESTAMP WITH TIME ZONE".to_string(),
+        },
+        DataType::Decimal128(p, s) => format!("DECIMAL({p},{s})"),
+        DataType::Decimal256(p, s) => format!("DECIMAL({p},{s})"),
+        _ => dt.to_string(),
+    }
+}
+
+fn format_columns(schema: &datafusion::arrow::datatypes::Schema) -> Vec<String> {
+    schema
+        .fields()
+        .iter()
+        .map(|f| {
+            let null = if f.is_nullable() {
+                ""
+            } else {
+                " NOT NULL"
+            };
+            format!("  {} {}{}", f.name(), data_type_sql(f.data_type()), null)
+        })
+        .collect()
+}
+
+fn format_with_clause(opts: &BTreeMap<String, String>) -> String {
+    if opts.is_empty() {
+        return "WITH ('connector' = '...', 'format' = '...');\n/* Original WITH options are not persisted in the stream catalog. */\n"
+            .to_string();
+    }
+    let pairs: Vec<String> = opts
+        .iter()
+        .map(|(k, v)| {
+            let k_esc = k.replace('\'', "''");
+            let v_esc = v.replace('\'', "''");
+            format!("  '{k_esc}' = '{v_esc}'")
+        })
+        .collect();
+    format!("WITH (\n{}\n);\n", pairs.join(",\n"))
+}
+
+/// Single-line `col:TYPE` list for result grids.
+pub fn schema_columns_one_line(schema: &datafusion::arrow::datatypes::Schema) -> String {
+    schema
+        .fields()
+        .iter()
+        .map(|f| format!("{}:{}", f.name(), data_type_sql(f.data_type())))
+        .collect::<Vec<_>>()
+        .join(", ")
+}
+
+fn pipeline_summary_short(program: &LogicalProgram) -> String {
+    let mut parts: Vec<String> = Vec::new();
+    parts.push(format!("tasks={}", program.task_count()));
+    parts.push(format!("hash={}", program.get_hash()));
+    for nw in program.graph.node_weights() {
+        let chain = nw
+            .operator_chain
+            .operators
+            .iter()
+            .map(|o| format!("{}", o.operator_name))
+            .collect::<Vec<_>>()
+            .join("->");
+        parts.push(format!("n{}:{}", nw.node_id, chain));
+    }
+    parts.join(" | ")
+}
+
+/// Extra fields for `SHOW TABLES` result grid (pipeline summary; no full Graphviz).
+pub fn stream_table_row_detail(table: &StreamTable) -> String {
+    match table {
+        StreamTable::Source {
+            event_time_field,
+            watermark_field,
+            with_options,
+            ..
+        } => {
+            format!(
+                "event_time={:?}, watermark={:?}, with_options={}",
+                event_time_field,
+                watermark_field,
+                with_options.len()
+            )
+        }
+        StreamTable::Sink { program, .. } => pipeline_summary_short(program),
+    }
+}
+
+fn pipeline_text(program: &LogicalProgram) -> String {
+    let mut lines: Vec<String> = Vec::new();
+    lines.push(format!("tasks_total: {}", program.task_count()));
+    lines.push(format!("program_hash: {}", program.get_hash()));
+    for nw in program.graph.node_weights() {
+        let chain = nw
+            .operator_chain
+            .operators
+            .iter()
+            .map(|o| format!("{}[{}]", o.operator_name, o.operator_id))
+            .collect::<Vec<_>>()
+            .join(" -> ");
+        lines.push(format!(
+            "node {} (parallelism={}): {chain}",
+            nw.node_id, nw.parallelism
+        ));
+    }
+    let dot = program.dot();
+    const MAX_DOT: usize = 12_000;
+    if dot.len() > MAX_DOT {
+        lines.push(format!(
+            "graphviz_dot_truncated:\n{}... [{} more bytes]",
+            &dot[..MAX_DOT],
+            dot.len() - MAX_DOT
+        ));
+    } else {
+        lines.push(format!("graphviz_dot:\n{dot}"));
+    }
+    lines.join("\n")
+}
+
+/// Human-readable `SHOW CREATE TABLE` text (sink `AS SELECT` is not stored).
+pub fn show_create_stream_table(table: &StreamTable) -> String {
+    match table {
+        StreamTable::Source {
+            name,
+            schema,
+            event_time_field,
+            watermark_field,
+            with_options,
+        } => {
+            let cols = format_columns(schema);
+            let mut ddl = format!("CREATE TABLE {name} (\n{}\n)", cols.join(",\n"));
+            if let Some(e) = event_time_field {
+                ddl.push_str(&format!("\n/* EVENT TIME COLUMN: {e} */\n"));
+            }
+            if let Some(w) = watermark_field {
+                ddl.push_str(&format!("/* WATERMARK: {w} */\n"));
+            }
+            ddl.push_str(&format_with_clause(with_options));
+            ddl
+        }
+        StreamTable::Sink { name, program } => {
+            let schema = program
+                .egress_arrow_schema()
+                .unwrap_or_else(|| std::sync::Arc::new(datafusion::arrow::datatypes::Schema::empty()));
+            let cols = format_columns(&schema);
+            let mut ddl = format!(
+                "CREATE STREAMING TABLE {name}\nWITH ('connector' = '...') AS SELECT ...\n/* Sink WITH / AS SELECT text is not stored. Output schema:\n{}\n*/\n\n",
+                cols.join(",\n")
+            );
+            ddl.push_str("-- Resolved logical pipeline:\n");
+            ddl.push_str(&pipeline_text(program));
+            ddl.push('\n');
+            ddl
+        }
+    }
+}
diff --git a/src/sql/schema/mod.rs b/src/sql/schema/mod.rs
index cac86d52..a4aa3747 100644
--- a/src/sql/schema/mod.rs
+++ b/src/sql/schema/mod.rs
@@ -10,6 +10,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+pub mod catalog_ddl;
 pub mod column_descriptor;
 pub mod connection_type;
 pub mod source_table;
@@ -22,6 +23,7 @@ pub mod table_role;
 pub mod temporal_pipeline_config;
 pub mod utils;
 
+pub use catalog_ddl::{schema_columns_one_line, show_create_stream_table, stream_table_row_detail};
 pub use column_descriptor::ColumnDescriptor;
 pub use connection_type::ConnectionType;
 pub use source_table::{SourceOperator, SourceTable};
diff --git a/src/sql/schema/schema_provider.rs b/src/sql/schema/schema_provider.rs
index 5ecde68e..f93aead1 100644
--- a/src/sql/schema/schema_provider.rs
+++ b/src/sql/schema/schema_provider.rs
@@ -10,7 +10,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-use std::collections::{HashMap, HashSet};
+use std::collections::{BTreeMap, HashMap, HashSet};
 use std::sync::Arc;
 
 use datafusion::arrow::datatypes::{self as datatypes, DataType, Field, Schema};
@@ -45,6 +45,8 @@ pub enum StreamTable {
         schema: Arc<Schema>,
         event_time_field: Option<String>,
         watermark_field: Option<String>,
+        /// Persisted `WITH` options for `SHOW CREATE TABLE`.
+        with_options: BTreeMap<String, String>,
     },
     Sink {
         name: String,
@@ -202,6 +204,7 @@ impl StreamPlanningContext {
             schema,
             event_time_field,
             watermark_field,
+            with_options: BTreeMap::new(),
         });
     }
 
diff --git a/src/sql/schema/source_table.rs b/src/sql/schema/source_table.rs
index 63baca2a..9c975573 100644
--- a/src/sql/schema/source_table.rs
+++ b/src/sql/schema/source_table.rs
@@ -10,7 +10,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-use std::collections::{HashMap, HashSet};
+use std::collections::{BTreeMap, HashMap, HashSet};
 use std::sync::Arc;
 use std::time::Duration;
 
@@ -73,6 +73,8 @@ pub struct SourceTable {
     pub lookup_cache_max_bytes: Option<u64>,
     pub lookup_cache_ttl: Option<Duration>,
     pub inferred_fields: Option<Vec<FieldRef>>,
+    /// Original `WITH` options for catalog / `SHOW CREATE TABLE` (snapshot at DDL parse time).
+    pub catalog_with_options: BTreeMap<String, String>,
 }
 
 multifield_partial_ord!(
@@ -84,7 +86,8 @@ multifield_partial_ord!(
     opaque_config,
     description,
     key_constraints,
-    connection_format
+    connection_format,
+    catalog_with_options
 );
 
 impl SourceTable {
@@ -114,6 +117,7 @@ impl SourceTable {
             lookup_cache_max_bytes: None,
             lookup_cache_ttl: None,
             inferred_fields: None,
+            catalog_with_options: BTreeMap::new(),
         }
     }
 
@@ -135,6 +139,11 @@ impl SourceTable {
         self.temporal_config.watermark_strategy_column.as_deref()
     }
 
+    #[inline]
+    pub fn catalog_with_options(&self) -> &BTreeMap<String, String> {
+        &self.catalog_with_options
+    }
+
     pub fn idle_time(&self) -> Option<Duration> {
         self.temporal_config.liveness_timeout
     }
@@ -150,6 +159,11 @@ impl SourceTable {
     ) -> Result<Self> {
         validate_adapter_availability(adapter)?;
 
+        let catalog_with_options: BTreeMap<String, String> = options
+            .iter()
+            .map(|(k, v)| (k.clone(), v.clone()))
+            .collect();
+
         let encoding = DataEncodingFormat::extract_from_map(options)?;
 
         let mut refined_columns = apply_adapter_specific_rules(adapter, raw_columns);
@@ -179,6 +193,7 @@ impl SourceTable {
             lookup_cache_max_bytes: None,
             lookup_cache_ttl: None,
             inferred_fields: None,
+            catalog_with_options,
         })
     }
 
@@ -250,6 +265,8 @@ impl SourceTable {
     ) -> Result<Self> {
         let _ = connection_profile;
 
+        let catalog_with_options = options.snapshot_for_catalog();
+
         if let Some(c) = options.pull_opt_str(opt::CONNECTOR)? {
             if c != connector_name {
                 return plan_err!(
@@ -351,6 +368,7 @@ impl SourceTable {
             lookup_cache_max_bytes: None,
             lookup_cache_ttl: None,
             inferred_fields: None,
+            catalog_with_options,
         };
 
         if let Some(event_time_field) = options.pull_opt_field(opt::EVENT_TIME_FIELD)? {
diff --git a/src/storage/stream_catalog/manager.rs b/src/storage/stream_catalog/manager.rs
index 7e75f786..a9d68977 100644
--- a/src/storage/stream_catalog/manager.rs
+++ b/src/storage/stream_catalog/manager.rs
@@ -19,7 +19,7 @@ use datafusion::common::{internal_err, plan_err, Result as DFResult};
 use parking_lot::RwLock;
 use prost::Message;
 use protocol::storage::{self as pb, table_definition};
-use tracing::warn;
+use tracing::{info, warn};
 use unicase::UniCase;
 
 use crate::sql::schema::{ObjectName, StreamPlanningContext, StreamTable};
@@ -144,17 +144,35 @@ impl CatalogManager {
         ctx
     }
 
+    /// All stream catalog entries (connector sources + streaming sinks), sorted by table name.
+    pub fn list_stream_tables(&self) -> Vec<Arc<StreamTable>> {
+        let guard = self.cache.read();
+        let mut out: Vec<Arc<StreamTable>> = guard.streams.values().cloned().collect();
+        out.sort_by(|a, b| a.name().cmp(b.name()));
+        out
+    }
+
+    pub fn get_stream_table(&self, name: &str) -> Option<Arc<StreamTable>> {
+        let key = UniCase::new(name.to_string());
+        self.cache.read().streams.get(&key).cloned()
+    }
+
     fn encode_table(&self, table: &StreamTable) -> DFResult<pb::TableDefinition> {
         let table_type = match table {
             StreamTable::Source {
                 schema,
                 event_time_field,
                 watermark_field,
+                with_options,
                 ..
             } => table_definition::TableType::Source(pb::StreamSource {
                 arrow_schema_ipc: CatalogCodec::encode_schema(schema)?,
                 event_time_field: event_time_field.clone(),
                 watermark_field: watermark_field.clone(),
+                with_options: with_options
+                    .iter()
+                    .map(|(k, v)| (k.clone(), v.clone()))
+                    .collect(),
             }),
             StreamTable::Sink { program, .. } => {
                 let logical_program_bincode = CatalogCodec::encode_logical_program(program)?;
@@ -189,6 +207,7 @@ impl CatalogManager {
                 schema: CatalogCodec::decode_schema(&src.arrow_schema_ipc)?,
                 event_time_field: src.event_time_field,
                 watermark_field: src.watermark_field,
+                with_options: src.with_options.into_iter().collect(),
             }),
             table_definition::TableType::Sink(sink) => {
                 if sink.logical_program_bincode.is_empty() {
@@ -211,13 +230,45 @@ pub fn restore_global_catalog_from_store() {
     let Some(mgr) = CatalogManager::try_global() else {
         return;
     };
-    if let Err(e) = mgr.restore_from_store() {
-        warn!("Stream catalog restore_from_store skipped or failed: {e:#}");
+    match mgr.restore_from_store() {
+        Ok(()) => {
+            let n = mgr.list_stream_tables().len();
+            info!(stream_tables = n, "Stream catalog loaded from durable store");
+        }
+        Err(e) => warn!("Stream catalog restore_from_store failed: {e:#}"),
     }
 }
 
-pub fn initialize_stream_catalog(_config: &crate::config::GlobalConfig) -> anyhow::Result<()> {
-    CatalogManager::init_global_in_memory().context("Stream catalog (CatalogManager) init failed")
+pub fn initialize_stream_catalog(config: &crate::config::GlobalConfig) -> anyhow::Result<()> {
+    if !config.stream_catalog.persist {
+        return CatalogManager::init_global_in_memory()
+            .context("Stream catalog (CatalogManager) in-memory init failed");
+    }
+
+    let path = config
+        .stream_catalog
+        .db_path
+        .as_ref()
+        .map(|p| crate::config::resolve_path(p))
+        .unwrap_or_else(|| crate::config::get_data_dir().join("stream_catalog"));
+
+    std::fs::create_dir_all(&path).with_context(|| {
+        format!(
+            "Failed to create stream catalog directory {}",
+            path.display()
+        )
+    })?;
+
+    let store = std::sync::Arc::new(
+        super::RocksDbMetaStore::open(&path).with_context(|| {
+            format!(
+                "Failed to open stream catalog RocksDB at {}",
+                path.display()
+            )
+        })?,
+    );
+
+    CatalogManager::init_global(store).context("Stream catalog (CatalogManager) init failed")
 }
 
 pub fn planning_schema_provider() -> StreamPlanningContext {
@@ -228,6 +279,7 @@ pub fn planning_schema_provider() -> StreamPlanningContext {
 
 #[cfg(test)]
 mod tests {
+    use std::collections::BTreeMap;
     use std::sync::Arc;
 
     use datafusion::arrow::datatypes::{DataType, Field, Schema};
@@ -252,6 +304,7 @@ mod tests {
             schema: Arc::clone(&schema),
             event_time_field: Some("ts".into()),
             watermark_field: None,
+            with_options: BTreeMap::new(),
         };
 
         mgr.add_table(table).unwrap();
@@ -274,6 +327,35 @@ mod tests {
         }
     }
 
+    #[test]
+    fn add_table_roundtrip_with_options() {
+        let mgr = create_test_manager();
+        let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)]));
+
+        let mut opts = BTreeMap::new();
+        opts.insert("connector".to_string(), "kafka".to_string());
+        opts.insert("topic".to_string(), "my-topic".to_string());
+
+        let table = StreamTable::Source {
+            name: "t_with".into(),
+            schema,
+            event_time_field: None,
+            watermark_field: None,
+            with_options: opts.clone(),
+        };
+
+        mgr.add_table(table).unwrap();
+
+        let ctx = mgr.acquire_planning_context();
+        let got = ctx.get_stream_table("t_with").expect("table present");
+
+        if let StreamTable::Source { with_options, .. } = got.as_ref() {
+            assert_eq!(with_options, &opts);
+        } else {
+            panic!("expected Source");
+        }
+    }
+
     #[test]
     fn drop_table_if_exists() {
         let mgr = create_test_manager();
@@ -284,6 +366,7 @@ mod tests {
             schema,
             event_time_field: None,
             watermark_field: None,
+            with_options: BTreeMap::new(),
         })
         .unwrap();
 
diff --git a/src/storage/stream_catalog/mod.rs b/src/storage/stream_catalog/mod.rs
index f4f84469..fea2e39f 100644
--- a/src/storage/stream_catalog/mod.rs
+++ b/src/storage/stream_catalog/mod.rs
@@ -15,9 +15,11 @@
 mod codec;
 mod manager;
 mod meta_store;
+mod rocksdb_meta_store;
 
 pub use manager::{
     CatalogManager, initialize_stream_catalog, planning_schema_provider,
     restore_global_catalog_from_store,
 };
 pub use meta_store::{InMemoryMetaStore, MetaStore};
+pub use rocksdb_meta_store::RocksDbMetaStore;
diff --git a/src/storage/stream_catalog/rocksdb_meta_store.rs b/src/storage/stream_catalog/rocksdb_meta_store.rs
new file mode 100644
index 00000000..98a518a3
--- /dev/null
+++ b/src/storage/stream_catalog/rocksdb_meta_store.rs
@@ -0,0 +1,131 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! RocksDB-backed [`super::MetaStore`] for durable stream catalog rows.
+
+use std::path::Path;
+use std::sync::Arc;
+
+use anyhow::Context;
+use datafusion::common::Result;
+use rocksdb::{DB, Direction, IteratorMode, Options};
+
+use super::MetaStore;
+
+/// Single-node durable KV used by [`crate::storage::stream_catalog::CatalogManager`].
+pub struct RocksDbMetaStore {
+    db: Arc<DB>,
+}
+
+impl RocksDbMetaStore {
+    pub fn open<P: AsRef<Path>>(path: P) -> anyhow::Result<Self> {
+        let path = path.as_ref();
+        if let Some(parent) = path.parent() {
+            std::fs::create_dir_all(parent).with_context(|| {
+                format!("stream catalog: create parent directory {parent:?}")
+            })?;
+        }
+        let mut opts = Options::default();
+        opts.create_if_missing(true);
+        let db = DB::open(&opts, path).with_context(|| {
+            format!("stream catalog: open RocksDB at {}", path.display())
+        })?;
+        Ok(Self { db: Arc::new(db) })
+    }
+}
+
+impl MetaStore for RocksDbMetaStore {
+    fn put(&self, key: &str, value: Vec<u8>) -> Result<()> {
+        self.db
+            .put(key.as_bytes(), value.as_slice())
+            .map_err(|e| datafusion::common::DataFusionError::Execution(format!(
+                "stream catalog store put: {e}"
+            )))
+    }
+
+    fn get(&self, key: &str) -> Result<Option<Vec<u8>>> {
+        self.db
+            .get(key.as_bytes())
+            .map_err(|e| datafusion::common::DataFusionError::Execution(format!(
+                "stream catalog store get: {e}"
+            )))
+    }
+
+    fn delete(&self, key: &str) -> Result<()> {
+        self.db
+            .delete(key.as_bytes())
+            .map_err(|e| datafusion::common::DataFusionError::Execution(format!(
+                "stream catalog store delete: {e}"
+            )))
+    }
+
+    fn scan_prefix(&self, prefix: &str) -> Result<Vec<(String, Vec<u8>)>> {
+        let mut out = Vec::new();
+        let iter = self
+            .db
+            .iterator(IteratorMode::From(prefix.as_bytes(), Direction::Forward));
+        for item in iter {
+            let (k, v) = item.map_err(|e| {
+                datafusion::common::DataFusionError::Execution(format!(
+                    "stream catalog store scan: {e}"
+                ))
+            })?;
+            let key = String::from_utf8(k.to_vec()).map_err(|e| {
+                datafusion::common::DataFusionError::Execution(format!(
+                    "stream catalog store: invalid utf8 key: {e}"
+                ))
+            })?;
+            if !key.starts_with(prefix) {
+                break;
+            }
+            out.push((key, v.to_vec()));
+        }
+        Ok(out)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::path::PathBuf;
+
+    use uuid::Uuid;
+
+    use super::*;
+
+    #[test]
+    fn put_get_scan_roundtrip() {
+        let dir: PathBuf = std::env::temp_dir().join(format!(
+            "fs_stream_catalog_test_{}",
+            Uuid::new_v4()
+        ));
+        let _ = std::fs::remove_dir_all(&dir);
+
+        let store = RocksDbMetaStore::open(&dir).expect("open");
+        store.put("catalog:stream_table:a", vec![1, 2, 3]).unwrap();
+        store.put("catalog:stream_table:b", vec![4]).unwrap();
+        store.put("other:x", vec![9]).unwrap();
+
+        assert_eq!(
+            store.get("catalog:stream_table:a").unwrap(),
+            Some(vec![1, 2, 3])
+        );
+
+        let prefixed = store.scan_prefix("catalog:stream_table:").unwrap();
+        assert_eq!(prefixed.len(), 2);
+        assert!(prefixed.iter().any(|(k, _)| k.ends_with(":a")));
+        assert!(prefixed.iter().any(|(k, _)| k.ends_with(":b")));
+
+        store.delete("catalog:stream_table:a").unwrap();
+        assert!(store.get("catalog:stream_table:a").unwrap().is_none());
+
+        let _ = std::fs::remove_dir_all(&dir);
+    }
+}

From 728c750ae6946d42254a0b2bda791e49fe058f81 Mon Sep 17 00:00:00 2001
From: luoluoyuyu <zhenyu@apache.org>
Date: Sun, 29 Mar 2026 22:52:03 +0800
Subject: [PATCH 30/44] update

---
 src/coordinator/execution/executor.rs         |  24 +-
 src/coordinator/plan/logical_plan_visitor.rs  |   9 +-
 src/runtime/streaming/factory/mod.rs          |   2 +-
 .../streaming/factory/operator_factory.rs     |  51 +++--
 src/sql/analysis/mod.rs                       |   4 +
 src/sql/analysis/source_rewriter.rs           | 209 ++++++++++++++++--
 src/sql/logical_node/logical/operator_name.rs |  10 +-
 .../logical_planner/optimizers/chaining.rs    | 195 +++++++++-------
 src/sql/physical/codec.rs                     |  13 +-
 src/sql/schema/column_descriptor.rs           |  10 +
 src/sql/schema/source_table.rs                |  35 ++-
 src/storage/stream_catalog/manager.rs         |  10 +-
 12 files changed, 444 insertions(+), 128 deletions(-)

diff --git a/src/coordinator/execution/executor.rs b/src/coordinator/execution/executor.rs
index 4a7fc273..8329d498 100644
--- a/src/coordinator/execution/executor.rs
+++ b/src/coordinator/execution/executor.rs
@@ -82,6 +82,28 @@ impl Executor {
     }
 }
 
+
+fn strip_noisy_fields(value: &mut serde_json::Value) {
+    match value {
+        serde_json::Value::Object(map) => {
+            // 兼容 camelCase 和 snake_case，直接把配置项连根拔起
+            map.remove("operatorConfig");
+            map.remove("operator_config");
+
+            // 继续向子节点递归
+            for (_, v) in map.iter_mut() {
+                strip_noisy_fields(v);
+            }
+        }
+        serde_json::Value::Array(arr) => {
+            for v in arr.iter_mut() {
+                strip_noisy_fields(v);
+            }
+        }
+        _ => {}
+    }
+}
+
 impl PlanVisitor for Executor {
     fn visit_create_function(
         &self,
@@ -273,7 +295,7 @@ impl PlanVisitor for Executor {
                         name: table_name.clone(),
                         schema,
                         event_time_field: source_table.event_time_field().map(str::to_string),
-                        watermark_field: source_table.watermark_field().map(str::to_string),
+                        watermark_field: source_table.stream_catalog_watermark_field(),
                         with_options: source_table.catalog_with_options().clone(),
                     };
                     (table_name, *if_not_exists, table_instance)
diff --git a/src/coordinator/plan/logical_plan_visitor.rs b/src/coordinator/plan/logical_plan_visitor.rs
index e5b5e36a..3bd117e2 100644
--- a/src/coordinator/plan/logical_plan_visitor.rs
+++ b/src/coordinator/plan/logical_plan_visitor.rs
@@ -104,6 +104,13 @@ impl LogicalPlanVisitor {
 
         let partition_keys = Self::extract_partitioning_keys(&mut sink_properties)?;
 
+        let sink_description = comment
+            .as_deref()
+            .map(str::trim)
+            .filter(|s| !s.is_empty())
+            .map(str::to_string)
+            .unwrap_or_else(|| format!("sink `{}` ({connector_type})", sink_table_name));
+
         let mut query_logical_plan = rewrite_plan(
             produce_optimized_plan(&Statement::Query(query.clone()), &self.schema_provider)?,
             &self.schema_provider,
@@ -131,7 +138,7 @@ impl LogicalPlanVisitor {
             None,
             &self.schema_provider,
             Some(ConnectionType::Sink),
-            comment.clone().unwrap_or_default(),
+            sink_description,
         )?;
         sink_definition.partition_exprs = Arc::new(partition_keys);
 
diff --git a/src/runtime/streaming/factory/mod.rs b/src/runtime/streaming/factory/mod.rs
index 024dfb14..f82cc657 100644
--- a/src/runtime/streaming/factory/mod.rs
+++ b/src/runtime/streaming/factory/mod.rs
@@ -56,4 +56,4 @@ fn register_kafka_connector_plugins(factory: &mut OperatorFactory) {
         factory_operator_name::KAFKA_SOURCE,
         factory_operator_name::KAFKA_SINK
     );
-}
+}
\ No newline at end of file
diff --git a/src/runtime/streaming/factory/operator_factory.rs b/src/runtime/streaming/factory/operator_factory.rs
index a95c0241..e1bdd635 100644
--- a/src/runtime/streaming/factory/operator_factory.rs
+++ b/src/runtime/streaming/factory/operator_factory.rs
@@ -38,6 +38,8 @@ use protocol::grpc::api::{
     WindowFunctionOperator as WindowFunctionProto,
 };
 
+use crate::sql::logical_node::logical::OperatorName;
+
 ///
 pub struct OperatorFactory {
     constructors: HashMap<String, Box<dyn OperatorConstructor>>,
@@ -58,6 +60,10 @@ impl OperatorFactory {
         self.constructors.insert(name.to_string(), constructor);
     }
 
+    pub fn register_named(&mut self, name: OperatorName, constructor: Box<dyn OperatorConstructor>) {
+        self.register(name.as_registry_key(), constructor);
+    }
+
     pub fn create_operator(&self, name: &str, payload: &[u8]) -> Result<ConstructedOperator> {
         let ctor = self
             .constructors
@@ -78,27 +84,36 @@ impl OperatorFactory {
     }
 
     fn register_builtins(&mut self) {
-        self.register("TumblingWindowAggregate", Box::new(TumblingWindowBridge));
-        self.register("SlidingWindowAggregate", Box::new(SlidingWindowBridge));
-        self.register("SessionWindowAggregate", Box::new(SessionWindowBridge));
+        self.register_named(OperatorName::TumblingWindowAggregate, Box::new(TumblingWindowBridge));
+        self.register_named(OperatorName::SlidingWindowAggregate, Box::new(SlidingWindowBridge));
+        self.register_named(OperatorName::SessionWindowAggregate, Box::new(SessionWindowBridge));
 
-        self.register("ExpressionWatermark", Box::new(WatermarkBridge));
+        self.register_named(OperatorName::ExpressionWatermark, Box::new(WatermarkBridge));
 
         // ─── SQL Window Function ───
-        self.register("WindowFunction", Box::new(WindowFunctionBridge));
+        self.register_named(OperatorName::WindowFunction, Box::new(WindowFunctionBridge));
 
         // ─── Join ───
-        self.register("Join", Box::new(JoinWithExpirationBridge));
-        self.register("InstantJoin", Box::new(InstantJoinBridge));
-        self.register("LookupJoin", Box::new(LookupJoinBridge));
-
-        self.register("UpdatingAggregate", Box::new(IncrementalAggregateBridge));
-
-        self.register("KeyBy", Box::new(KeyByBridge));
-
-        self.register("Projection", Box::new(PassthroughConstructor("Projection")));
-        self.register("ArrowValue", Box::new(PassthroughConstructor("ArrowValue")));
-        self.register("ArrowKey", Box::new(PassthroughConstructor("ArrowKey")));
+        self.register_named(OperatorName::Join, Box::new(JoinWithExpirationBridge));
+        self.register_named(OperatorName::InstantJoin, Box::new(InstantJoinBridge));
+        self.register_named(OperatorName::LookupJoin, Box::new(LookupJoinBridge));
+
+        self.register_named(OperatorName::UpdatingAggregate, Box::new(IncrementalAggregateBridge));
+
+        self.register_named(OperatorName::KeyBy, Box::new(KeyByBridge));
+
+        self.register_named(
+            OperatorName::Projection,
+            Box::new(PassthroughConstructor(OperatorName::Projection)),
+        );
+        self.register_named(
+            OperatorName::ArrowValue,
+            Box::new(PassthroughConstructor(OperatorName::ArrowValue)),
+        );
+        self.register_named(
+            OperatorName::ArrowKey,
+            Box::new(PassthroughConstructor(OperatorName::ArrowKey)),
+        );
 
         crate::runtime::streaming::factory::register_builtin_connectors(self);
         crate::runtime::streaming::factory::register_kafka_connector_plugins(self);
@@ -202,12 +217,12 @@ impl OperatorConstructor for KeyByBridge {
     }
 }
 
-pub struct PassthroughConstructor(pub &'static str);
+pub struct PassthroughConstructor(pub OperatorName);
 
 impl OperatorConstructor for PassthroughConstructor {
     fn with_config(&self, _config: &[u8], _registry: Arc<Registry>) -> Result<ConstructedOperator> {
         Ok(ConstructedOperator::Operator(Box::new(
-            PassthroughOperator::new(self.0),
+            PassthroughOperator::new(self.0.as_registry_key()),
         )))
     }
 }
diff --git a/src/sql/analysis/mod.rs b/src/sql/analysis/mod.rs
index d417ebd1..653cb601 100644
--- a/src/sql/analysis/mod.rs
+++ b/src/sql/analysis/mod.rs
@@ -207,6 +207,10 @@ pub fn rewrite_plan(
 ) -> Result<LogicalPlan> {
     info!("Starting streaming plan rewrite pipeline");
 
+    let Transformed {
+        data: plan, ..
+    } = plan.rewrite_with_subqueries(&mut source_rewriter::SourceRewriter::new(schema_provider))?;
+
     let mut rewriter = stream_rewriter::StreamRewriter::new(schema_provider);
     let Transformed {
         data: rewritten_plan,
diff --git a/src/sql/analysis/source_rewriter.rs b/src/sql/analysis/source_rewriter.rs
index d642afd5..d96a47bf 100644
--- a/src/sql/analysis/source_rewriter.rs
+++ b/src/sql/analysis/source_rewriter.rs
@@ -10,6 +10,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+use std::collections::HashSet;
 use std::sync::Arc;
 use std::time::Duration;
 
@@ -24,6 +25,8 @@ use crate::sql::schema::source_table::SourceTable;
 use crate::sql::schema::ColumnDescriptor;
 use crate::sql::schema::table::Table;
 use crate::sql::schema::StreamSchemaProvider;
+use crate::sql::schema::StreamTable;
+use crate::sql::common::constants::sql_field;
 use crate::sql::extensions::remote_table::RemoteTableBoundaryNode;
 use crate::sql::extensions::watermark_node::EventTimeWatermarkNode;
 use crate::sql::types::TIMESTAMP_FIELD;
@@ -33,7 +36,42 @@ pub struct SourceRewriter<'a> {
     pub(crate) schema_provider: &'a StreamSchemaProvider,
 }
 
+impl<'a> SourceRewriter<'a> {
+    pub fn new(schema_provider: &'a StreamSchemaProvider) -> Self {
+        Self { schema_provider }
+    }
+}
+
 impl SourceRewriter<'_> {
+    /// Output column names after stream-catalog source projection (physical fields plus optional
+    /// `_timestamp` alias when event time is renamed).
+    fn stream_source_projected_column_names(
+        schema: &datafusion::arrow::datatypes::Schema,
+        event_time_field: Option<&str>,
+    ) -> HashSet<String> {
+        let mut names: HashSet<String> =
+            schema.fields().iter().map(|f| f.name().clone()).collect();
+        if let Some(et) = event_time_field {
+            if et != TIMESTAMP_FIELD {
+                names.insert(TIMESTAMP_FIELD.to_string());
+            }
+        }
+        names
+    }
+
+    /// Resolves watermark column for [`StreamTable::Source`]: drop computed `__watermark` and any
+    /// name not present in the projected schema (defaults to `_timestamp` − delay).
+    fn stream_source_effective_watermark_field<'b>(
+        watermark_field: Option<&'b str>,
+        projected: &HashSet<String>,
+    ) -> Option<&'b str> {
+        let w = watermark_field?;
+        if w == sql_field::COMPUTED_WATERMARK {
+            return None;
+        }
+        projected.contains(w).then_some(w)
+    }
+
     fn projection_expr_for_column(col: &ColumnDescriptor, qualifier: &TableReference) -> Expr {
         if let Some(logic) = col.computation_logic() {
             logic
@@ -117,6 +155,17 @@ impl SourceRewriter<'_> {
             expressions
                 .push(expr.alias_qualified(Some(qualifier.clone()), TIMESTAMP_FIELD.to_string()));
         } else {
+            let has_ts = table
+                .schema_specs
+                .iter()
+                .any(|c| c.arrow_field().name() == TIMESTAMP_FIELD);
+            if !has_ts {
+                return plan_err!(
+                    "Connector table '{}' has no `{}` column; declare WATERMARK FOR <event_time> AS ... in CREATE TABLE",
+                    table.table_identifier,
+                    TIMESTAMP_FIELD
+                );
+            }
             expressions.push(Expr::Column(Column::new(
                 Some(qualifier.clone()),
                 TIMESTAMP_FIELD,
@@ -126,6 +175,132 @@ impl SourceRewriter<'_> {
         Ok(expressions)
     }
 
+    /// Stream catalog [`StreamTable::Source`] (Kafka/… registered via coordinator): inject `_timestamp`
+    /// from `event_time_field` when the physical schema uses another name (e.g. `impression_time`).
+    fn mutate_stream_catalog_source(
+        &self,
+        table_scan: &TableScan,
+        st: &StreamTable,
+    ) -> DFResult<Transformed<LogicalPlan>> {
+        let StreamTable::Source {
+            schema,
+            event_time_field,
+            watermark_field,
+            ..
+        } = st
+        else {
+            return Ok(Transformed::no(LogicalPlan::TableScan(table_scan.clone())));
+        };
+
+        let qualifier = table_scan.table_name.clone();
+
+        let mut expressions: Vec<Expr> = schema
+            .fields()
+            .iter()
+            .map(|f| {
+                Expr::Column(Column {
+                    relation: Some(qualifier.clone()),
+                    name: f.name().to_string(),
+                    spans: Default::default(),
+                })
+            })
+            .collect();
+
+        let has_physical_ts = schema.fields().iter().any(|f| f.name() == TIMESTAMP_FIELD);
+
+        match event_time_field.as_deref() {
+            Some(et) if et != TIMESTAMP_FIELD => {
+                if !schema.fields().iter().any(|f| f.name().as_str() == et) {
+                    return Err(DataFusionError::Plan(format!(
+                        "Stream source `{}`: event_time_field `{et}` is not in the table schema",
+                        table_scan.table_name.table()
+                    )));
+                }
+                expressions.push(
+                    Expr::Column(Column {
+                        relation: Some(qualifier.clone()),
+                        name: et.to_string(),
+                        spans: Default::default(),
+                    })
+                    .alias_qualified(Some(qualifier.clone()), TIMESTAMP_FIELD.to_string()),
+                );
+            }
+            None if !has_physical_ts => {
+                return plan_err!(
+                    "Stream source `{}` has no `{}` column; declare WATERMARK FOR <event_time> AS ... on CREATE TABLE, or add a `{}` column",
+                    table_scan.table_name.table(),
+                    TIMESTAMP_FIELD,
+                    TIMESTAMP_FIELD
+                );
+            }
+            _ => {}
+        }
+
+        let source_input = LogicalPlan::TableScan(table_scan.clone());
+        let projection = LogicalPlan::Projection(Projection::try_new(
+            expressions,
+            Arc::new(source_input),
+        )?);
+
+        let schema_ref = projection.schema().clone();
+        let remote = LogicalPlan::Extension(Extension {
+            node: Arc::new(RemoteTableBoundaryNode {
+                upstream_plan: projection,
+                table_identifier: table_scan.table_name.to_owned(),
+                resolved_schema: schema_ref,
+                requires_materialization: true,
+            }),
+        });
+
+        let projected = Self::stream_source_projected_column_names(
+            schema.as_ref(),
+            event_time_field.as_deref(),
+        );
+        let wf = Self::stream_source_effective_watermark_field(
+            watermark_field.as_deref(),
+            &projected,
+        );
+        let wm_expr = Self::watermark_expression_for_stream_source(wf, &qualifier)?;
+
+        let watermark_node = EventTimeWatermarkNode::try_new(
+            remote,
+            table_scan.table_name.clone(),
+            wm_expr,
+        )
+        .map_err(|err| {
+            DataFusionError::Internal(format!("failed to create watermark node: {err}"))
+        })?;
+
+        Ok(Transformed::yes(LogicalPlan::Extension(Extension {
+            node: Arc::new(watermark_node),
+        })))
+    }
+
+    fn watermark_expression_for_stream_source(
+        watermark_field: Option<&str>,
+        qualifier: &TableReference,
+    ) -> DFResult<Expr> {
+        match watermark_field {
+            Some(wf) => Ok(Expr::Column(Column {
+                relation: Some(qualifier.clone()),
+                name: wf.to_string(),
+                spans: Default::default(),
+            })),
+            None => Ok(Expr::BinaryExpr(BinaryExpr {
+                left: Box::new(Expr::Column(Column {
+                    relation: Some(qualifier.clone()),
+                    name: TIMESTAMP_FIELD.to_string(),
+                    spans: Default::default(),
+                })),
+                op: logical_expr::Operator::Minus,
+                right: Box::new(Expr::Literal(
+                    ScalarValue::DurationNanosecond(Some(Duration::from_secs(1).as_nanos() as i64)),
+                    None,
+                )),
+            })),
+        }
+    }
+
     fn projection(&self, table_scan: &TableScan, table: &SourceTable) -> DFResult<LogicalPlan> {
         let qualifier = table_scan.table_name.clone();
 
@@ -224,21 +399,25 @@ impl TreeNodeRewriter for SourceRewriter<'_> {
         };
 
         let table_name = table_scan.table_name.table();
-        let table = self
-            .schema_provider
-            .get_catalog_table(table_name)
-            .ok_or_else(|| DataFusionError::Plan(format!("Table {table_name} not found")))?;
-
-        match table {
-            Table::ConnectorTable(table) => self.mutate_connector_table(&table_scan, table),
-            Table::LookupTable(_table) => {
-                // TODO: implement LookupSource extension
-                plan_err!("Lookup tables are not yet supported")
-            }
-            Table::TableFromQuery {
-                name: _,
-                logical_plan,
-            } => self.mutate_table_from_query(&table_scan, logical_plan),
+
+        if let Some(table) = self.schema_provider.get_catalog_table(table_name) {
+            return match table {
+                Table::ConnectorTable(table) => self.mutate_connector_table(&table_scan, table),
+                Table::LookupTable(_table) => {
+                    // TODO: implement LookupSource extension
+                    plan_err!("Lookup tables are not yet supported")
+                }
+                Table::TableFromQuery {
+                    name: _,
+                    logical_plan,
+                } => self.mutate_table_from_query(&table_scan, logical_plan),
+            };
         }
+
+        if let Some(st) = self.schema_provider.get_stream_table(table_name) {
+            return self.mutate_stream_catalog_source(&table_scan, st.as_ref());
+        }
+
+        Ok(Transformed::no(LogicalPlan::TableScan(table_scan.clone())))
     }
 }
diff --git a/src/sql/logical_node/logical/operator_name.rs b/src/sql/logical_node/logical/operator_name.rs
index d157234b..224562ea 100644
--- a/src/sql/logical_node/logical/operator_name.rs
+++ b/src/sql/logical_node/logical/operator_name.rs
@@ -13,11 +13,11 @@
 use std::str::FromStr;
 
 use serde::{Deserialize, Deserializer, Serialize, Serializer};
-use strum::{Display, EnumString};
+use strum::{Display, EnumString, IntoStaticStr};
 
 use crate::sql::common::constants::operator_feature;
 
-#[derive(Clone, Copy, Debug, Eq, PartialEq, EnumString, Display)]
+#[derive(Clone, Copy, Debug, Eq, PartialEq, EnumString, Display, IntoStaticStr)]
 pub enum OperatorName {
     ExpressionWatermark,
     ArrowValue,
@@ -38,6 +38,12 @@ pub enum OperatorName {
 }
 
 impl OperatorName {
+    /// Registry / worker lookup key; matches [`Display`] and protobuf operator names.
+    #[inline]
+    pub fn as_registry_key(self) -> &'static str {
+        self.into()
+    }
+
     pub fn feature_tag(self) -> Option<&'static str> {
         match self {
             Self::ExpressionWatermark | Self::ArrowValue | Self::ArrowKey | Self::Projection => None,
diff --git a/src/sql/logical_planner/optimizers/chaining.rs b/src/sql/logical_planner/optimizers/chaining.rs
index 11c072d3..8c1534a6 100644
--- a/src/sql/logical_planner/optimizers/chaining.rs
+++ b/src/sql/logical_planner/optimizers/chaining.rs
@@ -10,121 +10,164 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+use std::mem;
+
 use petgraph::graph::{EdgeIndex, NodeIndex};
-use petgraph::visit::EdgeRef;
-use petgraph::Direction::{Incoming, Outgoing};
+use petgraph::prelude::*;
+use petgraph::visit::NodeRef;
+
 
 use crate::sql::logical_node::logical::{LogicalEdgeType, LogicalGraph, Optimizer};
 
 pub type NodeId = NodeIndex;
 pub type EdgeId = EdgeIndex;
 
-pub struct ChainingOptimizer;
+pub struct ChainingOptimizer {}
 
-impl ChainingOptimizer {
-    fn find_fusion_candidate(plan: &LogicalGraph) -> Option<(NodeId, NodeId, EdgeId)> {
-        let node_ids: Vec<NodeId> = plan.node_indices().collect();
+fn remove_in_place<N, E>(graph: &mut DiGraph<N, E>, node: NodeIndex) {
+    let incoming = graph.edges_directed(node, Incoming).next().unwrap();
 
-        for upstream_id in node_ids {
-            let upstream_node = plan.node_weight(upstream_id)?;
+    let parent = incoming.source().id();
+    let incoming = incoming.id();
+    graph.remove_edge(incoming);
 
-            if upstream_node.operator_chain.is_source() {
-                continue;
-            }
+    let outgoing: Vec<_> = graph
+        .edges_directed(node, Outgoing)
+        .map(|e| (e.id(), e.target().id()))
+        .collect();
+
+    for (edge, target) in outgoing {
+        let weight = graph.remove_edge(edge).unwrap();
+        graph.add_edge(parent, target, weight);
+    }
+
+    graph.remove_node(node);
+}
+
+impl Optimizer for ChainingOptimizer {
+    fn optimize_once(&self, plan: &mut LogicalGraph) -> bool {
+        let node_indices: Vec<NodeIndex> = plan.node_indices().collect();
 
-            let outgoing_edges: Vec<_> = plan.edges_directed(upstream_id, Outgoing).collect();
+        for &node_idx in &node_indices {
+            let cur = plan.node_weight(node_idx).unwrap();
 
-            if outgoing_edges.len() != 1 {
+            // sources can't be chained
+            if cur.operator_chain.is_source() {
                 continue;
             }
 
-            let bridging_edge = &outgoing_edges[0];
+            let mut successors = plan.edges_directed(node_idx, Outgoing).collect::<Vec<_>>();
 
-            if bridging_edge.weight().edge_type != LogicalEdgeType::Forward {
+            if successors.len() != 1 {
                 continue;
             }
 
-            let downstream_id = bridging_edge.target();
-            let downstream_node = plan.node_weight(downstream_id)?;
+            let edge = successors.remove(0);
+            let edge_type = edge.weight().edge_type;
 
-            if downstream_node.operator_chain.is_sink() {
+            if edge_type != LogicalEdgeType::Forward {
                 continue;
             }
 
-            if upstream_node.parallelism != downstream_node.parallelism {
+            let successor_idx = edge.target();
+
+            let successor_node = plan.node_weight(successor_idx).unwrap();
+
+            // skip if parallelism doesn't match or successor is a sink
+            if cur.parallelism != successor_node.parallelism
+                || successor_node.operator_chain.is_sink()
+            {
                 continue;
             }
 
-            let incoming_edges: Vec<_> = plan.edges_directed(downstream_id, Incoming).collect();
-            if incoming_edges.len() != 1 {
+            // skip successors with multiple predecessors
+            if plan.edges_directed(successor_idx, Incoming).count() > 1 {
                 continue;
             }
 
-            return Some((upstream_id, downstream_id, bridging_edge.id()));
+            // construct the new node
+            let mut new_cur = cur.clone();
+
+            new_cur.description = format!("{} -> {}", cur.description, successor_node.description);
+
+            new_cur
+                .operator_chain
+                .operators
+                .extend(successor_node.operator_chain.operators.clone());
+
+            new_cur
+                .operator_chain
+                .edges
+                .push(edge.weight().schema.clone());
+
+            mem::swap(&mut new_cur, plan.node_weight_mut(node_idx).unwrap());
+
+            // remove the old successor
+            remove_in_place(plan, successor_idx);
+            return true;
         }
 
-        None
+        false
     }
+}
 
-    fn apply_fusion(
-        plan: &mut LogicalGraph,
-        upstream_id: NodeId,
-        downstream_id: NodeId,
-        bridging_edge_id: EdgeId,
-    ) {
-        let bridging_edge = plan
-            .remove_edge(bridging_edge_id)
-            .expect("Graph Integrity Violation: Bridging edge missing");
-
-        let propagated_schema = bridging_edge.schema.clone();
-
-        let downstream_outgoing: Vec<_> = plan
-            .edges_directed(downstream_id, Outgoing)
-            .map(|e| (e.id(), e.target()))
-            .collect();
-
-        for (edge_id, target_id) in downstream_outgoing {
-            let edge_weight = plan
-                .remove_edge(edge_id)
-                .expect("Graph Integrity Violation: Outgoing edge missing");
-
-            plan.add_edge(upstream_id, target_id, edge_weight);
-        }
+#[cfg(test)]
+mod tests {
+    use std::sync::Arc;
 
-        let downstream_node = plan
-            .remove_node(downstream_id)
-            .expect("Graph Integrity Violation: Downstream node missing");
+    use datafusion::arrow::datatypes::{DataType, Field, Schema, TimeUnit};
 
-        let upstream_node = plan
-            .node_weight_mut(upstream_id)
-            .expect("Graph Integrity Violation: Upstream node missing");
+    use crate::sql::common::FsSchema;
+    use crate::sql::logical_node::logical::{
+        LogicalEdge, LogicalEdgeType, LogicalGraph, LogicalNode, Optimizer, OperatorName,
+    };
 
-        upstream_node.description = format!(
-            "{} -> {}",
-            upstream_node.description, downstream_node.description
-        );
+    use super::ChainingOptimizer;
 
-        upstream_node
-            .operator_chain
-            .operators
-            .extend(downstream_node.operator_chain.operators);
+    fn forward_edge() -> LogicalEdge {
+        let s = Arc::new(Schema::new(vec![Field::new(
+            "_timestamp",
+            DataType::Timestamp(TimeUnit::Nanosecond, None),
+            false,
+        )]));
+        LogicalEdge::new(LogicalEdgeType::Forward, FsSchema::new_unkeyed(s, 0))
+    }
 
-        upstream_node
-            .operator_chain
-            .edges
-            .push(propagated_schema);
+    fn proj_node(id: u32, label: &str) -> LogicalNode {
+        LogicalNode::single(
+            id,
+            format!("op_{label}"),
+            OperatorName::Projection,
+            vec![],
+            label.to_string(),
+            1,
+        )
     }
-}
 
-impl Optimizer for ChainingOptimizer {
-    fn optimize_once(&self, plan: &mut LogicalGraph) -> bool {
-        if let Some((upstream_id, downstream_id, bridging_edge_id)) =
-            Self::find_fusion_candidate(plan)
-        {
-            Self::apply_fusion(plan, upstream_id, downstream_id, bridging_edge_id);
-            true
-        } else {
-            false
-        }
+    fn source_node() -> LogicalNode {
+        LogicalNode::single(
+            0,
+            "src".into(),
+            OperatorName::ConnectorSource,
+            vec![],
+            "source".into(),
+            1,
+        )
+    }
+
+    /// Regression: upstream at last `NodeIndex` + remove non-last downstream swaps indices.
+    #[test]
+    fn fusion_remaps_when_upstream_was_last_node_index() {
+        let mut g = LogicalGraph::new();
+        let n0 = g.add_node(source_node());
+        let n1 = g.add_node(proj_node(1, "downstream"));
+        let n2 = g.add_node(proj_node(2, "upstream_last_index"));
+        let e = forward_edge();
+        g.add_edge(n0, n2, e.clone());
+        g.add_edge(n2, n1, e);
+
+        let changed = ChainingOptimizer {}.optimize_once(&mut g);
+        assert!(changed);
+        assert_eq!(g.node_count(), 2);
     }
 }
diff --git a/src/sql/physical/codec.rs b/src/sql/physical/codec.rs
index 1301ef09..c8349dc6 100644
--- a/src/sql/physical/codec.rs
+++ b/src/sql/physical/codec.rs
@@ -15,8 +15,9 @@ use std::sync::Arc;
 
 use datafusion::arrow::array::RecordBatch;
 use datafusion::arrow::datatypes::Schema;
-use datafusion::common::{DataFusionError, Result, UnnestOptions};
+use datafusion::common::{DataFusionError, Result, UnnestOptions, not_impl_err};
 use datafusion::execution::FunctionRegistry;
+use datafusion::logical_expr::ScalarUDF;
 use datafusion::physical_plan::ExecutionPlan;
 use datafusion::physical_plan::unnest::{ListUnnest, UnnestExec};
 use datafusion_proto::physical_plan::PhysicalExtensionCodec;
@@ -28,7 +29,8 @@ use protocol::grpc::api::{
 use tokio::sync::mpsc::UnboundedReceiver;
 
 use crate::sql::analysis::UNNESTED_COL;
-use crate::sql::common::constants::mem_exec_join_side;
+use crate::sql::common::constants::{mem_exec_join_side, window_function_udf};
+use crate::sql::physical::udfs::window;
 use crate::sql::physical::cdc::{DebeziumUnrollingExec, ToDebeziumExec};
 use crate::sql::physical::readers::{
     FsMemExec, RecordBatchVecReader, RwLockRecordBatchReader, UnboundedRecordBatchReader,
@@ -138,6 +140,13 @@ impl PhysicalExtensionCodec for FsPhysicalExtensionCodec {
             )))
         }
     }
+
+    fn try_decode_udf(&self, name: &str, _buf: &[u8]) -> Result<Arc<ScalarUDF>> {
+        if name == window_function_udf::NAME {
+            return Ok(window());
+        }
+        not_impl_err!("PhysicalExtensionCodec is not provided for scalar function {name}")
+    }
 }
 
 impl FsPhysicalExtensionCodec {
diff --git a/src/sql/schema/column_descriptor.rs b/src/sql/schema/column_descriptor.rs
index 941a7500..533708cc 100644
--- a/src/sql/schema/column_descriptor.rs
+++ b/src/sql/schema/column_descriptor.rs
@@ -99,6 +99,16 @@ impl ColumnDescriptor {
         self.arrow_field().data_type()
     }
 
+    pub fn set_nullable(&mut self, nullable: bool) {
+        let f = match self {
+            Self::Physical(f) => f,
+            Self::SystemMeta { field, .. } => field,
+            Self::Computed { field, .. } => field,
+        };
+        *f = Field::new(f.name(), f.data_type().clone(), nullable)
+            .with_metadata(f.metadata().clone());
+    }
+
     pub fn force_precision(&mut self, unit: TimeUnit) {
         match self {
             Self::Physical(f) => {
diff --git a/src/sql/schema/source_table.rs b/src/sql/schema/source_table.rs
index 9c975573..d10e39dc 100644
--- a/src/sql/schema/source_table.rs
+++ b/src/sql/schema/source_table.rs
@@ -139,6 +139,17 @@ impl SourceTable {
         self.temporal_config.watermark_strategy_column.as_deref()
     }
 
+    /// Watermark column name safe to persist for [`StreamTable::Source`]. Omits the computed
+    /// [`sql_field::COMPUTED_WATERMARK`] column: stream catalog only stores Arrow physical fields,
+    /// so `__watermark` cannot be resolved when the table is planned from the catalog.
+    pub fn stream_catalog_watermark_field(&self) -> Option<String> {
+        self.temporal_config
+            .watermark_strategy_column
+            .as_deref()
+            .filter(|w| *w != sql_field::COMPUTED_WATERMARK)
+            .map(str::to_string)
+    }
+
     #[inline]
     pub fn catalog_with_options(&self) -> &BTreeMap<String, String> {
         &self.catalog_with_options
@@ -382,10 +393,6 @@ impl SourceTable {
         }
 
         if let Some((time_field, watermark_expr)) = watermark {
-            let table_ref = TableReference::bare(table.table_identifier.as_str());
-            let df_schema =
-                DFSchema::try_from_qualified_schema(table_ref, &table.produce_physical_schema())?;
-
             let field = table
                 .schema_specs
                 .iter()
@@ -404,6 +411,19 @@ impl SourceTable {
                 );
             }
 
+            // Watermark 引用的时间列语义上必须非空，强制设为 NOT NULL，
+            // 避免用户建表时遗漏 NOT NULL 导致后续表达式 nullable 校验失败。
+            for col in table.schema_specs.iter_mut() {
+                if col.arrow_field().name().as_str() == time_field.as_str() {
+                    col.set_nullable(false);
+                    break;
+                }
+            }
+
+            let table_ref = TableReference::bare(table.table_identifier.as_str());
+            let df_schema =
+                DFSchema::try_from_qualified_schema(table_ref, &table.produce_physical_schema())?;
+
             table.temporal_config.event_column = Some(time_field.clone());
 
             if let Some(expr) = watermark_expr {
@@ -412,17 +432,12 @@ impl SourceTable {
                         DataFusionError::Plan(format!("could not plan watermark expression: {e}"))
                     })?;
 
-                let (data_type, nullable) = logical_expr.data_type_and_nullable(&df_schema)?;
+                let (data_type, _nullable) = logical_expr.data_type_and_nullable(&df_schema)?;
                 if !matches!(data_type, DataType::Timestamp(_, _)) {
                     return plan_err!(
                         "the type of the WATERMARK FOR expression must be TIMESTAMP, but was {data_type}"
                     );
                 }
-                if nullable {
-                    return plan_err!(
-                        "the type of the WATERMARK FOR expression must be NOT NULL"
-                    );
-                }
 
                 table.schema_specs.push(ColumnDescriptor::new_computed(
                     Field::new(
diff --git a/src/storage/stream_catalog/manager.rs b/src/storage/stream_catalog/manager.rs
index a9d68977..5f40240a 100644
--- a/src/storage/stream_catalog/manager.rs
+++ b/src/storage/stream_catalog/manager.rs
@@ -22,6 +22,7 @@ use protocol::storage::{self as pb, table_definition};
 use tracing::{info, warn};
 use unicase::UniCase;
 
+use crate::sql::common::constants::sql_field;
 use crate::sql::schema::{ObjectName, StreamPlanningContext, StreamTable};
 
 use super::codec::CatalogCodec;
@@ -168,7 +169,10 @@ impl CatalogManager {
             } => table_definition::TableType::Source(pb::StreamSource {
                 arrow_schema_ipc: CatalogCodec::encode_schema(schema)?,
                 event_time_field: event_time_field.clone(),
-                watermark_field: watermark_field.clone(),
+                watermark_field: watermark_field
+                    .as_ref()
+                    .filter(|w| *w != sql_field::COMPUTED_WATERMARK)
+                    .cloned(),
                 with_options: with_options
                     .iter()
                     .map(|(k, v)| (k.clone(), v.clone()))
@@ -206,7 +210,9 @@ impl CatalogManager {
                 name: proto_def.table_name,
                 schema: CatalogCodec::decode_schema(&src.arrow_schema_ipc)?,
                 event_time_field: src.event_time_field,
-                watermark_field: src.watermark_field,
+                watermark_field: src
+                    .watermark_field
+                    .filter(|w| w != sql_field::COMPUTED_WATERMARK),
                 with_options: src.with_options.into_iter().collect(),
             }),
             table_definition::TableType::Sink(sink) => {

From b64e4ceda5b8e5a17563001c1c4b4bdc75f86077 Mon Sep 17 00:00:00 2001
From: luoluoyuyu <zhenyu@apache.org>
Date: Mon, 30 Mar 2026 22:58:27 +0800
Subject: [PATCH 31/44] update

---
 src/coordinator/coordinator.rs                | 363 +-------
 src/coordinator/plan/logical_plan_visitor.rs  |  56 +-
 src/coordinator/runtime_context.rs            |  44 +-
 src/runtime/streaming/factory/mod.rs          |   2 -
 .../streaming/factory/operator_factory.rs     | 110 ++-
 src/runtime/streaming/operators/mod.rs        |  51 +-
 src/sql/analysis/source_rewriter.rs           | 200 ++---
 src/sql/extensions/key_calculation.rs         |   2 +-
 src/sql/extensions/remote_table.rs            |   2 +-
 src/sql/frontend_sql_coverage_tests.rs        | 823 ------------------
 src/sql/logical_node/logical/operator_name.rs |   7 +-
 src/sql/mod.rs                                |   2 -
 12 files changed, 190 insertions(+), 1472 deletions(-)
 delete mode 100644 src/sql/frontend_sql_coverage_tests.rs

diff --git a/src/coordinator/coordinator.rs b/src/coordinator/coordinator.rs
index f21b12ca..b86b1070 100644
--- a/src/coordinator/coordinator.rs
+++ b/src/coordinator/coordinator.rs
@@ -145,365 +145,4 @@ impl Coordinator {
             }
         }
     }
-}
-
-// ---------------------------------------------------------------------------
-// Test-only helpers (used by `create_streaming_table_coordinator_tests` below)
-// ---------------------------------------------------------------------------
-
-#[cfg(test)]
-use datafusion::arrow::datatypes::{DataType, Field, Schema, TimeUnit};
-
-#[cfg(test)]
-use crate::sql::common::TIMESTAMP_FIELD;
-#[cfg(test)]
-use crate::sql::parse::parse_sql;
-
-#[cfg(test)]
-fn fake_stream_schema_provider() -> StreamSchemaProvider {
-    let mut provider = StreamSchemaProvider::new();
-    let schema = Arc::new(Schema::new(vec![
-        Field::new("id", DataType::Int64, false),
-        Field::new(
-            TIMESTAMP_FIELD,
-            DataType::Timestamp(TimeUnit::Nanosecond, None),
-            false,
-        ),
-    ]));
-    provider.add_source_table(
-        "src".to_string(),
-        schema,
-        Some(TIMESTAMP_FIELD.to_string()),
-        None,
-    );
-    provider
-}
-
-#[cfg(test)]
-fn fake_stream_schema_provider_with_v() -> StreamSchemaProvider {
-    let mut provider = StreamSchemaProvider::new();
-    let schema = Arc::new(Schema::new(vec![
-        Field::new("id", DataType::Int64, false),
-        Field::new("v", DataType::Utf8, true),
-        Field::new(
-            TIMESTAMP_FIELD,
-            DataType::Timestamp(TimeUnit::Nanosecond, None),
-            false,
-        ),
-    ]));
-    provider.add_source_table(
-        "src".to_string(),
-        schema,
-        Some(TIMESTAMP_FIELD.to_string()),
-        None,
-    );
-    provider
-}
-
-#[cfg(test)]
-fn fake_src_dim_provider() -> StreamSchemaProvider {
-    let mut provider = fake_stream_schema_provider_with_v();
-    let dim = Arc::new(Schema::new(vec![
-        Field::new("id", DataType::Int64, false),
-        Field::new("name", DataType::Utf8, true),
-        Field::new("amt", DataType::Float64, true),
-        Field::new(
-            TIMESTAMP_FIELD,
-            DataType::Timestamp(TimeUnit::Nanosecond, None),
-            false,
-        ),
-    ]));
-    provider.add_source_table(
-        "dim".to_string(),
-        dim,
-        Some(TIMESTAMP_FIELD.to_string()),
-        None,
-    );
-    provider
-}
-
-#[cfg(test)]
-fn assert_coordinator_streaming_build_ok(
-    sql: &str,
-    provider: StreamSchemaProvider,
-    expect_sink_substring: &str,
-    expect_connector_substring: &str,
-) {
-    let stmts = parse_sql(sql).unwrap_or_else(|e| panic!("parse {sql:?}: {e}"));
-    assert_eq!(stmts.len(), 1);
-    let plan = Coordinator::new()
-        .compile_plan(stmts[0].as_ref(), provider)
-        .unwrap_or_else(|e| panic!("compile_plan {sql:?}: {e:#}"));
-    let rendered = format!("{plan:?}");
-    assert!(rendered.contains("StreamingTable"), "{rendered}");
-    assert!(
-        rendered.contains(expect_sink_substring),
-        "expected sink name fragment {expect_sink_substring:?} in:\n{rendered}"
-    );
-    assert!(
-        rendered.contains(expect_connector_substring),
-        "expected connector fragment {expect_connector_substring:?} in:\n{rendered}"
-    );
-}
-
-#[cfg(test)]
-mod create_streaming_table_coordinator_tests {
-    use super::{
-        assert_coordinator_streaming_build_ok, fake_src_dim_provider,
-        fake_stream_schema_provider, fake_stream_schema_provider_with_v,
-    };
-    use crate::sql::common::TIMESTAMP_FIELD;
-
-    #[test]
-    fn coordinator_build_create_streaming_table_select_star_kafka() {
-        assert_coordinator_streaming_build_ok(
-            concat!(
-                "CREATE STREAMING TABLE my_sink ",
-                "WITH ('connector' = 'kafka') ",
-                "AS SELECT * FROM src",
-            ),
-            fake_stream_schema_provider(),
-            "my_sink",
-            "kafka",
-        );
-    }
-
-    #[test]
-    fn coordinator_build_create_streaming_table_memory_connector() {
-        assert_coordinator_streaming_build_ok(
-            "CREATE STREAMING TABLE mem_out WITH ('connector'='memory') AS SELECT * FROM src",
-            fake_stream_schema_provider(),
-            "mem_out",
-            "memory",
-        );
-    }
-
-    #[test]
-    fn coordinator_build_create_streaming_table_postgres_connector() {
-        assert_coordinator_streaming_build_ok(
-            "CREATE STREAMING TABLE pg_out WITH ('connector'='postgres') AS SELECT id FROM src",
-            fake_stream_schema_provider(),
-            "pg_out",
-            "postgres",
-        );
-    }
-
-    #[test]
-    fn coordinator_build_create_streaming_table_partition_by_and_idle_time() {
-        assert_coordinator_streaming_build_ok(
-            concat!(
-                "CREATE STREAMING TABLE part_idle ",
-                "WITH ('connector'='kafka', 'partition_by'='id', 'idle_time'='30 seconds') ",
-                "AS SELECT * FROM src",
-            ),
-            fake_stream_schema_provider(),
-            "part_idle",
-            "kafka",
-        );
-    }
-
-    #[test]
-    fn coordinator_build_create_streaming_table_project_timestamp_columns() {
-        let sql = format!(
-            "CREATE STREAMING TABLE ts_cols WITH ('connector'='kafka') AS SELECT id, {ts} FROM src",
-            ts = TIMESTAMP_FIELD
-        );
-        assert_coordinator_streaming_build_ok(
-            &sql,
-            fake_stream_schema_provider(),
-            "ts_cols",
-            "kafka",
-        );
-    }
-
-    #[test]
-    fn coordinator_build_create_streaming_table_where_filters() {
-        let p = fake_stream_schema_provider_with_v();
-        for (label, body) in [
-            ("eq", "SELECT * FROM src WHERE id = 1"),
-            ("range", "SELECT * FROM src WHERE id > 0 AND id < 100"),
-            ("in_list", "SELECT * FROM src WHERE id IN (1, 2, 3)"),
-            ("between", "SELECT * FROM src WHERE id BETWEEN 1 AND 10"),
-            ("like", "SELECT * FROM src WHERE v LIKE 'a%'"),
-            ("null", "SELECT * FROM src WHERE v IS NULL"),
-        ] {
-            let sql = format!(
-                "CREATE STREAMING TABLE sink_w_{label} WITH ('connector'='kafka') AS {body}"
-            );
-            assert_coordinator_streaming_build_ok(
-                &sql,
-                p.clone(),
-                &format!("sink_w_{label}"),
-                "kafka",
-            );
-        }
-    }
-
-    #[test]
-    fn coordinator_build_create_streaming_table_case_coalesce_cast() {
-        let ts = TIMESTAMP_FIELD;
-        let sql = format!(
-            "CREATE STREAMING TABLE sink_expr WITH ('connector'='kafka') AS \
-             SELECT CASE WHEN id < 0 THEN 0 ELSE id END AS c, COALESCE(v, 'x') AS v2, \
-             CAST(id AS DOUBLE) AS id_f, {ts} FROM src"
-        );
-        assert_coordinator_streaming_build_ok(
-            &sql,
-            fake_stream_schema_provider_with_v(),
-            "sink_expr",
-            "kafka",
-        );
-    }
-
-    #[test]
-    fn coordinator_build_create_streaming_table_row_time_projection() {
-        let ts = TIMESTAMP_FIELD;
-        let sql = format!(
-            "CREATE STREAMING TABLE sink_rt WITH ('connector'='kafka') AS \
-             SELECT row_time(), id, {ts} FROM src"
-        );
-        assert_coordinator_streaming_build_ok(
-            &sql,
-            fake_stream_schema_provider(),
-            "sink_rt",
-            "kafka",
-        );
-    }
-
-    #[test]
-    fn coordinator_build_create_streaming_table_scalar_funcs_projection() {
-        let ts = TIMESTAMP_FIELD;
-        let sql = format!(
-            "CREATE STREAMING TABLE sink_scalar WITH ('connector'='kafka') AS \
-             SELECT ABS(id), UPPER(v), LOWER(v), BTRIM(v), CHARACTER_LENGTH(v), {ts} FROM src"
-        );
-        assert_coordinator_streaming_build_ok(
-            &sql,
-            fake_stream_schema_provider_with_v(),
-            "sink_scalar",
-            "kafka",
-        );
-    }
-
-    #[test]
-    fn coordinator_build_create_streaming_table_cte() {
-        let ts = TIMESTAMP_FIELD;
-        let sql = format!(
-            "CREATE STREAMING TABLE sink_cte WITH ('connector'='kafka') AS \
-             WITH t AS (SELECT id, {ts} FROM src WHERE id > 0) SELECT * FROM t"
-        );
-        assert_coordinator_streaming_build_ok(
-            &sql,
-            fake_stream_schema_provider(),
-            "sink_cte",
-            "kafka",
-        );
-    }
-
-    #[test]
-    fn coordinator_build_create_streaming_table_cte_chain() {
-        let sql = "CREATE STREAMING TABLE sink_cte2 WITH ('connector'='kafka') AS \
-             WITH a AS (SELECT id FROM src), b AS (SELECT id FROM a WHERE id > 1) SELECT * FROM b";
-        assert_coordinator_streaming_build_ok(
-            sql,
-            fake_stream_schema_provider(),
-            "sink_cte2",
-            "kafka",
-        );
-    }
-
-    #[test]
-    fn coordinator_build_create_streaming_table_sink_name_with_digits() {
-        assert_coordinator_streaming_build_ok(
-            "CREATE STREAMING TABLE out_sink_01 WITH ('connector'='kafka') AS SELECT * FROM src",
-            fake_stream_schema_provider(),
-            "out_sink_01",
-            "kafka",
-        );
-    }
-
-    #[test]
-    fn coordinator_build_create_streaming_table_subquery_in_from() {
-        let ts = TIMESTAMP_FIELD;
-        let sql = format!(
-            "CREATE STREAMING TABLE sink_sq WITH ('connector'='kafka') AS \
-             SELECT * FROM (SELECT id, {ts} FROM src WHERE id >= 0) AS x"
-        );
-        assert_coordinator_streaming_build_ok(
-            &sql,
-            fake_stream_schema_provider(),
-            "sink_sq",
-            "kafka",
-        );
-    }
-
-    #[test]
-    fn coordinator_build_create_streaming_table_nested_subqueries() {
-        let sql = "CREATE STREAMING TABLE sink_nest WITH ('connector'='kafka') AS \
-             SELECT * FROM (SELECT * FROM (SELECT id FROM src) AS i2) AS i1";
-        assert_coordinator_streaming_build_ok(
-            sql,
-            fake_stream_schema_provider(),
-            "sink_nest",
-            "kafka",
-        );
-    }
-
-    #[test]
-    fn coordinator_build_create_streaming_table_union_all() {
-        let ts = TIMESTAMP_FIELD;
-        let sql = format!(
-            "CREATE STREAMING TABLE sink_union WITH ('connector'='kafka') AS \
-             SELECT id, v, {ts} FROM src \
-             UNION ALL \
-             SELECT id, name AS v, {ts} FROM dim"
-        );
-        assert_coordinator_streaming_build_ok(
-            &sql,
-            fake_src_dim_provider(),
-            "sink_union",
-            "kafka",
-        );
-    }
-
-    #[test]
-    fn coordinator_build_create_streaming_table_nullif_regexp() {
-        let ts = TIMESTAMP_FIELD;
-        let sql = format!(
-            "CREATE STREAMING TABLE sink_re WITH ('connector'='kafka') AS \
-             SELECT id, NULLIF(v, ''), REGEXP_LIKE(v, '^x'), {ts} FROM src"
-        );
-        assert_coordinator_streaming_build_ok(
-            &sql,
-            fake_stream_schema_provider_with_v(),
-            "sink_re",
-            "kafka",
-        );
-    }
-
-    #[test]
-    fn coordinator_build_create_streaming_table_not_and_or_where() {
-        let p = fake_stream_schema_provider_with_v();
-        assert_coordinator_streaming_build_ok(
-            "CREATE STREAMING TABLE sink_bool WITH ('connector'='kafka') AS \
-             SELECT * FROM src WHERE NOT (id = 0) AND (v IS NOT NULL OR id > 0)",
-            p,
-            "sink_bool",
-            "kafka",
-        );
-    }
-
-    #[test]
-    fn coordinator_sql_create_streaming_table_compiles_full_pipeline() {
-        assert_coordinator_streaming_build_ok(
-            concat!(
-                "CREATE STREAMING TABLE my_sink ",
-                "WITH ('connector' = 'kafka') ",
-                "AS SELECT * FROM src",
-            ),
-            fake_stream_schema_provider(),
-            "my_sink",
-            "kafka",
-        );
-    }
-}
+}
\ No newline at end of file
diff --git a/src/coordinator/plan/logical_plan_visitor.rs b/src/coordinator/plan/logical_plan_visitor.rs
index 3bd117e2..b9cb4dfe 100644
--- a/src/coordinator/plan/logical_plan_visitor.rs
+++ b/src/coordinator/plan/logical_plan_visitor.rs
@@ -459,58 +459,4 @@ impl StatementVisitor for LogicalPlanVisitor {
             *if_exists,
         )))
     }
-}
-
-#[cfg(test)]
-mod create_streaming_table_tests {
-    use std::sync::Arc;
-
-    use datafusion::arrow::datatypes::{DataType, Field, Schema, TimeUnit};
-    use datafusion::sql::sqlparser::ast::Statement as DFStatement;
-    use datafusion::sql::sqlparser::dialect::FunctionStreamDialect;
-    use datafusion::sql::sqlparser::parser::Parser;
-
-    use crate::sql::common::TIMESTAMP_FIELD;
-    use crate::sql::logical_planner::optimizers::produce_optimized_plan;
-    use crate::sql::rewrite_plan;
-    use crate::sql::schema::StreamSchemaProvider;
-
-    fn schema_provider_with_src() -> StreamSchemaProvider {
-        let mut provider = StreamSchemaProvider::new();
-        let schema = Arc::new(Schema::new(vec![
-            Field::new("id", DataType::Int64, false),
-            Field::new(
-                TIMESTAMP_FIELD,
-                DataType::Timestamp(TimeUnit::Nanosecond, None),
-                false,
-            ),
-        ]));
-        provider.add_source_table(
-            "src".to_string(),
-            schema,
-            Some(TIMESTAMP_FIELD.to_string()),
-            None,
-        );
-        provider
-    }
-
-    #[test]
-    fn create_streaming_table_query_plans_and_rewrites() {
-        let sql =
-            "CREATE STREAMING TABLE my_sink WITH ('connector' = 'kafka') AS SELECT * FROM src";
-        let dialect = FunctionStreamDialect {};
-        let ast = Parser::parse_sql(&dialect, sql).expect("parse CREATE STREAMING TABLE");
-        let DFStatement::CreateStreamingTable { query, .. } = &ast[0] else {
-            panic!("expected CreateStreamingTable, got {:?}", ast[0]);
-        };
-        let provider = schema_provider_with_src();
-        let base = produce_optimized_plan(&DFStatement::Query(query.clone()), &provider)
-            .expect("produce optimized logical plan for sink query");
-        let rewritten = rewrite_plan(base, &provider).expect("streaming rewrite_plan");
-        let dot = format!("{}", rewritten.display_graphviz());
-        assert!(
-            dot.contains("src") || dot.contains("Src"),
-            "rewritten plan should reference source; got subgraph:\n{dot}"
-        );
-    }
-}
+}
\ No newline at end of file
diff --git a/src/coordinator/runtime_context.rs b/src/coordinator/runtime_context.rs
index d0f80786..af9a9ddf 100644
--- a/src/coordinator/runtime_context.rs
+++ b/src/coordinator/runtime_context.rs
@@ -18,7 +18,11 @@ use anyhow::Result;
 
 use crate::runtime::streaming::job::JobManager;
 use crate::runtime::taskexecutor::TaskManager;
-use crate::sql::schema::StreamSchemaProvider;
+use crate::sql::schema::column_descriptor::ColumnDescriptor;
+use crate::sql::schema::connection_type::ConnectionType;
+use crate::sql::schema::source_table::SourceTable;
+use crate::sql::schema::table::Table as CatalogTable;
+use crate::sql::schema::{StreamSchemaProvider, StreamTable};
 use crate::storage::stream_catalog::CatalogManager;
 
 /// Dependencies shared by analyze / plan / execute, analogous to installing globals in
@@ -60,9 +64,41 @@ impl CoordinatorRuntimeContext {
 
     /// Schema provider for [`LogicalPlanVisitor`] / [`SqlToRel`]: override if set, else catalog snapshot.
     pub fn planning_schema_provider(&self) -> StreamSchemaProvider {
-        if let Some(ref p) = self.planning_schema_override {
-            return p.clone();
+        let mut provider = self.catalog_manager.acquire_planning_context();
+
+        for (name, stream) in provider.tables.streams.clone() {
+            let StreamTable::Source {
+                name: source_name,
+                schema,
+                event_time_field,
+                watermark_field,
+                with_options,
+            } = stream.as_ref()
+            else {
+                continue;
+            };
+
+            let connector = with_options
+                .get("connector")
+                .cloned()
+                .unwrap_or_else(|| "stream_catalog".to_string());
+            let mut source = SourceTable::new(source_name.clone(), connector, ConnectionType::Source);
+            source.schema_specs = schema
+                .fields()
+                .iter()
+                .map(|f| ColumnDescriptor::new_physical((**f).clone()))
+                .collect();
+            source.inferred_fields = Some(schema.fields().iter().cloned().collect());
+            source.temporal_config.event_column = event_time_field.clone();
+            source.temporal_config.watermark_strategy_column = watermark_field.clone();
+            source.catalog_with_options = with_options.clone();
+
+            provider
+                .tables
+                .catalogs
+                .insert(name, Arc::new(CatalogTable::ConnectorTable(source)));
         }
-        self.catalog_manager.acquire_planning_context()
+
+        provider
     }
 }
diff --git a/src/runtime/streaming/factory/mod.rs b/src/runtime/streaming/factory/mod.rs
index f82cc657..1315e4de 100644
--- a/src/runtime/streaming/factory/mod.rs
+++ b/src/runtime/streaming/factory/mod.rs
@@ -28,8 +28,6 @@ pub use connector::{
 pub use global::Registry;
 pub use operator_constructor::OperatorConstructor;
 pub use operator_factory::OperatorFactory;
-#[allow(unused_imports)]
-pub use operator_factory::PassthroughConstructor;
 
 fn register_builtin_connectors(factory: &mut OperatorFactory) {
     factory.register(
diff --git a/src/runtime/streaming/factory/operator_factory.rs b/src/runtime/streaming/factory/operator_factory.rs
index e1bdd635..eb2afd9b 100644
--- a/src/runtime/streaming/factory/operator_factory.rs
+++ b/src/runtime/streaming/factory/operator_factory.rs
@@ -19,7 +19,6 @@ use std::sync::Arc;
 use super::operator_constructor::OperatorConstructor;
 use crate::runtime::streaming::api::operator::ConstructedOperator;
 use crate::runtime::streaming::factory::global::Registry;
-use crate::runtime::streaming::operators::PassthroughOperator;
 use crate::runtime::streaming::operators::grouping::IncrementalAggregatingConstructor;
 use crate::runtime::streaming::operators::joins::{
     InstantJoinConstructor, JoinWithExpirationConstructor,
@@ -30,12 +29,18 @@ use crate::runtime::streaming::operators::windows::{
     SessionAggregatingWindowConstructor, SlidingAggregatingWindowConstructor,
     TumblingAggregateWindowConstructor, WindowFunctionConstructor,
 };
-
+use crate::runtime::streaming::operators::{
+    ProjectionOperator, StatelessPhysicalExecutor, ValueExecutionOperator,
+};
+use crate::sql::common::FsSchema;
+use datafusion_proto::physical_plan::DefaultPhysicalExtensionCodec;
+use datafusion_proto::physical_plan::from_proto::parse_physical_expr;
+use datafusion_proto::protobuf::PhysicalExprNode;
 use protocol::grpc::api::{
     ExpressionWatermarkConfig, JoinOperator as JoinOperatorProto,
-    KeyPlanOperator as KeyByProto, SessionWindowAggregateOperator, SlidingWindowAggregateOperator,
-    TumblingWindowAggregateOperator, UpdatingAggregateOperator,
-    WindowFunctionOperator as WindowFunctionProto,
+    KeyPlanOperator as KeyByProto, ProjectionOperator as ProjectionOperatorProto,
+    SessionWindowAggregateOperator, SlidingWindowAggregateOperator, TumblingWindowAggregateOperator,
+    UpdatingAggregateOperator, ValuePlanOperator, WindowFunctionOperator as WindowFunctionProto,
 };
 
 use crate::sql::logical_node::logical::OperatorName;
@@ -102,18 +107,8 @@ impl OperatorFactory {
 
         self.register_named(OperatorName::KeyBy, Box::new(KeyByBridge));
 
-        self.register_named(
-            OperatorName::Projection,
-            Box::new(PassthroughConstructor(OperatorName::Projection)),
-        );
-        self.register_named(
-            OperatorName::ArrowValue,
-            Box::new(PassthroughConstructor(OperatorName::ArrowValue)),
-        );
-        self.register_named(
-            OperatorName::ArrowKey,
-            Box::new(PassthroughConstructor(OperatorName::ArrowKey)),
-        );
+        self.register_named(OperatorName::Projection, Box::new(ProjectionBridge));
+        self.register_named(OperatorName::Value, Box::new(ValueBridge));
 
         crate::runtime::streaming::factory::register_builtin_connectors(self);
         crate::runtime::streaming::factory::register_kafka_connector_plugins(self);
@@ -217,12 +212,81 @@ impl OperatorConstructor for KeyByBridge {
     }
 }
 
-pub struct PassthroughConstructor(pub OperatorName);
+struct ProjectionBridge;
+impl OperatorConstructor for ProjectionBridge {
+    fn with_config(&self, config: &[u8], registry: Arc<Registry>) -> Result<ConstructedOperator> {
+        let proto = ProjectionOperatorProto::decode(config)
+            .map_err(|e| anyhow!("Decode ProjectionOperator failed: {e}"))?;
+        let op = ProjectionExecutionConstructor.with_config(proto, registry)?;
+        Ok(ConstructedOperator::Operator(Box::new(op)))
+    }
+}
 
-impl OperatorConstructor for PassthroughConstructor {
-    fn with_config(&self, _config: &[u8], _registry: Arc<Registry>) -> Result<ConstructedOperator> {
-        Ok(ConstructedOperator::Operator(Box::new(
-            PassthroughOperator::new(self.0.as_registry_key()),
-        )))
+struct ValueBridge;
+impl OperatorConstructor for ValueBridge {
+    fn with_config(&self, config: &[u8], registry: Arc<Registry>) -> Result<ConstructedOperator> {
+        let proto = ValuePlanOperator::decode(config)
+            .map_err(|e| anyhow!("Decode ValuePlanOperator failed: {e}"))?;
+        let op = ValueExecutionConstructor.with_config(proto, registry)?;
+        Ok(ConstructedOperator::Operator(Box::new(op)))
     }
 }
+
+struct ProjectionExecutionConstructor;
+impl ProjectionExecutionConstructor {
+    fn with_config(
+        &self,
+        config: ProjectionOperatorProto,
+        registry: Arc<Registry>,
+    ) -> Result<ProjectionOperator> {
+        let input_schema: FsSchema = config
+            .input_schema
+            .ok_or_else(|| anyhow!("missing projection input_schema"))?
+            .try_into()
+            .map_err(|e| anyhow!("projection input_schema: {e}"))?;
+        let output_schema: FsSchema = config
+            .output_schema
+            .ok_or_else(|| anyhow!("missing projection output_schema"))?
+            .try_into()
+            .map_err(|e| anyhow!("projection output_schema: {e}"))?;
+
+        let exprs = config
+            .exprs
+            .iter()
+            .map(|raw| {
+                let expr_node = PhysicalExprNode::decode(&mut raw.as_slice())
+                    .map_err(|e| anyhow!("decode projection expr: {e}"))?;
+                parse_physical_expr(
+                    &expr_node,
+                    registry.as_ref(),
+                    &input_schema.schema,
+                    &DefaultPhysicalExtensionCodec {},
+                )
+                .map_err(|e| anyhow!("parse projection expr: {e}"))
+            })
+            .collect::<Result<Vec<_>>>()?;
+
+        Ok(ProjectionOperator::new(
+            if config.name.is_empty() {
+                OperatorName::Projection.as_registry_key().to_string()
+            } else {
+                config.name
+            },
+            Arc::new(output_schema),
+            exprs,
+        ))
+    }
+}
+
+struct ValueExecutionConstructor;
+impl ValueExecutionConstructor {
+    fn with_config(
+        &self,
+        config: ValuePlanOperator,
+        registry: Arc<Registry>,
+    ) -> Result<ValueExecutionOperator> {
+        let executor = StatelessPhysicalExecutor::new(&config.physical_plan, registry.as_ref())
+            .map_err(|e| anyhow!("build value execution plan '{}': {e}", config.name))?;
+        Ok(ValueExecutionOperator::new(config.name, executor))
+    }
+}
\ No newline at end of file
diff --git a/src/runtime/streaming/operators/mod.rs b/src/runtime/streaming/operators/mod.rs
index cb8412d8..b679f2bd 100644
--- a/src/runtime/streaming/operators/mod.rs
+++ b/src/runtime/streaming/operators/mod.rs
@@ -24,6 +24,8 @@ mod stateless_physical_executor;
 mod value_execution;
 
 pub use stateless_physical_executor::StatelessPhysicalExecutor;
+pub use projection::ProjectionOperator;
+pub use value_execution::ValueExecutionOperator;
 
 pub use grouping::{IncrementalAggregatingFunc, Key, UpdatingCache};
 pub use joins::{InstantJoinOperator, JoinWithExpirationOperator};
@@ -35,52 +37,3 @@ pub use windows::{
     SessionWindowOperator, SlidingWindowOperator, TumblingWindowOperator,
     WindowFunctionOperator,
 };
-
-use crate::runtime::streaming::api::context::TaskContext;
-use crate::runtime::streaming::api::operator::MessageOperator;
-use arrow_array::RecordBatch;
-use async_trait::async_trait;
-use crate::runtime::streaming::StreamOutput;
-use crate::sql::common::{CheckpointBarrier, Watermark};
-
-pub struct PassthroughOperator {
-    name: String,
-}
-
-impl PassthroughOperator {
-    pub fn new(name: impl Into<String>) -> Self {
-        Self { name: name.into() }
-    }
-}
-
-#[async_trait]
-impl MessageOperator for PassthroughOperator {
-    fn name(&self) -> &str {
-        &self.name
-    }
-
-    async fn process_data(
-        &mut self,
-        _input_idx: usize,
-        batch: RecordBatch,
-        _ctx: &mut TaskContext,
-    ) -> anyhow::Result<Vec<StreamOutput>> {
-        Ok(vec![StreamOutput::Forward(batch)])
-    }
-
-    async fn process_watermark(
-        &mut self,
-        _watermark: Watermark,
-        _ctx: &mut TaskContext,
-    ) -> anyhow::Result<Vec<StreamOutput>> {
-        Ok(vec![])
-    }
-
-    async fn snapshot_state(
-        &mut self,
-        _barrier: CheckpointBarrier,
-        _ctx: &mut TaskContext,
-    ) -> anyhow::Result<()> {
-        Ok(())
-    }
-}
diff --git a/src/sql/analysis/source_rewriter.rs b/src/sql/analysis/source_rewriter.rs
index d96a47bf..35804c02 100644
--- a/src/sql/analysis/source_rewriter.rs
+++ b/src/sql/analysis/source_rewriter.rs
@@ -27,11 +27,17 @@ use crate::sql::schema::table::Table;
 use crate::sql::schema::StreamSchemaProvider;
 use crate::sql::schema::StreamTable;
 use crate::sql::common::constants::sql_field;
+use crate::sql::common::UPDATING_META_FIELD;
+use crate::sql::extensions::debezium::UnrollDebeziumPayloadNode;
 use crate::sql::extensions::remote_table::RemoteTableBoundaryNode;
+use crate::sql::extensions::table_source::StreamIngestionNode;
 use crate::sql::extensions::watermark_node::EventTimeWatermarkNode;
 use crate::sql::types::TIMESTAMP_FIELD;
 
-/// Rewrites table scans into proper source nodes with projections and watermarks.
+/// Rewrites table scans: projections are lifted out of scans into a dedicated projection node
+/// (including virtual fields), using a connector table-source extension instead of a bare
+/// `TableScan`, optionally with Debezium unrolling for updating sources, then remote boundary and
+/// watermark.
 pub struct SourceRewriter<'a> {
     pub(crate) schema_provider: &'a StreamSchemaProvider,
 }
@@ -172,144 +178,50 @@ impl SourceRewriter<'_> {
             )));
         }
 
+        if table.is_updating() {
+            expressions.push(Expr::Column(Column::new(
+                Some(qualifier.clone()),
+                UPDATING_META_FIELD,
+            )));
+        }
+
         Ok(expressions)
     }
 
-    /// Stream catalog [`StreamTable::Source`] (Kafka/… registered via coordinator): inject `_timestamp`
-    /// from `event_time_field` when the physical schema uses another name (e.g. `impression_time`).
-    fn mutate_stream_catalog_source(
-        &self,
-        table_scan: &TableScan,
-        st: &StreamTable,
-    ) -> DFResult<Transformed<LogicalPlan>> {
-        let StreamTable::Source {
-            schema,
-            event_time_field,
-            watermark_field,
-            ..
-        } = st
-        else {
-            return Ok(Transformed::no(LogicalPlan::TableScan(table_scan.clone())));
-        };
 
+    /// Connector path: `StreamIngestionNode` (table source) → optional `UnrollDebeziumPayloadNode`
+    /// → `Projection`, mirroring Arroyo `TableSourceExtension` + Debezium unroll + projection.
+    fn projection(&self, table_scan: &TableScan, table: &SourceTable) -> DFResult<LogicalPlan> {
         let qualifier = table_scan.table_name.clone();
 
-        let mut expressions: Vec<Expr> = schema
-            .fields()
-            .iter()
-            .map(|f| {
-                Expr::Column(Column {
-                    relation: Some(qualifier.clone()),
-                    name: f.name().to_string(),
-                    spans: Default::default(),
-                })
-            })
-            .collect();
+        let table_source = LogicalPlan::Extension(Extension {
+            node: Arc::new(StreamIngestionNode::try_new(
+                qualifier.clone(),
+                table.clone(),
+            )?),
+        });
 
-        let has_physical_ts = schema.fields().iter().any(|f| f.name() == TIMESTAMP_FIELD);
-
-        match event_time_field.as_deref() {
-            Some(et) if et != TIMESTAMP_FIELD => {
-                if !schema.fields().iter().any(|f| f.name().as_str() == et) {
-                    return Err(DataFusionError::Plan(format!(
-                        "Stream source `{}`: event_time_field `{et}` is not in the table schema",
-                        table_scan.table_name.table()
-                    )));
-                }
-                expressions.push(
-                    Expr::Column(Column {
-                        relation: Some(qualifier.clone()),
-                        name: et.to_string(),
-                        spans: Default::default(),
-                    })
-                    .alias_qualified(Some(qualifier.clone()), TIMESTAMP_FIELD.to_string()),
-                );
-            }
-            None if !has_physical_ts => {
+        let (projection_input, scan_projection) = if table.is_updating() {
+            if table.key_constraints.is_empty() {
                 return plan_err!(
-                    "Stream source `{}` has no `{}` column; declare WATERMARK FOR <event_time> AS ... on CREATE TABLE, or add a `{}` column",
-                    table_scan.table_name.table(),
-                    TIMESTAMP_FIELD,
-                    TIMESTAMP_FIELD
+                    "Updating connector table `{}` requires at least one PRIMARY KEY for CDC unrolling",
+                    table.table_identifier
                 );
             }
-            _ => {}
-        }
-
-        let source_input = LogicalPlan::TableScan(table_scan.clone());
-        let projection = LogicalPlan::Projection(Projection::try_new(
-            expressions,
-            Arc::new(source_input),
-        )?);
-
-        let schema_ref = projection.schema().clone();
-        let remote = LogicalPlan::Extension(Extension {
-            node: Arc::new(RemoteTableBoundaryNode {
-                upstream_plan: projection,
-                table_identifier: table_scan.table_name.to_owned(),
-                resolved_schema: schema_ref,
-                requires_materialization: true,
-            }),
-        });
-
-        let projected = Self::stream_source_projected_column_names(
-            schema.as_ref(),
-            event_time_field.as_deref(),
-        );
-        let wf = Self::stream_source_effective_watermark_field(
-            watermark_field.as_deref(),
-            &projected,
-        );
-        let wm_expr = Self::watermark_expression_for_stream_source(wf, &qualifier)?;
-
-        let watermark_node = EventTimeWatermarkNode::try_new(
-            remote,
-            table_scan.table_name.clone(),
-            wm_expr,
-        )
-        .map_err(|err| {
-            DataFusionError::Internal(format!("failed to create watermark node: {err}"))
-        })?;
-
-        Ok(Transformed::yes(LogicalPlan::Extension(Extension {
-            node: Arc::new(watermark_node),
-        })))
-    }
-
-    fn watermark_expression_for_stream_source(
-        watermark_field: Option<&str>,
-        qualifier: &TableReference,
-    ) -> DFResult<Expr> {
-        match watermark_field {
-            Some(wf) => Ok(Expr::Column(Column {
-                relation: Some(qualifier.clone()),
-                name: wf.to_string(),
-                spans: Default::default(),
-            })),
-            None => Ok(Expr::BinaryExpr(BinaryExpr {
-                left: Box::new(Expr::Column(Column {
-                    relation: Some(qualifier.clone()),
-                    name: TIMESTAMP_FIELD.to_string(),
-                    spans: Default::default(),
-                })),
-                op: logical_expr::Operator::Minus,
-                right: Box::new(Expr::Literal(
-                    ScalarValue::DurationNanosecond(Some(Duration::from_secs(1).as_nanos() as i64)),
-                    None,
-                )),
-            })),
-        }
-    }
-
-    fn projection(&self, table_scan: &TableScan, table: &SourceTable) -> DFResult<LogicalPlan> {
-        let qualifier = table_scan.table_name.clone();
-
-        // TODO: replace with StreamIngestionNode when available
-        let source_input = LogicalPlan::TableScan(table_scan.clone());
+            let unrolled = LogicalPlan::Extension(Extension {
+                node: Arc::new(UnrollDebeziumPayloadNode::try_new(
+                    table_source,
+                    Arc::new(table.key_constraints.clone()),
+                )?),
+            });
+            (unrolled, None)
+        } else {
+            (table_source, table_scan.projection.clone())
+        };
 
         Ok(LogicalPlan::Projection(Projection::try_new(
-            Self::projection_expressions(table, &qualifier, &table_scan.projection)?,
-            Arc::new(source_input),
+            Self::projection_expressions(table, &qualifier, &scan_projection)?,
+            Arc::new(projection_input),
         )?))
     }
 
@@ -399,25 +311,21 @@ impl TreeNodeRewriter for SourceRewriter<'_> {
         };
 
         let table_name = table_scan.table_name.table();
-
-        if let Some(table) = self.schema_provider.get_catalog_table(table_name) {
-            return match table {
-                Table::ConnectorTable(table) => self.mutate_connector_table(&table_scan, table),
-                Table::LookupTable(_table) => {
-                    // TODO: implement LookupSource extension
-                    plan_err!("Lookup tables are not yet supported")
-                }
-                Table::TableFromQuery {
-                    name: _,
-                    logical_plan,
-                } => self.mutate_table_from_query(&table_scan, logical_plan),
-            };
-        }
-
-        if let Some(st) = self.schema_provider.get_stream_table(table_name) {
-            return self.mutate_stream_catalog_source(&table_scan, st.as_ref());
+        let table = self
+            .schema_provider
+            .get_catalog_table(table_name)
+            .ok_or_else(|| DataFusionError::Plan(format!("Table {table_name} not found")))?;
+
+        match table {
+            Table::ConnectorTable(table) => self.mutate_connector_table(&table_scan, table),
+            Table::LookupTable(_table) => {
+                // TODO: implement LookupSource extension
+                plan_err!("Lookup tables are not yet supported")
+            }
+            Table::TableFromQuery {
+                name: _,
+                logical_plan,
+            } => self.mutate_table_from_query(&table_scan, logical_plan),
         }
-
-        Ok(Transformed::no(LogicalPlan::TableScan(table_scan.clone())))
     }
 }
diff --git a/src/sql/extensions/key_calculation.rs b/src/sql/extensions/key_calculation.rs
index 1d271698..25206429 100644
--- a/src/sql/extensions/key_calculation.rs
+++ b/src/sql/extensions/key_calculation.rs
@@ -107,7 +107,7 @@ impl KeyExtractionNode {
             key_fields: indices.iter().map(|&idx| idx as u64).collect(),
         };
 
-        (operator_config.encode_to_vec(), OperatorName::ArrowKey)
+        (operator_config.encode_to_vec(), OperatorName::KeyBy)
     }
 
     fn compile_expression_router(
diff --git a/src/sql/extensions/remote_table.rs b/src/sql/extensions/remote_table.rs
index 7025e254..72b6150c 100644
--- a/src/sql/extensions/remote_table.rs
+++ b/src/sql/extensions/remote_table.rs
@@ -116,7 +116,7 @@ impl StreamingOperatorBlueprint for RemoteTableBoundaryNode {
         let logical_node = LogicalNode::single(
             node_index as u32,
             format!("value_{node_index}"),
-            OperatorName::ArrowValue,
+            OperatorName::Value,
             operator_payload,
             self.table_identifier.to_string(),
             1,
diff --git a/src/sql/frontend_sql_coverage_tests.rs b/src/sql/frontend_sql_coverage_tests.rs
deleted file mode 100644
index 0a201f9e..00000000
--- a/src/sql/frontend_sql_coverage_tests.rs
+++ /dev/null
@@ -1,823 +0,0 @@
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-//! SQL parse and streaming-related tests.
-
-use std::sync::Arc;
-
-use datafusion::arrow::datatypes::{DataType, Field, Schema, TimeUnit};
-use datafusion::sql::sqlparser::ast::Statement as DFStatement;
-use datafusion::sql::sqlparser::dialect::FunctionStreamDialect;
-use datafusion::sql::sqlparser::parser::Parser;
-
-use crate::coordinator::Coordinator;
-use crate::sql::common::TIMESTAMP_FIELD;
-use crate::sql::parse::parse_sql;
-use crate::sql::rewrite_plan;
-use crate::sql::logical_planner::optimizers::produce_optimized_plan;
-use crate::sql::schema::StreamSchemaProvider;
-
-fn assert_parses_as(sql: &str, type_prefix: &str) {
-    let stmts = parse_sql(sql).unwrap_or_else(|e| panic!("parse failed for {sql:?}: {e}"));
-    assert!(!stmts.is_empty(), "{sql}");
-    let dbg = format!("{:?}", stmts[0]);
-    assert!(
-        dbg.starts_with(type_prefix),
-        "sql={sql:?} expected prefix {type_prefix}, got {dbg}"
-    );
-}
-
-fn assert_parse_fails(sql: &str) {
-    assert!(
-        parse_sql(sql).is_err(),
-        "expected parse/classify failure for {sql:?}"
-    );
-}
-
-fn fake_src_stream_provider() -> StreamSchemaProvider {
-    let mut provider = StreamSchemaProvider::new();
-    let schema = Arc::new(Schema::new(vec![
-        Field::new("id", DataType::Int64, false),
-        Field::new("v", DataType::Utf8, true),
-        Field::new(
-            TIMESTAMP_FIELD,
-            DataType::Timestamp(TimeUnit::Nanosecond, None),
-            false,
-        ),
-    ]));
-    provider.add_source_table(
-        "src".to_string(),
-        schema,
-        Some(TIMESTAMP_FIELD.to_string()),
-        None,
-    );
-    provider
-}
-
-fn compile_first(coordinator: &Coordinator, sql: &str, provider: StreamSchemaProvider) {
-    let stmts = parse_sql(sql).unwrap_or_else(|e| panic!("parse {sql:?}: {e}"));
-    coordinator
-        .compile_plan(stmts[0].as_ref(), provider)
-        .unwrap_or_else(|e| panic!("compile_plan {sql:?}: {e:#}"));
-}
-
-fn compile_first_streaming(sql: &str) {
-    compile_first(
-        &Coordinator::new(),
-        sql,
-        fake_src_stream_provider(),
-    );
-}
-
-fn fake_src_dim_stream_provider() -> StreamSchemaProvider {
-    let mut provider = fake_src_stream_provider();
-    let dim_schema = Arc::new(Schema::new(vec![
-        Field::new("id", DataType::Int64, false),
-        Field::new("name", DataType::Utf8, true),
-        Field::new("amt", DataType::Float64, true),
-        Field::new(
-            TIMESTAMP_FIELD,
-            DataType::Timestamp(TimeUnit::Nanosecond, None),
-            false,
-        ),
-    ]));
-    provider.add_source_table(
-        "dim".to_string(),
-        dim_schema,
-        Some(TIMESTAMP_FIELD.to_string()),
-        None,
-    );
-    provider
-}
-
-fn compile_streaming_select_body(body: &str, provider: StreamSchemaProvider) {
-    let sql = format!(
-        "CREATE STREAMING TABLE sink_shape_cov WITH ('connector'='kafka') AS {body}"
-    );
-    compile_first(&Coordinator::new(), &sql, provider);
-}
-
-fn assert_streaming_select_logical_rewrites(body: &str, provider: &StreamSchemaProvider) {
-    let sql = format!(
-        "CREATE STREAMING TABLE sink_lr WITH ('connector'='kafka') AS {body}"
-    );
-    let dialect = FunctionStreamDialect {};
-    let stmts = Parser::parse_sql(&dialect, &sql).unwrap_or_else(|e| panic!("parse {sql:?}: {e}"));
-    let DFStatement::CreateStreamingTable { query, .. } = &stmts[0] else {
-        panic!("expected CreateStreamingTable, got {:?}", stmts[0]);
-    };
-    let plan = produce_optimized_plan(&DFStatement::Query(query.clone()), provider)
-        .unwrap_or_else(|e| panic!("produce_optimized_plan {sql:?}: {e:#}"));
-    rewrite_plan(plan, provider).unwrap_or_else(|e| panic!("rewrite_plan {sql:?}: {e:#}"));
-}
-
-fn assert_streaming_select_logical_rewrite_err_contains(
-    body: &str,
-    provider: &StreamSchemaProvider,
-    needle: &str,
-) {
-    let sql = format!(
-        "CREATE STREAMING TABLE sink_lr WITH ('connector'='kafka') AS {body}"
-    );
-    let dialect = FunctionStreamDialect {};
-    let stmts = Parser::parse_sql(&dialect, &sql).unwrap_or_else(|e| panic!("parse {sql:?}: {e}"));
-    let DFStatement::CreateStreamingTable { query, .. } = &stmts[0] else {
-        panic!("expected CreateStreamingTable, got {:?}", stmts[0]);
-    };
-    let plan = produce_optimized_plan(&DFStatement::Query(query.clone()), provider)
-        .unwrap_or_else(|e| panic!("produce_optimized_plan {sql:?}: {e:#}"));
-    let err = rewrite_plan(plan, provider).unwrap_err();
-    let msg = err.to_string();
-    assert!(
-        msg.contains(needle),
-        "expected '{needle}' in rewrite error, got: {msg}"
-    );
-}
-
-#[test]
-fn parse_create_function_double_quoted_path_style() {
-    assert_parses_as(
-        r#"CREATE FUNCTION WITH ("function_path"='./a.wasm', "config_path"='./b.yml')"#,
-        "CreateFunction",
-    );
-}
-
-#[test]
-fn parse_create_function_extra_numeric_and_bool_like_strings() {
-    assert_parses_as(
-        r#"CREATE FUNCTION WITH (
-            'function_path'='./f.wasm',
-            'config_path'='./c.yml',
-            'parallelism'='8',
-            'dry_run'='false'
-        )"#,
-        "CreateFunction",
-    );
-}
-
-#[test]
-fn parse_create_function_fails_without_function_path() {
-    let err = parse_sql("CREATE FUNCTION WITH ('config_path'='./only.yml')").unwrap_err();
-    let s = err.to_string();
-    assert!(
-        s.contains("function_path") || s.contains("CREATE FUNCTION"),
-        "{s}"
-    );
-}
-
-#[test]
-fn parse_drop_function_quoted_name() {
-    assert_parses_as(r#"DROP FUNCTION "my-pipeline""#, "DropFunction");
-}
-
-#[test]
-fn parse_start_stop_function_dotted_style_name() {
-    assert_parses_as("START FUNCTION job.v1.main", "StartFunction");
-    assert_parses_as("STOP FUNCTION job.v1.main", "StopFunction");
-}
-
-#[test]
-fn parse_show_functions_extra_whitespace() {
-    assert_parses_as("  SHOW   FUNCTIONS  ", "ShowFunctions");
-}
-
-#[test]
-fn parse_create_table_multiple_columns_types() {
-    assert_parses_as(
-        "CREATE TABLE metrics (ts TIMESTAMP, name VARCHAR, val DOUBLE, ok BOOLEAN)",
-        "CreateTable",
-    );
-}
-
-#[test]
-fn parse_create_table_with_not_null_and_precision() {
-    assert_parses_as(
-        "CREATE TABLE t (id BIGINT NOT NULL, code DECIMAL(10,2))",
-        "CreateTable",
-    );
-}
-
-#[test]
-fn parse_create_table_if_not_exists_if_dialect_accepts() {
-    if let Ok(stmts) = parse_sql("CREATE TABLE IF NOT EXISTS guard (id INT)") {
-        assert!(format!("{:?}", stmts[0]).starts_with("CreateTable"));
-    }
-}
-
-#[test]
-fn parse_streaming_table_select_star() {
-    assert_parses_as(
-        "CREATE STREAMING TABLE s1 WITH ('connector'='kafka') AS SELECT * FROM src",
-        "StreamingTableStatement",
-    );
-}
-
-#[test]
-fn parse_streaming_table_select_columns() {
-    assert_parses_as(
-        "CREATE STREAMING TABLE s2 WITH ('connector'='memory') AS SELECT id, v FROM src",
-        "StreamingTableStatement",
-    );
-}
-
-#[test]
-fn parse_streaming_table_with_partition_by() {
-    let sql = format!(
-        "CREATE STREAMING TABLE s3 WITH ('connector' = 'kafka', 'partition_by' = 'id') AS SELECT id, {} FROM src",
-        TIMESTAMP_FIELD
-    );
-    assert_parses_as(&sql, "StreamingTableStatement");
-}
-
-#[test]
-fn parse_streaming_table_with_idle_time_option() {
-    assert_parses_as(
-        "CREATE STREAMING TABLE s4 WITH ('connector'='kafka', 'idle_time'='30s') AS SELECT * FROM src",
-        "StreamingTableStatement",
-    );
-}
-
-#[test]
-fn parse_streaming_table_sink_name_snake_and_digits() {
-    assert_parses_as(
-        "CREATE STREAMING TABLE sink_01_out WITH ('connector'='memory') AS SELECT 1",
-        "StreamingTableStatement",
-    );
-}
-
-#[test]
-fn parse_streaming_table_comment_before_as_if_supported() {
-    let sql = "CREATE STREAMING TABLE c1 WITH ('connector'='kafka') COMMENT 'out' AS SELECT * FROM src";
-    if let Ok(stmts) = parse_sql(sql) {
-        assert!(
-            format!("{:?}", stmts[0]).starts_with("StreamingTableStatement"),
-            "{stmts:?}"
-        );
-    }
-}
-
-#[test]
-fn parse_three_semicolon_separated_statements() {
-    let sql = concat!(
-        "CREATE FUNCTION WITH ('function_path'='./x.wasm'); ",
-        "CREATE TABLE meta (id INT); ",
-        "CREATE STREAMING TABLE out1 WITH ('connector'='kafka') AS SELECT 1",
-    );
-    let stmts = parse_sql(sql).unwrap();
-    assert_eq!(stmts.len(), 3);
-    assert!(format!("{:?}", stmts[0]).starts_with("CreateFunction"));
-    assert!(format!("{:?}", stmts[1]).starts_with("CreateTable"));
-    assert!(format!("{:?}", stmts[2]).starts_with("StreamingTableStatement"));
-}
-
-#[test]
-fn parse_rejects_insert_with_columns_list() {
-    assert_parse_fails("INSERT INTO t (a,b) VALUES (1,2)");
-}
-
-#[test]
-fn parse_rejects_update_delete() {
-    assert_parse_fails("UPDATE src SET id = 1");
-    assert_parse_fails("DELETE FROM src WHERE id = 0");
-}
-
-#[test]
-fn parse_rejects_merge_explain() {
-    assert_parse_fails("EXPLAIN SELECT 1");
-    assert_parse_fails("MERGE INTO t USING s ON true WHEN MATCHED THEN UPDATE SET x=1");
-}
-
-#[test]
-fn parse_rejects_create_schema_database() {
-    assert_parse_fails("CREATE SCHEMA s");
-    assert_parse_fails("CREATE DATABASE d");
-}
-
-#[test]
-fn compile_streaming_select_star_from_src() {
-    compile_first_streaming(concat!(
-        "CREATE STREAMING TABLE kafka_all ",
-        "WITH ('connector'='kafka') ",
-        "AS SELECT * FROM src",
-    ));
-}
-
-#[test]
-fn compile_streaming_select_id_v_from_src() {
-    let sql = format!(
-        "CREATE STREAMING TABLE kafka_cols WITH ('connector'='kafka') AS SELECT id, v, {} FROM src",
-        TIMESTAMP_FIELD
-    );
-    compile_first_streaming(&sql);
-}
-
-#[test]
-fn compile_streaming_memory_connector() {
-    compile_first_streaming(
-        "CREATE STREAMING TABLE mem_sink WITH ('connector'='memory') AS SELECT * FROM src",
-    );
-}
-
-#[test]
-fn compile_streaming_with_partition_by_id() {
-    compile_first_streaming(concat!(
-        "CREATE STREAMING TABLE part_sink ",
-        "WITH ('connector'='kafka', 'partition_by'='id') ",
-        "AS SELECT * FROM src",
-    ));
-}
-
-#[test]
-fn compile_streaming_connector_postgres_string() {
-    compile_first_streaming(
-        "CREATE STREAMING TABLE pg_sink WITH ('connector'='postgres') AS SELECT id FROM src",
-    );
-}
-
-#[test]
-#[should_panic(expected = "connector")]
-fn compile_streaming_fails_without_connector() {
-    let sql = "CREATE STREAMING TABLE bad WITH ('partition_by'='id') AS SELECT * FROM src";
-    let stmts = parse_sql(sql).unwrap();
-    let _ = Coordinator::new().compile_plan(stmts[0].as_ref(), fake_src_stream_provider());
-}
-
-#[test]
-fn compile_plan_show_functions() {
-    let stmts = parse_sql("SHOW FUNCTIONS").unwrap();
-    Coordinator::new()
-        .compile_plan(stmts[0].as_ref(), StreamSchemaProvider::new())
-        .expect("ShowFunctions plan");
-}
-
-#[test]
-fn compile_plan_show_tables() {
-    let stmts = parse_sql("SHOW TABLES").unwrap();
-    Coordinator::new()
-        .compile_plan(stmts[0].as_ref(), StreamSchemaProvider::new())
-        .expect("ShowCatalogTables plan");
-}
-
-#[test]
-fn compile_plan_show_create_table() {
-    let stmts = parse_sql("SHOW CREATE TABLE my_table").unwrap();
-    Coordinator::new()
-        .compile_plan(stmts[0].as_ref(), StreamSchemaProvider::new())
-        .expect("ShowCreateTable plan");
-}
-
-#[test]
-fn compile_plan_start_stop_drop_function() {
-    for sql in [
-        "START FUNCTION t1",
-        "STOP FUNCTION t1",
-        "DROP FUNCTION t1",
-    ] {
-        let stmts = parse_sql(sql).unwrap();
-        Coordinator::new()
-            .compile_plan(stmts[0].as_ref(), StreamSchemaProvider::new())
-            .unwrap_or_else(|e| panic!("{sql}: {e:#}"));
-    }
-}
-
-#[test]
-fn compile_plan_create_function() {
-    let sql =
-        "CREATE FUNCTION WITH ('function_path'='./x.wasm', 'config_path'='./c.yml')";
-    let stmts = parse_sql(sql).unwrap();
-    Coordinator::new()
-        .compile_plan(stmts[0].as_ref(), StreamSchemaProvider::new())
-        .expect("CreateFunction plan");
-}
-
-#[test]
-fn compile_plan_create_table_simple_ddl() {
-    let sql = "CREATE TABLE local_only (id INT, name VARCHAR)";
-    let stmts = parse_sql(sql).unwrap();
-    Coordinator::new()
-        .compile_plan(stmts[0].as_ref(), StreamSchemaProvider::new())
-        .expect("CreateTable plan");
-}
-
-#[test]
-fn streaming_where_eq_ne_and_or_not() {
-    let ts = TIMESTAMP_FIELD;
-    compile_streaming_select_body(
-        &format!("SELECT * FROM src WHERE id = 1 AND (v <> 'x' OR NOT (id < 0))"),
-        fake_src_stream_provider(),
-    );
-    compile_streaming_select_body(
-        &format!("SELECT * FROM src WHERE id > 0 AND id <= 100 AND id >= 1"),
-        fake_src_stream_provider(),
-    );
-    compile_streaming_select_body(
-        &format!("SELECT id, v, {ts} FROM src WHERE (id = 2 OR id = 3) AND v IS NOT NULL"),
-        fake_src_stream_provider(),
-    );
-}
-
-#[test]
-fn streaming_where_in_between_like_null() {
-    let ts = TIMESTAMP_FIELD;
-    compile_streaming_select_body(
-        &format!("SELECT * FROM src WHERE id IN (1, 2, 3)"),
-        fake_src_stream_provider(),
-    );
-    compile_streaming_select_body(
-        &format!("SELECT * FROM src WHERE id NOT IN (99, 100)"),
-        fake_src_stream_provider(),
-    );
-    compile_streaming_select_body(
-        &format!("SELECT * FROM src WHERE id BETWEEN 1 AND 10"),
-        fake_src_stream_provider(),
-    );
-    compile_streaming_select_body(
-        &format!("SELECT * FROM src WHERE v LIKE 'pre%'"),
-        fake_src_stream_provider(),
-    );
-    compile_streaming_select_body(
-        &format!("SELECT * FROM src WHERE v IS NULL"),
-        fake_src_stream_provider(),
-    );
-    compile_streaming_select_body(
-        &format!("SELECT id, v, {ts} FROM src WHERE v IS NOT NULL OR id = 0"),
-        fake_src_stream_provider(),
-    );
-}
-
-#[test]
-fn streaming_where_scalar_subquery() {
-    let ts = TIMESTAMP_FIELD;
-    let p = fake_src_dim_stream_provider();
-    assert_streaming_select_logical_rewrites(
-        &format!(
-            "SELECT src.id, src.v, src.{ts} FROM src \
-             WHERE src.id = (SELECT MAX(dim.id) FROM dim)"
-        ),
-        &p,
-    );
-}
-
-#[test]
-#[should_panic(expected = "window")]
-fn streaming_where_in_subquery_currently_panics() {
-    let p = fake_src_dim_stream_provider();
-    compile_streaming_select_body(
-        "SELECT * FROM src WHERE id IN (SELECT id FROM dim WHERE amt IS NOT NULL)",
-        p,
-    );
-}
-
-#[test]
-#[should_panic(expected = "window")]
-fn streaming_where_exists_correlated_currently_panics() {
-    let p = fake_src_dim_stream_provider();
-    compile_streaming_select_body(
-        "SELECT * FROM src WHERE EXISTS (SELECT 1 FROM dim WHERE dim.id = src.id)",
-        p,
-    );
-}
-
-#[test]
-fn streaming_select_case_coalesce_cast() {
-    let ts = TIMESTAMP_FIELD;
-    compile_streaming_select_body(
-        &format!(
-            "SELECT CASE WHEN id < 0 THEN 0 WHEN id > 1000 THEN 1000 ELSE id END AS c, v, {ts} FROM src"
-        ),
-        fake_src_stream_provider(),
-    );
-    compile_streaming_select_body(
-        &format!("SELECT COALESCE(v, 'na') AS v2, id, {ts} FROM src"),
-        fake_src_stream_provider(),
-    );
-    compile_streaming_select_body(
-        &format!("SELECT CAST(id AS DOUBLE) AS id_f, {ts} FROM src"),
-        fake_src_stream_provider(),
-    );
-}
-
-#[test]
-fn streaming_select_row_time_distinct() {
-    let ts = TIMESTAMP_FIELD;
-    compile_streaming_select_body(
-        &format!("SELECT row_time(), id, v, {ts} FROM src"),
-        fake_src_stream_provider(),
-    );
-    let p = fake_src_stream_provider();
-    assert_streaming_select_logical_rewrites("SELECT DISTINCT id FROM src", &p);
-}
-
-#[test]
-fn streaming_from_subquery_nested() {
-    let ts = TIMESTAMP_FIELD;
-    compile_streaming_select_body(
-        &format!("SELECT * FROM (SELECT id, v, {ts} FROM src WHERE id > 0) AS t"),
-        fake_src_stream_provider(),
-    );
-    compile_streaming_select_body(
-        &format!(
-            "SELECT * FROM (SELECT * FROM (SELECT id FROM src) AS i2) AS i1"
-        ),
-        fake_src_stream_provider(),
-    );
-}
-
-#[test]
-fn streaming_with_cte_single_and_chain() {
-    let ts = TIMESTAMP_FIELD;
-    compile_streaming_select_body(
-        &format!(
-            "WITH a AS (SELECT id, v, {ts} FROM src WHERE id > 0) SELECT * FROM a"
-        ),
-        fake_src_stream_provider(),
-    );
-    compile_streaming_select_body(
-        &format!(
-            "WITH a AS (SELECT id FROM src), b AS (SELECT id FROM a WHERE id > 1) SELECT * FROM b"
-        ),
-        fake_src_stream_provider(),
-    );
-}
-
-#[test]
-fn streaming_group_by_updating_aggregate_bundle() {
-    let p = fake_src_stream_provider();
-    assert_streaming_select_logical_rewrites(
-        "SELECT id, COUNT(*), SUM(id), AVG(id), MIN(v), MAX(v) FROM src GROUP BY id",
-        &p,
-    );
-}
-
-#[test]
-fn streaming_group_by_count_distinct_and_stats() {
-    let p = fake_src_stream_provider();
-    assert_streaming_select_logical_rewrites(
-        "SELECT id, COUNT(DISTINCT v), STDDEV_POP(id), VAR_POP(id) FROM src GROUP BY id",
-        &p,
-    );
-}
-
-#[test]
-fn streaming_group_by_having() {
-    let p = fake_src_stream_provider();
-    assert_streaming_select_logical_rewrites(
-        "SELECT id, COUNT(*) AS c FROM src GROUP BY id HAVING COUNT(*) >= 0",
-        &p,
-    );
-}
-
-#[test]
-fn streaming_group_by_tumble_window() {
-    let ts = TIMESTAMP_FIELD;
-    let p = fake_src_stream_provider();
-    assert_streaming_select_logical_rewrites(
-        &format!(
-            "SELECT tumble(INTERVAL '1' MINUTE) AS w, id, COUNT(*) AS c, MAX({ts}) AS max_evt \
-             FROM src GROUP BY tumble(INTERVAL '1' MINUTE), id"
-        ),
-        &p,
-    );
-}
-
-#[test]
-fn streaming_group_by_hop_window() {
-    let ts = TIMESTAMP_FIELD;
-    let p = fake_src_stream_provider();
-    assert_streaming_select_logical_rewrites(
-        &format!(
-            "SELECT hop(INTERVAL '1' MINUTE, INTERVAL '3' MINUTE) AS w, id, SUM(id), MAX({ts}) AS max_evt \
-             FROM src GROUP BY hop(INTERVAL '1' MINUTE, INTERVAL '3' MINUTE), id"
-        ),
-        &p,
-    );
-}
-
-#[test]
-fn streaming_window_row_number_over_tumble_aggregate() {
-    let ts = TIMESTAMP_FIELD;
-    let p = fake_src_stream_provider();
-    assert_streaming_select_logical_rewrites(
-        &format!(
-            "SELECT ROW_NUMBER() OVER (PARTITION BY w ORDER BY max_evt) AS rn, id, w, max_evt \
-             FROM ( \
-               SELECT tumble(INTERVAL '1' MINUTE) AS w, id, MAX({ts}) AS max_evt \
-               FROM src \
-               GROUP BY tumble(INTERVAL '1' MINUTE), id \
-             ) AS x"
-        ),
-        &p,
-    );
-}
-
-#[test]
-fn streaming_inner_join_eq_and_compound_on() {
-    let ts = TIMESTAMP_FIELD;
-    let p = fake_src_dim_stream_provider();
-    assert_streaming_select_logical_rewrites(
-        &format!(
-            "SELECT src.id, src.v, dim.name, src.{ts} \
-             FROM src INNER JOIN dim ON src.id = dim.id"
-        ),
-        &p,
-    );
-    assert_streaming_select_logical_rewrites(
-        &format!(
-            "SELECT src.id, dim.amt, src.{ts} \
-             FROM src JOIN dim ON src.id = dim.id AND dim.amt > CAST(0 AS DOUBLE)"
-        ),
-        &p,
-    );
-}
-
-#[test]
-#[ignore]
-fn streaming_self_join_inner_ignored() {
-    let ts = TIMESTAMP_FIELD;
-    compile_streaming_select_body(
-        &format!(
-            "SELECT a.id, b.v, a.{ts} \
-             FROM src AS a JOIN src AS b ON a.id = b.id AND a.v = b.v"
-        ),
-        fake_src_stream_provider(),
-    );
-}
-
-#[test]
-fn streaming_join_subquery_branch() {
-    let ts = TIMESTAMP_FIELD;
-    let p = fake_src_dim_stream_provider();
-    assert_streaming_select_logical_rewrites(
-        &format!(
-            "SELECT src.id, src.v, j.name, src.{ts} \
-             FROM src JOIN (SELECT id, name FROM dim) AS j ON src.id = j.id"
-        ),
-        &p,
-    );
-}
-
-#[test]
-fn streaming_union_all_compatible_schemas() {
-    let ts = TIMESTAMP_FIELD;
-    let p = fake_src_dim_stream_provider();
-    compile_streaming_select_body(
-        &format!(
-            "SELECT id, v, {ts} FROM src \
-             UNION ALL \
-             SELECT id, name AS v, {ts} FROM dim"
-        ),
-        p,
-    );
-}
-
-#[test]
-fn streaming_logical_group_by_two_keys_and_filter_agg() {
-    let p = fake_src_stream_provider();
-    assert_streaming_select_logical_rewrites(
-        "SELECT id, v, COUNT(*) AS c FROM src GROUP BY id, v",
-        &p,
-    );
-    assert_streaming_select_logical_rewrites(
-        "SELECT id, SUM(id) FILTER (WHERE v IS NOT NULL) AS s FROM src GROUP BY id",
-        &p,
-    );
-}
-
-#[test]
-fn streaming_logical_more_builtin_aggregates() {
-    let p = fake_src_stream_provider();
-    assert_streaming_select_logical_rewrites(
-        "SELECT id, STDDEV_POP(CAST(id AS DOUBLE)), COVAR_SAMP(CAST(id AS DOUBLE), CAST(id AS DOUBLE)), \
-         COVAR_POP(CAST(id AS DOUBLE), CAST(id AS DOUBLE)) \
-         FROM src GROUP BY id",
-        &p,
-    );
-    assert_streaming_select_logical_rewrites(
-        "SELECT id, CORR(CAST(id AS DOUBLE), CAST(id AS DOUBLE)) FROM src GROUP BY id",
-        &p,
-    );
-}
-
-#[test]
-fn streaming_logical_bit_and_bool_aggregates() {
-    let p = fake_src_stream_provider();
-    assert_streaming_select_logical_rewrites(
-        "SELECT id, BIT_AND(id), BIT_OR(id), BIT_XOR(id) FROM src GROUP BY id",
-        &p,
-    );
-    assert_streaming_select_logical_rewrites(
-        "SELECT id, BOOL_AND(id > 0), BOOL_OR(id < 100000) FROM src GROUP BY id",
-        &p,
-    );
-}
-
-#[test]
-fn streaming_logical_array_agg_and_list_union() {
-    let p = fake_src_stream_provider();
-    assert_streaming_select_logical_rewrites(
-        "SELECT id, ARRAY_AGG(v) FROM src GROUP BY id",
-        &p,
-    );
-}
-
-#[test]
-fn streaming_logical_scalar_funcs_on_projection() {
-    let ts = TIMESTAMP_FIELD;
-    compile_streaming_select_body(
-        &format!(
-            "SELECT ABS(id), POWER(CAST(id AS DOUBLE), 2.0), UPPER(v), LOWER(v), BTRIM(v), \
-             CHARACTER_LENGTH(v), CONCAT(v, '_x'), {ts} FROM src"
-        ),
-        fake_src_stream_provider(),
-    );
-}
-
-#[test]
-fn streaming_logical_nullif_regexp() {
-    let ts = TIMESTAMP_FIELD;
-    compile_streaming_select_body(
-        &format!(
-            "SELECT id, NULLIF(v, ''), REGEXP_LIKE(v, '^a'), {ts} FROM src WHERE v IS NOT NULL OR id = 0"
-        ),
-        fake_src_stream_provider(),
-    );
-}
-
-#[test]
-fn streaming_window_first_value_over_tumbled_subquery() {
-    let ts = TIMESTAMP_FIELD;
-    let p = fake_src_stream_provider();
-    assert_streaming_select_logical_rewrites(
-        &format!(
-            "SELECT FIRST_VALUE(id) OVER (PARTITION BY w ORDER BY max_evt) AS fv, w, id \
-             FROM ( \
-               SELECT tumble(INTERVAL '1' MINUTE) AS w, id, MAX({ts}) AS max_evt \
-               FROM src GROUP BY tumble(INTERVAL '1' MINUTE), id \
-             ) AS x"
-        ),
-        &p,
-    );
-}
-
-#[test]
-fn streaming_window_lag_over_tumbled_subquery() {
-    let ts = TIMESTAMP_FIELD;
-    let p = fake_src_stream_provider();
-    assert_streaming_select_logical_rewrites(
-        &format!(
-            "SELECT LAG(id, 1) OVER (PARTITION BY w ORDER BY max_evt) AS prev_id, w, id \
-             FROM ( \
-               SELECT tumble(INTERVAL '2' MINUTE) AS w, id, MAX({ts}) AS max_evt \
-               FROM src GROUP BY tumble(INTERVAL '2' MINUTE), id \
-             ) AS x"
-        ),
-        &p,
-    );
-}
-
-#[test]
-fn streaming_window_lead_over_tumbled_subquery() {
-    let ts = TIMESTAMP_FIELD;
-    let p = fake_src_stream_provider();
-    assert_streaming_select_logical_rewrites(
-        &format!(
-            "SELECT LEAD(id, 1) OVER (PARTITION BY w ORDER BY max_evt) AS next_id, w \
-             FROM ( \
-               SELECT tumble(INTERVAL '2' MINUTE) AS w, id, MAX({ts}) AS max_evt \
-               FROM src GROUP BY tumble(INTERVAL '2' MINUTE), id \
-             ) AS x"
-        ),
-        &p,
-    );
-}
-
-#[test]
-fn streaming_logical_full_outer_join_errors() {
-    let p = fake_src_dim_stream_provider();
-    assert_streaming_select_logical_rewrite_err_contains(
-        "SELECT src.id, dim.name FROM src FULL OUTER JOIN dim ON src.id = dim.id",
-        &p,
-        "inner",
-    );
-}
-
-#[test]
-#[should_panic(expected = "Non-inner")]
-fn streaming_left_join_errors_without_window() {
-    let ts = TIMESTAMP_FIELD;
-    let sql = format!(
-        "CREATE STREAMING TABLE sink_left WITH ('connector'='kafka') AS \
-         SELECT src.id, dim.name, src.{ts} FROM src LEFT JOIN dim ON src.id = dim.id"
-    );
-    let stmts = parse_sql(&sql).unwrap();
-    let _ = Coordinator::new().compile_plan(stmts[0].as_ref(), fake_src_dim_stream_provider());
-}
diff --git a/src/sql/logical_node/logical/operator_name.rs b/src/sql/logical_node/logical/operator_name.rs
index 224562ea..79fe9a05 100644
--- a/src/sql/logical_node/logical/operator_name.rs
+++ b/src/sql/logical_node/logical/operator_name.rs
@@ -20,8 +20,8 @@ use crate::sql::common::constants::operator_feature;
 #[derive(Clone, Copy, Debug, Eq, PartialEq, EnumString, Display, IntoStaticStr)]
 pub enum OperatorName {
     ExpressionWatermark,
-    ArrowValue,
-    ArrowKey,
+    Value,
+    KeyBy,
     Projection,
     AsyncUdf,
     Join,
@@ -32,7 +32,6 @@ pub enum OperatorName {
     SlidingWindowAggregate,
     SessionWindowAggregate,
     UpdatingAggregate,
-    KeyBy,
     ConnectorSource,
     ConnectorSink,
 }
@@ -46,7 +45,7 @@ impl OperatorName {
 
     pub fn feature_tag(self) -> Option<&'static str> {
         match self {
-            Self::ExpressionWatermark | Self::ArrowValue | Self::ArrowKey | Self::Projection => None,
+            Self::ExpressionWatermark | Self::Value | Self::KeyBy | Self::Projection => None,
             Self::AsyncUdf => Some(operator_feature::ASYNC_UDF),
             Self::Join => Some(operator_feature::JOIN_WITH_EXPIRATION),
             Self::InstantJoin => Some(operator_feature::WINDOWED_JOIN),
diff --git a/src/sql/mod.rs b/src/sql/mod.rs
index 5cb53705..c13f1c4a 100644
--- a/src/sql/mod.rs
+++ b/src/sql/mod.rs
@@ -27,5 +27,3 @@ pub use schema::{StreamPlanningContext, StreamSchemaProvider};
 pub use parse::parse_sql;
 pub use analysis::rewrite_plan;
 
-#[cfg(test)]
-mod frontend_sql_coverage_tests;

From 53655b4f52d5ee8b1d7f8c2865844be0deb6a1ad Mon Sep 17 00:00:00 2001
From: luoluoyuyu <zhenyu@apache.org>
Date: Mon, 30 Mar 2026 23:17:45 +0800
Subject: [PATCH 32/44] update

---
 src/coordinator/execution/executor.rs |  1 +
 src/coordinator/runtime_context.rs    | 12 ++++++------
 src/sql/schema/catalog_ddl.rs         | 11 +++++++++--
 src/sql/schema/schema_provider.rs     |  2 ++
 src/storage/stream_catalog/manager.rs | 22 ++++++++++++++++++----
 5 files changed, 36 insertions(+), 12 deletions(-)

diff --git a/src/coordinator/execution/executor.rs b/src/coordinator/execution/executor.rs
index 8329d498..6f7c5afb 100644
--- a/src/coordinator/execution/executor.rs
+++ b/src/coordinator/execution/executor.rs
@@ -293,6 +293,7 @@ impl PlanVisitor for Executor {
                     let schema = Arc::new(source_table.produce_physical_schema());
                     let table_instance = StreamTable::Source {
                         name: table_name.clone(),
+                        connector: source_table.connector().to_string(),
                         schema,
                         event_time_field: source_table.event_time_field().map(str::to_string),
                         watermark_field: source_table.stream_catalog_watermark_field(),
diff --git a/src/coordinator/runtime_context.rs b/src/coordinator/runtime_context.rs
index af9a9ddf..91f4100c 100644
--- a/src/coordinator/runtime_context.rs
+++ b/src/coordinator/runtime_context.rs
@@ -69,6 +69,7 @@ impl CoordinatorRuntimeContext {
         for (name, stream) in provider.tables.streams.clone() {
             let StreamTable::Source {
                 name: source_name,
+                connector,
                 schema,
                 event_time_field,
                 watermark_field,
@@ -77,12 +78,11 @@ impl CoordinatorRuntimeContext {
             else {
                 continue;
             };
-
-            let connector = with_options
-                .get("connector")
-                .cloned()
-                .unwrap_or_else(|| "stream_catalog".to_string());
-            let mut source = SourceTable::new(source_name.clone(), connector, ConnectionType::Source);
+            let mut source = SourceTable::new(
+                source_name.clone(),
+                connector.clone(),
+                ConnectionType::Source,
+            );
             source.schema_specs = schema
                 .fields()
                 .iter()
diff --git a/src/sql/schema/catalog_ddl.rs b/src/sql/schema/catalog_ddl.rs
index 2eea78f9..0828a45d 100644
--- a/src/sql/schema/catalog_ddl.rs
+++ b/src/sql/schema/catalog_ddl.rs
@@ -113,13 +113,15 @@ fn pipeline_summary_short(program: &LogicalProgram) -> String {
 pub fn stream_table_row_detail(table: &StreamTable) -> String {
     match table {
         StreamTable::Source {
+            connector,
             event_time_field,
             watermark_field,
             with_options,
             ..
         } => {
             format!(
-                "event_time={:?}, watermark={:?}, with_options={}",
+                "connector={}, event_time={:?}, watermark={:?}, with_options={}",
+                connector,
                 event_time_field,
                 watermark_field,
                 with_options.len()
@@ -165,6 +167,7 @@ pub fn show_create_stream_table(table: &StreamTable) -> String {
     match table {
         StreamTable::Source {
             name,
+            connector,
             schema,
             event_time_field,
             watermark_field,
@@ -178,7 +181,11 @@ pub fn show_create_stream_table(table: &StreamTable) -> String {
             if let Some(w) = watermark_field {
                 ddl.push_str(&format!("/* WATERMARK: {w} */\n"));
             }
-            ddl.push_str(&format_with_clause(with_options));
+            let mut merged_opts = with_options.clone();
+            merged_opts
+                .entry("connector".to_string())
+                .or_insert_with(|| connector.clone());
+            ddl.push_str(&format_with_clause(&merged_opts));
             ddl
         }
         StreamTable::Sink { name, program } => {
diff --git a/src/sql/schema/schema_provider.rs b/src/sql/schema/schema_provider.rs
index f93aead1..bbe03079 100644
--- a/src/sql/schema/schema_provider.rs
+++ b/src/sql/schema/schema_provider.rs
@@ -42,6 +42,7 @@ fn object_name(s: impl Into<String>) -> ObjectName {
 pub enum StreamTable {
     Source {
         name: String,
+        connector: String,
         schema: Arc<Schema>,
         event_time_field: Option<String>,
         watermark_field: Option<String>,
@@ -201,6 +202,7 @@ impl StreamPlanningContext {
     ) {
         self.register_stream_table(StreamTable::Source {
             name,
+            connector: "stream_catalog".to_string(),
             schema,
             event_time_field,
             watermark_field,
diff --git a/src/storage/stream_catalog/manager.rs b/src/storage/stream_catalog/manager.rs
index 5f40240a..fc7c5b2f 100644
--- a/src/storage/stream_catalog/manager.rs
+++ b/src/storage/stream_catalog/manager.rs
@@ -161,6 +161,7 @@ impl CatalogManager {
     fn encode_table(&self, table: &StreamTable) -> DFResult<pb::TableDefinition> {
         let table_type = match table {
             StreamTable::Source {
+                connector,
                 schema,
                 event_time_field,
                 watermark_field,
@@ -173,10 +174,15 @@ impl CatalogManager {
                     .as_ref()
                     .filter(|w| *w != sql_field::COMPUTED_WATERMARK)
                     .cloned(),
-                with_options: with_options
-                    .iter()
-                    .map(|(k, v)| (k.clone(), v.clone()))
-                    .collect(),
+                with_options: {
+                    let mut opts: std::collections::BTreeMap<String, String> = with_options
+                        .iter()
+                        .map(|(k, v)| (k.clone(), v.clone()))
+                        .collect();
+                    opts.entry("connector".to_string())
+                        .or_insert_with(|| connector.clone());
+                    opts.into_iter().collect()
+                },
             }),
             StreamTable::Sink { program, .. } => {
                 let logical_program_bincode = CatalogCodec::encode_logical_program(program)?;
@@ -208,6 +214,11 @@ impl CatalogManager {
         match table_type {
             table_definition::TableType::Source(src) => Ok(StreamTable::Source {
                 name: proto_def.table_name,
+                connector: src
+                    .with_options
+                    .get("connector")
+                    .cloned()
+                    .unwrap_or_else(|| "stream_catalog".to_string()),
                 schema: CatalogCodec::decode_schema(&src.arrow_schema_ipc)?,
                 event_time_field: src.event_time_field,
                 watermark_field: src
@@ -307,6 +318,7 @@ mod tests {
 
         let table = StreamTable::Source {
             name: "t1".into(),
+            connector: "stream_catalog".into(),
             schema: Arc::clone(&schema),
             event_time_field: Some("ts".into()),
             watermark_field: None,
@@ -344,6 +356,7 @@ mod tests {
 
         let table = StreamTable::Source {
             name: "t_with".into(),
+            connector: "kafka".into(),
             schema,
             event_time_field: None,
             watermark_field: None,
@@ -369,6 +382,7 @@ mod tests {
 
         mgr.add_table(StreamTable::Source {
             name: "t_drop".into(),
+            connector: "stream_catalog".into(),
             schema,
             event_time_field: None,
             watermark_field: None,

From 9bed7e730a430e2e8752216c6fe1a05021d4ed7b Mon Sep 17 00:00:00 2001
From: luoluoyuyu <zhenyu@apache.org>
Date: Mon, 30 Mar 2026 23:39:34 +0800
Subject: [PATCH 33/44] update

---
 .../streaming/factory/operator_factory.rs     | 21 +++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/src/runtime/streaming/factory/operator_factory.rs b/src/runtime/streaming/factory/operator_factory.rs
index eb2afd9b..d11a1555 100644
--- a/src/runtime/streaming/factory/operator_factory.rs
+++ b/src/runtime/streaming/factory/operator_factory.rs
@@ -18,6 +18,9 @@ use std::sync::Arc;
 
 use super::operator_constructor::OperatorConstructor;
 use crate::runtime::streaming::api::operator::ConstructedOperator;
+use crate::runtime::streaming::factory::connector::{
+    ConnectorSinkDispatcher, ConnectorSourceDispatcher,
+};
 use crate::runtime::streaming::factory::global::Registry;
 use crate::runtime::streaming::operators::grouping::IncrementalAggregatingConstructor;
 use crate::runtime::streaming::operators::joins::{
@@ -109,6 +112,8 @@ impl OperatorFactory {
 
         self.register_named(OperatorName::Projection, Box::new(ProjectionBridge));
         self.register_named(OperatorName::Value, Box::new(ValueBridge));
+        self.register_named(OperatorName::ConnectorSource, Box::new(ConnectorSourceBridge));
+        self.register_named(OperatorName::ConnectorSink, Box::new(ConnectorSinkBridge));
 
         crate::runtime::streaming::factory::register_builtin_connectors(self);
         crate::runtime::streaming::factory::register_kafka_connector_plugins(self);
@@ -232,6 +237,22 @@ impl OperatorConstructor for ValueBridge {
     }
 }
 
+/// Generic connector source constructor: decodes `ConnectorOp` and dispatches by connector type.
+struct ConnectorSourceBridge;
+impl OperatorConstructor for ConnectorSourceBridge {
+    fn with_config(&self, config: &[u8], registry: Arc<Registry>) -> Result<ConstructedOperator> {
+        ConnectorSourceDispatcher.with_config(config, registry)
+    }
+}
+
+/// Generic connector sink constructor: decodes `ConnectorOp` and dispatches by connector type.
+struct ConnectorSinkBridge;
+impl OperatorConstructor for ConnectorSinkBridge {
+    fn with_config(&self, config: &[u8], registry: Arc<Registry>) -> Result<ConstructedOperator> {
+        ConnectorSinkDispatcher.with_config(config, registry)
+    }
+}
+
 struct ProjectionExecutionConstructor;
 impl ProjectionExecutionConstructor {
     fn with_config(

From 174ebaa6c49bf12ca5f915df3c2d5434a7e2213b Mon Sep 17 00:00:00 2001
From: luoluoyuyu <zhenyu@apache.org>
Date: Tue, 31 Mar 2026 00:34:33 +0800
Subject: [PATCH 34/44] update

---
 protocol/proto/storage.proto                  |  20 +-
 .../dataset/show_catalog_tables_result.rs     |  20 +-
 src/coordinator/execution/executor.rs         |  42 +-
 src/coordinator/runtime_context.rs            |  49 +-
 src/sql/schema/catalog_ddl.rs                 |  47 ++
 src/sql/schema/mod.rs                         |   5 +-
 src/storage/stream_catalog/manager.rs         | 465 ++++++++++--------
 7 files changed, 346 insertions(+), 302 deletions(-)

diff --git a/protocol/proto/storage.proto b/protocol/proto/storage.proto
index 5ad09d38..6c645e75 100644
--- a/protocol/proto/storage.proto
+++ b/protocol/proto/storage.proto
@@ -10,30 +10,30 @@ syntax = "proto3";
 package function_stream.storage;
 
 // =============================================================================
-// Stream catalog (coordinator stream tables: source / sink)
+// Catalog table storage (coordinator SQL catalog)
 // =============================================================================
 
-// Top-level persisted record for one stream table.
+// Top-level persisted record for one catalog table.
 message TableDefinition {
   string table_name = 1;
   int64 updated_at_millis = 2;
   oneof table_type {
-    StreamSource source = 3;
-    StreamSink sink = 4;
+    // Connector-backed ingestion/egress table definition.
+    CatalogSourceTable connector_table = 3;
+    // Connector-backed lookup table definition.
+    CatalogSourceTable lookup_table = 5;
   }
 }
 
-message StreamSource {
+// Shared connector-backed table payload for connector/lookup entries.
+message CatalogSourceTable {
   bytes arrow_schema_ipc = 1;
   optional string event_time_field = 2;
   optional string watermark_field = 3;
   // Original CREATE TABLE ... WITH ('k'='v', ...) pairs (best-effort; keys sorted in DDL).
   map<string, string> with_options = 4;
-}
-
-message StreamSink {
-  bytes arrow_schema_ipc = 1;
-  bytes logical_program_bincode = 2;
+  // Canonical connector identifier (e.g. kafka, postgres-cdc).
+  string connector = 5;
 }
 
 // =============================================================================
diff --git a/src/coordinator/dataset/show_catalog_tables_result.rs b/src/coordinator/dataset/show_catalog_tables_result.rs
index 77792517..74a8cd2d 100644
--- a/src/coordinator/dataset/show_catalog_tables_result.rs
+++ b/src/coordinator/dataset/show_catalog_tables_result.rs
@@ -14,9 +14,11 @@ use std::sync::Arc;
 
 use arrow_array::{Int32Array, StringArray};
 use arrow_schema::{DataType, Field, Schema};
+use datafusion::arrow::datatypes::Schema as DfSchema;
 
 use super::DataSet;
-use crate::sql::schema::{schema_columns_one_line, stream_table_row_detail, StreamTable};
+use crate::sql::schema::table::Table as CatalogTable;
+use crate::sql::schema::{catalog_table_row_detail, schema_columns_one_line};
 
 #[derive(Clone, Debug)]
 pub struct ShowCatalogTablesResult {
@@ -28,7 +30,7 @@ pub struct ShowCatalogTablesResult {
 }
 
 impl ShowCatalogTablesResult {
-    pub fn from_tables(tables: &[Arc<StreamTable>]) -> Self {
+    pub fn from_tables(tables: &[Arc<CatalogTable>]) -> Self {
         let mut names = Vec::with_capacity(tables.len());
         let mut kinds = Vec::with_capacity(tables.len());
         let mut column_counts = Vec::with_capacity(tables.len());
@@ -36,17 +38,23 @@ impl ShowCatalogTablesResult {
         let mut details = Vec::with_capacity(tables.len());
 
         for t in tables {
-            let schema = t.schema();
+            let schema = match t.as_ref() {
+                CatalogTable::ConnectorTable(source) | CatalogTable::LookupTable(source) => {
+                    source.produce_physical_schema()
+                }
+                CatalogTable::TableFromQuery { .. } => DfSchema::new(t.get_fields()),
+            };
             let ncols = schema.fields().len() as i32;
             names.push(t.name().to_string());
             kinds.push(match t.as_ref() {
-                StreamTable::Source { .. } => "SOURCE",
-                StreamTable::Sink { .. } => "SINK",
+                CatalogTable::ConnectorTable(_) => "SOURCE",
+                CatalogTable::LookupTable(_) => "LOOKUP",
+                CatalogTable::TableFromQuery { .. } => "QUERY",
             }
             .to_string());
             column_counts.push(ncols);
             schema_lines.push(schema_columns_one_line(&schema));
-            details.push(stream_table_row_detail(t.as_ref()));
+            details.push(catalog_table_row_detail(t.as_ref()));
         }
 
         Self {
diff --git a/src/coordinator/execution/executor.rs b/src/coordinator/execution/executor.rs
index 6f7c5afb..5372ed33 100644
--- a/src/coordinator/execution/executor.rs
+++ b/src/coordinator/execution/executor.rs
@@ -29,7 +29,8 @@ use crate::coordinator::plan::{
 use crate::coordinator::statement::{ConfigSource, FunctionSource};
 use crate::runtime::streaming::job::JobManager;
 use crate::runtime::taskexecutor::TaskManager;
-use crate::sql::schema::{show_create_stream_table, StreamTable};
+use crate::sql::schema::table::Table as CatalogTable;
+use crate::sql::schema::show_create_catalog_table;
 use crate::storage::stream_catalog::CatalogManager;
 
 #[derive(Error, Debug)]
@@ -201,7 +202,10 @@ impl PlanVisitor for Executor {
         _plan: &ShowCatalogTablesPlan,
         _context: &PlanVisitorContext,
     ) -> PlanVisitorResult {
-        let tables = self.catalog_manager.list_stream_tables();
+        let tables = match self.catalog_manager.list_catalog_tables() {
+            Ok(tables) => tables,
+            Err(e) => return PlanVisitorResult::Execute(Err(ExecuteError::Internal(e.to_string()))),
+        };
         let n = tables.len();
         let result = ExecuteResult::ok_with_data(
             format!("{n} stream catalog table(s)"),
@@ -218,14 +222,15 @@ impl PlanVisitor for Executor {
         let execute = || -> Result<ExecuteResult, ExecuteError> {
             let t = self
                 .catalog_manager
-                .get_stream_table(&plan.table_name)
+                .get_catalog_table(&plan.table_name)
+                .map_err(|e| ExecuteError::Internal(e.to_string()))?
                 .ok_or_else(|| {
                     ExecuteError::Validation(format!(
                         "Table '{}' not found in stream catalog",
                         plan.table_name
                     ))
                 })?;
-            let ddl = show_create_stream_table(t.as_ref());
+            let ddl = show_create_catalog_table(t.as_ref());
             Ok(ExecuteResult::ok_with_data(
                 format!("SHOW CREATE TABLE {}", plan.table_name),
                 ShowCreateTableResult::new(plan.table_name.clone(), ddl),
@@ -284,21 +289,13 @@ impl PlanVisitor for Executor {
         _context: &PlanVisitorContext,
     ) -> PlanVisitorResult {
         let execute = || -> Result<ExecuteResult, ExecuteError> {
-            let (table_name, if_not_exists, stream_table) = match &plan.body {
+            let (table_name, if_not_exists, catalog_table) = match &plan.body {
                 CreateTablePlanBody::ConnectorSource {
                     source_table,
                     if_not_exists,
                 } => {
                     let table_name = source_table.name().to_string();
-                    let schema = Arc::new(source_table.produce_physical_schema());
-                    let table_instance = StreamTable::Source {
-                        name: table_name.clone(),
-                        connector: source_table.connector().to_string(),
-                        schema,
-                        event_time_field: source_table.event_time_field().map(str::to_string),
-                        watermark_field: source_table.stream_catalog_watermark_field(),
-                        with_options: source_table.catalog_with_options().clone(),
-                    };
+                    let table_instance = CatalogTable::ConnectorTable(source_table.clone());
                     (table_name, *if_not_exists, table_instance)
                 }
                 CreateTablePlanBody::DataFusion(_) => {
@@ -309,14 +306,14 @@ impl PlanVisitor for Executor {
                 }
             };
 
-            if if_not_exists && self.catalog_manager.has_stream_table(&table_name) {
+            if if_not_exists && self.catalog_manager.has_catalog_table(&table_name) {
                 return Ok(ExecuteResult::ok(format!(
                     "Table '{table_name}' already exists (skipped)"
                 )));
             }
 
             self.catalog_manager
-                .add_table(stream_table)
+                .add_catalog_table(catalog_table)
                 .map_err(|e| {
                     ExecuteError::Internal(format!(
                         "Failed to register connector source table '{}': {}",
@@ -338,15 +335,6 @@ impl PlanVisitor for Executor {
         _context: &PlanVisitorContext,
     ) -> PlanVisitorResult {
         let execute = || -> Result<ExecuteResult, ExecuteError> {
-            let sink = StreamTable::Sink {
-                name: plan.name.clone(),
-                program: plan.program.clone(),
-            };
-
-            self.catalog_manager
-                .add_table(sink)
-                .map_err(|e| ExecuteError::Internal(e.to_string()))?;
-
             let fs_program: FsProgram = plan.program.clone().into();
             let job_manager: Arc<JobManager> = Arc::clone(&self.job_manager);
 
@@ -359,7 +347,7 @@ impl PlanVisitor for Executor {
             info!(
                 job_id = %job_id,
                 table = %plan.name,
-                "Streaming table registered and job submitted"
+                "Streaming job submitted"
             );
 
             Ok(ExecuteResult::ok_with_data(
@@ -398,7 +386,7 @@ impl PlanVisitor for Executor {
     ) -> PlanVisitorResult {
         let execute = || -> Result<ExecuteResult, ExecuteError> {
             self.catalog_manager
-                .drop_table(&plan.table_name, plan.if_exists)
+                .drop_catalog_table(&plan.table_name, plan.if_exists)
                 .map_err(|e| ExecuteError::Internal(e.to_string()))?;
 
             Ok(ExecuteResult::ok(format!(
diff --git a/src/coordinator/runtime_context.rs b/src/coordinator/runtime_context.rs
index 91f4100c..5d671b98 100644
--- a/src/coordinator/runtime_context.rs
+++ b/src/coordinator/runtime_context.rs
@@ -18,11 +18,7 @@ use anyhow::Result;
 
 use crate::runtime::streaming::job::JobManager;
 use crate::runtime::taskexecutor::TaskManager;
-use crate::sql::schema::column_descriptor::ColumnDescriptor;
-use crate::sql::schema::connection_type::ConnectionType;
-use crate::sql::schema::source_table::SourceTable;
-use crate::sql::schema::table::Table as CatalogTable;
-use crate::sql::schema::{StreamSchemaProvider, StreamTable};
+use crate::sql::schema::StreamSchemaProvider;
 use crate::storage::stream_catalog::CatalogManager;
 
 /// Dependencies shared by analyze / plan / execute, analogous to installing globals in
@@ -32,7 +28,6 @@ pub struct CoordinatorRuntimeContext {
     pub task_manager: Arc<TaskManager>,
     pub catalog_manager: Arc<CatalogManager>,
     pub job_manager: Arc<JobManager>,
-    planning_schema_override: Option<StreamSchemaProvider>,
 }
 
 impl CoordinatorRuntimeContext {
@@ -44,7 +39,6 @@ impl CoordinatorRuntimeContext {
                 .map_err(|e| anyhow::anyhow!("Failed to get CatalogManager: {}", e))?,
             job_manager: JobManager::global()
                 .map_err(|e| anyhow::anyhow!("Failed to get JobManager: {}", e))?,
-            planning_schema_override: None,
         })
     }
 
@@ -52,53 +46,16 @@ impl CoordinatorRuntimeContext {
         task_manager: Arc<TaskManager>,
         catalog_manager: Arc<CatalogManager>,
         job_manager: Arc<JobManager>,
-        planning_schema_override: Option<StreamSchemaProvider>,
     ) -> Self {
         Self {
             task_manager,
             catalog_manager,
             job_manager,
-            planning_schema_override,
         }
     }
 
-    /// Schema provider for [`LogicalPlanVisitor`] / [`SqlToRel`]: override if set, else catalog snapshot.
+    /// Schema provider for [`LogicalPlanVisitor`] / [`SqlToRel`].
     pub fn planning_schema_provider(&self) -> StreamSchemaProvider {
-        let mut provider = self.catalog_manager.acquire_planning_context();
-
-        for (name, stream) in provider.tables.streams.clone() {
-            let StreamTable::Source {
-                name: source_name,
-                connector,
-                schema,
-                event_time_field,
-                watermark_field,
-                with_options,
-            } = stream.as_ref()
-            else {
-                continue;
-            };
-            let mut source = SourceTable::new(
-                source_name.clone(),
-                connector.clone(),
-                ConnectionType::Source,
-            );
-            source.schema_specs = schema
-                .fields()
-                .iter()
-                .map(|f| ColumnDescriptor::new_physical((**f).clone()))
-                .collect();
-            source.inferred_fields = Some(schema.fields().iter().cloned().collect());
-            source.temporal_config.event_column = event_time_field.clone();
-            source.temporal_config.watermark_strategy_column = watermark_field.clone();
-            source.catalog_with_options = with_options.clone();
-
-            provider
-                .tables
-                .catalogs
-                .insert(name, Arc::new(CatalogTable::ConnectorTable(source)));
-        }
-
-        provider
+        self.catalog_manager.acquire_planning_context()
     }
 }
diff --git a/src/sql/schema/catalog_ddl.rs b/src/sql/schema/catalog_ddl.rs
index 0828a45d..3729c99c 100644
--- a/src/sql/schema/catalog_ddl.rs
+++ b/src/sql/schema/catalog_ddl.rs
@@ -17,6 +17,7 @@ use std::collections::BTreeMap;
 use datafusion::arrow::datatypes::{DataType, TimeUnit};
 
 use super::schema_provider::StreamTable;
+use super::table::Table as CatalogTable;
 use crate::sql::logical_node::logical::LogicalProgram;
 
 fn data_type_sql(dt: &DataType) -> String {
@@ -204,3 +205,49 @@ pub fn show_create_stream_table(table: &StreamTable) -> String {
         }
     }
 }
+
+/// Extra fields for `SHOW TABLES` result grid for persisted catalog rows.
+pub fn catalog_table_row_detail(table: &CatalogTable) -> String {
+    match table {
+        CatalogTable::ConnectorTable(source) => format!(
+            "kind=connector, connector={}, event_time={:?}, watermark={:?}, with_options={}",
+            source.connector(),
+            source.event_time_field(),
+            source.temporal_config.watermark_strategy_column,
+            source.catalog_with_options().len()
+        ),
+        CatalogTable::LookupTable(source) => format!(
+            "kind=lookup, connector={}, event_time={:?}, watermark={:?}, with_options={}",
+            source.connector(),
+            source.event_time_field(),
+            source.temporal_config.watermark_strategy_column,
+            source.catalog_with_options().len()
+        ),
+        CatalogTable::TableFromQuery { .. } => "kind=query".to_string(),
+    }
+}
+
+/// Human-readable `SHOW CREATE TABLE` text for persisted catalog rows.
+pub fn show_create_catalog_table(table: &CatalogTable) -> String {
+    match table {
+        CatalogTable::ConnectorTable(source) | CatalogTable::LookupTable(source) => {
+            let schema = source.produce_physical_schema();
+            let cols = format_columns(&schema);
+            let mut ddl = format!("CREATE TABLE {} (\n{}\n)", source.name(), cols.join(",\n"));
+            if let Some(e) = source.event_time_field() {
+                ddl.push_str(&format!("\n/* EVENT TIME COLUMN: {e} */\n"));
+            }
+            if let Some(w) = source.temporal_config.watermark_strategy_column.as_deref() {
+                ddl.push_str(&format!("/* WATERMARK: {w} */\n"));
+            }
+            let mut opts = source.catalog_with_options().clone();
+            opts.entry("connector".to_string())
+                .or_insert_with(|| source.connector().to_string());
+            ddl.push_str(&format_with_clause(&opts));
+            ddl
+        }
+        CatalogTable::TableFromQuery { name, .. } => {
+            format!("CREATE TABLE {name} AS SELECT ...;\n/* logical query text is not persisted */\n")
+        }
+    }
+}
diff --git a/src/sql/schema/mod.rs b/src/sql/schema/mod.rs
index a4aa3747..b3ec5e09 100644
--- a/src/sql/schema/mod.rs
+++ b/src/sql/schema/mod.rs
@@ -23,7 +23,10 @@ pub mod table_role;
 pub mod temporal_pipeline_config;
 pub mod utils;
 
-pub use catalog_ddl::{schema_columns_one_line, show_create_stream_table, stream_table_row_detail};
+pub use catalog_ddl::{
+    catalog_table_row_detail, schema_columns_one_line, show_create_catalog_table,
+    show_create_stream_table, stream_table_row_detail,
+};
 pub use column_descriptor::ColumnDescriptor;
 pub use connection_type::ConnectionType;
 pub use source_table::{SourceOperator, SourceTable};
diff --git a/src/storage/stream_catalog/manager.rs b/src/storage/stream_catalog/manager.rs
index fc7c5b2f..fc6a16f8 100644
--- a/src/storage/stream_catalog/manager.rs
+++ b/src/storage/stream_catalog/manager.rs
@@ -10,44 +10,36 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-use std::collections::HashMap;
 use std::sync::{Arc, OnceLock};
 
 use anyhow::{anyhow, bail, Context};
-use datafusion::arrow::datatypes::Schema;
 use datafusion::common::{internal_err, plan_err, Result as DFResult};
-use parking_lot::RwLock;
 use prost::Message;
 use protocol::storage::{self as pb, table_definition};
 use tracing::{info, warn};
 use unicase::UniCase;
 
 use crate::sql::common::constants::sql_field;
-use crate::sql::schema::{ObjectName, StreamPlanningContext, StreamTable};
+use crate::sql::schema::column_descriptor::ColumnDescriptor;
+use crate::sql::schema::connection_type::ConnectionType;
+use crate::sql::schema::source_table::SourceTable;
+use crate::sql::schema::table::Table as CatalogTable;
+use crate::sql::schema::{StreamPlanningContext, StreamTable};
 
 use super::codec::CatalogCodec;
 use super::meta_store::MetaStore;
 
 const CATALOG_KEY_PREFIX: &str = "catalog:stream_table:";
 
-#[derive(Clone, Default, Debug)]
-pub struct StreamTableCatalogCache {
-    pub streams: HashMap<ObjectName, Arc<StreamTable>>,
-}
-
 pub struct CatalogManager {
     store: Arc<dyn MetaStore>,
-    cache: RwLock<StreamTableCatalogCache>,
 }
 
 static GLOBAL_CATALOG: OnceLock<Arc<CatalogManager>> = OnceLock::new();
 
 impl CatalogManager {
     pub fn new(store: Arc<dyn MetaStore>) -> Self {
-        Self {
-            store,
-            cache: RwLock::new(StreamTableCatalogCache::default()),
-        }
+        Self { store }
     }
 
     pub fn init_global_in_memory() -> anyhow::Result<()> {
@@ -80,120 +72,185 @@ impl CatalogManager {
         format!("{CATALOG_KEY_PREFIX}{}", table_name.to_lowercase())
     }
 
-    pub fn add_table(&self, table: StreamTable) -> DFResult<()> {
-        let proto_def = self.encode_table(&table)?;
+    pub fn add_catalog_table(&self, table: CatalogTable) -> DFResult<()> {
+        let proto_def = self.encode_catalog_table(&table)?;
         let payload = proto_def.encode_to_vec();
         let key = Self::build_store_key(table.name());
 
         self.store.put(&key, payload)?;
-
-        let object_name = UniCase::new(table.name().to_string());
-        self.cache.write().streams.insert(object_name, Arc::new(table));
-
         Ok(())
     }
 
-    pub fn has_stream_table(&self, name: &str) -> bool {
-        let object_name = UniCase::new(name.to_string());
-        self.cache.read().streams.contains_key(&object_name)
+    pub fn has_catalog_table(&self, name: &str) -> bool {
+        let key = Self::build_store_key(name);
+        self.store.get(&key).ok().flatten().is_some()
     }
 
-    pub fn drop_table(&self, table_name: &str, if_exists: bool) -> DFResult<()> {
-        let object_name = UniCase::new(table_name.to_string());
-
-        let exists = self.cache.read().streams.contains_key(&object_name);
-
+    pub fn drop_catalog_table(&self, table_name: &str, if_exists: bool) -> DFResult<()> {
+        let key = Self::build_store_key(table_name);
+        let exists = self.store.get(&key)?.is_some();
         if !exists {
             if if_exists {
                 return Ok(());
             }
             return plan_err!("Table '{table_name}' not found");
         }
-
-        let key = Self::build_store_key(table_name);
         self.store.delete(&key)?;
-
-        self.cache.write().streams.remove(&object_name);
-
         Ok(())
     }
 
     pub fn restore_from_store(&self) -> DFResult<()> {
-        let records = self.store.scan_prefix(CATALOG_KEY_PREFIX)?;
-        let mut restored = StreamTableCatalogCache::default();
-
-        for (_key, payload) in records {
-            let proto_def = pb::TableDefinition::decode(payload.as_slice()).map_err(|e| {
-                datafusion::common::DataFusionError::Execution(format!(
-                    "Failed to decode stream catalog protobuf: {e}"
-                ))
-            })?;
-
-            let table = self.decode_table(proto_def)?;
-            let object_name = UniCase::new(table.name().to_string());
-            restored.streams.insert(object_name, Arc::new(table));
-        }
-
-        *self.cache.write() = restored;
-
+        // No-op by design: the catalog is read-through from storage.
         Ok(())
     }
 
     pub fn acquire_planning_context(&self) -> StreamPlanningContext {
         let mut ctx = StreamPlanningContext::new();
-        ctx.tables.streams = self.cache.read().streams.clone();
+        let catalogs = self.load_catalog_tables_map().unwrap_or_default();
+        ctx.tables.catalogs = catalogs.clone();
+
+        for (name, table) in catalogs {
+            let source = match table.as_ref() {
+                CatalogTable::ConnectorTable(s) | CatalogTable::LookupTable(s) => s,
+                CatalogTable::TableFromQuery { .. } => continue,
+            };
+
+            let schema = Arc::new(source.produce_physical_schema());
+            ctx.tables.streams.insert(
+                name,
+                Arc::new(StreamTable::Source {
+                    name: source.name().to_string(),
+                    connector: source.connector().to_string(),
+                    schema,
+                    event_time_field: source.event_time_field().map(str::to_string),
+                    watermark_field: source.stream_catalog_watermark_field(),
+                    with_options: source.catalog_with_options().clone(),
+                }),
+            );
+        }
         ctx
     }
 
-    /// All stream catalog entries (connector sources + streaming sinks), sorted by table name.
-    pub fn list_stream_tables(&self) -> Vec<Arc<StreamTable>> {
-        let guard = self.cache.read();
-        let mut out: Vec<Arc<StreamTable>> = guard.streams.values().cloned().collect();
+    /// All persisted catalog tables, sorted by table name.
+    pub fn list_catalog_tables(&self) -> DFResult<Vec<Arc<CatalogTable>>> {
+        let mut out: Vec<Arc<CatalogTable>> =
+            self.load_catalog_tables_map()?.into_values().collect();
         out.sort_by(|a, b| a.name().cmp(b.name()));
-        out
+        Ok(out)
     }
 
-    pub fn get_stream_table(&self, name: &str) -> Option<Arc<StreamTable>> {
+    pub fn get_catalog_table(&self, name: &str) -> DFResult<Option<Arc<CatalogTable>>> {
         let key = UniCase::new(name.to_string());
-        self.cache.read().streams.get(&key).cloned()
+        Ok(self.load_catalog_tables_map()?.get(&key).cloned())
     }
 
-    fn encode_table(&self, table: &StreamTable) -> DFResult<pb::TableDefinition> {
-        let table_type = match table {
+    pub fn add_table(&self, table: StreamTable) -> DFResult<()> {
+        match table {
             StreamTable::Source {
+                name,
                 connector,
                 schema,
                 event_time_field,
                 watermark_field,
                 with_options,
-                ..
-            } => table_definition::TableType::Source(pb::StreamSource {
-                arrow_schema_ipc: CatalogCodec::encode_schema(schema)?,
-                event_time_field: event_time_field.clone(),
-                watermark_field: watermark_field
-                    .as_ref()
-                    .filter(|w| *w != sql_field::COMPUTED_WATERMARK)
-                    .cloned(),
-                with_options: {
-                    let mut opts: std::collections::BTreeMap<String, String> = with_options
-                        .iter()
-                        .map(|(k, v)| (k.clone(), v.clone()))
-                        .collect();
-                    opts.entry("connector".to_string())
-                        .or_insert_with(|| connector.clone());
-                    opts.into_iter().collect()
-                },
-            }),
-            StreamTable::Sink { program, .. } => {
-                let logical_program_bincode = CatalogCodec::encode_logical_program(program)?;
-                let schema = program
-                    .egress_arrow_schema()
-                    .unwrap_or_else(|| Arc::new(Schema::empty()));
-                table_definition::TableType::Sink(pb::StreamSink {
-                    arrow_schema_ipc: CatalogCodec::encode_schema(&schema)?,
-                    logical_program_bincode,
-                })
+            } => {
+                let mut source = SourceTable::new(name, connector, ConnectionType::Source);
+                source.schema_specs = schema
+                    .fields()
+                    .iter()
+                    .map(|f| ColumnDescriptor::new_physical((**f).clone()))
+                    .collect();
+                source.inferred_fields = Some(schema.fields().iter().cloned().collect());
+                source.temporal_config.event_column = event_time_field;
+                source.temporal_config.watermark_strategy_column = watermark_field;
+                source.catalog_with_options = with_options;
+                self.add_catalog_table(CatalogTable::ConnectorTable(source))
+            }
+            StreamTable::Sink { name, .. } => plan_err!(
+                "Persisting streaming sink '{name}' in stream catalog is no longer supported"
+            ),
+        }
+    }
+
+    pub fn has_stream_table(&self, name: &str) -> bool {
+        self.has_catalog_table(name)
+    }
+
+    pub fn drop_table(&self, table_name: &str, if_exists: bool) -> DFResult<()> {
+        self.drop_catalog_table(table_name, if_exists)
+    }
+
+    pub fn list_stream_tables(&self) -> Vec<Arc<StreamTable>> {
+        self.list_catalog_tables()
+            .unwrap_or_default()
+            .into_iter()
+            .filter_map(|t| match t.as_ref() {
+                CatalogTable::ConnectorTable(s) | CatalogTable::LookupTable(s) => {
+                    Some(Arc::new(StreamTable::Source {
+                        name: s.name().to_string(),
+                        connector: s.connector().to_string(),
+                        schema: Arc::new(s.produce_physical_schema()),
+                        event_time_field: s.event_time_field().map(str::to_string),
+                        watermark_field: s.stream_catalog_watermark_field(),
+                        with_options: s.catalog_with_options().clone(),
+                    }))
+                }
+                CatalogTable::TableFromQuery { .. } => None,
+            })
+            .collect()
+    }
+
+    pub fn get_stream_table(&self, name: &str) -> Option<Arc<StreamTable>> {
+        self.get_catalog_table(name)
+            .ok()
+            .flatten()
+            .and_then(|t| match t.as_ref() {
+                CatalogTable::ConnectorTable(s) | CatalogTable::LookupTable(s) => {
+                    Some(Arc::new(StreamTable::Source {
+                        name: s.name().to_string(),
+                        connector: s.connector().to_string(),
+                        schema: Arc::new(s.produce_physical_schema()),
+                        event_time_field: s.event_time_field().map(str::to_string),
+                        watermark_field: s.stream_catalog_watermark_field(),
+                        with_options: s.catalog_with_options().clone(),
+                    }))
+                }
+                CatalogTable::TableFromQuery { .. } => None,
+            })
+    }
+
+    fn encode_catalog_table(&self, table: &CatalogTable) -> DFResult<pb::TableDefinition> {
+        let table_type = match table {
+            CatalogTable::ConnectorTable(source) | CatalogTable::LookupTable(source) => {
+                let mut opts = source.catalog_with_options().clone();
+                opts.entry("connector".to_string())
+                    .or_insert_with(|| source.connector().to_string());
+                if matches!(table, CatalogTable::LookupTable(_)) {
+                    table_definition::TableType::LookupTable(pb::CatalogSourceTable {
+                        arrow_schema_ipc: CatalogCodec::encode_schema(&Arc::new(
+                            source.produce_physical_schema(),
+                        ))?,
+                        event_time_field: source.event_time_field().map(str::to_string),
+                        watermark_field: source.stream_catalog_watermark_field(),
+                        with_options: opts.into_iter().collect(),
+                        connector: source.connector().to_string(),
+                    })
+                } else {
+                    table_definition::TableType::ConnectorTable(pb::CatalogSourceTable {
+                        arrow_schema_ipc: CatalogCodec::encode_schema(&Arc::new(
+                            source.produce_physical_schema(),
+                        ))?,
+                        event_time_field: source.event_time_field().map(str::to_string),
+                        watermark_field: source.stream_catalog_watermark_field(),
+                        with_options: opts.into_iter().collect(),
+                        connector: source.connector().to_string(),
+                    })
+                }
             }
+            CatalogTable::TableFromQuery { name, .. } => return plan_err!(
+                "Persisting query-defined table '{}' is not supported by stream catalog storage",
+                name
+            ),
         };
 
         Ok(pb::TableDefinition {
@@ -203,7 +260,43 @@ impl CatalogManager {
         })
     }
 
-    fn decode_table(&self, proto_def: pb::TableDefinition) -> DFResult<StreamTable> {
+    fn decode_catalog_source_table(
+        &self,
+        table_name: String,
+        source_row: pb::CatalogSourceTable,
+        as_lookup: bool,
+    ) -> DFResult<CatalogTable> {
+        let connector = if source_row.connector.is_empty() {
+            source_row
+                .with_options
+                .get("connector")
+                .cloned()
+                .unwrap_or_else(|| "stream_catalog".to_string())
+        } else {
+            source_row.connector.clone()
+        };
+        let mut source = SourceTable::new(table_name, connector, ConnectionType::Source);
+        let schema = CatalogCodec::decode_schema(&source_row.arrow_schema_ipc)?;
+        source.schema_specs = schema
+            .fields()
+            .iter()
+            .map(|f| ColumnDescriptor::new_physical((**f).clone()))
+            .collect();
+        source.inferred_fields = Some(schema.fields().iter().cloned().collect());
+        source.temporal_config.event_column = source_row.event_time_field;
+        source.temporal_config.watermark_strategy_column = source_row
+            .watermark_field
+            .filter(|w| w != sql_field::COMPUTED_WATERMARK);
+        source.catalog_with_options = source_row.with_options.into_iter().collect();
+
+        if as_lookup {
+            Ok(CatalogTable::LookupTable(source))
+        } else {
+            Ok(CatalogTable::ConnectorTable(source))
+        }
+    }
+
+    fn decode_catalog_table(&self, proto_def: pb::TableDefinition) -> DFResult<CatalogTable> {
         let Some(table_type) = proto_def.table_type else {
             return internal_err!(
                 "Corrupted catalog row: missing table_type for {}",
@@ -212,34 +305,47 @@ impl CatalogManager {
         };
 
         match table_type {
-            table_definition::TableType::Source(src) => Ok(StreamTable::Source {
-                name: proto_def.table_name,
-                connector: src
-                    .with_options
-                    .get("connector")
-                    .cloned()
-                    .unwrap_or_else(|| "stream_catalog".to_string()),
-                schema: CatalogCodec::decode_schema(&src.arrow_schema_ipc)?,
-                event_time_field: src.event_time_field,
-                watermark_field: src
-                    .watermark_field
-                    .filter(|w| w != sql_field::COMPUTED_WATERMARK),
-                with_options: src.with_options.into_iter().collect(),
-            }),
-            table_definition::TableType::Sink(sink) => {
-                if sink.logical_program_bincode.is_empty() {
-                    return internal_err!(
-                        "Corrupted catalog row: sink '{}' missing logical_program_bincode",
-                        proto_def.table_name
+            table_definition::TableType::ConnectorTable(src) => {
+                self.decode_catalog_source_table(proto_def.table_name, src, false)
+            }
+            table_definition::TableType::LookupTable(src) => {
+                self.decode_catalog_source_table(proto_def.table_name, src, true)
+            }
+        }
+    }
+
+    fn load_catalog_tables_map(
+        &self,
+    ) -> DFResult<std::collections::HashMap<crate::sql::schema::ObjectName, Arc<CatalogTable>>> {
+        let mut out = std::collections::HashMap::new();
+        let records = self.store.scan_prefix(CATALOG_KEY_PREFIX)?;
+        for (key, payload) in records {
+            let proto_def = match pb::TableDefinition::decode(payload.as_slice()) {
+                Ok(v) => v,
+                Err(e) => {
+                    warn!(
+                        catalog_key = %key,
+                        error = %e,
+                        "Skipping corrupted stream catalog row: protobuf decode failed"
                     );
+                    continue;
                 }
-                let program = CatalogCodec::decode_logical_program(&sink.logical_program_bincode)?;
-                Ok(StreamTable::Sink {
-                    name: proto_def.table_name,
-                    program,
-                })
-            }
+            };
+            let table = match self.decode_catalog_table(proto_def) {
+                Ok(v) => v,
+                Err(e) => {
+                    warn!(
+                        catalog_key = %key,
+                        error = %e,
+                        "Skipping unsupported/corrupted stream catalog row"
+                    );
+                    continue;
+                }
+            };
+            let object_name = UniCase::new(table.name().to_string());
+            out.insert(object_name, Arc::new(table));
         }
+        Ok(out)
     }
 }
 
@@ -249,8 +355,8 @@ pub fn restore_global_catalog_from_store() {
     };
     match mgr.restore_from_store() {
         Ok(()) => {
-            let n = mgr.list_stream_tables().len();
-            info!(stream_tables = n, "Stream catalog loaded from durable store");
+            let n = mgr.list_catalog_tables().map(|t| t.len()).unwrap_or(0);
+            info!(catalog_tables = n, "Catalog loaded from durable store");
         }
         Err(e) => warn!("Stream catalog restore_from_store failed: {e:#}"),
     }
@@ -296,13 +402,14 @@ pub fn planning_schema_provider() -> StreamPlanningContext {
 
 #[cfg(test)]
 mod tests {
-    use std::collections::BTreeMap;
     use std::sync::Arc;
 
-    use datafusion::arrow::datatypes::{DataType, Field, Schema};
+    use datafusion::arrow::datatypes::{DataType, Field};
 
-    use crate::sql::logical_node::logical::LogicalProgram;
-    use crate::sql::schema::StreamTable;
+    use crate::sql::schema::column_descriptor::ColumnDescriptor;
+    use crate::sql::schema::connection_type::ConnectionType;
+    use crate::sql::schema::source_table::SourceTable;
+    use crate::sql::schema::table::Table as CatalogTable;
     use crate::storage::stream_catalog::{InMemoryMetaStore, MetaStore};
 
     use super::CatalogManager;
@@ -314,107 +421,41 @@ mod tests {
     #[test]
     fn add_table_roundtrip_snapshot() {
         let mgr = create_test_manager();
-        let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)]));
-
-        let table = StreamTable::Source {
-            name: "t1".into(),
-            connector: "stream_catalog".into(),
-            schema: Arc::clone(&schema),
-            event_time_field: Some("ts".into()),
-            watermark_field: None,
-            with_options: BTreeMap::new(),
-        };
-
-        mgr.add_table(table).unwrap();
-
-        let ctx = mgr.acquire_planning_context();
-        let got = ctx.get_stream_table("t1").expect("table present");
-
+        let mut source = SourceTable::new("t1", "kafka", ConnectionType::Source);
+        source.schema_specs = vec![ColumnDescriptor::new_physical(Field::new(
+            "a",
+            DataType::Int32,
+            false,
+        ))];
+        source.temporal_config.event_column = Some("ts".into());
+        let table = CatalogTable::ConnectorTable(source);
+
+        mgr.add_catalog_table(table).unwrap();
+
+        let got = mgr
+            .get_catalog_table("t1")
+            .unwrap()
+            .expect("table present");
         assert_eq!(got.name(), "t1");
-
-        if let StreamTable::Source {
-            event_time_field,
-            watermark_field,
-            ..
-        } = got.as_ref()
-        {
-            assert_eq!(event_time_field.as_deref(), Some("ts"));
-            assert!(watermark_field.is_none());
-        } else {
-            panic!("expected Source");
-        }
-    }
-
-    #[test]
-    fn add_table_roundtrip_with_options() {
-        let mgr = create_test_manager();
-        let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)]));
-
-        let mut opts = BTreeMap::new();
-        opts.insert("connector".to_string(), "kafka".to_string());
-        opts.insert("topic".to_string(), "my-topic".to_string());
-
-        let table = StreamTable::Source {
-            name: "t_with".into(),
-            connector: "kafka".into(),
-            schema,
-            event_time_field: None,
-            watermark_field: None,
-            with_options: opts.clone(),
-        };
-
-        mgr.add_table(table).unwrap();
-
-        let ctx = mgr.acquire_planning_context();
-        let got = ctx.get_stream_table("t_with").expect("table present");
-
-        if let StreamTable::Source { with_options, .. } = got.as_ref() {
-            assert_eq!(with_options, &opts);
-        } else {
-            panic!("expected Source");
-        }
     }
 
     #[test]
     fn drop_table_if_exists() {
         let mgr = create_test_manager();
-        let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)]));
-
-        mgr.add_table(StreamTable::Source {
-            name: "t_drop".into(),
-            connector: "stream_catalog".into(),
-            schema,
-            event_time_field: None,
-            watermark_field: None,
-            with_options: BTreeMap::new(),
-        })
-        .unwrap();
-
-        mgr.drop_table("t_drop", false).unwrap();
-        assert!(!mgr.has_stream_table("t_drop"));
-
-        mgr.drop_table("t_drop", true).unwrap();
-        assert!(mgr.drop_table("nope", false).is_err());
-        mgr.drop_table("nope", true).unwrap();
-    }
-
-    #[test]
-    fn restore_from_store_rebuilds_cache() {
-        let store: Arc<dyn MetaStore> = Arc::new(InMemoryMetaStore::new());
-
-        let mgr_a = CatalogManager::new(Arc::clone(&store));
-
-        mgr_a
-            .add_table(StreamTable::Sink {
-                name: "sink1".into(),
-                program: LogicalProgram::default(),
-            })
+        let mut source = SourceTable::new("t_drop", "kafka", ConnectionType::Source);
+        source.schema_specs = vec![ColumnDescriptor::new_physical(Field::new(
+            "a",
+            DataType::Int32,
+            false,
+        ))];
+        mgr.add_catalog_table(CatalogTable::ConnectorTable(source))
             .unwrap();
 
-        let mgr_b = CatalogManager::new(store);
-        mgr_b.restore_from_store().unwrap();
+        mgr.drop_catalog_table("t_drop", false).unwrap();
+        assert!(!mgr.has_catalog_table("t_drop"));
 
-        let ctx = mgr_b.acquire_planning_context();
-        assert!(ctx.get_stream_table("sink1").is_some());
+        mgr.drop_catalog_table("t_drop", true).unwrap();
+        assert!(mgr.drop_catalog_table("nope", false).is_err());
+        mgr.drop_catalog_table("nope", true).unwrap();
     }
 }

From 87f77222881b0bd533e9bbc0d2109ef5fc61b827 Mon Sep 17 00:00:00 2001
From: luoluoyuyu <zhenyu@apache.org>
Date: Tue, 31 Mar 2026 23:58:48 +0800
Subject: [PATCH 35/44] update

---
 protocol/proto/fs_api.proto                   | 127 +++++-
 protocol/proto/storage.proto                  |   5 +-
 src/common/mod.rs                             |   2 +-
 src/runtime/streaming/api/mod.rs              |   2 +-
 src/runtime/streaming/api/operator.rs         |   4 +-
 src/runtime/streaming/driver.rs               | 266 -----------
 src/runtime/streaming/execution/runner.rs     |  10 +-
 .../factory/connector/dispatchers.rs          |  28 +-
 .../streaming/factory/connector/kafka.rs      | 421 ++++++++----------
 src/runtime/streaming/factory/mod.rs          |   4 +-
 .../streaming/factory/operator_factory.rs     |  72 +--
 src/runtime/streaming/job/job_manager.rs      | 134 +++++-
 src/runtime/streaming/lib.rs                  |  40 --
 src/runtime/streaming/mod.rs                  |   2 +-
 .../grouping/incremental_aggregate.rs         |   4 +-
 .../operators/joins/join_instance.rs          |   4 +-
 .../operators/joins/join_with_expiration.rs   |   4 +-
 src/runtime/streaming/operators/key_by.rs     |   4 +-
 .../streaming/operators/key_operator.rs       |   6 +-
 src/runtime/streaming/operators/mod.rs        |   2 +-
 src/runtime/streaming/operators/projection.rs |  59 ++-
 .../streaming/operators/sink/kafka/mod.rs     |   4 +-
 .../streaming/operators/value_execution.rs    |   4 +-
 .../watermark/watermark_generator.rs          |   4 +-
 .../windows/session_aggregating_window.rs     |   4 +-
 .../windows/sliding_aggregating_window.rs     |   4 +-
 .../windows/tumbling_aggregating_window.rs    |   4 +-
 .../operators/windows/window_function.rs      |   4 +-
 src/sql/common/connector_options.rs           |  15 +
 src/sql/common/mod.rs                         |   2 +-
 src/sql/common/operator_config.rs             |  33 --
 src/sql/extensions/lookup.rs                  |  18 +-
 src/sql/schema/connector_config.rs            |  82 ++++
 src/sql/schema/kafka_operator_config.rs       | 250 +++++++++++
 src/sql/schema/mod.rs                         |   3 +
 src/sql/schema/source_table.rs                | 282 ++----------
 src/storage/stream_catalog/manager.rs         |  68 ++-
 37 files changed, 983 insertions(+), 998 deletions(-)
 delete mode 100644 src/runtime/streaming/driver.rs
 delete mode 100644 src/runtime/streaming/lib.rs
 create mode 100644 src/sql/schema/connector_config.rs
 create mode 100644 src/sql/schema/kafka_operator_config.rs

diff --git a/protocol/proto/fs_api.proto b/protocol/proto/fs_api.proto
index b178f6ea..1f578ffe 100644
--- a/protocol/proto/fs_api.proto
+++ b/protocol/proto/fs_api.proto
@@ -8,8 +8,131 @@ package fs_api;
 
 message ConnectorOp {
   string connector = 1;
-  string config = 2;
-  string description = 3;
+  reserved 2;  // removed: map<string, string> config_map
+  optional FsSchema fs_schema = 3;
+  string name = 4;
+  string description = 5;
+
+  oneof config {
+    KafkaSourceConfig kafka_source = 6;
+    KafkaSinkConfig kafka_sink = 7;
+    GenericConnectorConfig generic = 8;
+  }
+}
+
+// ─────────────────────── Kafka Connector Configs ───────────────────────
+
+message KafkaSourceConfig {
+  string topic = 1;
+  string bootstrap_servers = 2;
+  optional string group_id = 3;
+  optional string group_id_prefix = 4;
+  KafkaOffsetMode offset_mode = 5;
+  KafkaReadMode read_mode = 6;
+  KafkaAuthConfig auth = 7;
+  map<string, string> client_configs = 8;
+  FormatConfig format = 9;
+  BadDataPolicy bad_data_policy = 10;
+  uint32 rate_limit_msgs_per_sec = 11;
+  optional string value_subject = 12;
+}
+
+message KafkaSinkConfig {
+  string topic = 1;
+  string bootstrap_servers = 2;
+  KafkaSinkCommitMode commit_mode = 3;
+  optional string key_field = 4;
+  optional string timestamp_field = 5;
+  KafkaAuthConfig auth = 6;
+  map<string, string> client_configs = 7;
+  FormatConfig format = 8;
+  optional string value_subject = 9;
+}
+
+// Fallback for non-Kafka connectors that are not yet strongly typed.
+message GenericConnectorConfig {
+  map<string, string> properties = 1;
+}
+
+// ─────────────────────── Kafka Auth ───────────────────────
+
+message KafkaAuthConfig {
+  oneof auth {
+    KafkaAuthNone none = 1;
+    KafkaAuthSasl sasl = 2;
+    KafkaAuthAwsMskIam aws_msk_iam = 3;
+  }
+}
+
+message KafkaAuthNone {}
+
+message KafkaAuthSasl {
+  string protocol = 1;
+  string mechanism = 2;
+  string username = 3;
+  string password = 4;
+}
+
+message KafkaAuthAwsMskIam {
+  string region = 1;
+}
+
+// ─────────────────────── Format & Data-Quality ───────────────────────
+
+message FormatConfig {
+  oneof format {
+    JsonFormatConfig json = 1;
+    RawStringFormatConfig raw_string = 2;
+    RawBytesFormatConfig raw_bytes = 3;
+  }
+}
+
+message JsonFormatConfig {
+  TimestampFormatProto timestamp_format = 1;
+  DecimalEncodingProto decimal_encoding = 2;
+  bool include_schema = 3;
+  bool confluent_schema_registry = 4;
+  optional uint32 schema_id = 5;
+  bool debezium = 6;
+  bool unstructured = 7;
+}
+
+message RawStringFormatConfig {}
+message RawBytesFormatConfig {}
+
+// ─────────────────────── Kafka Enums ───────────────────────
+
+enum TimestampFormatProto {
+  TIMESTAMP_RFC3339 = 0;
+  TIMESTAMP_UNIX_MILLIS = 1;
+}
+
+enum DecimalEncodingProto {
+  DECIMAL_NUMBER = 0;
+  DECIMAL_STRING = 1;
+  DECIMAL_BYTES = 2;
+}
+
+enum BadDataPolicy {
+  BAD_DATA_FAIL = 0;
+  BAD_DATA_DROP = 1;
+}
+
+enum KafkaOffsetMode {
+  KAFKA_OFFSET_EARLIEST = 0;
+  KAFKA_OFFSET_LATEST = 1;
+  KAFKA_OFFSET_GROUP = 2;
+}
+
+enum KafkaReadMode {
+  KAFKA_READ_DEFAULT = 0;
+  KAFKA_READ_COMMITTED = 1;
+  KAFKA_READ_UNCOMMITTED = 2;
+}
+
+enum KafkaSinkCommitMode {
+  KAFKA_SINK_AT_LEAST_ONCE = 0;
+  KAFKA_SINK_EXACTLY_ONCE = 1;
 }
 
 message ValuePlanOperator {
diff --git a/protocol/proto/storage.proto b/protocol/proto/storage.proto
index 6c645e75..9ab0995d 100644
--- a/protocol/proto/storage.proto
+++ b/protocol/proto/storage.proto
@@ -30,10 +30,13 @@ message CatalogSourceTable {
   bytes arrow_schema_ipc = 1;
   optional string event_time_field = 2;
   optional string watermark_field = 3;
-  // Original CREATE TABLE ... WITH ('k'='v', ...) pairs (best-effort; keys sorted in DDL).
+  // Original CREATE TABLE ... WITH ('k'='v', ...) pairs — single source of truth.
   map<string, string> with_options = 4;
   // Canonical connector identifier (e.g. kafka, postgres-cdc).
   string connector = 5;
+  reserved 6;  // removed: string opaque_config (JSON blob no longer needed)
+  // Human-readable note from DDL (ConnectorOp.description).
+  string description = 7;
 }
 
 // =============================================================================
diff --git a/src/common/mod.rs b/src/common/mod.rs
index e3c103a2..e0eb8d7a 100644
--- a/src/common/mod.rs
+++ b/src/common/mod.rs
@@ -48,7 +48,7 @@ pub use control::{
 pub use fs_schema::{FsSchema, FsSchemaRef};
 pub use errors::DataflowError;
 pub use formats::{BadData, Format, Framing, JsonFormat};
-pub use operator_config::{MetadataField, OperatorConfig, RateLimit};
+pub use operator_config::MetadataField;
 
 // ── Well-known column names ──
 pub const TIMESTAMP_FIELD: &str = "_timestamp";
diff --git a/src/runtime/streaming/api/mod.rs b/src/runtime/streaming/api/mod.rs
index f004de58..a525c883 100644
--- a/src/runtime/streaming/api/mod.rs
+++ b/src/runtime/streaming/api/mod.rs
@@ -16,5 +16,5 @@ pub mod operator;
 pub mod source;
 
 pub use context::TaskContext;
-pub use operator::{ConstructedOperator, MessageOperator};
+pub use operator::{ConstructedOperator, Operator};
 pub use source::{SourceEvent, SourceOffset, SourceOperator};
diff --git a/src/runtime/streaming/api/operator.rs b/src/runtime/streaming/api/operator.rs
index 4683379b..9acc6e06 100644
--- a/src/runtime/streaming/api/operator.rs
+++ b/src/runtime/streaming/api/operator.rs
@@ -24,11 +24,11 @@ use crate::sql::common::{CheckpointBarrier, Watermark};
 
 pub enum ConstructedOperator {
     Source(Box<dyn SourceOperator>),
-    Operator(Box<dyn MessageOperator>),
+    Operator(Box<dyn Operator>),
 }
 
 #[async_trait]
-pub trait MessageOperator: Send + 'static {
+pub trait Operator: Send + 'static {
     fn name(&self) -> &str;
 
     async fn on_start(&mut self, _ctx: &mut TaskContext) -> anyhow::Result<()> {
diff --git a/src/runtime/streaming/driver.rs b/src/runtime/streaming/driver.rs
deleted file mode 100644
index 011e49ab..00000000
--- a/src/runtime/streaming/driver.rs
+++ /dev/null
@@ -1,266 +0,0 @@
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use std::future::pending;
-use std::sync::Arc;
-
-use arrow_array::RecordBatch;
-use tokio::sync::mpsc;
-
-use crate::runtime::streaming::api::context::TaskContext;
-use crate::runtime::streaming::api::operator::{MessageOperator, OperatorContext, StreamOperator};
-use crate::runtime::streaming::context::{ChainedOperatorContext, TerminalOutputContext};
-use crate::runtime::streaming::environment::TaskEnvironment;
-use crate::runtime::streaming::protocol::control::{ControlCommand, StopMode};
-use crate::runtime::streaming::protocol::event::StreamEvent;
-use crate::runtime::streaming::protocol::stream_out::StreamOutput;
-use crate::runtime::streaming::protocol::tracked::TrackedEvent;
-use crate::sql::common::CheckpointBarrier;
-
-pub struct StreamTaskDriver {
-    head_op: Box<dyn StreamOperator>,
-    head_ctx: Box<dyn OperatorContext>,
-    inbox: Option<mpsc::Receiver<TrackedEvent>>,
-    control_rx: mpsc::Receiver<ControlCommand>,
-}
-
-impl StreamTaskDriver {
-    pub fn new(
-        task_id: u32,
-        mut operators: Vec<Box<dyn StreamOperator>>,
-        inbox: Option<mpsc::Receiver<TrackedEvent>>,
-        outboxes: Vec<mpsc::Sender<TrackedEvent>>,
-        control_rx: mpsc::Receiver<ControlCommand>,
-        job_id: String,
-    ) -> Self {
-        let env = TaskEnvironment::new(job_id, task_id, 0, 1);
-        let mut current_op = operators.pop().expect("Operators pipeline cannot be empty");
-        let mut current_ctx: Box<dyn OperatorContext> =
-            Box::new(TerminalOutputContext::new(outboxes, env));
-
-        while let Some(prev_op) = operators.pop() {
-            let chained = ChainedOperatorContext::new(current_op, current_ctx);
-            current_op = prev_op;
-            current_ctx = Box::new(chained);
-        }
-
-        Self {
-            head_op: current_op,
-            head_ctx: current_ctx,
-            inbox,
-            control_rx,
-        }
-    }
-
-    pub async fn run(&mut self) -> anyhow::Result<()> {
-        self.head_op.open(self.head_ctx.env()).await?;
-
-        'main_loop: loop {
-            tokio::select! {
-                biased;
-                Some(cmd) = self.control_rx.recv() => {
-                    if self.process_control_command(cmd).await? {
-                        break 'main_loop;
-                    }
-                }
-                Some(tracked) = async {
-                    if let Some(ref mut rx) = self.inbox { rx.recv().await }
-                    else { pending().await }
-                } => {
-                    self.pump_event(tracked.event).await?;
-                }
-            }
-        }
-
-        self.head_op.close(self.head_ctx.env()).await?;
-        Ok(())
-    }
-
-    async fn process_control_command(&mut self, cmd: ControlCommand) -> anyhow::Result<bool> {
-        match cmd {
-            ControlCommand::TriggerCheckpoint { barrier } => {
-                let barrier: CheckpointBarrier = barrier.into();
-                self.pump_event(StreamEvent::Barrier(barrier)).await?;
-                Ok(false)
-            }
-            ControlCommand::Commit { epoch } => {
-                self.head_op.commit_checkpoint(epoch, self.head_ctx.env()).await?;
-                self.head_ctx.commit_checkpoint(epoch).await?;
-                Ok(false)
-            }
-            ControlCommand::Stop { mode } if mode == StopMode::Immediate => Ok(true),
-            other_cmd => {
-                let stop_head = self
-                    .head_op
-                    .handle_control(other_cmd.clone(), self.head_ctx.env())
-                    .await?;
-                let stop_rest = self.head_ctx.handle_control(other_cmd).await?;
-                Ok(stop_head || stop_rest)
-            }
-        }
-    }
-
-    async fn pump_event(&mut self, event: StreamEvent) -> anyhow::Result<()> {
-        match event {
-            StreamEvent::Data(batch) => self.head_op.process_data(batch, self.head_ctx.as_mut()).await,
-            StreamEvent::Watermark(wm) => {
-                self.head_op.process_watermark(wm, self.head_ctx.as_mut()).await
-            }
-            StreamEvent::Barrier(br) => {
-                self.head_op
-                    .snapshot_state(br.clone(), self.head_ctx.as_mut())
-                    .await?;
-                self.head_ctx.broadcast(StreamEvent::Barrier(br)).await
-            }
-            StreamEvent::EndOfStream => {
-                self.head_op.close(self.head_ctx.env()).await?;
-                self.head_ctx.broadcast(StreamEvent::EndOfStream).await
-            }
-        }
-    }
-}
-
-pub struct MessageOperatorAdapter {
-    inner: Box<dyn MessageOperator>,
-}
-
-impl MessageOperatorAdapter {
-    pub fn new(inner: Box<dyn MessageOperator>) -> Self {
-        Self { inner }
-    }
-
-    async fn emit_outputs(
-        ctx: &mut dyn OperatorContext,
-        outputs: Vec<StreamOutput>,
-    ) -> anyhow::Result<()> {
-        for out in outputs {
-            match out {
-                StreamOutput::Forward(b) | StreamOutput::Broadcast(b) | StreamOutput::Keyed(_, b) => {
-                    ctx.collect(b).await?;
-                }
-                StreamOutput::Watermark(wm) => {
-                    ctx.broadcast(StreamEvent::Watermark(wm)).await?;
-                }
-            }
-        }
-        Ok(())
-    }
-}
-
-#[async_trait::async_trait(?Send)]
-impl StreamOperator for MessageOperatorAdapter {
-    async fn open(&mut self, env: &mut TaskEnvironment) -> anyhow::Result<()> {
-        let mut ctx = TaskContext::new(
-            env.job_id.clone(),
-            env.task_id,
-            env.subtask_index,
-            env.parallelism,
-            vec![],
-            env.memory_pool.clone(),
-        );
-        self.inner.on_start(&mut ctx).await
-    }
-
-    async fn close(&mut self, env: &mut TaskEnvironment) -> anyhow::Result<()> {
-        let mut ctx = TaskContext::new(
-            env.job_id.clone(),
-            env.task_id,
-            env.subtask_index,
-            env.parallelism,
-            vec![],
-            env.memory_pool.clone(),
-        );
-        let _ = self.inner.on_close(&mut ctx).await?;
-        Ok(())
-    }
-
-    async fn process_data(
-        &mut self,
-        batch: RecordBatch,
-        ctx: &mut dyn OperatorContext,
-    ) -> anyhow::Result<()> {
-        let mut op_ctx = TaskContext::new(
-            ctx.env().job_id.clone(),
-            ctx.env().task_id,
-            ctx.env().subtask_index,
-            ctx.env().parallelism,
-            vec![],
-            ctx.env().memory_pool.clone(),
-        );
-        let outs = self.inner.process_data(0, batch, &mut op_ctx).await?;
-        Self::emit_outputs(ctx, outs).await
-    }
-
-    async fn process_watermark(
-        &mut self,
-        wm: crate::sql::common::Watermark,
-        ctx: &mut dyn OperatorContext,
-    ) -> anyhow::Result<()> {
-        let mut op_ctx = TaskContext::new(
-            ctx.env().job_id.clone(),
-            ctx.env().task_id,
-            ctx.env().subtask_index,
-            ctx.env().parallelism,
-            vec![],
-            ctx.env().memory_pool.clone(),
-        );
-        let outs = self.inner.process_watermark(wm, &mut op_ctx).await?;
-        Self::emit_outputs(ctx, outs).await
-    }
-
-    async fn snapshot_state(
-        &mut self,
-        barrier: CheckpointBarrier,
-        ctx: &mut dyn OperatorContext,
-    ) -> anyhow::Result<()> {
-        let mut op_ctx = TaskContext::new(
-            ctx.env().job_id.clone(),
-            ctx.env().task_id,
-            ctx.env().subtask_index,
-            ctx.env().parallelism,
-            vec![],
-            ctx.env().memory_pool.clone(),
-        );
-        self.inner.snapshot_state(barrier, &mut op_ctx).await
-    }
-
-    async fn commit_checkpoint(
-        &mut self,
-        epoch: u32,
-        env: &mut TaskEnvironment,
-    ) -> anyhow::Result<()> {
-        let mut ctx = TaskContext::new(
-            env.job_id.clone(),
-            env.task_id,
-            env.subtask_index,
-            env.parallelism,
-            vec![],
-            env.memory_pool.clone(),
-        );
-        self.inner.commit_checkpoint(epoch, &mut ctx).await
-    }
-
-    async fn handle_control(
-        &mut self,
-        cmd: ControlCommand,
-        _env: &mut TaskEnvironment,
-    ) -> anyhow::Result<bool> {
-        match cmd {
-            ControlCommand::Stop { mode } => Ok(mode == StopMode::Immediate),
-            ControlCommand::DropState
-            | ControlCommand::Start
-            | ControlCommand::UpdateConfig { .. }
-            | ControlCommand::TriggerCheckpoint { .. }
-            | ControlCommand::Commit { .. } => Ok(false),
-        }
-    }
-}
diff --git a/src/runtime/streaming/execution/runner.rs b/src/runtime/streaming/execution/runner.rs
index d43f052d..c4981d93 100644
--- a/src/runtime/streaming/execution/runner.rs
+++ b/src/runtime/streaming/execution/runner.rs
@@ -16,7 +16,7 @@ use tokio_stream::{StreamExt, StreamMap};
 use tracing::{info, info_span, Instrument};
 
 use crate::runtime::streaming::api::context::TaskContext;
-use crate::runtime::streaming::api::operator::MessageOperator;
+use crate::runtime::streaming::api::operator::Operator;
 use crate::runtime::streaming::error::RunError;
 use crate::runtime::streaming::network::endpoint::BoxedEventStream;
 use crate::runtime::streaming::protocol::{
@@ -52,16 +52,16 @@ pub trait OperatorDrive: Send {
 }
 
 pub struct ChainedDriver {
-    operator: Box<dyn MessageOperator>,
+    operator: Box<dyn Operator>,
     next: Option<Box<dyn OperatorDrive>>,
 }
 
 impl ChainedDriver {
-    pub fn new(operator: Box<dyn MessageOperator>, next: Option<Box<dyn OperatorDrive>>) -> Self {
+    pub fn new(operator: Box<dyn Operator>, next: Option<Box<dyn OperatorDrive>>) -> Self {
         Self { operator, next }
     }
 
-    pub fn build_chain(mut operators: Vec<Box<dyn MessageOperator>>) -> Option<Box<dyn OperatorDrive>> {
+    pub fn build_chain(mut operators: Vec<Box<dyn Operator>>) -> Option<Box<dyn OperatorDrive>> {
         if operators.is_empty() {
             return None;
         }
@@ -240,7 +240,7 @@ pub struct Pipeline {
 
 impl Pipeline {
     pub fn new(
-        operators: Vec<Box<dyn MessageOperator>>,
+        operators: Vec<Box<dyn Operator>>,
         ctx: TaskContext,
         inboxes: Vec<BoxedEventStream>,
         control_rx: Receiver<ControlCommand>,
diff --git a/src/runtime/streaming/factory/connector/dispatchers.rs b/src/runtime/streaming/factory/connector/dispatchers.rs
index cca85c1a..40e7242c 100644
--- a/src/runtime/streaming/factory/connector/dispatchers.rs
+++ b/src/runtime/streaming/factory/connector/dispatchers.rs
@@ -10,35 +10,21 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-
-use anyhow::{anyhow, Result};
-use prost::Message;
 use std::sync::Arc;
 
-use protocol::grpc::api::ConnectorOp;
+use anyhow::Result;
 
 use crate::runtime::streaming::api::operator::ConstructedOperator;
 use crate::runtime::streaming::factory::global::Registry;
 use crate::runtime::streaming::factory::operator_constructor::OperatorConstructor;
-use crate::sql::common::constants::connector_type;
 
-use super::kafka::{KafkaSinkDispatcher, KafkaSourceDispatcher};
+use super::kafka::ConnectorDispatcher;
 
 pub struct ConnectorSourceDispatcher;
 
 impl OperatorConstructor for ConnectorSourceDispatcher {
     fn with_config(&self, config: &[u8], registry: Arc<Registry>) -> Result<ConstructedOperator> {
-        let op = ConnectorOp::decode(config)
-            .map_err(|e| anyhow!("decode ConnectorOp (source): {e}"))?;
-
-        match op.connector.as_str() {
-            ct if ct == connector_type::KAFKA => KafkaSourceDispatcher.with_config(config, registry),
-            ct if ct == connector_type::REDIS => Err(anyhow!(
-                "ConnectorSource '{}' factory wiring not yet implemented",
-                op.connector
-            )),
-            other => Err(anyhow!("Unsupported source connector type: {}", other)),
-        }
+        ConnectorDispatcher.with_config(config, registry)
     }
 }
 
@@ -46,12 +32,6 @@ pub struct ConnectorSinkDispatcher;
 
 impl OperatorConstructor for ConnectorSinkDispatcher {
     fn with_config(&self, config: &[u8], registry: Arc<Registry>) -> Result<ConstructedOperator> {
-        let op = ConnectorOp::decode(config)
-            .map_err(|e| anyhow!("decode ConnectorOp (sink): {e}"))?;
-
-        match op.connector.as_str() {
-            ct if ct == connector_type::KAFKA => KafkaSinkDispatcher.with_config(config, registry),
-            other => Err(anyhow!("Unsupported sink connector type: {}", other)),
-        }
+        ConnectorDispatcher.with_config(config, registry)
     }
 }
diff --git a/src/runtime/streaming/factory/connector/kafka.rs b/src/runtime/streaming/factory/connector/kafka.rs
index 7e548cec..a55ef477 100644
--- a/src/runtime/streaming/factory/connector/kafka.rs
+++ b/src/runtime/streaming/factory/connector/kafka.rs
@@ -10,316 +10,213 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-
-use anyhow::{anyhow, bail, Context, Result};
+use anyhow::{bail, Context, Result};
 use prost::Message;
 use std::collections::HashMap;
 use std::num::NonZeroU32;
 use std::sync::Arc;
 
-use protocol::grpc::api::ConnectorOp;
-use tracing::{info, warn};
+use protocol::grpc::api::connector_op::Config;
+use protocol::grpc::api::{
+    BadDataPolicy, ConnectorOp, DecimalEncodingProto, FormatConfig,
+    KafkaAuthConfig, KafkaOffsetMode, KafkaReadMode, KafkaSinkCommitMode, KafkaSinkConfig,
+    KafkaSourceConfig, TimestampFormatProto,
+};
+use tracing::info;
 
 use crate::runtime::streaming::api::operator::ConstructedOperator;
 use crate::runtime::streaming::api::source::SourceOffset;
 use crate::runtime::streaming::factory::global::Registry;
 use crate::runtime::streaming::factory::operator_constructor::OperatorConstructor;
 use crate::runtime::streaming::format::{
-    BadDataPolicy, DataSerializer, DecimalEncoding as RtDecimalEncoding, Format as RuntimeFormat,
-    JsonFormat as RuntimeJsonFormat, TimestampFormat as RtTimestampFormat,
+    BadDataPolicy as RtBadDataPolicy, DataSerializer, DecimalEncoding as RtDecimalEncoding,
+    Format as RuntimeFormat, JsonFormat as RuntimeJsonFormat,
+    TimestampFormat as RtTimestampFormat,
 };
 use crate::runtime::streaming::operators::sink::kafka::{ConsistencyMode, KafkaSinkOperator};
-use crate::runtime::streaming::operators::source::kafka::{BufferedDeserializer, KafkaSourceOperator};
-use crate::sql::common::constants::connector_type;
-use crate::sql::common::formats::{
-    BadData, DecimalEncoding as SqlDecimalEncoding, Format as SqlFormat, JsonFormat as SqlJsonFormat,
-    TimestampFormat as SqlTimestampFormat,
+use crate::runtime::streaming::operators::source::kafka::{
+    BufferedDeserializer, KafkaSourceOperator,
 };
-use crate::sql::common::kafka_catalog::{
-    KafkaConfig, KafkaConfigAuthentication, KafkaTable, ReadMode, SinkCommitMode, TableType,
-};
-use crate::sql::common::{FsSchema, OperatorConfig};
+use crate::sql::common::FsSchema;
 
 const DEFAULT_SOURCE_BATCH_SIZE: usize = 1024;
 
-pub fn build_client_configs(config: &KafkaConfig, table: &KafkaTable) -> Result<HashMap<String, String>> {
-    let mut client_configs = HashMap::new();
-
-    match &config.authentication {
-        KafkaConfigAuthentication::None => {}
-        KafkaConfigAuthentication::Sasl {
-            protocol,
-            mechanism,
-            username,
-            password,
-        } => {
-            client_configs.insert("security.protocol".to_string(), protocol.clone());
-            client_configs.insert("sasl.mechanism".to_string(), mechanism.clone());
-            client_configs.insert("sasl.username".to_string(), username.clone());
-            client_configs.insert("sasl.password".to_string(), password.clone());
+// ─────────────── Proto → Runtime type conversions ───────────────
+
+fn proto_format_to_runtime(fmt: &Option<FormatConfig>) -> Result<RuntimeFormat> {
+    let cfg = fmt.as_ref().context("FormatConfig is required")?;
+    match &cfg.format {
+        Some(protocol::grpc::api::format_config::Format::Json(j)) => {
+            Ok(RuntimeFormat::Json(RuntimeJsonFormat {
+                timestamp_format: match j.timestamp_format() {
+                    TimestampFormatProto::TimestampRfc3339 => RtTimestampFormat::RFC3339,
+                    TimestampFormatProto::TimestampUnixMillis => RtTimestampFormat::UnixMillis,
+                },
+                decimal_encoding: match j.decimal_encoding() {
+                    DecimalEncodingProto::DecimalNumber => RtDecimalEncoding::Number,
+                    DecimalEncodingProto::DecimalString => RtDecimalEncoding::String,
+                    DecimalEncodingProto::DecimalBytes => RtDecimalEncoding::Bytes,
+                },
+                include_schema: j.include_schema,
+            }))
         }
-        KafkaConfigAuthentication::AwsMskIam { region } => {
-            client_configs.insert("security.protocol".to_string(), "SASL_SSL".to_string());
-            client_configs.insert("sasl.mechanism".to_string(), "OAUTHBEARER".to_string());
-            client_configs.insert(
-                "sasl.oauthbearer.extensions".to_string(),
-                format!("logicalCluster=aws_msk;aws_region={region}"),
-            );
+        Some(protocol::grpc::api::format_config::Format::RawString(_)) => {
+            Ok(RuntimeFormat::RawString)
         }
-    }
-
-    for (k, v) in &config.connection_properties {
-        client_configs.insert(k.clone(), v.clone());
-    }
-
-    for (k, v) in &table.client_configs {
-        if client_configs.contains_key(k) {
-            warn!(
-                "Kafka config key '{}' is defined in both connection and table; using table value",
-                k
-            );
+        Some(protocol::grpc::api::format_config::Format::RawBytes(_)) => {
+            Ok(RuntimeFormat::RawBytes)
         }
-        client_configs.insert(k.clone(), v.clone());
-    }
-
-    Ok(client_configs)
-}
-
-fn bad_data_policy(b: Option<BadData>) -> BadDataPolicy {
-    match b.unwrap_or_default() {
-        BadData::Fail {} => BadDataPolicy::Fail,
-        BadData::Drop {} => BadDataPolicy::Drop,
+        None => bail!("FormatConfig has no format variant set"),
     }
 }
 
-fn sql_timestamp_format(t: SqlTimestampFormat) -> RtTimestampFormat {
-    match t {
-        SqlTimestampFormat::RFC3339 => RtTimestampFormat::RFC3339,
-        SqlTimestampFormat::UnixMillis => RtTimestampFormat::UnixMillis,
+fn proto_bad_data_to_runtime(policy: i32) -> RtBadDataPolicy {
+    match BadDataPolicy::try_from(policy) {
+        Ok(BadDataPolicy::BadDataDrop) => RtBadDataPolicy::Drop,
+        _ => RtBadDataPolicy::Fail,
     }
 }
 
-fn sql_decimal_encoding(d: SqlDecimalEncoding) -> RtDecimalEncoding {
-    match d {
-        SqlDecimalEncoding::Number => RtDecimalEncoding::Number,
-        SqlDecimalEncoding::String => RtDecimalEncoding::String,
-        SqlDecimalEncoding::Bytes => RtDecimalEncoding::Bytes,
+fn proto_offset_to_runtime(mode: i32) -> SourceOffset {
+    match KafkaOffsetMode::try_from(mode) {
+        Ok(KafkaOffsetMode::KafkaOffsetLatest) => SourceOffset::Latest,
+        Ok(KafkaOffsetMode::KafkaOffsetEarliest) => SourceOffset::Earliest,
+        _ => SourceOffset::Group,
     }
 }
 
-fn sql_json_format_to_runtime(j: &SqlJsonFormat) -> RuntimeJsonFormat {
-    RuntimeJsonFormat {
-        timestamp_format: sql_timestamp_format(j.timestamp_format),
-        decimal_encoding: sql_decimal_encoding(j.decimal_encoding),
-        include_schema: j.include_schema,
+fn build_auth_client_configs(auth: &Option<KafkaAuthConfig>) -> HashMap<String, String> {
+    let mut out = HashMap::new();
+    let Some(auth) = auth else { return out };
+    match &auth.auth {
+        Some(protocol::grpc::api::kafka_auth_config::Auth::Sasl(sasl)) => {
+            out.insert("security.protocol".to_string(), sasl.protocol.clone());
+            out.insert("sasl.mechanism".to_string(), sasl.mechanism.clone());
+            out.insert("sasl.username".to_string(), sasl.username.clone());
+            out.insert("sasl.password".to_string(), sasl.password.clone());
+        }
+        Some(protocol::grpc::api::kafka_auth_config::Auth::AwsMskIam(iam)) => {
+            out.insert("security.protocol".to_string(), "SASL_SSL".to_string());
+            out.insert("sasl.mechanism".to_string(), "OAUTHBEARER".to_string());
+            out.insert(
+                "sasl.oauthbearer.extensions".to_string(),
+                format!("logicalCluster=aws_msk;aws_region={}", iam.region),
+            );
+        }
+        _ => {}
     }
+    out
 }
 
-fn sql_format_to_runtime(f: SqlFormat) -> Result<RuntimeFormat> {
-    match f {
-        SqlFormat::Json(j) => Ok(RuntimeFormat::Json(sql_json_format_to_runtime(&j))),
-        SqlFormat::RawString(_) => Ok(RuntimeFormat::RawString),
-        SqlFormat::RawBytes(_) => Ok(RuntimeFormat::RawBytes),
-        other => bail!(
-            "Kafka connector: format '{}' is not supported for runtime deserializer/serializer yet",
-            other.name()
-        ),
+fn merge_client_configs(
+    auth: &Option<KafkaAuthConfig>,
+    extra: &HashMap<String, String>,
+) -> HashMap<String, String> {
+    let mut configs = build_auth_client_configs(auth);
+    for (k, v) in extra {
+        configs.insert(k.clone(), v.clone());
     }
+    configs
 }
 
-fn kafka_table_offset_to_runtime(o: crate::sql::common::KafkaTableSourceOffset) -> SourceOffset {
-    use crate::sql::common::KafkaTableSourceOffset as KOff;
-    match o {
-        KOff::Latest => SourceOffset::Latest,
-        KOff::Earliest => SourceOffset::Earliest,
-        KOff::Group => SourceOffset::Group,
-    }
-}
+// ─────────────── Unified Connector Dispatcher ───────────────
 
-fn non_zero_rate_per_second(op: &OperatorConfig) -> NonZeroU32 {
-    op.rate_limit
-        .as_ref()
-        .and_then(|r| NonZeroU32::new(r.messages_per_second.max(1)))
-        .unwrap_or_else(|| NonZeroU32::new(1_000_000).expect("nonzero"))
-}
+pub struct ConnectorDispatcher;
 
-fn sink_fs_schema_adjusted(
-    fs: FsSchema,
-    key_field: &Option<String>,
-    timestamp_field: &Option<String>,
-) -> Result<FsSchema> {
-    if key_field.is_none() && timestamp_field.is_none() {
-        return Ok(fs);
-    }
-    let schema = fs.schema.clone();
-    let ts = if let Some(name) = timestamp_field {
-        schema
-            .column_with_name(name)
-            .ok_or_else(|| anyhow!("timestamp column '{name}' not found in schema"))?
-            .0
-    } else {
-        fs.timestamp_index
-    };
-    let keys = fs.clone_storage_key_indices();
-    let routing = if let Some(name) = key_field {
-        let k = schema
-            .column_with_name(name)
-            .ok_or_else(|| anyhow!("key column '{name}' not found in schema"))?
-            .0;
-        Some(vec![k])
-    } else {
-        fs.clone_routing_key_indices()
-    };
-    Ok(FsSchema::new(schema, ts, keys, routing))
-}
-
-fn decode_operator_config(op: &ConnectorOp) -> Result<OperatorConfig> {
-    serde_json::from_str(&op.config).with_context(|| {
-        format!(
-            "Invalid OperatorConfig JSON for connector '{}'",
-            op.connector
-        )
-    })
-}
-
-pub struct KafkaSourceDispatcher;
-
-impl OperatorConstructor for KafkaSourceDispatcher {
+impl OperatorConstructor for ConnectorDispatcher {
     fn with_config(&self, payload: &[u8], _registry: Arc<Registry>) -> Result<ConstructedOperator> {
         let op = ConnectorOp::decode(payload)
-            .context("Failed to decode ConnectorOp protobuf for Kafka Source")?;
-
-        if op.connector != connector_type::KAFKA {
-            bail!(
-                "KafkaSourceDispatcher: expected connector 'kafka', got '{}'",
-                op.connector
-            );
+            .context("Failed to decode ConnectorOp protobuf")?;
+
+        let fs_schema = op
+            .fs_schema
+            .as_ref()
+            .map(|fs| FsSchema::try_from(fs.clone()))
+            .transpose()
+            .map_err(|e| anyhow::anyhow!("{e}"))?;
+
+        match op.config {
+            Some(Config::KafkaSource(ref cfg)) => {
+                Self::build_kafka_source(&op.name, cfg, fs_schema)
+            }
+            Some(Config::KafkaSink(ref cfg)) => {
+                Self::build_kafka_sink(&op.name, cfg, fs_schema)
+            }
+            Some(Config::Generic(_)) => bail!(
+                "ConnectorOp '{}': GenericConnectorConfig dispatch not yet implemented",
+                op.name
+            ),
+            None => bail!("ConnectorOp '{}' has no configuration payload", op.name),
         }
+    }
+}
 
-        let op_config = decode_operator_config(&op)?;
-
-        let kafka_config: KafkaConfig = serde_json::from_value(op_config.connection.clone())
-            .context("Failed to parse Kafka connection configuration")?;
-
-        let kafka_table: KafkaTable = serde_json::from_value(op_config.table.clone())
-            .context("Failed to parse Kafka table configuration")?;
-
-        let TableType::Source {
-            offset,
-            read_mode,
-            group_id,
-            group_id_prefix,
-        } = &kafka_table.kind
-        else {
-            bail!(
-                "Expected Kafka Source, got Sink configuration for topic '{}'",
-                kafka_table.topic
-            );
-        };
+impl ConnectorDispatcher {
+    fn build_kafka_source(
+        _name: &str,
+        cfg: &KafkaSourceConfig,
+        fs_schema: Option<FsSchema>,
+    ) -> Result<ConstructedOperator> {
+        info!(topic = %cfg.topic, "Constructing Kafka Source");
 
-        info!("Constructing Kafka Source for topic: {}", kafka_table.topic);
+        let fs = fs_schema.context("fs_schema is required for Kafka Source")?;
+        let client_configs = merge_client_configs(&cfg.auth, &cfg.client_configs);
 
-        let mut client_configs = build_client_configs(&kafka_config, &kafka_table)?;
-        if let Some(ReadMode::ReadCommitted) = read_mode {
-            client_configs.insert("isolation.level".to_string(), "read_committed".to_string());
+        let mut final_configs = client_configs;
+        if cfg.read_mode() == KafkaReadMode::KafkaReadCommitted {
+            final_configs.insert("isolation.level".to_string(), "read_committed".to_string());
         }
 
-        let sql_format = op_config
-            .format
-            .clone()
-            .context("Format must be specified for Kafka Source")?;
-        let runtime_format = sql_format_to_runtime(sql_format)?;
-        let fs = op_config
-            .input_schema
-            .clone()
-            .context("input_schema is required for Kafka Source")?;
-        let bad = bad_data_policy(op_config.bad_data.clone());
+        let runtime_format = proto_format_to_runtime(&cfg.format)?;
+        let bad_data = proto_bad_data_to_runtime(cfg.bad_data_policy);
 
-        let deserializer: std::boxed::Box<
-            dyn crate::runtime::streaming::operators::source::kafka::BatchDeserializer,
-        > = Box::new(BufferedDeserializer::new(
+        let deserializer = Box::new(BufferedDeserializer::new(
             runtime_format,
             fs.schema.clone(),
-            bad,
+            bad_data,
             DEFAULT_SOURCE_BATCH_SIZE,
         ));
 
+        let rate = NonZeroU32::new(cfg.rate_limit_msgs_per_sec.max(1))
+            .unwrap_or_else(|| NonZeroU32::new(1_000_000).expect("nonzero"));
+
         let source_op = KafkaSourceOperator::new(
-            kafka_table.topic.clone(),
-            kafka_config.bootstrap_servers.clone(),
-            group_id.clone(),
-            group_id_prefix.clone(),
-            kafka_table_offset_to_runtime(*offset),
-            client_configs,
-            non_zero_rate_per_second(&op_config),
-            op_config.metadata_fields,
+            cfg.topic.clone(),
+            cfg.bootstrap_servers.clone(),
+            cfg.group_id.clone(),
+            cfg.group_id_prefix.clone(),
+            proto_offset_to_runtime(cfg.offset_mode),
+            final_configs,
+            rate,
+            vec![],
             deserializer,
         );
 
         Ok(ConstructedOperator::Source(Box::new(source_op)))
     }
-}
 
-pub struct KafkaSinkDispatcher;
+    fn build_kafka_sink(
+        _name: &str,
+        cfg: &KafkaSinkConfig,
+        fs_schema: Option<FsSchema>,
+    ) -> Result<ConstructedOperator> {
+        info!(topic = %cfg.topic, "Constructing Kafka Sink");
 
-impl OperatorConstructor for KafkaSinkDispatcher {
-    fn with_config(&self, payload: &[u8], _registry: Arc<Registry>) -> Result<ConstructedOperator> {
-        let op = ConnectorOp::decode(payload)
-            .context("Failed to decode ConnectorOp protobuf for Kafka Sink")?;
+        let fs_in = fs_schema.context("fs_schema is required for Kafka Sink")?;
+        let client_configs = merge_client_configs(&cfg.auth, &cfg.client_configs);
 
-        if op.connector != connector_type::KAFKA {
-            bail!(
-                "KafkaSinkDispatcher: expected connector 'kafka', got '{}'",
-                op.connector
-            );
-        }
-
-        let op_config = decode_operator_config(&op)?;
-
-        let kafka_config: KafkaConfig = serde_json::from_value(op_config.connection.clone())
-            .context("Failed to parse Kafka connection configuration")?;
-
-        let kafka_table: KafkaTable = serde_json::from_value(op_config.table.clone())
-            .context("Failed to parse Kafka table configuration")?;
-
-        let TableType::Sink {
-            commit_mode,
-            key_field,
-            timestamp_field,
-        } = &kafka_table.kind
-        else {
-            bail!(
-                "Expected Kafka Sink, got Source configuration for topic '{}'",
-                kafka_table.topic
-            );
+        let consistency = match cfg.commit_mode() {
+            KafkaSinkCommitMode::KafkaSinkExactlyOnce => ConsistencyMode::ExactlyOnce,
+            KafkaSinkCommitMode::KafkaSinkAtLeastOnce => ConsistencyMode::AtLeastOnce,
         };
 
-        info!("Constructing Kafka Sink for topic: {}", kafka_table.topic);
-
-        let client_configs = build_client_configs(&kafka_config, &kafka_table)?;
-
-        let consistency = match commit_mode {
-            SinkCommitMode::ExactlyOnce => ConsistencyMode::ExactlyOnce,
-            SinkCommitMode::AtLeastOnce => ConsistencyMode::AtLeastOnce,
-        };
-
-        let sql_format = op_config
-            .format
-            .clone()
-            .context("Format must be specified for Kafka Sink")?;
-        let runtime_format = sql_format_to_runtime(sql_format)?;
-
-        let fs_in = op_config
-            .input_schema
-            .clone()
-            .context("input_schema is required for Kafka Sink")?;
-        let fs = sink_fs_schema_adjusted(fs_in, key_field, timestamp_field)?;
-
+        let runtime_format = proto_format_to_runtime(&cfg.format)?;
+        let fs = sink_fs_schema_adjusted(fs_in, &cfg.key_field, &cfg.timestamp_field)?;
         let serializer = DataSerializer::new(runtime_format, fs.schema.clone());
 
         let sink_op = KafkaSinkOperator::new(
-            kafka_table.topic.clone(),
-            kafka_config.bootstrap_servers.clone(),
+            cfg.topic.clone(),
+            cfg.bootstrap_servers.clone(),
             consistency,
             client_configs,
             fs,
@@ -329,3 +226,37 @@ impl OperatorConstructor for KafkaSinkDispatcher {
         Ok(ConstructedOperator::Operator(Box::new(sink_op)))
     }
 }
+
+fn sink_fs_schema_adjusted(
+    fs: FsSchema,
+    key_field: &Option<String>,
+    timestamp_field: &Option<String>,
+) -> Result<FsSchema> {
+    if key_field.is_none() && timestamp_field.is_none() {
+        return Ok(fs);
+    }
+    let schema = fs.schema.clone();
+    let ts = if let Some(name) = timestamp_field {
+        schema
+            .column_with_name(name)
+            .ok_or_else(|| anyhow::anyhow!("timestamp column '{name}' not found in schema"))?
+            .0
+    } else {
+        fs.timestamp_index
+    };
+    let keys = fs.clone_storage_key_indices();
+    let routing = if let Some(name) = key_field {
+        let k = schema
+            .column_with_name(name)
+            .ok_or_else(|| anyhow::anyhow!("key column '{name}' not found in schema"))?
+            .0;
+        Some(vec![k])
+    } else {
+        fs.clone_routing_key_indices()
+    };
+    Ok(FsSchema::new(schema, ts, keys, routing))
+}
+
+// Legacy dispatcher aliases kept for backward compatibility with factory registration.
+pub type KafkaSourceDispatcher = ConnectorDispatcher;
+pub type KafkaSinkDispatcher = ConnectorDispatcher;
diff --git a/src/runtime/streaming/factory/mod.rs b/src/runtime/streaming/factory/mod.rs
index 1315e4de..442c9bf9 100644
--- a/src/runtime/streaming/factory/mod.rs
+++ b/src/runtime/streaming/factory/mod.rs
@@ -43,11 +43,11 @@ fn register_builtin_connectors(factory: &mut OperatorFactory) {
 fn register_kafka_connector_plugins(factory: &mut OperatorFactory) {
     factory.register(
         factory_operator_name::KAFKA_SOURCE,
-        Box::new(connector::KafkaSourceDispatcher),
+        Box::new(connector::kafka::ConnectorDispatcher),
     );
     factory.register(
         factory_operator_name::KAFKA_SINK,
-        Box::new(connector::KafkaSinkDispatcher),
+        Box::new(connector::kafka::ConnectorDispatcher),
     );
     info!(
         "Registered Kafka connector plugins ({}, {})",
diff --git a/src/runtime/streaming/factory/operator_factory.rs b/src/runtime/streaming/factory/operator_factory.rs
index d11a1555..5a2dc26f 100644
--- a/src/runtime/streaming/factory/operator_factory.rs
+++ b/src/runtime/streaming/factory/operator_factory.rs
@@ -15,7 +15,7 @@ use anyhow::{anyhow, Result};
 use prost::Message;
 use std::collections::HashMap;
 use std::sync::Arc;
-
+use protocol::grpc::api::ProjectionOperator as ProjectionOperatorProto;
 use super::operator_constructor::OperatorConstructor;
 use crate::runtime::streaming::api::operator::ConstructedOperator;
 use crate::runtime::streaming::factory::connector::{
@@ -32,16 +32,10 @@ use crate::runtime::streaming::operators::windows::{
     SessionAggregatingWindowConstructor, SlidingAggregatingWindowConstructor,
     TumblingAggregateWindowConstructor, WindowFunctionConstructor,
 };
-use crate::runtime::streaming::operators::{
-    ProjectionOperator, StatelessPhysicalExecutor, ValueExecutionOperator,
-};
-use crate::sql::common::FsSchema;
-use datafusion_proto::physical_plan::DefaultPhysicalExtensionCodec;
-use datafusion_proto::physical_plan::from_proto::parse_physical_expr;
-use datafusion_proto::protobuf::PhysicalExprNode;
+use crate::runtime::streaming::operators::{ProjectionOperator, StatelessPhysicalExecutor, ValueExecutionOperator};
 use protocol::grpc::api::{
     ExpressionWatermarkConfig, JoinOperator as JoinOperatorProto,
-    KeyPlanOperator as KeyByProto, ProjectionOperator as ProjectionOperatorProto,
+    KeyPlanOperator as KeyByProto,
     SessionWindowAggregateOperator, SlidingWindowAggregateOperator, TumblingWindowAggregateOperator,
     UpdatingAggregateOperator, ValuePlanOperator, WindowFunctionOperator as WindowFunctionProto,
 };
@@ -110,12 +104,11 @@ impl OperatorFactory {
 
         self.register_named(OperatorName::KeyBy, Box::new(KeyByBridge));
 
-        self.register_named(OperatorName::Projection, Box::new(ProjectionBridge));
+        self.register_named(OperatorName::Projection, Box::new(ProjectionConstructor));
         self.register_named(OperatorName::Value, Box::new(ValueBridge));
         self.register_named(OperatorName::ConnectorSource, Box::new(ConnectorSourceBridge));
         self.register_named(OperatorName::ConnectorSink, Box::new(ConnectorSinkBridge));
 
-        crate::runtime::streaming::factory::register_builtin_connectors(self);
         crate::runtime::streaming::factory::register_kafka_connector_plugins(self);
     }
 }
@@ -217,12 +210,12 @@ impl OperatorConstructor for KeyByBridge {
     }
 }
 
-struct ProjectionBridge;
-impl OperatorConstructor for ProjectionBridge {
-    fn with_config(&self, config: &[u8], registry: Arc<Registry>) -> Result<ConstructedOperator> {
-        let proto = ProjectionOperatorProto::decode(config)
-            .map_err(|e| anyhow!("Decode ProjectionOperator failed: {e}"))?;
-        let op = ProjectionExecutionConstructor.with_config(proto, registry)?;
+pub struct ProjectionConstructor;
+
+impl OperatorConstructor for ProjectionConstructor {
+    fn with_config(&self, payload: &[u8], registry: Arc<Registry>) -> Result<ConstructedOperator> {
+        let proto = ProjectionOperatorProto::decode(payload)?;
+        let op = ProjectionOperator::from_proto(proto, registry)?;
         Ok(ConstructedOperator::Operator(Box::new(op)))
     }
 }
@@ -253,51 +246,6 @@ impl OperatorConstructor for ConnectorSinkBridge {
     }
 }
 
-struct ProjectionExecutionConstructor;
-impl ProjectionExecutionConstructor {
-    fn with_config(
-        &self,
-        config: ProjectionOperatorProto,
-        registry: Arc<Registry>,
-    ) -> Result<ProjectionOperator> {
-        let input_schema: FsSchema = config
-            .input_schema
-            .ok_or_else(|| anyhow!("missing projection input_schema"))?
-            .try_into()
-            .map_err(|e| anyhow!("projection input_schema: {e}"))?;
-        let output_schema: FsSchema = config
-            .output_schema
-            .ok_or_else(|| anyhow!("missing projection output_schema"))?
-            .try_into()
-            .map_err(|e| anyhow!("projection output_schema: {e}"))?;
-
-        let exprs = config
-            .exprs
-            .iter()
-            .map(|raw| {
-                let expr_node = PhysicalExprNode::decode(&mut raw.as_slice())
-                    .map_err(|e| anyhow!("decode projection expr: {e}"))?;
-                parse_physical_expr(
-                    &expr_node,
-                    registry.as_ref(),
-                    &input_schema.schema,
-                    &DefaultPhysicalExtensionCodec {},
-                )
-                .map_err(|e| anyhow!("parse projection expr: {e}"))
-            })
-            .collect::<Result<Vec<_>>>()?;
-
-        Ok(ProjectionOperator::new(
-            if config.name.is_empty() {
-                OperatorName::Projection.as_registry_key().to_string()
-            } else {
-                config.name
-            },
-            Arc::new(output_schema),
-            exprs,
-        ))
-    }
-}
 
 struct ValueExecutionConstructor;
 impl ValueExecutionConstructor {
diff --git a/src/runtime/streaming/job/job_manager.rs b/src/runtime/streaming/job/job_manager.rs
index 20ecad9f..88df6457 100644
--- a/src/runtime/streaming/job/job_manager.rs
+++ b/src/runtime/streaming/job/job_manager.rs
@@ -21,8 +21,10 @@ use tracing::{error, info, warn};
 use protocol::grpc::api::{ChainedOperator, FsProgram};
 
 use crate::runtime::streaming::api::context::TaskContext;
-use crate::runtime::streaming::api::operator::{ConstructedOperator, MessageOperator};
-use crate::runtime::streaming::execution::runner::Pipeline;
+use crate::runtime::streaming::api::operator::{ConstructedOperator, Operator};
+use crate::runtime::streaming::api::source::SourceOperator;
+use crate::runtime::streaming::execution::runner::{ChainedDriver, Pipeline};
+use crate::runtime::streaming::execution::source::SourceRunner;
 use crate::runtime::streaming::factory::OperatorFactory;
 use crate::runtime::streaming::job::edge_manager::EdgeManager;
 use crate::runtime::streaming::job::models::{PhysicalExecutionGraph, PhysicalPipeline, PipelineStatus};
@@ -38,6 +40,11 @@ pub struct JobManager {
     memory_pool: Arc<MemoryPool>,
 }
 
+struct PreparedChain {
+    source: Option<Box<dyn SourceOperator>>,
+    operators: Vec<Box<dyn Operator>>,
+}
+
 impl JobManager {
     pub fn new(operator_factory: Arc<OperatorFactory>, max_memory_bytes: usize) -> Self {
         Self {
@@ -76,20 +83,44 @@ impl JobManager {
                 .map(|rx| Box::pin(ReceiverStream::new(rx)) as _)
                 .collect();
 
-            let operators = self.build_operator_chain(&node.operators)?;
+            let chain = self.build_operator_chain(&node.operators)?;
+            if chain.source.is_none() && physical_inboxes.is_empty() {
+                anyhow::bail!(
+                    "Topology Error: pipeline '{}' contains no source operator and has no upstream inputs.",
+                    pipeline_id
+                );
+            }
+            if chain.source.is_some() && !physical_inboxes.is_empty() {
+                anyhow::bail!(
+                    "Topology Error: source pipeline '{}' should not have upstream inputs.",
+                    pipeline_id
+                );
+            }
 
             let (control_tx, control_rx) = mpsc::channel(64);
             let status = Arc::new(RwLock::new(PipelineStatus::Initializing));
 
-            let handle = self.spawn_pipeline_thread(
-                job_id.clone(),
-                pipeline_id,
-                operators,
-                physical_inboxes,
-                physical_outboxes,
-                control_rx,
-                Arc::clone(&status),
-            )?;
+            let handle = if let Some(source) = chain.source {
+                self.spawn_source_pipeline_thread(
+                    job_id.clone(),
+                    pipeline_id,
+                    source,
+                    chain.operators,
+                    physical_outboxes,
+                    control_rx,
+                    Arc::clone(&status),
+                )?
+            } else {
+                self.spawn_pipeline_thread(
+                    job_id.clone(),
+                    pipeline_id,
+                    chain.operators,
+                    physical_inboxes,
+                    physical_outboxes,
+                    control_rx,
+                    Arc::clone(&status),
+                )?
+            };
 
             pipelines.insert(
                 pipeline_id,
@@ -153,7 +184,8 @@ impl JobManager {
     fn build_operator_chain(
         &self,
         operator_configs: &[ChainedOperator],
-    ) -> anyhow::Result<Vec<Box<dyn MessageOperator>>> {
+    ) -> anyhow::Result<PreparedChain> {
+        let mut source: Option<Box<dyn SourceOperator>> = None;
         let mut chain = Vec::with_capacity(operator_configs.len());
 
         for op_config in operator_configs {
@@ -162,22 +194,33 @@ impl JobManager {
 
             match constructed {
                 ConstructedOperator::Operator(msg_op) => chain.push(msg_op),
-                ConstructedOperator::Source(_) => {
-                    anyhow::bail!(
-                        "Topology Error: Source operator '{}' cannot be scheduled inside a MessageOperator physical chain.",
-                        op_config.operator_name
-                    );
+                ConstructedOperator::Source(src_op) => {
+                    if source.is_some() {
+                        anyhow::bail!(
+                            "Topology Error: Multiple source operators detected in one physical chain."
+                        );
+                    }
+                    if !chain.is_empty() {
+                        anyhow::bail!(
+                            "Topology Error: Source operator '{}' cannot be scheduled inside a MessageOperator physical chain.",
+                            op_config.operator_name
+                        );
+                    }
+                    source = Some(src_op);
                 }
             }
         }
-        Ok(chain)
+        Ok(PreparedChain {
+            source,
+            operators: chain,
+        })
     }
 
     fn spawn_pipeline_thread(
         &self,
         job_id: String,
         pipeline_id: u32,
-        operators: Vec<Box<dyn MessageOperator>>,
+        operators: Vec<Box<dyn Operator>>,
         inboxes: Vec<BoxedEventStream>,
         outboxes: Vec<PhysicalSender>,
         control_rx: mpsc::Receiver<ControlCommand>,
@@ -221,6 +264,57 @@ impl JobManager {
         Ok(handle)
     }
 
+    fn spawn_source_pipeline_thread(
+        &self,
+        job_id: String,
+        pipeline_id: u32,
+        source: Box<dyn SourceOperator>,
+        operators: Vec<Box<dyn Operator>>,
+        outboxes: Vec<PhysicalSender>,
+        control_rx: mpsc::Receiver<ControlCommand>,
+        status: Arc<RwLock<PipelineStatus>>,
+    ) -> anyhow::Result<std::thread::JoinHandle<()>> {
+        let memory_pool = Arc::clone(&self.memory_pool);
+        let thread_name = format!("Task-{job_id}-{pipeline_id}");
+
+        let handle = std::thread::Builder::new()
+            .name(thread_name)
+            .spawn(move || {
+                *status.write().unwrap() = PipelineStatus::Running;
+
+                let rt = tokio::runtime::Builder::new_current_thread()
+                    .enable_all()
+                    .build()
+                    .expect("Failed to build current-thread Tokio runtime for source pipeline");
+
+                let job_id_inner = job_id.clone();
+                let execution_result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
+                    rt.block_on(async move {
+                        let ctx = TaskContext::new(
+                            job_id_inner,
+                            pipeline_id,
+                            0,
+                            1,
+                            outboxes,
+                            memory_pool,
+                        );
+
+                        let chain_head = ChainedDriver::build_chain(operators);
+                        let runner = SourceRunner::new(source, chain_head, ctx, control_rx);
+
+                        runner
+                            .run()
+                            .await
+                            .map_err(|e| anyhow::anyhow!("Source pipeline execution failed: {e}"))
+                    })
+                }));
+
+                Self::handle_pipeline_exit(&job_id, pipeline_id, execution_result, &status);
+            })?;
+
+        Ok(handle)
+    }
+
     fn handle_pipeline_exit(
         job_id: &str,
         pipeline_id: u32,
diff --git a/src/runtime/streaming/lib.rs b/src/runtime/streaming/lib.rs
deleted file mode 100644
index 4dd6316b..00000000
--- a/src/runtime/streaming/lib.rs
+++ /dev/null
@@ -1,40 +0,0 @@
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-//
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-//! Streaming actor runtime (vendored from Arroyo `arroyo-actor-runtime`).
-
-pub mod api;
-pub mod error;
-pub mod execution;
-pub mod factory;
-pub mod job;
-pub mod memory;
-pub mod network;
-pub mod operators;
-pub mod protocol;
-
-pub use api::{
-    ConstructedOperator, MessageOperator, SourceEvent, SourceOffset, SourceOperator, TaskContext,
-};
-pub use error::RunError;
-pub use execution::{
-    OperatorDrive, SourceRunner, SubtaskRunner, SOURCE_IDLE_SLEEP, WATERMARK_EMIT_INTERVAL,
-};
-pub use factory::{OperatorConstructor, OperatorFactory};
-pub use job::{JobManager, PhysicalExecutionGraph, PhysicalPipeline, PipelineStatus};
-pub use memory::{MemoryPool, MemoryTicket};
-pub use network::{BoxedEventStream, NetworkEnvironment, PhysicalSender, RemoteSenderStub};
-pub use protocol::{
-    CheckpointBarrierWire, ControlCommand, StopMode, StreamEvent, StreamOutput,
-    control_channel, merge_watermarks, watermark_strictly_advances,
-};
diff --git a/src/runtime/streaming/mod.rs b/src/runtime/streaming/mod.rs
index 1da5f952..6fd45abd 100644
--- a/src/runtime/streaming/mod.rs
+++ b/src/runtime/streaming/mod.rs
@@ -25,7 +25,7 @@ pub mod operators;
 pub mod protocol;
 
 pub use api::{
-    ConstructedOperator, MessageOperator, SourceEvent, SourceOffset, SourceOperator, TaskContext,
+    ConstructedOperator, Operator, SourceEvent, SourceOffset, SourceOperator, TaskContext,
 };
 pub use error::RunError;
 pub use execution::{SOURCE_IDLE_SLEEP, SourceRunner, SubtaskRunner};
diff --git a/src/runtime/streaming/operators/grouping/incremental_aggregate.rs b/src/runtime/streaming/operators/grouping/incremental_aggregate.rs
index 4b1af6b3..16d92fd1 100644
--- a/src/runtime/streaming/operators/grouping/incremental_aggregate.rs
+++ b/src/runtime/streaming/operators/grouping/incremental_aggregate.rs
@@ -43,7 +43,7 @@ use protocol::grpc::api::UpdatingAggregateOperator;
 // =========================================================================
 // =========================================================================
 use crate::runtime::streaming::api::context::TaskContext;
-use crate::runtime::streaming::api::operator::MessageOperator;
+use crate::runtime::streaming::api::operator::Operator;
 use crate::runtime::streaming::factory::Registry;
 use crate::runtime::util::decode_aggregate;
 use crate::runtime::streaming::operators::{Key, UpdatingCache};
@@ -657,7 +657,7 @@ fn set_retract_metadata(metadata: ArrayRef, is_retract: Arc<BooleanArray>) -> Ar
 // =========================================================================
 
 #[async_trait::async_trait]
-impl MessageOperator for IncrementalAggregatingFunc {
+impl Operator for IncrementalAggregatingFunc {
     fn name(&self) -> &str {
         "UpdatingAggregatingFunc"
     }
diff --git a/src/runtime/streaming/operators/joins/join_instance.rs b/src/runtime/streaming/operators/joins/join_instance.rs
index ef49c323..18ed3599 100644
--- a/src/runtime/streaming/operators/joins/join_instance.rs
+++ b/src/runtime/streaming/operators/joins/join_instance.rs
@@ -29,7 +29,7 @@ use tokio::sync::mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender};
 use tracing::warn;
 
 use crate::runtime::streaming::api::context::TaskContext;
-use crate::runtime::streaming::api::operator::MessageOperator;
+use crate::runtime::streaming::api::operator::Operator;
 use crate::runtime::streaming::factory::Registry;
 use async_trait::async_trait;
 use protocol::grpc::api::JoinOperator;
@@ -197,7 +197,7 @@ impl InstantJoinOperator {
 }
 
 #[async_trait]
-impl MessageOperator for InstantJoinOperator {
+impl Operator for InstantJoinOperator {
     fn name(&self) -> &str {
         "InstantJoin"
     }
diff --git a/src/runtime/streaming/operators/joins/join_with_expiration.rs b/src/runtime/streaming/operators/joins/join_with_expiration.rs
index 91fd38a6..212cfaad 100644
--- a/src/runtime/streaming/operators/joins/join_with_expiration.rs
+++ b/src/runtime/streaming/operators/joins/join_with_expiration.rs
@@ -26,7 +26,7 @@ use std::time::{Duration, SystemTime};
 use tracing::warn;
 
 use crate::runtime::streaming::api::context::TaskContext;
-use crate::runtime::streaming::api::operator::MessageOperator;
+use crate::runtime::streaming::api::operator::Operator;
 use crate::runtime::streaming::factory::Registry;
 use async_trait::async_trait;
 use protocol::grpc::api::JoinOperator;
@@ -180,7 +180,7 @@ impl JoinWithExpirationOperator {
 }
 
 #[async_trait]
-impl MessageOperator for JoinWithExpirationOperator {
+impl Operator for JoinWithExpirationOperator {
     fn name(&self) -> &str {
         "JoinWithExpiration"
     }
diff --git a/src/runtime/streaming/operators/key_by.rs b/src/runtime/streaming/operators/key_by.rs
index 8d0da441..edafc063 100644
--- a/src/runtime/streaming/operators/key_by.rs
+++ b/src/runtime/streaming/operators/key_by.rs
@@ -21,7 +21,7 @@ use datafusion_common::hash_utils::create_hashes;
 use std::sync::Arc;
 
 use crate::runtime::streaming::api::context::TaskContext;
-use crate::runtime::streaming::api::operator::MessageOperator;
+use crate::runtime::streaming::api::operator::Operator;
 use crate::runtime::streaming::StreamOutput;
 use crate::sql::common::{CheckpointBarrier, Watermark};
 
@@ -44,7 +44,7 @@ impl KeyByOperator {
 }
 
 #[async_trait]
-impl MessageOperator for KeyByOperator {
+impl Operator for KeyByOperator {
     fn name(&self) -> &str {
         &self.name
     }
diff --git a/src/runtime/streaming/operators/key_operator.rs b/src/runtime/streaming/operators/key_operator.rs
index 0202f924..4a3942e0 100644
--- a/src/runtime/streaming/operators/key_operator.rs
+++ b/src/runtime/streaming/operators/key_operator.rs
@@ -23,7 +23,7 @@ use futures::StreamExt;
 use std::sync::Arc;
 
 use crate::runtime::streaming::api::context::TaskContext;
-use crate::runtime::streaming::api::operator::MessageOperator;
+use crate::runtime::streaming::api::operator::Operator;
 use crate::runtime::streaming::operators::StatelessPhysicalExecutor;
 use crate::runtime::streaming::StreamOutput;
 use crate::sql::common::{CheckpointBarrier, Watermark};
@@ -47,7 +47,7 @@ impl KeyByOperator {
 }
 
 #[async_trait]
-impl MessageOperator for KeyByOperator {
+impl Operator for KeyByOperator {
     fn name(&self) -> &str {
         &self.name
     }
@@ -192,7 +192,7 @@ impl KeyExecutionOperator {
 }
 
 #[async_trait]
-impl MessageOperator for KeyExecutionOperator {
+impl Operator for KeyExecutionOperator {
     fn name(&self) -> &str {
         &self.name
     }
diff --git a/src/runtime/streaming/operators/mod.rs b/src/runtime/streaming/operators/mod.rs
index b679f2bd..18a98830 100644
--- a/src/runtime/streaming/operators/mod.rs
+++ b/src/runtime/streaming/operators/mod.rs
@@ -19,7 +19,7 @@ pub mod source;
 pub mod watermark;
 pub mod windows;
 mod key_operator;
-mod projection;
+pub mod projection;
 mod stateless_physical_executor;
 mod value_execution;
 
diff --git a/src/runtime/streaming/operators/projection.rs b/src/runtime/streaming/operators/projection.rs
index 3afb93ef..d2f54b8c 100644
--- a/src/runtime/streaming/operators/projection.rs
+++ b/src/runtime/streaming/operators/projection.rs
@@ -10,17 +10,24 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-
-use anyhow::Result;
+use anyhow::{anyhow, Context, Result};
 use arrow_array::RecordBatch;
 use async_trait::async_trait;
 use datafusion::physical_expr::PhysicalExpr;
+use datafusion_proto::physical_plan::DefaultPhysicalExtensionCodec;
+use datafusion_proto::physical_plan::from_proto::parse_physical_expr;
+use datafusion_proto::protobuf::PhysicalExprNode;
+use prost::Message;
 use std::sync::Arc;
 
+use protocol::grpc::api::ProjectionOperator as ProjectionOperatorProto;
+
 use crate::runtime::streaming::api::context::TaskContext;
-use crate::runtime::streaming::api::operator::MessageOperator;
+use crate::runtime::streaming::api::operator::{ConstructedOperator, Operator};
+use crate::runtime::streaming::factory::global::Registry;
 use crate::runtime::streaming::StreamOutput;
-use crate::sql::common::{CheckpointBarrier, FsSchemaRef, Watermark};
+use crate::sql::common::{CheckpointBarrier, FsSchema, FsSchemaRef, Watermark};
+use crate::sql::logical_node::logical::OperatorName;
 
 pub struct ProjectionOperator {
     name: String,
@@ -40,10 +47,52 @@ impl ProjectionOperator {
             exprs,
         }
     }
+
+    pub fn from_proto(
+        config: ProjectionOperatorProto,
+        registry: Arc<Registry>,
+    ) -> Result<Self> {
+        let input_schema: FsSchema = config
+            .input_schema
+            .ok_or_else(|| anyhow!("missing projection input_schema"))?
+            .try_into()
+            .map_err(|e| anyhow!("projection input_schema: {e}"))?;
+
+        let output_schema: FsSchema = config
+            .output_schema
+            .ok_or_else(|| anyhow!("missing projection output_schema"))?
+            .try_into()
+            .map_err(|e| anyhow!("projection output_schema: {e}"))?;
+
+        let exprs = config
+            .exprs
+            .iter()
+            .map(|raw| {
+                let expr_node = PhysicalExprNode::decode(&mut raw.as_slice())
+                    .map_err(|e| anyhow!("decode projection expr: {e}"))?;
+                parse_physical_expr(
+                    &expr_node,
+                    registry.as_ref(),
+                    &input_schema.schema,
+                    &DefaultPhysicalExtensionCodec {},
+                )
+                    .map_err(|e| anyhow!("parse projection expr: {e}"))
+            })
+            .collect::<Result<Vec<_>>>()?;
+
+        let name = if config.name.is_empty() {
+            OperatorName::Projection.as_registry_key().to_string()
+        } else {
+            config.name
+        };
+
+        Ok(Self::new(name, Arc::new(output_schema), exprs))
+
+    }
 }
 
 #[async_trait]
-impl MessageOperator for ProjectionOperator {
+impl Operator for ProjectionOperator {
     fn name(&self) -> &str {
         &self.name
     }
diff --git a/src/runtime/streaming/operators/sink/kafka/mod.rs b/src/runtime/streaming/operators/sink/kafka/mod.rs
index 9f82a4ce..4b6d48cb 100644
--- a/src/runtime/streaming/operators/sink/kafka/mod.rs
+++ b/src/runtime/streaming/operators/sink/kafka/mod.rs
@@ -27,7 +27,7 @@ use tokio::time::sleep;
 use tracing::{info, warn};
 
 use crate::runtime::streaming::api::context::TaskContext;
-use crate::runtime::streaming::api::operator::MessageOperator;
+use crate::runtime::streaming::api::operator::Operator;
 use crate::runtime::streaming::format::DataSerializer;
 use crate::runtime::streaming::StreamOutput;
 use crate::sql::common::constants::factory_operator_name;
@@ -205,7 +205,7 @@ fn row_key_bytes(batch: &RecordBatch, row: usize, col: usize) -> Option<Vec<u8>>
 // ============================================================================
 
 #[async_trait]
-impl MessageOperator for KafkaSinkOperator {
+impl Operator for KafkaSinkOperator {
     fn name(&self) -> &str {
         factory_operator_name::KAFKA_SINK
     }
diff --git a/src/runtime/streaming/operators/value_execution.rs b/src/runtime/streaming/operators/value_execution.rs
index 86596512..effdf5f6 100644
--- a/src/runtime/streaming/operators/value_execution.rs
+++ b/src/runtime/streaming/operators/value_execution.rs
@@ -17,7 +17,7 @@ use async_trait::async_trait;
 use futures::StreamExt;
 
 use crate::runtime::streaming::api::context::TaskContext;
-use crate::runtime::streaming::api::operator::MessageOperator;
+use crate::runtime::streaming::api::operator::Operator;
 use crate::runtime::streaming::operators::StatelessPhysicalExecutor;
 use crate::runtime::streaming::StreamOutput;
 use crate::sql::common::{CheckpointBarrier, Watermark};
@@ -34,7 +34,7 @@ impl ValueExecutionOperator {
 }
 
 #[async_trait]
-impl MessageOperator for ValueExecutionOperator {
+impl Operator for ValueExecutionOperator {
     fn name(&self) -> &str {
         &self.name
     }
diff --git a/src/runtime/streaming/operators/watermark/watermark_generator.rs b/src/runtime/streaming/operators/watermark/watermark_generator.rs
index b512f842..0fee4a38 100644
--- a/src/runtime/streaming/operators/watermark/watermark_generator.rs
+++ b/src/runtime/streaming/operators/watermark/watermark_generator.rs
@@ -27,7 +27,7 @@ use std::time::{Duration, SystemTime};
 use tracing::{debug, info};
 
 use crate::runtime::streaming::api::context::TaskContext;
-use crate::runtime::streaming::api::operator::MessageOperator;
+use crate::runtime::streaming::api::operator::Operator;
 use crate::runtime::streaming::factory::Registry;
 use async_trait::async_trait;
 use protocol::grpc::api::ExpressionWatermarkConfig;
@@ -103,7 +103,7 @@ impl WatermarkGeneratorOperator {
 }
 
 #[async_trait]
-impl MessageOperator for WatermarkGeneratorOperator {
+impl Operator for WatermarkGeneratorOperator {
     fn name(&self) -> &str {
         "ExpressionWatermarkGenerator"
     }
diff --git a/src/runtime/streaming/operators/windows/session_aggregating_window.rs b/src/runtime/streaming/operators/windows/session_aggregating_window.rs
index d7257223..93376c4c 100644
--- a/src/runtime/streaming/operators/windows/session_aggregating_window.rs
+++ b/src/runtime/streaming/operators/windows/session_aggregating_window.rs
@@ -35,7 +35,7 @@ use std::time::{Duration, SystemTime};
 use tokio::sync::mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender};
 
 use crate::runtime::streaming::api::context::TaskContext;
-use crate::runtime::streaming::api::operator::MessageOperator;
+use crate::runtime::streaming::api::operator::Operator;
 use async_trait::async_trait;
 use crate::runtime::streaming::factory::Registry;
 use protocol::grpc::api::SessionWindowAggregateOperator;
@@ -603,7 +603,7 @@ impl SessionWindowOperator {
 }
 
 #[async_trait]
-impl MessageOperator for SessionWindowOperator {
+impl Operator for SessionWindowOperator {
     fn name(&self) -> &str {
         "SessionWindow"
     }
diff --git a/src/runtime/streaming/operators/windows/sliding_aggregating_window.rs b/src/runtime/streaming/operators/windows/sliding_aggregating_window.rs
index 7bad21bc..19a539f3 100644
--- a/src/runtime/streaming/operators/windows/sliding_aggregating_window.rs
+++ b/src/runtime/streaming/operators/windows/sliding_aggregating_window.rs
@@ -34,7 +34,7 @@ use std::time::{Duration, SystemTime};
 use tokio::sync::mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender};
 
 use crate::runtime::streaming::api::context::TaskContext;
-use crate::runtime::streaming::api::operator::MessageOperator;
+use crate::runtime::streaming::api::operator::Operator;
 use async_trait::async_trait;
 use crate::runtime::streaming::factory::Registry;
 use protocol::grpc::api::SlidingWindowAggregateOperator;
@@ -317,7 +317,7 @@ impl SlidingWindowOperator {
 }
 
 #[async_trait]
-impl MessageOperator for SlidingWindowOperator {
+impl Operator for SlidingWindowOperator {
     fn name(&self) -> &str {
         "SlidingWindow"
     }
diff --git a/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs b/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs
index 093823bb..c0342d66 100644
--- a/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs
+++ b/src/runtime/streaming/operators/windows/tumbling_aggregating_window.rs
@@ -36,7 +36,7 @@ use tokio::sync::mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender};
 use tracing::warn;
 
 use crate::runtime::streaming::api::context::TaskContext;
-use crate::runtime::streaming::api::operator::MessageOperator;
+use crate::runtime::streaming::api::operator::Operator;
 use async_trait::async_trait;
 use crate::runtime::streaming::factory::Registry;
 use protocol::grpc::api::TumblingWindowAggregateOperator;
@@ -145,7 +145,7 @@ impl TumblingWindowOperator {
 }
 
 #[async_trait]
-impl MessageOperator for TumblingWindowOperator {
+impl Operator for TumblingWindowOperator {
     fn name(&self) -> &str {
         "TumblingWindow"
     }
diff --git a/src/runtime/streaming/operators/windows/window_function.rs b/src/runtime/streaming/operators/windows/window_function.rs
index d067eccf..4ab68cfd 100644
--- a/src/runtime/streaming/operators/windows/window_function.rs
+++ b/src/runtime/streaming/operators/windows/window_function.rs
@@ -29,7 +29,7 @@ use tokio::sync::mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender};
 use tracing::warn;
 
 use crate::runtime::streaming::api::context::TaskContext;
-use crate::runtime::streaming::api::operator::MessageOperator;
+use crate::runtime::streaming::api::operator::Operator;
 use crate::runtime::streaming::factory::Registry;
 use async_trait::async_trait;
 use crate::runtime::streaming::StreamOutput;
@@ -153,7 +153,7 @@ impl WindowFunctionOperator {
 }
 
 #[async_trait]
-impl MessageOperator for WindowFunctionOperator {
+impl Operator for WindowFunctionOperator {
     fn name(&self) -> &str {
         "WindowFunction"
     }
diff --git a/src/sql/common/connector_options.rs b/src/sql/common/connector_options.rs
index 98e3299e..6f82782e 100644
--- a/src/sql/common/connector_options.rs
+++ b/src/sql/common/connector_options.rs
@@ -48,6 +48,21 @@ fn sql_expr_to_catalog_string(e: &Expr) -> String {
 }
 
 impl ConnectorOptions {
+    /// Build options from persisted catalog string maps (same semantics as SQL `WITH` literals).
+    pub fn from_flat_string_map(map: HashMap<String, String>) -> DFResult<Self> {
+        let mut options = HashMap::with_capacity(map.len());
+        for (k, v) in map {
+            options.insert(
+                k,
+                Expr::Value(SqlValue::SingleQuotedString(v).with_empty_span()),
+            );
+        }
+        Ok(Self {
+            options,
+            partitions: Vec::new(),
+        })
+    }
+
     pub fn new(sql_opts: &[SqlOption], partition_by: &Option<Vec<Expr>>) -> DFResult<Self> {
         let mut options = HashMap::new();
 
diff --git a/src/sql/common/mod.rs b/src/sql/common/mod.rs
index 3302eb10..fa37a9fd 100644
--- a/src/sql/common/mod.rs
+++ b/src/sql/common/mod.rs
@@ -58,7 +58,7 @@ pub use kafka_catalog::{
 };
 pub use errors::{DataflowError, DataflowResult};
 pub use formats::{BadData, Format, Framing, JsonCompression, JsonFormat};
-pub use operator_config::{MetadataField, OperatorConfig, RateLimit};
+pub use operator_config::MetadataField;
 
 // ── Well-known column names ──
 pub use constants::sql_field::{TIMESTAMP_FIELD, UPDATING_META_FIELD};
diff --git a/src/sql/common/operator_config.rs b/src/sql/common/operator_config.rs
index ba61b36a..b5360cd7 100644
--- a/src/sql/common/operator_config.rs
+++ b/src/sql/common/operator_config.rs
@@ -1,45 +1,12 @@
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
 
 use serde::{Deserialize, Serialize};
-use serde_json::Value;
-
-use super::formats::{BadData, Format, Framing};
-use super::fs_schema::FsSchema;
-
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct RateLimit {
-    pub messages_per_second: u32,
-}
 
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct MetadataField {
     pub field_name: String,
     pub key: String,
-    /// JSON-encoded Arrow DataType string, e.g. `"Utf8"`, `"Int64"`.
     #[serde(default)]
     pub data_type: Option<String>,
 }
-
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct OperatorConfig {
-    pub connection: Value,
-    pub table: Value,
-    pub format: Option<Format>,
-    pub bad_data: Option<BadData>,
-    pub framing: Option<Framing>,
-    pub rate_limit: Option<RateLimit>,
-    #[serde(default)]
-    pub metadata_fields: Vec<MetadataField>,
-    #[serde(default)]
-    pub input_schema: Option<FsSchema>,
-}
diff --git a/src/sql/extensions/lookup.rs b/src/sql/extensions/lookup.rs
index 684a8f97..8371efce 100644
--- a/src/sql/extensions/lookup.rs
+++ b/src/sql/extensions/lookup.rs
@@ -10,6 +10,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+use std::collections::HashMap;
 use std::fmt::Formatter;
 use std::sync::Arc;
 
@@ -21,7 +22,7 @@ use datafusion_proto::physical_plan::to_proto::serialize_physical_expr;
 use prost::Message;
 
 use protocol::grpc::api;
-use protocol::grpc::api::{ConnectorOp, LookupJoinCondition, LookupJoinOperator};
+use protocol::grpc::api::{ConnectorOp, GenericConnectorConfig, LookupJoinCondition, LookupJoinOperator};
 
 use crate::multifield_partial_ord;
 use crate::sql::common::constants::extension_node;
@@ -156,13 +157,24 @@ impl StreamReferenceJoinNode {
         let lookup_fs_schema =
             FsSchema::from_schema_unkeyed(add_timestamp_field_arrow(dictionary_physical_schema))?;
 
+        let properties: HashMap<String, String> = self
+            .external_dictionary
+            .catalog_with_options
+            .iter()
+            .map(|(k, v)| (k.clone(), v.clone()))
+            .collect();
+
         Ok(LookupJoinOperator {
             input_schema: Some(internal_input_schema.into()),
-            lookup_schema: Some(lookup_fs_schema.into()),
+            lookup_schema: Some(lookup_fs_schema.clone().into()),
             connector: Some(ConnectorOp {
                 connector: self.external_dictionary.adapter_type.clone(),
-                config: self.external_dictionary.opaque_config.clone(),
+                fs_schema: Some(lookup_fs_schema.into()),
+                name: self.external_dictionary.table_identifier.clone(),
                 description: self.external_dictionary.description.clone(),
+                config: Some(protocol::grpc::api::connector_op::Config::Generic(
+                    GenericConnectorConfig { properties },
+                )),
             }),
             key_exprs: self.compile_join_conditions(planner)?,
             join_type: self.map_api_join_type()?,
diff --git a/src/sql/schema/connector_config.rs b/src/sql/schema/connector_config.rs
new file mode 100644
index 00000000..f47e05d9
--- /dev/null
+++ b/src/sql/schema/connector_config.rs
@@ -0,0 +1,82 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+//
+// Strongly-typed in-memory connector configuration for the SQL catalog layer.
+// Maps 1:1 to the `ConnectorOp.oneof config` proto variants.
+
+use std::collections::HashMap;
+
+use protocol::grpc::api::{
+    connector_op, GenericConnectorConfig, KafkaSinkConfig, KafkaSourceConfig,
+};
+
+/// Strongly-typed connector configuration stored in [`super::SourceTable`].
+///
+/// Each variant corresponds directly to a proto `ConnectorOp.oneof config` branch.
+/// Adding a new connector (e.g. MySQL CDC) means adding a variant here and a proto message —
+/// the Rust compiler will then guide you to every call-site that needs updating.
+#[derive(Debug, Clone)]
+pub enum ConnectorConfig {
+    KafkaSource(KafkaSourceConfig),
+    KafkaSink(KafkaSinkConfig),
+    /// Fallback for connectors not yet strongly typed (e.g. future Redis, JDBC).
+    Generic(HashMap<String, String>),
+}
+
+impl ConnectorConfig {
+    /// Convert to the proto `ConnectorOp.oneof config` representation — zero JSON involved.
+    pub fn to_proto_config(&self) -> connector_op::Config {
+        match self {
+            ConnectorConfig::KafkaSource(cfg) => {
+                connector_op::Config::KafkaSource(cfg.clone())
+            }
+            ConnectorConfig::KafkaSink(cfg) => {
+                connector_op::Config::KafkaSink(cfg.clone())
+            }
+            ConnectorConfig::Generic(props) => {
+                connector_op::Config::Generic(GenericConnectorConfig {
+                    properties: props.clone(),
+                })
+            }
+        }
+    }
+}
+
+// Proto-generated types do not derive Eq/Hash/PartialEq since they contain f32/f64
+// in the general case. For our subset (Kafka configs) all fields are integers, strings,
+// and maps — logically hashable. We impl the traits via serialized proto bytes so the
+// SourceTable derive chain stays intact.
+
+impl PartialEq for ConnectorConfig {
+    fn eq(&self, other: &Self) -> bool {
+        use prost::Message;
+        match (self, other) {
+            (ConnectorConfig::KafkaSource(a), ConnectorConfig::KafkaSource(b)) => {
+                a.encode_to_vec() == b.encode_to_vec()
+            }
+            (ConnectorConfig::KafkaSink(a), ConnectorConfig::KafkaSink(b)) => {
+                a.encode_to_vec() == b.encode_to_vec()
+            }
+            (ConnectorConfig::Generic(a), ConnectorConfig::Generic(b)) => a == b,
+            _ => false,
+        }
+    }
+}
+
+impl Eq for ConnectorConfig {}
+
+impl std::hash::Hash for ConnectorConfig {
+    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
+        use prost::Message;
+        std::mem::discriminant(self).hash(state);
+        match self {
+            ConnectorConfig::KafkaSource(cfg) => cfg.encode_to_vec().hash(state),
+            ConnectorConfig::KafkaSink(cfg) => cfg.encode_to_vec().hash(state),
+            ConnectorConfig::Generic(m) => {
+                let mut pairs: Vec<_> = m.iter().collect();
+                pairs.sort_by_key(|(k, _)| (*k).clone());
+                pairs.hash(state);
+            }
+        }
+    }
+}
diff --git a/src/sql/schema/kafka_operator_config.rs b/src/sql/schema/kafka_operator_config.rs
new file mode 100644
index 00000000..4dd70906
--- /dev/null
+++ b/src/sql/schema/kafka_operator_config.rs
@@ -0,0 +1,250 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+//
+// Builds strongly-typed proto Kafka configs from SQL DDL WITH options.
+
+use std::collections::HashMap;
+
+use datafusion::arrow::datatypes::Schema;
+use datafusion::common::{Result as DFResult, plan_datafusion_err, plan_err};
+
+use protocol::grpc::api::connector_op::Config as ProtoConfig;
+use protocol::grpc::api::{
+    BadDataPolicy, DecimalEncodingProto, FormatConfig, JsonFormatConfig, KafkaAuthConfig,
+    KafkaAuthNone, KafkaOffsetMode, KafkaReadMode, KafkaSinkCommitMode, KafkaSinkConfig,
+    KafkaSourceConfig, RawBytesFormatConfig, RawStringFormatConfig, TimestampFormatProto,
+};
+
+use crate::sql::common::constants::{connection_table_role, kafka_with_value};
+use crate::sql::common::connector_options::ConnectorOptions;
+use crate::sql::common::formats::{
+    BadData, DecimalEncoding as SqlDecimalEncoding, Format as SqlFormat,
+    TimestampFormat as SqlTimestampFormat,
+};
+use crate::sql::common::with_option_keys as opt;
+use crate::sql::schema::table_role::TableRole;
+
+fn sql_format_to_proto(fmt: &SqlFormat) -> DFResult<FormatConfig> {
+    match fmt {
+        SqlFormat::Json(j) => Ok(FormatConfig {
+            format: Some(protocol::grpc::api::format_config::Format::Json(
+                JsonFormatConfig {
+                    timestamp_format: match j.timestamp_format {
+                        SqlTimestampFormat::RFC3339 => TimestampFormatProto::TimestampRfc3339 as i32,
+                        SqlTimestampFormat::UnixMillis => {
+                            TimestampFormatProto::TimestampUnixMillis as i32
+                        }
+                    },
+                    decimal_encoding: match j.decimal_encoding {
+                        SqlDecimalEncoding::Number => DecimalEncodingProto::DecimalNumber as i32,
+                        SqlDecimalEncoding::String => DecimalEncodingProto::DecimalString as i32,
+                        SqlDecimalEncoding::Bytes => DecimalEncodingProto::DecimalBytes as i32,
+                    },
+                    include_schema: j.include_schema,
+                    confluent_schema_registry: j.confluent_schema_registry,
+                    schema_id: j.schema_id,
+                    debezium: j.debezium,
+                    unstructured: j.unstructured,
+                },
+            )),
+        }),
+        SqlFormat::RawString(_) => Ok(FormatConfig {
+            format: Some(protocol::grpc::api::format_config::Format::RawString(
+                RawStringFormatConfig {},
+            )),
+        }),
+        SqlFormat::RawBytes(_) => Ok(FormatConfig {
+            format: Some(protocol::grpc::api::format_config::Format::RawBytes(
+                RawBytesFormatConfig {},
+            )),
+        }),
+        other => plan_err!(
+            "Kafka connector: format '{}' is not supported yet",
+            other.name()
+        ),
+    }
+}
+
+fn sql_bad_data_to_proto(bad: &BadData) -> i32 {
+    match bad {
+        BadData::Fail {} => BadDataPolicy::BadDataFail as i32,
+        BadData::Drop {} => BadDataPolicy::BadDataDrop as i32,
+    }
+}
+
+/// Build Kafka proto config from a flat string map (catalog rebuild path).
+pub fn build_kafka_proto_config_from_string_map(
+    map: HashMap<String, String>,
+    _physical_schema: &Schema,
+) -> DFResult<ProtoConfig> {
+    let mut options = ConnectorOptions::from_flat_string_map(map)?;
+    let format = crate::sql::common::formats::Format::from_opts(&mut options)
+        .map_err(|e| datafusion::error::DataFusionError::Plan(format!("invalid format: {e}")))?;
+    let bad_data = BadData::from_opts(&mut options)
+        .map_err(|e| datafusion::error::DataFusionError::Plan(format!("Invalid bad_data: '{e}'")))?;
+    let _framing = crate::sql::common::formats::Framing::from_opts(&mut options)
+        .map_err(|e| datafusion::error::DataFusionError::Plan(format!("invalid framing: '{e}'")))?;
+
+    let role = match options.pull_opt_str(opt::TYPE)?.as_deref() {
+        None | Some(connection_table_role::SOURCE) => TableRole::Ingestion,
+        Some(connection_table_role::SINK) => TableRole::Egress,
+        Some(connection_table_role::LOOKUP) => TableRole::Reference,
+        Some(other) => {
+            return plan_err!("invalid connection type '{other}' in WITH options");
+        }
+    };
+
+    build_kafka_proto_config(&mut options, role, &format, bad_data)
+}
+
+/// Core builder shared by SQL DDL and catalog reload paths.
+pub fn build_kafka_proto_config(
+    options: &mut ConnectorOptions,
+    role: TableRole,
+    format: &Option<SqlFormat>,
+    bad_data: BadData,
+) -> DFResult<ProtoConfig> {
+    let bootstrap_servers = match options.pull_opt_str(opt::KAFKA_BOOTSTRAP_SERVERS)? {
+        Some(s) => s,
+        None => options
+            .pull_opt_str(opt::KAFKA_BOOTSTRAP_SERVERS_LEGACY)?
+            .ok_or_else(|| {
+                plan_datafusion_err!(
+                    "Kafka connector requires 'bootstrap.servers' in the WITH clause"
+                )
+            })?,
+    };
+
+    let topic = options
+        .pull_opt_str(opt::KAFKA_TOPIC)?
+        .ok_or_else(|| plan_datafusion_err!("Kafka connector requires 'topic' in the WITH clause"))?;
+
+    let sql_format = format.clone().ok_or_else(|| {
+        plan_datafusion_err!(
+            "Kafka connector requires 'format' in the WITH clause (e.g. format = 'json')"
+        )
+    })?;
+    let proto_format = sql_format_to_proto(&sql_format)?;
+
+    let rate_limit = options
+        .pull_opt_u64(opt::KAFKA_RATE_LIMIT_MESSAGES_PER_SECOND)?
+        .map(|v| v.clamp(1, u32::MAX as u64) as u32)
+        .unwrap_or(0);
+
+    let value_subject = options.pull_opt_str(opt::KAFKA_VALUE_SUBJECT)?;
+
+    let auth = Some(KafkaAuthConfig {
+        auth: Some(protocol::grpc::api::kafka_auth_config::Auth::None(
+            KafkaAuthNone {},
+        )),
+    });
+
+    let _ = options.pull_opt_str(opt::TYPE)?;
+    let _ = options.pull_opt_str(opt::CONNECTOR)?;
+
+    match role {
+        TableRole::Ingestion => {
+            let offset_mode = match options.pull_opt_str(opt::KAFKA_SCAN_STARTUP_MODE)?.as_deref() {
+                Some(s) if s == kafka_with_value::SCAN_LATEST => {
+                    KafkaOffsetMode::KafkaOffsetLatest as i32
+                }
+                Some(s) if s == kafka_with_value::SCAN_EARLIEST => {
+                    KafkaOffsetMode::KafkaOffsetEarliest as i32
+                }
+                Some(s)
+                    if s == kafka_with_value::SCAN_GROUP_OFFSETS
+                        || s == kafka_with_value::SCAN_GROUP =>
+                {
+                    KafkaOffsetMode::KafkaOffsetGroup as i32
+                }
+                None => KafkaOffsetMode::KafkaOffsetGroup as i32,
+                Some(other) => {
+                    return plan_err!(
+                        "invalid scan.startup.mode '{other}'; expected latest, earliest, or group-offsets"
+                    );
+                }
+            };
+
+            let read_mode = match options.pull_opt_str(opt::KAFKA_ISOLATION_LEVEL)?.as_deref() {
+                Some(s) if s == kafka_with_value::ISOLATION_READ_COMMITTED => {
+                    KafkaReadMode::KafkaReadCommitted as i32
+                }
+                Some(s) if s == kafka_with_value::ISOLATION_READ_UNCOMMITTED => {
+                    KafkaReadMode::KafkaReadUncommitted as i32
+                }
+                None => KafkaReadMode::KafkaReadDefault as i32,
+                Some(other) => {
+                    return plan_err!("invalid isolation.level '{other}'");
+                }
+            };
+
+            let group_id = match options.pull_opt_str(opt::KAFKA_GROUP_ID)? {
+                Some(s) => Some(s),
+                None => options.pull_opt_str(opt::KAFKA_GROUP_ID_LEGACY)?,
+            };
+            let group_id_prefix = options.pull_opt_str(opt::KAFKA_GROUP_ID_PREFIX)?;
+
+            let client_configs = options.drain_remaining_string_values()?;
+
+            Ok(ProtoConfig::KafkaSource(KafkaSourceConfig {
+                topic,
+                bootstrap_servers,
+                group_id,
+                group_id_prefix,
+                offset_mode,
+                read_mode,
+                auth,
+                client_configs,
+                format: Some(proto_format),
+                bad_data_policy: sql_bad_data_to_proto(&bad_data),
+                rate_limit_msgs_per_sec: rate_limit,
+                value_subject,
+            }))
+        }
+        TableRole::Egress => {
+            let commit_mode = match options.pull_opt_str(opt::KAFKA_SINK_COMMIT_MODE)?.as_deref() {
+                Some(s)
+                    if s == kafka_with_value::SINK_COMMIT_EXACTLY_ONCE_HYPHEN
+                        || s == kafka_with_value::SINK_COMMIT_EXACTLY_ONCE_UNDERSCORE =>
+                {
+                    KafkaSinkCommitMode::KafkaSinkExactlyOnce as i32
+                }
+                None => KafkaSinkCommitMode::KafkaSinkAtLeastOnce as i32,
+                Some(s)
+                    if s == kafka_with_value::SINK_COMMIT_AT_LEAST_ONCE_HYPHEN
+                        || s == kafka_with_value::SINK_COMMIT_AT_LEAST_ONCE_UNDERSCORE =>
+                {
+                    KafkaSinkCommitMode::KafkaSinkAtLeastOnce as i32
+                }
+                Some(other) => {
+                    return plan_err!("invalid sink.commit.mode '{other}'");
+                }
+            };
+            let key_field = match options.pull_opt_str(opt::KAFKA_SINK_KEY_FIELD)? {
+                Some(s) => Some(s),
+                None => options.pull_opt_str(opt::KAFKA_KEY_FIELD_LEGACY)?,
+            };
+            let timestamp_field = match options.pull_opt_str(opt::KAFKA_SINK_TIMESTAMP_FIELD)? {
+                Some(s) => Some(s),
+                None => options.pull_opt_str(opt::KAFKA_TIMESTAMP_FIELD_LEGACY)?,
+            };
+
+            let client_configs = options.drain_remaining_string_values()?;
+
+            Ok(ProtoConfig::KafkaSink(KafkaSinkConfig {
+                topic,
+                bootstrap_servers,
+                commit_mode,
+                key_field,
+                timestamp_field,
+                auth,
+                client_configs,
+                format: Some(proto_format),
+                value_subject,
+            }))
+        }
+        TableRole::Reference => {
+            plan_err!("Kafka connector cannot be used as a lookup table in this path")
+        }
+    }
+}
diff --git a/src/sql/schema/mod.rs b/src/sql/schema/mod.rs
index b3ec5e09..b052aa68 100644
--- a/src/sql/schema/mod.rs
+++ b/src/sql/schema/mod.rs
@@ -13,6 +13,8 @@
 pub mod catalog_ddl;
 pub mod column_descriptor;
 pub mod connection_type;
+pub mod connector_config;
+pub mod kafka_operator_config;
 pub mod source_table;
 pub mod data_encoding_format;
 pub mod schema_context;
@@ -29,6 +31,7 @@ pub use catalog_ddl::{
 };
 pub use column_descriptor::ColumnDescriptor;
 pub use connection_type::ConnectionType;
+pub use connector_config::ConnectorConfig;
 pub use source_table::{SourceOperator, SourceTable};
 
 /// Back-compat alias for [`SourceTable`].
diff --git a/src/sql/schema/source_table.rs b/src/sql/schema/source_table.rs
index d10e39dc..fe4411dd 100644
--- a/src/sql/schema/source_table.rs
+++ b/src/sql/schema/source_table.rs
@@ -10,7 +10,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-use std::collections::{BTreeMap, HashMap, HashSet};
+use std::collections::{BTreeMap, HashMap};
 use std::sync::Arc;
 use std::time::Duration;
 
@@ -26,6 +26,7 @@ use protocol::grpc::api::ConnectorOp;
 use tracing::warn;
 
 use super::column_descriptor::ColumnDescriptor;
+use super::connector_config::ConnectorConfig;
 use super::data_encoding_format::DataEncodingFormat;
 use super::schema_context::SchemaContext;
 use super::table_execution_unit::{EngineDescriptor, SyncMode, TableExecutionUnit};
@@ -36,19 +37,16 @@ use super::table_role::{
 use super::temporal_pipeline_config::{resolve_temporal_logic, TemporalPipelineConfig, TemporalSpec};
 use super::StreamSchemaProvider;
 use crate::multifield_partial_ord;
-use crate::sql::api::{ConnectionProfile, ConnectionSchema, SourceField};
+use crate::sql::api::ConnectionProfile;
 use crate::sql::common::constants::{
-    connection_table_role, connector_type, kafka_with_value, sql_field,
+    connection_table_role, connector_type, sql_field,
 };
 use crate::sql::common::connector_options::ConnectorOptions;
-use crate::sql::common::kafka_catalog::{
-    KafkaConfig, KafkaConfigAuthentication, KafkaTable, KafkaTableSourceOffset, ReadMode,
-    SinkCommitMode, TableType as KafkaTableType,
-};
 use crate::sql::common::with_option_keys as opt;
 use crate::sql::common::{
-    BadData, Format, Framing, FsSchema, JsonCompression, JsonFormat, OperatorConfig, RateLimit,
+    BadData, Format, Framing, FsSchema, JsonCompression, JsonFormat,
 };
+use crate::sql::schema::kafka_operator_config::build_kafka_proto_config;
 use crate::sql::schema::ConnectionType;
 use crate::sql::schema::table::SqlSource;
 use crate::sql::types::ProcessingMode;
@@ -61,8 +59,8 @@ pub struct SourceTable {
     pub table_identifier: String,
     pub role: TableRole,
     pub schema_specs: Vec<ColumnDescriptor>,
-    /// Serialized runtime payload (e.g. JSON: connector + `connection_schema` + options).
-    pub opaque_config: String,
+    /// Strongly-typed connector runtime configuration — replaces the legacy `opaque_config: String`.
+    pub connector_config: ConnectorConfig,
     pub temporal_config: TemporalPipelineConfig,
     pub key_constraints: Vec<String>,
     pub payload_format: Option<DataEncodingFormat>,
@@ -73,7 +71,7 @@ pub struct SourceTable {
     pub lookup_cache_max_bytes: Option<u64>,
     pub lookup_cache_ttl: Option<Duration>,
     pub inferred_fields: Option<Vec<FieldRef>>,
-    /// Original `WITH` options for catalog / `SHOW CREATE TABLE` (snapshot at DDL parse time).
+    /// Original `WITH` options for catalog persistence / `SHOW CREATE TABLE`.
     pub catalog_with_options: BTreeMap<String, String>,
 }
 
@@ -83,7 +81,6 @@ multifield_partial_ord!(
     adapter_type,
     table_identifier,
     role,
-    opaque_config,
     description,
     key_constraints,
     connection_format,
@@ -107,7 +104,7 @@ impl SourceTable {
             table_identifier: table_identifier.into(),
             role: connection_type.into(),
             schema_specs: Vec::new(),
-            opaque_config: String::new(),
+            connector_config: ConnectorConfig::Generic(HashMap::new()),
             temporal_config: TemporalPipelineConfig::default(),
             key_constraints: Vec::new(),
             payload_format: None,
@@ -181,7 +178,7 @@ impl SourceTable {
         refined_columns = encoding.apply_envelope(refined_columns)?;
 
         let temporal_settings = resolve_temporal_logic(&refined_columns, time_meta)?;
-        let finalized_config = serialize_backend_params(adapter, options)?;
+        let _finalized_config = serialize_backend_params(adapter, options)?;
         let role = deduce_role(options)?;
 
         if role == TableRole::Ingestion && encoding.supports_delta_updates() && pk_list.is_empty() {
@@ -194,7 +191,7 @@ impl SourceTable {
             table_identifier: id.to_string(),
             role,
             schema_specs: refined_columns,
-            opaque_config: finalized_config,
+            connector_config: ConnectorConfig::Generic(catalog_with_options.clone().into_iter().collect()),
             temporal_config: temporal_settings,
             key_constraints: pk_list,
             payload_format: Some(encoding),
@@ -242,7 +239,7 @@ impl SourceTable {
             label: self.table_identifier.clone(),
             engine_meta: EngineDescriptor {
                 engine_type: self.adapter_type.clone(),
-                raw_payload: self.opaque_config.clone(),
+                raw_payload: String::new(),
             },
             sync_mode: mode,
             temporal_offset: self.temporal_config.clone(),
@@ -288,7 +285,6 @@ impl SourceTable {
 
         validate_adapter_availability(connector_name)?;
 
-        let inferred_empty = fields.is_empty();
         let mut columns = fields;
         columns = apply_adapter_specific_rules(connector_name, columns);
 
@@ -302,7 +298,7 @@ impl SourceTable {
             return plan_err!("'json.compression' is only supported for the filesystem connector");
         }
 
-        let framing = Framing::from_opts(options)
+        let _framing = Framing::from_opts(options)
             .map_err(|e| DataFusionError::Plan(format!("invalid framing: '{e}'")))?;
 
         if temporary
@@ -318,38 +314,9 @@ impl SourceTable {
         let encoding = payload_format.unwrap_or(DataEncodingFormat::Raw);
         columns = encoding.apply_envelope(columns)?;
 
-        let schema_fields: Vec<SourceField> = columns
-            .iter()
-            .filter(|c| !c.is_computed())
-            .map(|c| {
-                let mut sf: SourceField = c.arrow_field().clone().try_into().map_err(|_| {
-                    DataFusionError::Plan(format!(
-                        "field '{}' has a type '{:?}' that cannot be used in a connection table",
-                        c.arrow_field().name(),
-                        c.arrow_field().data_type()
-                    ))
-                })?;
-                if let Some(key) = c.system_meta_key() {
-                    sf.metadata_key = Some(key.to_string());
-                }
-                Ok(sf)
-            })
-            .collect::<Result<_>>()?;
-
         let bad_data = BadData::from_opts(options)
             .map_err(|e| DataFusionError::Plan(format!("Invalid bad_data: '{e}'")))?;
 
-        let connection_schema = ConnectionSchema::try_new(
-            format.clone(),
-            Some(bad_data.clone()),
-            framing.clone(),
-            schema_fields,
-            None,
-            Some(inferred_empty),
-            primary_keys.iter().cloned().collect::<HashSet<_>>(),
-        )
-        .map_err(|e| DataFusionError::Plan(format!("could not create connection schema: {e}")))?;
-
         let role = if let Some(t) = connection_type_override {
             t.into()
         } else {
@@ -369,7 +336,7 @@ impl SourceTable {
             table_identifier: table_identifier.to_string(),
             role,
             schema_specs: columns,
-            opaque_config: String::new(),
+            connector_config: ConnectorConfig::Generic(HashMap::new()),
             temporal_config: TemporalPipelineConfig::default(),
             key_constraints: Vec::new(),
             payload_format,
@@ -466,36 +433,21 @@ impl SourceTable {
         table.lookup_cache_ttl = options.pull_opt_duration(opt::LOOKUP_CACHE_TTL)?;
 
         if connector_name.eq_ignore_ascii_case(connector_type::KAFKA) {
-            let physical = table.produce_physical_schema();
-            let op_cfg = wire_kafka_operator_config(
-                options,
-                role,
-                &physical,
-                &format,
-                bad_data,
-                framing,
-            )?;
-            table.opaque_config = serde_json::to_string(&op_cfg).map_err(|e| {
-                DataFusionError::Plan(format!("failed to serialize Kafka OperatorConfig: {e}"))
-            })?;
+            let proto_cfg = build_kafka_proto_config(options, role, &format, bad_data)?;
+            table.connector_config = match proto_cfg {
+                protocol::grpc::api::connector_op::Config::KafkaSource(cfg) => {
+                    ConnectorConfig::KafkaSource(cfg)
+                }
+                protocol::grpc::api::connector_op::Config::KafkaSink(cfg) => {
+                    ConnectorConfig::KafkaSink(cfg)
+                }
+                protocol::grpc::api::connector_op::Config::Generic(g) => {
+                    ConnectorConfig::Generic(g.properties)
+                }
+            };
         } else {
             let extra_opts = options.drain_remaining_string_values()?;
-            let mut map = serde_json::Map::new();
-            map.insert(
-                opt::CONNECTOR.to_string(),
-                serde_json::Value::String(connector_name.to_string()),
-            );
-            let schema_val = serde_json::to_value(&connection_schema).map_err(|e| {
-                DataFusionError::Plan(format!("failed to serialize connection schema: {e}"))
-            })?;
-            map.insert(opt::CONNECTION_SCHEMA.to_string(), schema_val);
-            for (k, v) in extra_opts {
-                map.insert(k, serde_json::Value::String(v));
-            }
-            let config_root = serde_json::Value::Object(map);
-            table.opaque_config = serde_json::to_string(&config_root).map_err(|e| {
-                DataFusionError::Plan(format!("failed to serialize connector config: {e}"))
-            })?;
+            table.connector_config = ConnectorConfig::Generic(extra_opts);
         }
 
         if role == TableRole::Ingestion && encoding.supports_delta_updates() && primary_keys.is_empty()
@@ -519,11 +471,25 @@ impl SourceTable {
             || self.payload_format == Some(DataEncodingFormat::DebeziumJson)
     }
 
+    /// Build strongly-typed `ConnectorOp` protobuf for runtime operator construction.
+    ///
+    /// Directly maps the in-memory [`ConnectorConfig`] to the proto `oneof config` — zero JSON,
+    /// zero re-parsing.
     pub fn connector_op(&self) -> ConnectorOp {
+        let physical = self.produce_physical_schema();
+        let fields: Vec<Field> = physical
+            .fields()
+            .iter()
+            .map(|f| f.as_ref().clone())
+            .collect();
+        let fs_schema = FsSchema::from_fields(fields);
+
         ConnectorOp {
             connector: self.adapter_type.clone(),
-            config: self.opaque_config.clone(),
+            fs_schema: Some(fs_schema.into()),
+            name: self.table_identifier.clone(),
             description: self.description.clone(),
+            config: Some(self.connector_config.to_proto_config()),
         }
     }
 
@@ -607,168 +573,6 @@ impl SourceTable {
     }
 }
 
-/// Kafka: runtime [`KafkaSourceDispatcher`] / [`KafkaSinkDispatcher`] expect [`OperatorConfig`] JSON,
-/// not the legacy `{ connector, connection_schema, ... }` blob used by other adapters.
-fn wire_kafka_operator_config(
-    options: &mut ConnectorOptions,
-    role: TableRole,
-    physical_schema: &Schema,
-    format: &Option<Format>,
-    bad_data: BadData,
-    framing: Option<Framing>,
-) -> Result<OperatorConfig> {
-    let bootstrap_servers = match options.pull_opt_str(opt::KAFKA_BOOTSTRAP_SERVERS)? {
-        Some(s) => s,
-        None => options
-            .pull_opt_str(opt::KAFKA_BOOTSTRAP_SERVERS_LEGACY)?
-            .ok_or_else(|| {
-                plan_datafusion_err!(
-                    "Kafka connector requires 'bootstrap.servers' in the WITH clause"
-                )
-            })?,
-    };
-
-    let topic = options
-        .pull_opt_str(opt::KAFKA_TOPIC)?
-        .ok_or_else(|| plan_datafusion_err!("Kafka connector requires 'topic' in the WITH clause"))?;
-
-    let sql_format = format.clone().ok_or_else(|| {
-        plan_datafusion_err!(
-            "Kafka connector requires 'format' in the WITH clause (e.g. format = 'json')"
-        )
-    })?;
-
-    let rate_limit = options
-        .pull_opt_u64(opt::KAFKA_RATE_LIMIT_MESSAGES_PER_SECOND)?
-        .map(|v| RateLimit {
-            messages_per_second: v.clamp(1, u32::MAX as u64) as u32,
-        });
-
-    let value_subject = options.pull_opt_str(opt::KAFKA_VALUE_SUBJECT)?;
-
-    let kind = match role {
-        TableRole::Ingestion => {
-            let offset = match options.pull_opt_str(opt::KAFKA_SCAN_STARTUP_MODE)?.as_deref() {
-                Some(s) if s == kafka_with_value::SCAN_LATEST => KafkaTableSourceOffset::Latest,
-                Some(s) if s == kafka_with_value::SCAN_EARLIEST => KafkaTableSourceOffset::Earliest,
-                Some(s)
-                    if s == kafka_with_value::SCAN_GROUP_OFFSETS
-                        || s == kafka_with_value::SCAN_GROUP =>
-                {
-                    KafkaTableSourceOffset::Group
-                }
-                None => KafkaTableSourceOffset::Group,
-                Some(other) => {
-                    return plan_err!(
-                        "invalid scan.startup.mode '{other}'; expected latest, earliest, or group-offsets"
-                    );
-                }
-            };
-            let read_mode = match options.pull_opt_str(opt::KAFKA_ISOLATION_LEVEL)?.as_deref() {
-                Some(s) if s == kafka_with_value::ISOLATION_READ_COMMITTED => {
-                    Some(ReadMode::ReadCommitted)
-                }
-                Some(s) if s == kafka_with_value::ISOLATION_READ_UNCOMMITTED => {
-                    Some(ReadMode::ReadUncommitted)
-                }
-                None => None,
-                Some(other) => {
-                    return plan_err!("invalid isolation.level '{other}'");
-                }
-            };
-            let group_id = match options.pull_opt_str(opt::KAFKA_GROUP_ID)? {
-                Some(s) => Some(s),
-                None => options.pull_opt_str(opt::KAFKA_GROUP_ID_LEGACY)?,
-            };
-            let group_id_prefix = options.pull_opt_str(opt::KAFKA_GROUP_ID_PREFIX)?;
-            KafkaTableType::Source {
-                offset,
-                read_mode,
-                group_id,
-                group_id_prefix,
-            }
-        }
-        TableRole::Egress => {
-            let commit_mode = match options.pull_opt_str(opt::KAFKA_SINK_COMMIT_MODE)?.as_deref() {
-                Some(s)
-                    if s == kafka_with_value::SINK_COMMIT_EXACTLY_ONCE_HYPHEN
-                        || s == kafka_with_value::SINK_COMMIT_EXACTLY_ONCE_UNDERSCORE =>
-                {
-                    SinkCommitMode::ExactlyOnce
-                }
-                None => SinkCommitMode::AtLeastOnce,
-                Some(s)
-                    if s == kafka_with_value::SINK_COMMIT_AT_LEAST_ONCE_HYPHEN
-                        || s == kafka_with_value::SINK_COMMIT_AT_LEAST_ONCE_UNDERSCORE =>
-                {
-                    SinkCommitMode::AtLeastOnce
-                }
-                Some(other) => {
-                    return plan_err!("invalid sink.commit.mode '{other}'");
-                }
-            };
-            let key_field = match options.pull_opt_str(opt::KAFKA_SINK_KEY_FIELD)? {
-                Some(s) => Some(s),
-                None => options.pull_opt_str(opt::KAFKA_KEY_FIELD_LEGACY)?,
-            };
-            let timestamp_field = match options.pull_opt_str(opt::KAFKA_SINK_TIMESTAMP_FIELD)? {
-                Some(s) => Some(s),
-                None => options.pull_opt_str(opt::KAFKA_TIMESTAMP_FIELD_LEGACY)?,
-            };
-            KafkaTableType::Sink {
-                commit_mode,
-                key_field,
-                timestamp_field,
-            }
-        }
-        TableRole::Reference => {
-            return plan_err!("Kafka connector cannot be used as a lookup table in this path");
-        }
-    };
-
-    // Role already decided; keep these out of librdkafka `connection_properties`.
-    let _ = options.pull_opt_str(opt::TYPE)?;
-    let _ = options.pull_opt_str(opt::CONNECTOR)?;
-
-    let connection_properties = options.drain_remaining_string_values()?;
-
-    let kafka_connection = KafkaConfig {
-        bootstrap_servers,
-        authentication: KafkaConfigAuthentication::None,
-        schema_registry_enum: None,
-        connection_properties,
-    };
-
-    let kafka_table = KafkaTable {
-        topic,
-        kind,
-        client_configs: HashMap::new(),
-        value_subject,
-    };
-
-    let fields: Vec<Field> = physical_schema
-        .fields()
-        .iter()
-        .map(|f| f.as_ref().clone())
-        .collect();
-    let input_schema = FsSchema::from_fields(fields);
-
-    Ok(OperatorConfig {
-        connection: serde_json::to_value(&kafka_connection).map_err(|e| {
-            DataFusionError::Plan(format!("Kafka connection serialization failed: {e}"))
-        })?,
-        table: serde_json::to_value(&kafka_table).map_err(|e| {
-            DataFusionError::Plan(format!("Kafka table serialization failed: {e}"))
-        })?,
-        format: Some(sql_format),
-        bad_data: Some(bad_data),
-        framing,
-        rate_limit,
-        metadata_fields: vec![],
-        input_schema: Some(input_schema),
-    })
-}
-
 /// Plan a SQL scalar expression against a table-qualified schema (e.g. watermark `AS` clause).
 fn plan_generating_expr(
     ast: &ast::Expr,
diff --git a/src/storage/stream_catalog/manager.rs b/src/storage/stream_catalog/manager.rs
index fc6a16f8..086b206b 100644
--- a/src/storage/stream_catalog/manager.rs
+++ b/src/storage/stream_catalog/manager.rs
@@ -225,26 +225,20 @@ impl CatalogManager {
                 let mut opts = source.catalog_with_options().clone();
                 opts.entry("connector".to_string())
                     .or_insert_with(|| source.connector().to_string());
+                let catalog_row = pb::CatalogSourceTable {
+                    arrow_schema_ipc: CatalogCodec::encode_schema(&Arc::new(
+                        source.produce_physical_schema(),
+                    ))?,
+                    event_time_field: source.event_time_field().map(str::to_string),
+                    watermark_field: source.stream_catalog_watermark_field(),
+                    with_options: opts.into_iter().collect(),
+                    connector: source.connector().to_string(),
+                    description: source.description.clone(),
+                };
                 if matches!(table, CatalogTable::LookupTable(_)) {
-                    table_definition::TableType::LookupTable(pb::CatalogSourceTable {
-                        arrow_schema_ipc: CatalogCodec::encode_schema(&Arc::new(
-                            source.produce_physical_schema(),
-                        ))?,
-                        event_time_field: source.event_time_field().map(str::to_string),
-                        watermark_field: source.stream_catalog_watermark_field(),
-                        with_options: opts.into_iter().collect(),
-                        connector: source.connector().to_string(),
-                    })
+                    table_definition::TableType::LookupTable(catalog_row)
                 } else {
-                    table_definition::TableType::ConnectorTable(pb::CatalogSourceTable {
-                        arrow_schema_ipc: CatalogCodec::encode_schema(&Arc::new(
-                            source.produce_physical_schema(),
-                        ))?,
-                        event_time_field: source.event_time_field().map(str::to_string),
-                        watermark_field: source.stream_catalog_watermark_field(),
-                        with_options: opts.into_iter().collect(),
-                        connector: source.connector().to_string(),
-                    })
+                    table_definition::TableType::ConnectorTable(catalog_row)
                 }
             }
             CatalogTable::TableFromQuery { name, .. } => return plan_err!(
@@ -275,7 +269,15 @@ impl CatalogManager {
         } else {
             source_row.connector.clone()
         };
-        let mut source = SourceTable::new(table_name, connector, ConnectionType::Source);
+        let mut source = SourceTable::new(
+            table_name,
+            connector,
+            if as_lookup {
+                ConnectionType::Lookup
+            } else {
+                ConnectionType::Source
+            },
+        );
         let schema = CatalogCodec::decode_schema(&source_row.arrow_schema_ipc)?;
         source.schema_specs = schema
             .fields()
@@ -288,6 +290,34 @@ impl CatalogManager {
             .watermark_field
             .filter(|w| w != sql_field::COMPUTED_WATERMARK);
         source.catalog_with_options = source_row.with_options.into_iter().collect();
+        source.description = source_row.description;
+
+        // Rebuild strongly-typed ConnectorConfig from persisted WITH options.
+        if source.connector().eq_ignore_ascii_case("kafka") {
+            use crate::sql::schema::kafka_operator_config::build_kafka_proto_config_from_string_map;
+            use crate::sql::schema::ConnectorConfig;
+            let opts_map: std::collections::HashMap<String, String> =
+                source.catalog_with_options.iter().map(|(k, v)| (k.clone(), v.clone())).collect();
+            let physical = source.produce_physical_schema();
+            if let Ok(proto_cfg) = build_kafka_proto_config_from_string_map(opts_map, &physical) {
+                source.connector_config = match proto_cfg {
+                    protocol::grpc::api::connector_op::Config::KafkaSource(cfg) => {
+                        ConnectorConfig::KafkaSource(cfg)
+                    }
+                    protocol::grpc::api::connector_op::Config::KafkaSink(cfg) => {
+                        ConnectorConfig::KafkaSink(cfg)
+                    }
+                    protocol::grpc::api::connector_op::Config::Generic(g) => {
+                        ConnectorConfig::Generic(g.properties)
+                    }
+                };
+            }
+        } else {
+            use crate::sql::schema::ConnectorConfig;
+            source.connector_config = ConnectorConfig::Generic(
+                source.catalog_with_options.iter().map(|(k, v)| (k.clone(), v.clone())).collect(),
+            );
+        }
 
         if as_lookup {
             Ok(CatalogTable::LookupTable(source))

From 249bd5dce4d9c2553b906595f40e811e7808d8e3 Mon Sep 17 00:00:00 2001
From: luoluoyuyu <zhenyu@apache.org>
Date: Wed, 1 Apr 2026 00:21:02 +0800
Subject: [PATCH 36/44] update

---
 src/coordinator/mod.rs                        |   1 -
 src/runtime/streaming/api/mod.rs              |   3 -
 src/runtime/streaming/execution/mod.rs        |   2 -
 .../streaming/execution/tracker/mod.rs        |   2 -
 src/runtime/streaming/factory/mod.rs          |   1 -
 src/runtime/streaming/format/mod.rs           |   1 -
 src/runtime/streaming/job/mod.rs              |   1 -
 src/runtime/streaming/mod.rs                  |  13 +-
 src/runtime/streaming/network/mod.rs          |   2 -
 .../grouping/incremental_aggregate.rs         | 122 +-----------------
 .../streaming/operators/grouping/mod.rs       |   2 +-
 src/runtime/streaming/operators/joins/mod.rs  |   4 +-
 src/runtime/streaming/operators/mod.rs        |  11 +-
 src/runtime/streaming/operators/projection.rs |   4 +-
 src/runtime/streaming/operators/sink/mod.rs   |   1 -
 src/runtime/streaming/operators/source/mod.rs |   1 -
 .../streaming/operators/watermark/mod.rs      |   2 +-
 .../streaming/operators/windows/mod.rs        |   8 +-
 src/runtime/streaming/protocol/mod.rs         |   6 -
 src/server/mod.rs                             |   2 +-
 src/sql/analysis/join_rewriter.rs             |   2 +-
 src/sql/analysis/mod.rs                       |  24 +---
 src/sql/analysis/sink_input_rewriter.rs       |   2 +-
 src/sql/analysis/source_rewriter.rs           |   1 -
 src/sql/analysis/window_function_rewriter.rs  |   2 +-
 src/sql/api/mod.rs                            |   9 +-
 src/sql/common/mod.rs                         |  22 +---
 src/sql/logical_node/logical/mod.rs           |   2 -
 src/sql/logical_node/logical/operator_name.rs |   2 +-
 src/sql/mod.rs                                |   2 -
 src/sql/physical/mod.rs                       |   2 +-
 src/sql/schema/mod.rs                         |  14 +-
 src/sql/types/mod.rs                          |   4 +-
 src/storage/stream_catalog/mod.rs             |   2 +-
 34 files changed, 35 insertions(+), 244 deletions(-)

diff --git a/src/coordinator/mod.rs b/src/coordinator/mod.rs
index 23cd2925..ca384a90 100644
--- a/src/coordinator/mod.rs
+++ b/src/coordinator/mod.rs
@@ -22,7 +22,6 @@ mod statement;
 mod tool;
 
 pub use coordinator::Coordinator;
-pub use runtime_context::CoordinatorRuntimeContext;
 pub use dataset::{DataSet, ShowFunctionsResult};
 pub use statement::{
     CreateFunction, CreatePythonFunction, CreateTable, DropFunction, DropTableStatement,
diff --git a/src/runtime/streaming/api/mod.rs b/src/runtime/streaming/api/mod.rs
index a525c883..e78ba371 100644
--- a/src/runtime/streaming/api/mod.rs
+++ b/src/runtime/streaming/api/mod.rs
@@ -15,6 +15,3 @@ pub mod context;
 pub mod operator;
 pub mod source;
 
-pub use context::TaskContext;
-pub use operator::{ConstructedOperator, Operator};
-pub use source::{SourceEvent, SourceOffset, SourceOperator};
diff --git a/src/runtime/streaming/execution/mod.rs b/src/runtime/streaming/execution/mod.rs
index 40beabe4..1a8401ef 100644
--- a/src/runtime/streaming/execution/mod.rs
+++ b/src/runtime/streaming/execution/mod.rs
@@ -15,5 +15,3 @@ pub mod runner;
 pub mod source;
 pub mod tracker;
 
-pub use runner::{OperatorDrive, SubtaskRunner};
-pub use source::{SourceRunner, SOURCE_IDLE_SLEEP, WATERMARK_EMIT_INTERVAL};
diff --git a/src/runtime/streaming/execution/tracker/mod.rs b/src/runtime/streaming/execution/tracker/mod.rs
index b00ee0a2..3206f352 100644
--- a/src/runtime/streaming/execution/tracker/mod.rs
+++ b/src/runtime/streaming/execution/tracker/mod.rs
@@ -14,5 +14,3 @@
 pub mod barrier_aligner;
 pub mod watermark_tracker;
 
-pub use barrier_aligner::{AlignmentStatus, BarrierAligner};
-pub use watermark_tracker::WatermarkTracker;
diff --git a/src/runtime/streaming/factory/mod.rs b/src/runtime/streaming/factory/mod.rs
index 442c9bf9..f02ec955 100644
--- a/src/runtime/streaming/factory/mod.rs
+++ b/src/runtime/streaming/factory/mod.rs
@@ -26,7 +26,6 @@ pub use connector::{
     ConnectorSinkDispatcher, ConnectorSourceDispatcher, KafkaSinkDispatcher, KafkaSourceDispatcher,
 };
 pub use global::Registry;
-pub use operator_constructor::OperatorConstructor;
 pub use operator_factory::OperatorFactory;
 
 fn register_builtin_connectors(factory: &mut OperatorFactory) {
diff --git a/src/runtime/streaming/format/mod.rs b/src/runtime/streaming/format/mod.rs
index b27935ba..d5e63a9d 100644
--- a/src/runtime/streaming/format/mod.rs
+++ b/src/runtime/streaming/format/mod.rs
@@ -17,5 +17,4 @@ pub mod serializer;
 
 pub use config::{BadDataPolicy, DecimalEncoding, Format, JsonFormat, TimestampFormat};
 pub use deserializer::DataDeserializer;
-pub use json_encoder::CustomEncoderFactory;
 pub use serializer::DataSerializer;
diff --git a/src/runtime/streaming/job/mod.rs b/src/runtime/streaming/job/mod.rs
index 72f98d69..5b2bbd8c 100644
--- a/src/runtime/streaming/job/mod.rs
+++ b/src/runtime/streaming/job/mod.rs
@@ -15,4 +15,3 @@ pub mod job_manager;
 pub mod models;
 
 pub use job_manager::JobManager;
-pub use models::{PhysicalExecutionGraph, PhysicalPipeline, PipelineStatus};
diff --git a/src/runtime/streaming/mod.rs b/src/runtime/streaming/mod.rs
index 6fd45abd..7e0ba57a 100644
--- a/src/runtime/streaming/mod.rs
+++ b/src/runtime/streaming/mod.rs
@@ -24,15 +24,4 @@ pub mod network;
 pub mod operators;
 pub mod protocol;
 
-pub use api::{
-    ConstructedOperator, Operator, SourceEvent, SourceOffset, SourceOperator, TaskContext,
-};
-pub use error::RunError;
-pub use execution::{SOURCE_IDLE_SLEEP, SourceRunner, SubtaskRunner};
-pub use factory::{OperatorConstructor, OperatorFactory};
-pub use memory::{MemoryPool, MemoryTicket};
-pub use network::{BoxedEventStream, NetworkEnvironment, PhysicalSender, RemoteSenderStub};
-pub use protocol::{
-    CheckpointBarrierWire, ControlCommand, StopMode, StreamEvent, StreamOutput,
-    control_channel, merge_watermarks, watermark_strictly_advances,
-};
+pub use protocol::StreamOutput;
diff --git a/src/runtime/streaming/network/mod.rs b/src/runtime/streaming/network/mod.rs
index 4b120781..16100133 100644
--- a/src/runtime/streaming/network/mod.rs
+++ b/src/runtime/streaming/network/mod.rs
@@ -13,5 +13,3 @@
 pub mod endpoint;
 pub mod environment;
 
-pub use endpoint::{BoxedEventStream, PhysicalSender, RemoteSenderStub};
-pub use environment::NetworkEnvironment;
diff --git a/src/runtime/streaming/operators/grouping/incremental_aggregate.rs b/src/runtime/streaming/operators/grouping/incremental_aggregate.rs
index 16d92fd1..f895c173 100644
--- a/src/runtime/streaming/operators/grouping/incremental_aggregate.rs
+++ b/src/runtime/streaming/operators/grouping/incremental_aggregate.rs
@@ -10,7 +10,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-use anyhow::{anyhow, bail, Result};
+use anyhow::{bail, Result};
 use arrow::compute::max_array;
 use arrow::row::{RowConverter, SortField};
 use arrow_array::builder::{
@@ -19,7 +19,7 @@ use arrow_array::builder::{
 use arrow_array::cast::AsArray;
 use arrow_array::types::UInt64Type;
 use arrow_array::{
-    Array, ArrayRef, BinaryArray, BooleanArray, RecordBatch, StructArray, UInt32Array, UInt64Array,
+    Array, ArrayRef, BooleanArray, RecordBatch, StructArray,
 };
 use arrow_schema::{DataType, Field, FieldRef, Schema, SchemaBuilder, TimeUnit};
 use datafusion::common::{Result as DFResult, ScalarValue};
@@ -31,7 +31,6 @@ use datafusion_proto::physical_plan::from_proto::parse_physical_expr;
 use datafusion_proto::protobuf::PhysicalExprNode;
 use datafusion_proto::protobuf::PhysicalPlanNode;
 use datafusion_proto::protobuf::physical_plan_node::PhysicalPlanType;
-use futures::StreamExt;
 use itertools::Itertools;
 use prost::Message;
 use std::collections::HashSet;
@@ -482,95 +481,6 @@ impl IncrementalAggregatingFunc {
     }
 
     async fn initialize(&mut self, _ctx: &mut TaskContext) -> Result<()> {
-        // let table = tm
-        //     .get_uncached_key_value_view("a")
-        //     .await
-        //     .map_err(|e| anyhow!("state table a: {e}"))?;
-        // let mut stream = Box::pin(table.get_all());
-        // let key_converter = RowConverter::new(self.sliding_state_schema.sort_fields(false))?;
-        //
-        // while let Some(batch) = stream.next().await {
-        //     let batch = batch?;
-        //     if batch.num_rows() == 0 { continue; }
-        //
-        //     let key_cols: Vec<_> = self.sliding_state_schema.sort_columns(&batch, false).into_iter().map(|c| c.values).collect();
-        //     let aggregate_states = self.aggregates.iter().map(|agg| {
-        //         agg.state_cols.iter().map(|idx| batch.column(*idx).clone()).collect_vec()
-        //     }).collect_vec();
-        //     let generations = batch.columns().last().unwrap().as_primitive::<UInt64Type>();
-        //     let now = Instant::now();
-        //
-        //     if key_cols.is_empty() {
-        //         self.restore_sliding(
-        //             GLOBAL_KEY.as_ref().as_slice(),
-        //             now,
-        //             0,
-        //             &aggregate_states,
-        //             generations.value(0),
-        //         )?;
-        //     } else {
-        //         let key_rows = key_converter.convert_columns(&key_cols)?;
-        //         for (i, row) in key_rows.iter().enumerate() {
-        //             if generations.is_null(i) {
-        //                 bail!("generation is null at row {i}");
-        //             }
-        //             let generation = generations.value(i);
-        //             self.restore_sliding(
-        //                 row.as_ref(),
-        //                 now,
-        //                 i,
-        //                 &aggregate_states,
-        //                 generation,
-        //             )?;
-        //         }
-        //     }
-        // }
-        // drop(stream);
-
-        //
-        // if self.aggregates.iter().any(|agg| agg.accumulator_type == AccumulatorType::Batch) {
-        //     let table = tm
-        //         .get_uncached_key_value_view("b")
-        //         .await
-        //         .map_err(|e| anyhow!("state table b: {e}"))?;
-        //     let mut stream = Box::pin(table.get_all());
-        //     while let Some(batch) = stream.next().await {
-        //         let batch = batch?;
-        //         if batch.num_rows() == 0 { continue; }
-        //
-        //         let key_cols: Vec<_> = self.sliding_state_schema.sort_columns(&batch, false).into_iter().map(|c| c.values).collect();
-        //         let count_column = batch.column(self.batch_state_schema.schema.index_of("count").unwrap()).as_any().downcast_ref::<UInt64Array>().unwrap();
-        //         let accumulator_column = batch.column(self.batch_state_schema.schema.index_of("accumulator").unwrap()).as_any().downcast_ref::<UInt32Array>().unwrap();
-        //         let args_row_column = batch.column(self.batch_state_schema.schema.index_of("args_row").unwrap()).as_any().downcast_ref::<BinaryArray>().unwrap();
-        //         let generations = batch.columns().last().unwrap().as_primitive::<UInt64Type>();
-        //
-        //         let key_rows = if key_cols.is_empty() {
-        //             vec![GLOBAL_KEY.as_ref().clone()]
-        //         } else {
-        //             self.key_converter
-        //                 .convert_columns(&key_cols)?
-        //                 .iter()
-        //                 .map(|k| k.as_ref().to_vec())
-        //                 .collect()
-        //         };
-        //
-        //         for (i, row) in key_rows.iter().enumerate() {
-        //             let Some(accumulators) = self.accumulators.get_mut(row.as_ref()) else { continue; };
-        //             let count = count_column.value(i);
-        //             let accumulator_idx = accumulator_column.value(i) as usize;
-        //             let args_row = args_row_column.value(i);
-        //             let generation = generations.value(i);
-        //
-        //             let IncrementalState::Batch { data, .. } = &mut accumulators[accumulator_idx] else { bail!("expected batch accumulator"); };
-        //
-        //             if let Some(existing) = data.get_mut(args_row) {
-        //                 if existing.generation < generation { existing.count = count; existing.generation = generation; }
-        //             } else {
-        //                 data.insert(Key(Arc::new(args_row.to_vec())), BatchData { count, generation });
-        //             }
-        //         }
-        //     }
-        // }
 
         let mut deleted_keys = vec![];
         for (k, v) in self.accumulators.iter_mut() {
@@ -697,34 +607,8 @@ impl Operator for IncrementalAggregatingFunc {
     async fn snapshot_state(
         &mut self,
         _barrier: CheckpointBarrier,
-        ctx: &mut TaskContext,
+        _ctx: &mut TaskContext,
     ) -> Result<()> {
-        // let mut tm = ctx.table_manager_guard().await?;
-        //
-        // if let Some(sliding) = self.checkpoint_sliding()? {
-        //     let table = tm
-        //         .get_uncached_key_value_view("a")
-        //         .await
-        //         .map_err(|e| anyhow!("state table a: {e}"))?;
-        //     table
-        //         .insert_batch(sliding)
-        //         .await
-        //         .map_err(|e| anyhow!("insert_batch a: {e}"))?;
-        // }
-        //
-        // if let Some(batch) = self.checkpoint_batch()? {
-        //     let table = tm
-        //         .get_uncached_key_value_view("b")
-        //         .await
-        //         .map_err(|e| anyhow!("state table b: {e}"))?;
-        //     table
-        //         .insert_batch(batch)
-        //         .await
-        //         .map_err(|e| anyhow!("insert_batch b: {e}"))?;
-        // }
-        //
-        //
-        // self.updated_keys.clear();
          Ok(())
     }
 
diff --git a/src/runtime/streaming/operators/grouping/mod.rs b/src/runtime/streaming/operators/grouping/mod.rs
index ef672351..2a17a49d 100644
--- a/src/runtime/streaming/operators/grouping/mod.rs
+++ b/src/runtime/streaming/operators/grouping/mod.rs
@@ -13,5 +13,5 @@
 pub mod incremental_aggregate;
 pub mod updating_cache;
 
-pub use incremental_aggregate::{IncrementalAggregatingConstructor, IncrementalAggregatingFunc};
+pub use incremental_aggregate::IncrementalAggregatingConstructor;
 pub use updating_cache::{Key, UpdatingCache};
diff --git a/src/runtime/streaming/operators/joins/mod.rs b/src/runtime/streaming/operators/joins/mod.rs
index bc81f328..1cc83d36 100644
--- a/src/runtime/streaming/operators/joins/mod.rs
+++ b/src/runtime/streaming/operators/joins/mod.rs
@@ -13,5 +13,5 @@
 pub mod join_instance;
 pub mod join_with_expiration;
 
-pub use join_instance::{InstantJoinConstructor, InstantJoinOperator};
-pub use join_with_expiration::{JoinWithExpirationConstructor, JoinWithExpirationOperator};
+pub use join_instance::InstantJoinConstructor;
+pub use join_with_expiration::JoinWithExpirationConstructor;
diff --git a/src/runtime/streaming/operators/mod.rs b/src/runtime/streaming/operators/mod.rs
index 18a98830..ffe1c101 100644
--- a/src/runtime/streaming/operators/mod.rs
+++ b/src/runtime/streaming/operators/mod.rs
@@ -27,13 +27,4 @@ pub use stateless_physical_executor::StatelessPhysicalExecutor;
 pub use projection::ProjectionOperator;
 pub use value_execution::ValueExecutionOperator;
 
-pub use grouping::{IncrementalAggregatingFunc, Key, UpdatingCache};
-pub use joins::{InstantJoinOperator, JoinWithExpirationOperator};
-pub use key_by::KeyByOperator;
-pub use sink::{ConsistencyMode, KafkaSinkOperator};
-pub use source::{BatchDeserializer, BufferedDeserializer, KafkaSourceOperator, KafkaState};
-pub use watermark::{WatermarkGeneratorOperator, WatermarkGeneratorState};
-pub use windows::{
-    SessionWindowOperator, SlidingWindowOperator, TumblingWindowOperator,
-    WindowFunctionOperator,
-};
+pub use grouping::{Key, UpdatingCache};
diff --git a/src/runtime/streaming/operators/projection.rs b/src/runtime/streaming/operators/projection.rs
index d2f54b8c..0136e18e 100644
--- a/src/runtime/streaming/operators/projection.rs
+++ b/src/runtime/streaming/operators/projection.rs
@@ -10,7 +10,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-use anyhow::{anyhow, Context, Result};
+use anyhow::{anyhow, Result};
 use arrow_array::RecordBatch;
 use async_trait::async_trait;
 use datafusion::physical_expr::PhysicalExpr;
@@ -23,7 +23,7 @@ use std::sync::Arc;
 use protocol::grpc::api::ProjectionOperator as ProjectionOperatorProto;
 
 use crate::runtime::streaming::api::context::TaskContext;
-use crate::runtime::streaming::api::operator::{ConstructedOperator, Operator};
+use crate::runtime::streaming::api::operator::Operator;
 use crate::runtime::streaming::factory::global::Registry;
 use crate::runtime::streaming::StreamOutput;
 use crate::sql::common::{CheckpointBarrier, FsSchema, FsSchemaRef, Watermark};
diff --git a/src/runtime/streaming/operators/sink/mod.rs b/src/runtime/streaming/operators/sink/mod.rs
index 8abd2985..aa340614 100644
--- a/src/runtime/streaming/operators/sink/mod.rs
+++ b/src/runtime/streaming/operators/sink/mod.rs
@@ -13,4 +13,3 @@
 
 pub mod kafka;
 
-pub use kafka::{ConsistencyMode, KafkaSinkOperator};
diff --git a/src/runtime/streaming/operators/source/mod.rs b/src/runtime/streaming/operators/source/mod.rs
index 76f3639a..aa340614 100644
--- a/src/runtime/streaming/operators/source/mod.rs
+++ b/src/runtime/streaming/operators/source/mod.rs
@@ -13,4 +13,3 @@
 
 pub mod kafka;
 
-pub use kafka::{BatchDeserializer, BufferedDeserializer, KafkaSourceOperator, KafkaState};
diff --git a/src/runtime/streaming/operators/watermark/mod.rs b/src/runtime/streaming/operators/watermark/mod.rs
index 4486a0fd..3a0a1099 100644
--- a/src/runtime/streaming/operators/watermark/mod.rs
+++ b/src/runtime/streaming/operators/watermark/mod.rs
@@ -12,4 +12,4 @@
 
 pub mod watermark_generator;
 
-pub use watermark_generator::{WatermarkGeneratorConstructor, WatermarkGeneratorOperator, WatermarkGeneratorState};
+pub use watermark_generator::WatermarkGeneratorConstructor;
diff --git a/src/runtime/streaming/operators/windows/mod.rs b/src/runtime/streaming/operators/windows/mod.rs
index 02c9eccb..f1915f0d 100644
--- a/src/runtime/streaming/operators/windows/mod.rs
+++ b/src/runtime/streaming/operators/windows/mod.rs
@@ -15,7 +15,7 @@ pub mod sliding_aggregating_window;
 pub mod tumbling_aggregating_window;
 pub mod window_function;
 
-pub use session_aggregating_window::{SessionAggregatingWindowConstructor, SessionWindowOperator};
-pub use sliding_aggregating_window::{SlidingAggregatingWindowConstructor, SlidingWindowOperator};
-pub use tumbling_aggregating_window::{TumblingAggregateWindowConstructor, TumblingWindowOperator};
-pub use window_function::{WindowFunctionConstructor, WindowFunctionOperator};
+pub use session_aggregating_window::SessionAggregatingWindowConstructor;
+pub use sliding_aggregating_window::SlidingAggregatingWindowConstructor;
+pub use tumbling_aggregating_window::TumblingAggregateWindowConstructor;
+pub use window_function::WindowFunctionConstructor;
diff --git a/src/runtime/streaming/protocol/mod.rs b/src/runtime/streaming/protocol/mod.rs
index 63f7f0bf..fb20c59e 100644
--- a/src/runtime/streaming/protocol/mod.rs
+++ b/src/runtime/streaming/protocol/mod.rs
@@ -17,10 +17,4 @@ pub mod stream_out;
 pub mod tracked;
 pub mod watermark;
 
-pub use control::{
-    control_channel, CheckpointBarrierWire, ControlCommand, StopMode,
-};
-pub use event::StreamEvent;
 pub use stream_out::StreamOutput;
-pub use tracked::TrackedEvent;
-pub use watermark::{merge_watermarks, watermark_strictly_advances};
diff --git a/src/server/mod.rs b/src/server/mod.rs
index 7795f29b..cb7a4a85 100644
--- a/src/server/mod.rs
+++ b/src/server/mod.rs
@@ -17,5 +17,5 @@ mod initializer;
 mod service;
 
 pub use handler::FunctionStreamServiceImpl;
-pub use initializer::{bootstrap_system, build_core_registry};
+pub use initializer::bootstrap_system;
 pub use service::start_server_with_shutdown;
diff --git a/src/sql/analysis/join_rewriter.rs b/src/sql/analysis/join_rewriter.rs
index 4421aa99..058a5bd8 100644
--- a/src/sql/analysis/join_rewriter.rs
+++ b/src/sql/analysis/join_rewriter.rs
@@ -19,7 +19,7 @@ use crate::sql::common::constants::mem_exec_join_side;
 use crate::sql::common::TIMESTAMP_FIELD;
 use datafusion::common::tree_node::{Transformed, TreeNodeRewriter};
 use datafusion::common::{
-    Column, DataFusionError, JoinConstraint, JoinType, Result, ScalarValue, Spans, TableReference,
+    JoinConstraint, JoinType, Result, ScalarValue, TableReference,
     not_impl_err, plan_err,
 };
 use datafusion::logical_expr::{
diff --git a/src/sql/analysis/mod.rs b/src/sql/analysis/mod.rs
index 653cb601..cd26a4e6 100644
--- a/src/sql/analysis/mod.rs
+++ b/src/sql/analysis/mod.rs
@@ -26,41 +26,27 @@ pub mod source_rewriter;
 pub mod time_window;
 pub mod unnest_rewriter;
 
-pub use async_udf_rewriter::{AsyncOptions, AsyncUdfRewriter};
+pub use async_udf_rewriter::AsyncOptions;
 pub use sink_input_rewriter::SinkInputRewriter;
-pub use source_metadata_visitor::SourceMetadataVisitor;
-pub use source_rewriter::SourceRewriter;
-pub use time_window::{TimeWindowNullCheckRemover, TimeWindowUdfChecker, is_time_window};
-pub use unnest_rewriter::{UNNESTED_COL, UnnestRewriter};
+pub use time_window::{TimeWindowNullCheckRemover, TimeWindowUdfChecker};
+pub use unnest_rewriter::UNNESTED_COL;
 
-pub use crate::sql::schema::schema_provider::{
-    LogicalBatchInput, StreamSchemaProvider, StreamTable,
-};
+pub use crate::sql::schema::schema_provider::StreamSchemaProvider;
 
-use std::collections::{HashMap, HashSet};
+use std::collections::HashMap;
 use std::sync::Arc;
 
 use datafusion::common::tree_node::{Transformed, TreeNode};
 use datafusion::common::{Result, plan_err};
 use datafusion::error::DataFusionError;
-use datafusion::execution::SessionStateBuilder;
 use datafusion::logical_expr::{Extension, LogicalPlan, UserDefinedLogicalNodeCore};
-use datafusion::prelude::SessionConfig;
-use datafusion::sql::TableReference;
-use datafusion::sql::sqlparser::ast::{OneOrManyWithParens, Statement};
-use datafusion::sql::sqlparser::dialect::FunctionStreamDialect;
-use datafusion::sql::sqlparser::parser::Parser;
 use tracing::{debug, info, instrument};
 
-use crate::sql::logical_planner::optimizers::ChainingOptimizer;
-use crate::sql::schema::table::Table as CatalogTable;
-use crate::sql::functions::{is_json_union, serialize_outgoing_json};
 use crate::sql::extensions::key_calculation::{KeyExtractionNode, KeyExtractionStrategy};
 use crate::sql::extensions::projection::StreamProjectionNode;
 use crate::sql::extensions::sink::StreamEgressNode;
 use crate::sql::extensions::StreamingOperatorBlueprint;
 use crate::sql::logical_planner::planner::NamedNode;
-use crate::sql::types::SqlConfig;
 
 fn duration_from_sql_expr(
     expr: &datafusion::sql::sqlparser::ast::Expr,
diff --git a/src/sql/analysis/sink_input_rewriter.rs b/src/sql/analysis/sink_input_rewriter.rs
index 6b8b2de1..ad36046f 100644
--- a/src/sql/analysis/sink_input_rewriter.rs
+++ b/src/sql/analysis/sink_input_rewriter.rs
@@ -28,7 +28,7 @@ pub struct SinkInputRewriter<'a> {
 }
 
 impl<'a> SinkInputRewriter<'a> {
-    pub fn new(sink_inputs: &'a mut SinkInputs) -> Self {
+    pub(crate) fn new(sink_inputs: &'a mut SinkInputs) -> Self {
         Self {
             sink_inputs,
             was_removed: false,
diff --git a/src/sql/analysis/source_rewriter.rs b/src/sql/analysis/source_rewriter.rs
index 35804c02..39df350e 100644
--- a/src/sql/analysis/source_rewriter.rs
+++ b/src/sql/analysis/source_rewriter.rs
@@ -25,7 +25,6 @@ use crate::sql::schema::source_table::SourceTable;
 use crate::sql::schema::ColumnDescriptor;
 use crate::sql::schema::table::Table;
 use crate::sql::schema::StreamSchemaProvider;
-use crate::sql::schema::StreamTable;
 use crate::sql::common::constants::sql_field;
 use crate::sql::common::UPDATING_META_FIELD;
 use crate::sql::extensions::debezium::UnrollDebeziumPayloadNode;
diff --git a/src/sql/analysis/window_function_rewriter.rs b/src/sql/analysis/window_function_rewriter.rs
index 7b94b841..63c502bf 100644
--- a/src/sql/analysis/window_function_rewriter.rs
+++ b/src/sql/analysis/window_function_rewriter.rs
@@ -11,7 +11,7 @@
 // limitations under the License.
 
 use datafusion::common::tree_node::Transformed;
-use datafusion::common::{Column, Result as DFResult, plan_err, tree_node::TreeNodeRewriter};
+use datafusion::common::{Result as DFResult, plan_err, tree_node::TreeNodeRewriter};
 use datafusion::logical_expr::{
     self, Expr, Extension, LogicalPlan, Projection, Sort, Window, expr::WindowFunction,
     expr::WindowFunctionParams,
diff --git a/src/sql/api/mod.rs b/src/sql/api/mod.rs
index 3969296a..cdc119b7 100644
--- a/src/sql/api/mod.rs
+++ b/src/sql/api/mod.rs
@@ -25,14 +25,7 @@ pub mod var_str;
 
 use serde::{Deserialize, Serialize};
 
-pub use checkpoints::*;
-pub use connections::{
-    ConnectionProfile, ConnectionSchema, ConnectionType, Connector, FieldType, SchemaDefinition,
-    SourceField,
-};
-pub use metrics::*;
-pub use pipelines::*;
-pub use udfs::*;
+pub use connections::ConnectionProfile;
 
 #[derive(Serialize, Deserialize, Clone, Debug)]
 #[serde(rename_all = "camelCase")]
diff --git a/src/sql/common/mod.rs b/src/sql/common/mod.rs
index fa37a9fd..03e9b4a6 100644
--- a/src/sql/common/mod.rs
+++ b/src/sql/common/mod.rs
@@ -36,27 +36,13 @@ pub mod worker;
 pub mod converter;
 
 // ── Re-exports from existing modules ──
-pub use arrow_ext::{DisplayAsSql, FsExtensionType, GetArrowSchema, GetArrowType};
-pub use date::{DatePart, DateTruncPrecision};
-pub use debezium::{Debezium, DebeziumOp, UpdatingData};
-pub use hash::{range_for_server, server_for_hash, HASH_SEEDS};
-pub use message::{ArrowMessage, CheckpointBarrier, SignalMessage, Watermark};
-pub use task_info::{ChainInfo, TaskInfo};
-pub use time_utils::{from_micros, from_millis, from_nanos, to_micros, to_millis, to_nanos};
-pub use worker::{MachineId, WorkerId};
+pub use arrow_ext::FsExtensionType;
+pub use message::{CheckpointBarrier, Watermark};
+pub use time_utils::{from_nanos, to_micros, to_millis, to_nanos};
 
 // ── Re-exports from new modules ──
-pub use control::{
-    CheckpointCompleted, CheckpointEvent, CompactionResult, ControlMessage, ControlResp,
-    ErrorDomain, RetryHint, StopMode, TableConfig, TaskCheckpointEventType, TaskError,
-};
 pub use fs_schema::{FsSchema, FsSchemaRef};
-pub use connector_options::{ConnectorOptions, FromOpts};
-pub use kafka_catalog::{
-    KafkaConfig, KafkaConfigAuthentication, KafkaTable, KafkaTableSourceOffset, ReadMode,
-    SchemaRegistryConfig, SinkCommitMode, TableType,
-};
-pub use errors::{DataflowError, DataflowResult};
+pub use connector_options::ConnectorOptions;
 pub use formats::{BadData, Format, Framing, JsonCompression, JsonFormat};
 pub use operator_config::MetadataField;
 
diff --git a/src/sql/logical_node/logical/mod.rs b/src/sql/logical_node/logical/mod.rs
index ab318804..d2e9a327 100644
--- a/src/sql/logical_node/logical/mod.rs
+++ b/src/sql/logical_node/logical/mod.rs
@@ -26,7 +26,5 @@ pub use logical_edge::{LogicalEdge, LogicalEdgeType};
 pub use logical_graph::{LogicalGraph, Optimizer};
 pub use logical_node::LogicalNode;
 pub use logical_program::LogicalProgram;
-pub use operator_chain::OperatorChain;
 pub use operator_name::OperatorName;
 pub use program_config::ProgramConfig;
-pub use python_udf_config::PythonUdfConfig;
diff --git a/src/sql/logical_node/logical/operator_name.rs b/src/sql/logical_node/logical/operator_name.rs
index 79fe9a05..57f53f90 100644
--- a/src/sql/logical_node/logical/operator_name.rs
+++ b/src/sql/logical_node/logical/operator_name.rs
@@ -45,7 +45,7 @@ impl OperatorName {
 
     pub fn feature_tag(self) -> Option<&'static str> {
         match self {
-            Self::ExpressionWatermark | Self::Value | Self::KeyBy | Self::Projection => None,
+            Self::ExpressionWatermark | Self::Value | Self::Projection => None,
             Self::AsyncUdf => Some(operator_feature::ASYNC_UDF),
             Self::Join => Some(operator_feature::JOIN_WITH_EXPIRATION),
             Self::InstantJoin => Some(operator_feature::WINDOWED_JOIN),
diff --git a/src/sql/mod.rs b/src/sql/mod.rs
index c13f1c4a..dc98a4de 100644
--- a/src/sql/mod.rs
+++ b/src/sql/mod.rs
@@ -23,7 +23,5 @@ pub mod analysis;
 pub(crate) mod extensions;
 pub mod types;
 
-pub use schema::{StreamPlanningContext, StreamSchemaProvider};
-pub use parse::parse_sql;
 pub use analysis::rewrite_plan;
 
diff --git a/src/sql/physical/mod.rs b/src/sql/physical/mod.rs
index ee63a2be..7cbb3231 100644
--- a/src/sql/physical/mod.rs
+++ b/src/sql/physical/mod.rs
@@ -21,4 +21,4 @@ pub use cdc::{DebeziumUnrollingExec, ToDebeziumExec};
 pub use codec::{DecodingContext, FsPhysicalExtensionCodec};
 pub use meta::{updating_meta_field, updating_meta_fields};
 pub use readers::FsMemExec;
-pub use udfs::{WindowFunctionUdf, window};
+pub use udfs::window;
diff --git a/src/sql/schema/mod.rs b/src/sql/schema/mod.rs
index b052aa68..f3bf1946 100644
--- a/src/sql/schema/mod.rs
+++ b/src/sql/schema/mod.rs
@@ -27,25 +27,15 @@ pub mod utils;
 
 pub use catalog_ddl::{
     catalog_table_row_detail, schema_columns_one_line, show_create_catalog_table,
-    show_create_stream_table, stream_table_row_detail,
 };
 pub use column_descriptor::ColumnDescriptor;
 pub use connection_type::ConnectionType;
 pub use connector_config::ConnectorConfig;
-pub use source_table::{SourceOperator, SourceTable};
+pub use source_table::SourceTable;
 
 /// Back-compat alias for [`SourceTable`].
 pub type ConnectorTable = SourceTable;
-pub use data_encoding_format::DataEncodingFormat;
-pub use schema_context::{DfSchemaContext, SchemaContext};
 pub use schema_provider::{
-    FunctionCatalog, LogicalBatchInput, ObjectName, StreamPlanningContext,
-    StreamPlanningContextBuilder, StreamSchemaProvider, StreamTable, TableCatalog,
+    ObjectName, StreamPlanningContext, StreamSchemaProvider, StreamTable,
 };
 pub use table::Table;
-pub use table_execution_unit::{EngineDescriptor, SyncMode, TableExecutionUnit};
-pub use table_role::{
-    apply_adapter_specific_rules, deduce_role, serialize_backend_params, validate_adapter_availability,
-    TableRole,
-};
-pub use temporal_pipeline_config::{resolve_temporal_logic, TemporalPipelineConfig, TemporalSpec};
diff --git a/src/sql/types/mod.rs b/src/sql/types/mod.rs
index 41753e38..4c99d08f 100644
--- a/src/sql/types/mod.rs
+++ b/src/sql/types/mod.rs
@@ -20,14 +20,12 @@ use std::time::Duration;
 
 use crate::sql::common::constants::sql_planning_default;
 
-pub use data_type::convert_data_type;
 pub use df_field::{
     DFField, fields_with_qualifiers, schema_from_df_fields, schema_from_df_fields_with_metadata,
 };
 pub(crate) use placeholder_udf::PlaceholderUdf;
-pub use stream_schema::StreamSchema;
 pub(crate) use window::WindowBehavior;
-pub use window::{WindowType, find_window, get_duration};
+pub use window::{WindowType, find_window};
 
 pub use crate::sql::common::constants::sql_field::TIMESTAMP_FIELD;
 
diff --git a/src/storage/stream_catalog/mod.rs b/src/storage/stream_catalog/mod.rs
index fea2e39f..1b893cea 100644
--- a/src/storage/stream_catalog/mod.rs
+++ b/src/storage/stream_catalog/mod.rs
@@ -18,7 +18,7 @@ mod meta_store;
 mod rocksdb_meta_store;
 
 pub use manager::{
-    CatalogManager, initialize_stream_catalog, planning_schema_provider,
+    CatalogManager, initialize_stream_catalog,
     restore_global_catalog_from_store,
 };
 pub use meta_store::{InMemoryMetaStore, MetaStore};

From 52610ec34d4328704453f695a8e29e3eab92a949 Mon Sep 17 00:00:00 2001
From: luoluoyuyu <zhenyu@apache.org>
Date: Wed, 1 Apr 2026 20:30:11 +0800
Subject: [PATCH 37/44] update

---
 src/coordinator/analyze/analyzer.rs           |  33 ++++-
 src/coordinator/dataset/mod.rs                |   4 +
 .../show_create_streaming_table_result.rs     |  64 +++++++++
 .../dataset/show_streaming_tables_result.rs   |  75 ++++++++++
 src/coordinator/execution/executor.rs         | 134 +++++++++++++++++-
 src/coordinator/mod.rs                        |   7 +-
 .../plan/drop_streaming_table_plan.rs         |  34 +++++
 src/coordinator/plan/logical_plan_visitor.rs  |  44 +++++-
 src/coordinator/plan/mod.rs                   |   6 +
 .../plan/show_create_streaming_table_plan.rs  |  30 ++++
 .../plan/show_streaming_tables_plan.rs        |  28 ++++
 src/coordinator/plan/visitor.rs               |  26 +++-
 .../statement/drop_streaming_table.rs         |  40 ++++++
 src/coordinator/statement/mod.rs              |  10 ++
 .../statement/show_create_streaming_table.rs  |  36 +++++
 .../statement/show_streaming_tables.rs        |  33 +++++
 src/coordinator/statement/visitor.rs          |  25 +++-
 src/runtime/streaming/job/job_manager.rs      | 133 ++++++++++++++++-
 src/runtime/streaming/job/mod.rs              |   2 +-
 src/sql/parse.rs                              |  88 ++++++++++--
 20 files changed, 813 insertions(+), 39 deletions(-)
 create mode 100644 src/coordinator/dataset/show_create_streaming_table_result.rs
 create mode 100644 src/coordinator/dataset/show_streaming_tables_result.rs
 create mode 100644 src/coordinator/plan/drop_streaming_table_plan.rs
 create mode 100644 src/coordinator/plan/show_create_streaming_table_plan.rs
 create mode 100644 src/coordinator/plan/show_streaming_tables_plan.rs
 create mode 100644 src/coordinator/statement/drop_streaming_table.rs
 create mode 100644 src/coordinator/statement/show_create_streaming_table.rs
 create mode 100644 src/coordinator/statement/show_streaming_tables.rs

diff --git a/src/coordinator/analyze/analyzer.rs b/src/coordinator/analyze/analyzer.rs
index dbac78cf..878a9481 100644
--- a/src/coordinator/analyze/analyzer.rs
+++ b/src/coordinator/analyze/analyzer.rs
@@ -13,10 +13,11 @@
 use super::Analysis;
 use crate::coordinator::execution_context::ExecutionContext;
 use crate::coordinator::statement::{
-    CreateFunction, CreatePythonFunction, CreateTable, DropFunction, DropTableStatement,
-    ShowCatalogTables, ShowCreateTable, ShowFunctions, StartFunction, Statement,
-    StatementVisitor, StatementVisitorContext, StatementVisitorResult, StopFunction,
-    StreamingTableStatement,
+    CreateFunction, CreatePythonFunction, CreateTable, DropFunction,
+    DropStreamingTableStatement, DropTableStatement, ShowCatalogTables,
+    ShowCreateStreamingTable, ShowCreateTable, ShowFunctions, ShowStreamingTables,
+    StartFunction, Statement, StatementVisitor, StatementVisitorContext,
+    StatementVisitorResult, StopFunction, StreamingTableStatement,
 };
 use std::fmt;
 
@@ -159,4 +160,28 @@ impl StatementVisitor for Analyzer<'_> {
     ) -> StatementVisitorResult {
         StatementVisitorResult::Analyze(Box::new(DropTableStatement::new(stmt.statement.clone())))
     }
+
+    fn visit_show_streaming_tables(
+        &self,
+        stmt: &ShowStreamingTables,
+        _context: &StatementVisitorContext,
+    ) -> StatementVisitorResult {
+        StatementVisitorResult::Analyze(Box::new(stmt.clone()))
+    }
+
+    fn visit_show_create_streaming_table(
+        &self,
+        stmt: &ShowCreateStreamingTable,
+        _context: &StatementVisitorContext,
+    ) -> StatementVisitorResult {
+        StatementVisitorResult::Analyze(Box::new(stmt.clone()))
+    }
+
+    fn visit_drop_streaming_table(
+        &self,
+        stmt: &DropStreamingTableStatement,
+        _context: &StatementVisitorContext,
+    ) -> StatementVisitorResult {
+        StatementVisitorResult::Analyze(Box::new(stmt.clone()))
+    }
 }
diff --git a/src/coordinator/dataset/mod.rs b/src/coordinator/dataset/mod.rs
index f09c24ca..bbcac6f0 100644
--- a/src/coordinator/dataset/mod.rs
+++ b/src/coordinator/dataset/mod.rs
@@ -13,11 +13,15 @@
 mod data_set;
 mod execute_result;
 mod show_catalog_tables_result;
+mod show_create_streaming_table_result;
 mod show_create_table_result;
 mod show_functions_result;
+mod show_streaming_tables_result;
 
 pub use data_set::{DataSet, empty_record_batch};
 pub use execute_result::ExecuteResult;
 pub use show_catalog_tables_result::ShowCatalogTablesResult;
+pub use show_create_streaming_table_result::ShowCreateStreamingTableResult;
 pub use show_create_table_result::ShowCreateTableResult;
 pub use show_functions_result::ShowFunctionsResult;
+pub use show_streaming_tables_result::ShowStreamingTablesResult;
diff --git a/src/coordinator/dataset/show_create_streaming_table_result.rs b/src/coordinator/dataset/show_create_streaming_table_result.rs
new file mode 100644
index 00000000..2b9e0d0a
--- /dev/null
+++ b/src/coordinator/dataset/show_create_streaming_table_result.rs
@@ -0,0 +1,64 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::sync::Arc;
+
+use arrow_array::StringArray;
+use arrow_schema::{DataType, Field, Schema};
+
+use super::DataSet;
+
+#[derive(Clone, Debug)]
+pub struct ShowCreateStreamingTableResult {
+    table_name: String,
+    status: String,
+    pipeline_detail: String,
+    program_json: String,
+}
+
+impl ShowCreateStreamingTableResult {
+    pub fn new(
+        table_name: String,
+        status: String,
+        pipeline_detail: String,
+        program_json: String,
+    ) -> Self {
+        Self {
+            table_name,
+            status,
+            pipeline_detail,
+            program_json,
+        }
+    }
+}
+
+impl DataSet for ShowCreateStreamingTableResult {
+    fn to_record_batch(&self) -> arrow_array::RecordBatch {
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("Streaming Table", DataType::Utf8, false),
+            Field::new("Status", DataType::Utf8, false),
+            Field::new("Pipelines", DataType::Utf8, false),
+            Field::new("Program", DataType::Utf8, false),
+        ]));
+
+        arrow_array::RecordBatch::try_new(
+            schema,
+            vec![
+                Arc::new(StringArray::from(vec![self.table_name.as_str()])),
+                Arc::new(StringArray::from(vec![self.status.as_str()])),
+                Arc::new(StringArray::from(vec![self.pipeline_detail.as_str()])),
+                Arc::new(StringArray::from(vec![self.program_json.as_str()])),
+            ],
+        )
+        .unwrap_or_else(|_| arrow_array::RecordBatch::new_empty(Arc::new(Schema::empty())))
+    }
+}
diff --git a/src/coordinator/dataset/show_streaming_tables_result.rs b/src/coordinator/dataset/show_streaming_tables_result.rs
new file mode 100644
index 00000000..a992d1b9
--- /dev/null
+++ b/src/coordinator/dataset/show_streaming_tables_result.rs
@@ -0,0 +1,75 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::sync::Arc;
+
+use arrow_array::{Int32Array, StringArray};
+use arrow_schema::{DataType, Field, Schema};
+
+use super::DataSet;
+use crate::runtime::streaming::job::StreamingJobSummary;
+
+#[derive(Clone, Debug)]
+pub struct ShowStreamingTablesResult {
+    jobs: Vec<StreamingJobSummary>,
+}
+
+impl ShowStreamingTablesResult {
+    pub fn new(jobs: Vec<StreamingJobSummary>) -> Self {
+        Self { jobs }
+    }
+}
+
+impl DataSet for ShowStreamingTablesResult {
+    fn to_record_batch(&self) -> arrow_array::RecordBatch {
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("job_id", DataType::Utf8, false),
+            Field::new("status", DataType::Utf8, false),
+            Field::new("pipeline_count", DataType::Int32, false),
+            Field::new("uptime", DataType::Utf8, false),
+        ]));
+
+        let job_ids: Vec<&str> = self.jobs.iter().map(|j| j.job_id.as_str()).collect();
+        let statuses: Vec<&str> = self.jobs.iter().map(|j| j.status.as_str()).collect();
+        let pipeline_counts: Vec<i32> = self.jobs.iter().map(|j| j.pipeline_count).collect();
+        let uptimes: Vec<String> = self.jobs.iter().map(|j| format_duration(j.uptime_secs)).collect();
+        let uptime_refs: Vec<&str> = uptimes.iter().map(|s| s.as_str()).collect();
+
+        arrow_array::RecordBatch::try_new(
+            schema,
+            vec![
+                Arc::new(StringArray::from(job_ids)),
+                Arc::new(StringArray::from(statuses)),
+                Arc::new(Int32Array::from(pipeline_counts)),
+                Arc::new(StringArray::from(uptime_refs)),
+            ],
+        )
+        .unwrap_or_else(|_| arrow_array::RecordBatch::new_empty(Arc::new(Schema::empty())))
+    }
+}
+
+fn format_duration(total_secs: u64) -> String {
+    let days = total_secs / 86400;
+    let hours = (total_secs % 86400) / 3600;
+    let mins = (total_secs % 3600) / 60;
+    let secs = total_secs % 60;
+
+    if days > 0 {
+        format!("{days}d {hours}h {mins}m {secs}s")
+    } else if hours > 0 {
+        format!("{hours}h {mins}m {secs}s")
+    } else if mins > 0 {
+        format!("{mins}m {secs}s")
+    } else {
+        format!("{secs}s")
+    }
+}
diff --git a/src/coordinator/execution/executor.rs b/src/coordinator/execution/executor.rs
index 5372ed33..f52504e0 100644
--- a/src/coordinator/execution/executor.rs
+++ b/src/coordinator/execution/executor.rs
@@ -17,17 +17,21 @@ use thiserror::Error;
 use tracing::{debug, info};
 
 use crate::coordinator::dataset::{
-    empty_record_batch, ExecuteResult, ShowCatalogTablesResult, ShowCreateTableResult,
-    ShowFunctionsResult,
+    empty_record_batch, ExecuteResult, ShowCatalogTablesResult,
+    ShowCreateStreamingTableResult, ShowCreateTableResult, ShowFunctionsResult,
+    ShowStreamingTablesResult,
 };
 use crate::coordinator::plan::{
     CreateFunctionPlan, CreatePythonFunctionPlan, CreateTablePlan, CreateTablePlanBody,
-    DropFunctionPlan, DropTablePlan, LookupTablePlan, PlanNode, PlanVisitor, PlanVisitorContext,
-    PlanVisitorResult, ShowCatalogTablesPlan, ShowCreateTablePlan, ShowFunctionsPlan,
-    StartFunctionPlan, StopFunctionPlan, StreamingTable, StreamingTableConnectorPlan,
+    DropFunctionPlan, DropStreamingTablePlan, DropTablePlan, LookupTablePlan, PlanNode,
+    PlanVisitor, PlanVisitorContext, PlanVisitorResult, ShowCatalogTablesPlan,
+    ShowCreateStreamingTablePlan, ShowCreateTablePlan, ShowFunctionsPlan,
+    ShowStreamingTablesPlan, StartFunctionPlan, StopFunctionPlan, StreamingTable,
+    StreamingTableConnectorPlan,
 };
 use crate::coordinator::statement::{ConfigSource, FunctionSource};
 use crate::runtime::streaming::job::JobManager;
+use crate::runtime::streaming::protocol::control::StopMode;
 use crate::runtime::taskexecutor::TaskManager;
 use crate::sql::schema::table::Table as CatalogTable;
 use crate::sql::schema::show_create_catalog_table;
@@ -397,4 +401,124 @@ impl PlanVisitor for Executor {
 
         PlanVisitorResult::Execute(execute())
     }
+
+    fn visit_show_streaming_tables(
+        &self,
+        _plan: &ShowStreamingTablesPlan,
+        _context: &PlanVisitorContext,
+    ) -> PlanVisitorResult {
+        let execute = || -> Result<ExecuteResult, ExecuteError> {
+            let jobs = self.job_manager.list_jobs();
+            let n = jobs.len();
+            Ok(ExecuteResult::ok_with_data(
+                format!("{n} streaming table(s)"),
+                ShowStreamingTablesResult::new(jobs),
+            ))
+        };
+        PlanVisitorResult::Execute(execute())
+    }
+
+    fn visit_show_create_streaming_table(
+        &self,
+        plan: &ShowCreateStreamingTablePlan,
+        _context: &PlanVisitorContext,
+    ) -> PlanVisitorResult {
+        let execute = || -> Result<ExecuteResult, ExecuteError> {
+            let detail = self
+                .job_manager
+                .get_job_detail(&plan.table_name)
+                .ok_or_else(|| {
+                    ExecuteError::Validation(format!(
+                        "Streaming table '{}' not found in active jobs",
+                        plan.table_name
+                    ))
+                })?;
+
+            let pipeline_lines: Vec<String> = detail
+                .pipelines
+                .iter()
+                .map(|p| format!("  pipeline[{}]: {}", p.pipeline_id, p.status))
+                .collect();
+            let pipeline_detail = if pipeline_lines.is_empty() {
+                "(no pipelines)".to_string()
+            } else {
+                pipeline_lines.join("\n")
+            };
+
+            let mut program_json = serde_json::Value::String(detail.program_json.clone());
+            if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(&detail.program_json) {
+                let mut cleaned = parsed;
+                strip_noisy_fields(&mut cleaned);
+                program_json = cleaned;
+            }
+            let program_display =
+                serde_json::to_string_pretty(&program_json).unwrap_or(detail.program_json);
+
+            Ok(ExecuteResult::ok_with_data(
+                format!("SHOW CREATE STREAMING TABLE {}", plan.table_name),
+                ShowCreateStreamingTableResult::new(
+                    plan.table_name.clone(),
+                    detail.status,
+                    pipeline_detail,
+                    program_display,
+                ),
+            ))
+        };
+        PlanVisitorResult::Execute(execute())
+    }
+
+    fn visit_drop_streaming_table(
+        &self,
+        plan: &DropStreamingTablePlan,
+        _context: &PlanVisitorContext,
+    ) -> PlanVisitorResult {
+        let execute = || -> Result<ExecuteResult, ExecuteError> {
+            let job_exists = self.job_manager.has_job(&plan.table_name);
+
+            if !job_exists && !plan.if_exists {
+                return Err(ExecuteError::Validation(format!(
+                    "Streaming table '{}' not found in active jobs",
+                    plan.table_name
+                )));
+            }
+
+            if job_exists {
+                let job_manager = Arc::clone(&self.job_manager);
+                let table_name = plan.table_name.clone();
+                tokio::task::block_in_place(|| {
+                    tokio::runtime::Handle::current()
+                        .block_on(job_manager.remove_job(&table_name, StopMode::Graceful))
+                })
+                .map_err(|e| {
+                    ExecuteError::Internal(format!(
+                        "Failed to stop streaming job '{}': {}",
+                        plan.table_name, e
+                    ))
+                })?;
+
+                info!(
+                    table = %plan.table_name,
+                    "Streaming job stopped and removed"
+                );
+            }
+
+            let _ = self
+                .catalog_manager
+                .drop_catalog_table(&plan.table_name, true);
+
+            if job_exists {
+                Ok(ExecuteResult::ok(format!(
+                    "Dropped streaming table '{}'",
+                    plan.table_name
+                )))
+            } else {
+                Ok(ExecuteResult::ok(format!(
+                    "Streaming table '{}' does not exist (skipped)",
+                    plan.table_name
+                )))
+            }
+        };
+
+        PlanVisitorResult::Execute(execute())
+    }
 }
diff --git a/src/coordinator/mod.rs b/src/coordinator/mod.rs
index ca384a90..922b793f 100644
--- a/src/coordinator/mod.rs
+++ b/src/coordinator/mod.rs
@@ -24,7 +24,8 @@ mod tool;
 pub use coordinator::Coordinator;
 pub use dataset::{DataSet, ShowFunctionsResult};
 pub use statement::{
-    CreateFunction, CreatePythonFunction, CreateTable, DropFunction, DropTableStatement,
-    PythonModule, ShowCatalogTables, ShowCreateTable, ShowFunctions, StartFunction, Statement,
-    StopFunction, StreamingTableStatement,
+    CreateFunction, CreatePythonFunction, CreateTable, DropFunction,
+    DropStreamingTableStatement, DropTableStatement, PythonModule, ShowCatalogTables,
+    ShowCreateStreamingTable, ShowCreateTable, ShowFunctions, ShowStreamingTables,
+    StartFunction, Statement, StopFunction, StreamingTableStatement,
 };
diff --git a/src/coordinator/plan/drop_streaming_table_plan.rs b/src/coordinator/plan/drop_streaming_table_plan.rs
new file mode 100644
index 00000000..d06dc836
--- /dev/null
+++ b/src/coordinator/plan/drop_streaming_table_plan.rs
@@ -0,0 +1,34 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use super::{PlanNode, PlanVisitor, PlanVisitorContext, PlanVisitorResult};
+
+#[derive(Debug, Clone)]
+pub struct DropStreamingTablePlan {
+    pub table_name: String,
+    pub if_exists: bool,
+}
+
+impl DropStreamingTablePlan {
+    pub fn new(table_name: String, if_exists: bool) -> Self {
+        Self {
+            table_name,
+            if_exists,
+        }
+    }
+}
+
+impl PlanNode for DropStreamingTablePlan {
+    fn accept(&self, visitor: &dyn PlanVisitor, context: &PlanVisitorContext) -> PlanVisitorResult {
+        visitor.visit_drop_streaming_table(self, context)
+    }
+}
diff --git a/src/coordinator/plan/logical_plan_visitor.rs b/src/coordinator/plan/logical_plan_visitor.rs
index b9cb4dfe..77fa9eb4 100644
--- a/src/coordinator/plan/logical_plan_visitor.rs
+++ b/src/coordinator/plan/logical_plan_visitor.rs
@@ -26,14 +26,17 @@ use tracing::debug;
 
 use crate::coordinator::analyze::analysis::Analysis;
 use crate::coordinator::plan::{
-    CreateFunctionPlan, CreatePythonFunctionPlan, CreateTablePlan, DropFunctionPlan, DropTablePlan,
-    PlanNode, ShowCatalogTablesPlan, ShowCreateTablePlan, ShowFunctionsPlan, StartFunctionPlan,
-    StopFunctionPlan, StreamingTable,
+    CreateFunctionPlan, CreatePythonFunctionPlan, CreateTablePlan, DropFunctionPlan,
+    DropStreamingTablePlan, DropTablePlan, PlanNode, ShowCatalogTablesPlan,
+    ShowCreateStreamingTablePlan, ShowCreateTablePlan, ShowFunctionsPlan,
+    ShowStreamingTablesPlan, StartFunctionPlan, StopFunctionPlan, StreamingTable,
 };
 use crate::coordinator::statement::{
-    CreateFunction, CreatePythonFunction, CreateTable, DropFunction, DropTableStatement,
-    ShowCatalogTables, ShowCreateTable, ShowFunctions, StartFunction, StatementVisitor,
-    StatementVisitorContext, StatementVisitorResult, StopFunction, StreamingTableStatement,
+    CreateFunction, CreatePythonFunction, CreateTable, DropFunction,
+    DropStreamingTableStatement, DropTableStatement, ShowCatalogTables,
+    ShowCreateStreamingTable, ShowCreateTable, ShowFunctions, ShowStreamingTables,
+    StartFunction, StatementVisitor, StatementVisitorContext, StatementVisitorResult,
+    StopFunction, StreamingTableStatement,
 };
 use crate::coordinator::tool::ConnectorOptions;
 use crate::sql::analysis::{
@@ -459,4 +462,33 @@ impl StatementVisitor for LogicalPlanVisitor {
             *if_exists,
         )))
     }
+
+    fn visit_show_streaming_tables(
+        &self,
+        _stmt: &ShowStreamingTables,
+        _ctx: &StatementVisitorContext,
+    ) -> StatementVisitorResult {
+        StatementVisitorResult::Plan(Box::new(ShowStreamingTablesPlan::new()))
+    }
+
+    fn visit_show_create_streaming_table(
+        &self,
+        stmt: &ShowCreateStreamingTable,
+        _ctx: &StatementVisitorContext,
+    ) -> StatementVisitorResult {
+        StatementVisitorResult::Plan(Box::new(ShowCreateStreamingTablePlan::new(
+            stmt.table_name.clone(),
+        )))
+    }
+
+    fn visit_drop_streaming_table(
+        &self,
+        stmt: &DropStreamingTableStatement,
+        _ctx: &StatementVisitorContext,
+    ) -> StatementVisitorResult {
+        StatementVisitorResult::Plan(Box::new(DropStreamingTablePlan::new(
+            stmt.table_name.clone(),
+            stmt.if_exists,
+        )))
+    }
 }
\ No newline at end of file
diff --git a/src/coordinator/plan/mod.rs b/src/coordinator/plan/mod.rs
index b04234d5..8166d444 100644
--- a/src/coordinator/plan/mod.rs
+++ b/src/coordinator/plan/mod.rs
@@ -14,13 +14,16 @@ mod create_function_plan;
 mod create_python_function_plan;
 mod create_table_plan;
 mod drop_function_plan;
+mod drop_streaming_table_plan;
 mod drop_table_plan;
 mod logical_plan_visitor;
 mod lookup_table_plan;
 mod optimizer;
 mod show_catalog_tables_plan;
+mod show_create_streaming_table_plan;
 mod show_create_table_plan;
 mod show_functions_plan;
+mod show_streaming_tables_plan;
 mod start_function_plan;
 mod stop_function_plan;
 mod streaming_table_connector_plan;
@@ -31,13 +34,16 @@ pub use create_function_plan::CreateFunctionPlan;
 pub use create_python_function_plan::CreatePythonFunctionPlan;
 pub use create_table_plan::{CreateTablePlan, CreateTablePlanBody};
 pub use drop_function_plan::DropFunctionPlan;
+pub use drop_streaming_table_plan::DropStreamingTablePlan;
 pub use drop_table_plan::DropTablePlan;
 pub use logical_plan_visitor::LogicalPlanVisitor;
 pub use lookup_table_plan::LookupTablePlan;
 pub use optimizer::LogicalPlanner;
 pub use show_catalog_tables_plan::ShowCatalogTablesPlan;
+pub use show_create_streaming_table_plan::ShowCreateStreamingTablePlan;
 pub use show_create_table_plan::ShowCreateTablePlan;
 pub use show_functions_plan::ShowFunctionsPlan;
+pub use show_streaming_tables_plan::ShowStreamingTablesPlan;
 pub use start_function_plan::StartFunctionPlan;
 pub use stop_function_plan::StopFunctionPlan;
 pub use streaming_table_connector_plan::StreamingTableConnectorPlan;
diff --git a/src/coordinator/plan/show_create_streaming_table_plan.rs b/src/coordinator/plan/show_create_streaming_table_plan.rs
new file mode 100644
index 00000000..8d63c0d5
--- /dev/null
+++ b/src/coordinator/plan/show_create_streaming_table_plan.rs
@@ -0,0 +1,30 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use super::{PlanNode, PlanVisitor, PlanVisitorContext, PlanVisitorResult};
+
+#[derive(Debug, Clone)]
+pub struct ShowCreateStreamingTablePlan {
+    pub table_name: String,
+}
+
+impl ShowCreateStreamingTablePlan {
+    pub fn new(table_name: String) -> Self {
+        Self { table_name }
+    }
+}
+
+impl PlanNode for ShowCreateStreamingTablePlan {
+    fn accept(&self, visitor: &dyn PlanVisitor, context: &PlanVisitorContext) -> PlanVisitorResult {
+        visitor.visit_show_create_streaming_table(self, context)
+    }
+}
diff --git a/src/coordinator/plan/show_streaming_tables_plan.rs b/src/coordinator/plan/show_streaming_tables_plan.rs
new file mode 100644
index 00000000..08410115
--- /dev/null
+++ b/src/coordinator/plan/show_streaming_tables_plan.rs
@@ -0,0 +1,28 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use super::{PlanNode, PlanVisitor, PlanVisitorContext, PlanVisitorResult};
+
+#[derive(Debug, Default)]
+pub struct ShowStreamingTablesPlan;
+
+impl ShowStreamingTablesPlan {
+    pub fn new() -> Self {
+        Self
+    }
+}
+
+impl PlanNode for ShowStreamingTablesPlan {
+    fn accept(&self, visitor: &dyn PlanVisitor, context: &PlanVisitorContext) -> PlanVisitorResult {
+        visitor.visit_show_streaming_tables(self, context)
+    }
+}
diff --git a/src/coordinator/plan/visitor.rs b/src/coordinator/plan/visitor.rs
index 28f11f53..bba44a1f 100644
--- a/src/coordinator/plan/visitor.rs
+++ b/src/coordinator/plan/visitor.rs
@@ -11,9 +11,11 @@
 // limitations under the License.
 
 use super::{
-    CreateFunctionPlan, CreatePythonFunctionPlan, CreateTablePlan, DropFunctionPlan, DropTablePlan,
-    LookupTablePlan, ShowCatalogTablesPlan, ShowCreateTablePlan, ShowFunctionsPlan,
-    StartFunctionPlan, StopFunctionPlan, StreamingTable, StreamingTableConnectorPlan,
+    CreateFunctionPlan, CreatePythonFunctionPlan, CreateTablePlan, DropFunctionPlan,
+    DropStreamingTablePlan, DropTablePlan, LookupTablePlan, ShowCatalogTablesPlan,
+    ShowCreateStreamingTablePlan, ShowCreateTablePlan, ShowFunctionsPlan,
+    ShowStreamingTablesPlan, StartFunctionPlan, StopFunctionPlan, StreamingTable,
+    StreamingTableConnectorPlan,
 };
 
 /// Context passed to PlanVisitor methods
@@ -127,4 +129,22 @@ pub trait PlanVisitor {
         plan: &DropTablePlan,
         context: &PlanVisitorContext,
     ) -> PlanVisitorResult;
+
+    fn visit_show_streaming_tables(
+        &self,
+        plan: &ShowStreamingTablesPlan,
+        context: &PlanVisitorContext,
+    ) -> PlanVisitorResult;
+
+    fn visit_show_create_streaming_table(
+        &self,
+        plan: &ShowCreateStreamingTablePlan,
+        context: &PlanVisitorContext,
+    ) -> PlanVisitorResult;
+
+    fn visit_drop_streaming_table(
+        &self,
+        plan: &DropStreamingTablePlan,
+        context: &PlanVisitorContext,
+    ) -> PlanVisitorResult;
 }
diff --git a/src/coordinator/statement/drop_streaming_table.rs b/src/coordinator/statement/drop_streaming_table.rs
new file mode 100644
index 00000000..309abd97
--- /dev/null
+++ b/src/coordinator/statement/drop_streaming_table.rs
@@ -0,0 +1,40 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use super::{Statement, StatementVisitor, StatementVisitorContext, StatementVisitorResult};
+
+/// `DROP STREAMING TABLE [IF EXISTS] <name>` — stops and removes the streaming
+/// job from `JobManager`, then drops the corresponding catalog entry if present.
+#[derive(Debug, Clone)]
+pub struct DropStreamingTableStatement {
+    pub table_name: String,
+    pub if_exists: bool,
+}
+
+impl DropStreamingTableStatement {
+    pub fn new(table_name: String, if_exists: bool) -> Self {
+        Self {
+            table_name,
+            if_exists,
+        }
+    }
+}
+
+impl Statement for DropStreamingTableStatement {
+    fn accept(
+        &self,
+        visitor: &dyn StatementVisitor,
+        context: &StatementVisitorContext,
+    ) -> StatementVisitorResult {
+        visitor.visit_drop_streaming_table(self, context)
+    }
+}
diff --git a/src/coordinator/statement/mod.rs b/src/coordinator/statement/mod.rs
index 83f6ca0e..80d9c320 100644
--- a/src/coordinator/statement/mod.rs
+++ b/src/coordinator/statement/mod.rs
@@ -14,10 +14,13 @@ mod create_function;
 mod create_python_function;
 mod create_table;
 mod drop_function;
+mod drop_streaming_table;
 mod drop_table;
 mod show_catalog_tables;
+mod show_create_streaming_table;
 mod show_create_table;
 mod show_functions;
+mod show_streaming_tables;
 mod start_function;
 mod stop_function;
 mod streaming_table;
@@ -27,10 +30,13 @@ pub use create_function::{ConfigSource, CreateFunction, FunctionSource};
 pub use create_python_function::{CreatePythonFunction, PythonModule};
 pub use create_table::CreateTable;
 pub use drop_function::DropFunction;
+pub use drop_streaming_table::DropStreamingTableStatement;
 pub use drop_table::DropTableStatement;
 pub use show_catalog_tables::ShowCatalogTables;
+pub use show_create_streaming_table::ShowCreateStreamingTable;
 pub use show_create_table::ShowCreateTable;
 pub use show_functions::ShowFunctions;
+pub use show_streaming_tables::ShowStreamingTables;
 pub use start_function::StartFunction;
 pub use stop_function::StopFunction;
 pub use streaming_table::StreamingTableStatement;
@@ -56,4 +62,8 @@ pub trait Statement: fmt::Debug + Send + Sync {
     fn as_streaming_table_statement(&self) -> Option<&StreamingTableStatement> {
         None
     }
+
+    fn as_drop_streaming_table_statement(&self) -> Option<&DropStreamingTableStatement> {
+        None
+    }
 }
diff --git a/src/coordinator/statement/show_create_streaming_table.rs b/src/coordinator/statement/show_create_streaming_table.rs
new file mode 100644
index 00000000..73f16870
--- /dev/null
+++ b/src/coordinator/statement/show_create_streaming_table.rs
@@ -0,0 +1,36 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use super::{Statement, StatementVisitor, StatementVisitorContext, StatementVisitorResult};
+
+/// `SHOW CREATE STREAMING TABLE <name>` — displays the pipeline topology and
+/// runtime metadata for the named streaming job.
+#[derive(Debug, Clone)]
+pub struct ShowCreateStreamingTable {
+    pub table_name: String,
+}
+
+impl ShowCreateStreamingTable {
+    pub fn new(table_name: String) -> Self {
+        Self { table_name }
+    }
+}
+
+impl Statement for ShowCreateStreamingTable {
+    fn accept(
+        &self,
+        visitor: &dyn StatementVisitor,
+        context: &StatementVisitorContext,
+    ) -> StatementVisitorResult {
+        visitor.visit_show_create_streaming_table(self, context)
+    }
+}
diff --git a/src/coordinator/statement/show_streaming_tables.rs b/src/coordinator/statement/show_streaming_tables.rs
new file mode 100644
index 00000000..cedf3610
--- /dev/null
+++ b/src/coordinator/statement/show_streaming_tables.rs
@@ -0,0 +1,33 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use super::{Statement, StatementVisitor, StatementVisitorContext, StatementVisitorResult};
+
+/// `SHOW STREAMING TABLES` — lists all active streaming jobs managed by `JobManager`.
+#[derive(Debug, Clone, Default)]
+pub struct ShowStreamingTables;
+
+impl ShowStreamingTables {
+    pub fn new() -> Self {
+        Self
+    }
+}
+
+impl Statement for ShowStreamingTables {
+    fn accept(
+        &self,
+        visitor: &dyn StatementVisitor,
+        context: &StatementVisitorContext,
+    ) -> StatementVisitorResult {
+        visitor.visit_show_streaming_tables(self, context)
+    }
+}
diff --git a/src/coordinator/statement/visitor.rs b/src/coordinator/statement/visitor.rs
index f24c85be..c3cf153a 100644
--- a/src/coordinator/statement/visitor.rs
+++ b/src/coordinator/statement/visitor.rs
@@ -11,9 +11,10 @@
 // limitations under the License.
 
 use super::{
-    CreateFunction, CreatePythonFunction, CreateTable, DropFunction, DropTableStatement,
-    ShowCatalogTables, ShowCreateTable, ShowFunctions, StartFunction, StopFunction,
-    StreamingTableStatement,
+    CreateFunction, CreatePythonFunction, CreateTable, DropFunction,
+    DropStreamingTableStatement, DropTableStatement, ShowCatalogTables,
+    ShowCreateStreamingTable, ShowCreateTable, ShowFunctions, ShowStreamingTables,
+    StartFunction, StopFunction, StreamingTableStatement,
 };
 use crate::coordinator::plan::PlanNode;
 use crate::coordinator::statement::Statement;
@@ -119,4 +120,22 @@ pub trait StatementVisitor {
         stmt: &DropTableStatement,
         context: &StatementVisitorContext,
     ) -> StatementVisitorResult;
+
+    fn visit_show_streaming_tables(
+        &self,
+        stmt: &ShowStreamingTables,
+        context: &StatementVisitorContext,
+    ) -> StatementVisitorResult;
+
+    fn visit_show_create_streaming_table(
+        &self,
+        stmt: &ShowCreateStreamingTable,
+        context: &StatementVisitorContext,
+    ) -> StatementVisitorResult;
+
+    fn visit_drop_streaming_table(
+        &self,
+        stmt: &DropStreamingTableStatement,
+        context: &StatementVisitorContext,
+    ) -> StatementVisitorResult;
 }
diff --git a/src/runtime/streaming/job/job_manager.rs b/src/runtime/streaming/job/job_manager.rs
index 88df6457..5508ab70 100644
--- a/src/runtime/streaming/job/job_manager.rs
+++ b/src/runtime/streaming/job/job_manager.rs
@@ -32,6 +32,30 @@ use crate::runtime::streaming::memory::MemoryPool;
 use crate::runtime::streaming::network::endpoint::{BoxedEventStream, PhysicalSender};
 use crate::runtime::streaming::protocol::control::{ControlCommand, StopMode};
 
+#[derive(Debug, Clone)]
+pub struct StreamingJobSummary {
+    pub job_id: String,
+    pub status: String,
+    pub pipeline_count: i32,
+    pub uptime_secs: u64,
+}
+
+#[derive(Debug, Clone)]
+pub struct PipelineDetail {
+    pub pipeline_id: u32,
+    pub status: String,
+}
+
+#[derive(Debug, Clone)]
+pub struct StreamingJobDetail {
+    pub job_id: String,
+    pub status: String,
+    pub pipeline_count: i32,
+    pub uptime_secs: u64,
+    pub pipelines: Vec<PipelineDetail>,
+    pub program_json: String,
+}
+
 static GLOBAL_JOB_MANAGER: OnceLock<Arc<JobManager>> = OnceLock::new();
 
 pub struct JobManager {
@@ -178,7 +202,114 @@ impl JobManager {
         )
     }
 
-    // ========================================================================
+    pub fn list_jobs(&self) -> Vec<StreamingJobSummary> {
+        let jobs_guard = self.active_jobs.read().unwrap();
+        jobs_guard
+            .values()
+            .map(|graph| {
+                let pipeline_count = graph.pipelines.len() as i32;
+                let uptime_secs = graph.start_time.elapsed().as_secs();
+                let status = Self::aggregate_pipeline_status(&graph.pipelines);
+                StreamingJobSummary {
+                    job_id: graph.job_id.clone(),
+                    status,
+                    pipeline_count,
+                    uptime_secs,
+                }
+            })
+            .collect()
+    }
+
+    pub fn get_job_detail(&self, job_id: &str) -> Option<StreamingJobDetail> {
+        let jobs_guard = self.active_jobs.read().unwrap();
+        let graph = jobs_guard.get(job_id)?;
+
+        let uptime_secs = graph.start_time.elapsed().as_secs();
+        let overall_status = Self::aggregate_pipeline_status(&graph.pipelines);
+
+        let pipeline_details: Vec<PipelineDetail> = graph
+            .pipelines
+            .iter()
+            .map(|(id, pipeline)| {
+                let status = pipeline.status.read().unwrap().clone();
+                PipelineDetail {
+                    pipeline_id: *id,
+                    status: format!("{status:?}"),
+                }
+            })
+            .collect();
+
+        let program_json = serde_json::to_string_pretty(&graph.program).unwrap_or_else(|e| {
+            format!("{{\"error\": \"Failed to serialize program: {e}\"}}")
+        });
+
+        Some(StreamingJobDetail {
+            job_id: graph.job_id.clone(),
+            status: overall_status,
+            pipeline_count: graph.pipelines.len() as i32,
+            uptime_secs,
+            pipelines: pipeline_details,
+            program_json,
+        })
+    }
+
+    pub fn has_job(&self, job_id: &str) -> bool {
+        self.active_jobs.read().unwrap().contains_key(job_id)
+    }
+
+    pub async fn remove_job(&self, job_id: &str, mode: StopMode) -> anyhow::Result<()> {
+        {
+            let jobs_guard = self.active_jobs.read().unwrap();
+            if !jobs_guard.contains_key(job_id) {
+                anyhow::bail!("Job not found: {job_id}");
+            }
+            let graph = &jobs_guard[job_id];
+            let control_senders: Vec<_> =
+                graph.pipelines.values().map(|p| p.control_tx.clone()).collect();
+
+            drop(jobs_guard);
+
+            for tx in control_senders {
+                let _ = tx.send(ControlCommand::Stop { mode: mode.clone() }).await;
+            }
+        }
+
+        self.active_jobs.write().unwrap().remove(job_id);
+        info!(job_id = %job_id, "Job stopped and removed.");
+        Ok(())
+    }
+
+    fn aggregate_pipeline_status(
+        pipelines: &HashMap<u32, PhysicalPipeline>,
+    ) -> String {
+        let mut running = 0u32;
+        let mut failed = 0u32;
+        let mut finished = 0u32;
+        let mut initializing = 0u32;
+
+        for pipeline in pipelines.values() {
+            match &*pipeline.status.read().unwrap() {
+                PipelineStatus::Running => running += 1,
+                PipelineStatus::Failed { .. } => failed += 1,
+                PipelineStatus::Finished => finished += 1,
+                PipelineStatus::Initializing => initializing += 1,
+                PipelineStatus::Stopping => {}
+            }
+        }
+
+        if failed > 0 {
+            "DEGRADED".to_string()
+        } else if running > 0 && running == pipelines.len() as u32 {
+            "RUNNING".to_string()
+        } else if finished == pipelines.len() as u32 {
+            "FINISHED".to_string()
+        } else if initializing > 0 {
+            "INITIALIZING".to_string()
+        } else {
+            "PARTIAL".to_string()
+        }
+    }
+
     // ========================================================================
 
     fn build_operator_chain(
diff --git a/src/runtime/streaming/job/mod.rs b/src/runtime/streaming/job/mod.rs
index 5b2bbd8c..02e0343c 100644
--- a/src/runtime/streaming/job/mod.rs
+++ b/src/runtime/streaming/job/mod.rs
@@ -14,4 +14,4 @@ pub mod edge_manager;
 pub mod job_manager;
 pub mod models;
 
-pub use job_manager::JobManager;
+pub use job_manager::{JobManager, StreamingJobSummary};
diff --git a/src/sql/parse.rs b/src/sql/parse.rs
index 33bb13ad..5fd4a59f 100644
--- a/src/sql/parse.rs
+++ b/src/sql/parse.rs
@@ -38,24 +38,65 @@ use datafusion::sql::sqlparser::dialect::FunctionStreamDialect;
 use datafusion::sql::sqlparser::parser::Parser;
 
 use crate::coordinator::{
-    CreateFunction, CreateTable, DropFunction, DropTableStatement, ShowCatalogTables,
-    ShowCreateTable, ShowFunctions, StartFunction, Statement as CoordinatorStatement, StopFunction,
+    CreateFunction, CreateTable, DropFunction, DropStreamingTableStatement, DropTableStatement,
+    ShowCatalogTables, ShowCreateStreamingTable, ShowCreateTable, ShowFunctions,
+    ShowStreamingTables, StartFunction, Statement as CoordinatorStatement, StopFunction,
     StreamingTableStatement,
 };
 
-/// `DROP STREAMING TABLE t` is accepted as sugar for `DROP TABLE t` against the same catalog.
-fn rewrite_drop_streaming_table(sql: &str) -> String {
-    let trimmed = sql.trim_start();
-    let tokens: Vec<&str> = trimmed.split_whitespace().collect();
+/// Streaming-specific SQL that the sqlparser dialect does not natively handle.
+///
+/// Returns `Some(statement)` if the SQL was intercepted, `None` otherwise so
+/// the caller falls through to the normal sqlparser pipeline.
+fn try_parse_streaming_statement(sql: &str) -> Option<Box<dyn CoordinatorStatement>> {
+    let tokens: Vec<&str> = sql.split_whitespace().collect();
+    if tokens.is_empty() {
+        return None;
+    }
+
+    // SHOW STREAMING TABLES
+    if tokens.len() == 3
+        && tokens[0].eq_ignore_ascii_case("show")
+        && tokens[1].eq_ignore_ascii_case("streaming")
+        && tokens[2].eq_ignore_ascii_case("tables")
+    {
+        return Some(Box::new(ShowStreamingTables::new()));
+    }
+
+    // SHOW CREATE STREAMING TABLE <name>
+    if tokens.len() == 5
+        && tokens[0].eq_ignore_ascii_case("show")
+        && tokens[1].eq_ignore_ascii_case("create")
+        && tokens[2].eq_ignore_ascii_case("streaming")
+        && tokens[3].eq_ignore_ascii_case("table")
+    {
+        let name = tokens[4].trim_end_matches(';').to_string();
+        return Some(Box::new(ShowCreateStreamingTable::new(name)));
+    }
+
+    // DROP STREAMING TABLE [IF EXISTS] <name>
     if tokens.len() >= 4
         && tokens[0].eq_ignore_ascii_case("drop")
         && tokens[1].eq_ignore_ascii_case("streaming")
         && tokens[2].eq_ignore_ascii_case("table")
     {
-        let rest = tokens[3..].join(" ");
-        return format!("DROP TABLE {rest}");
+        let (if_exists, name_idx) = if tokens.len() >= 6
+            && tokens[3].eq_ignore_ascii_case("if")
+            && tokens[4].eq_ignore_ascii_case("exists")
+        {
+            (true, 5)
+        } else {
+            (false, 3)
+        };
+
+        if name_idx >= tokens.len() {
+            return None;
+        }
+        let name = tokens[name_idx].trim_end_matches(';').to_string();
+        return Some(Box::new(DropStreamingTableStatement::new(name, if_exists)));
     }
-    sql.to_string()
+
+    None
 }
 
 pub fn parse_sql(query: &str) -> Result<Vec<Box<dyn CoordinatorStatement>>> {
@@ -64,9 +105,12 @@ pub fn parse_sql(query: &str) -> Result<Vec<Box<dyn CoordinatorStatement>>> {
         return plan_err!("Query is empty");
     }
 
+    if let Some(stmt) = try_parse_streaming_statement(trimmed) {
+        return Ok(vec![stmt]);
+    }
+
     let dialect = FunctionStreamDialect {};
-    let to_parse = rewrite_drop_streaming_table(trimmed);
-    let statements = Parser::parse_sql(&dialect, &to_parse)
+    let statements = Parser::parse_sql(&dialect, trimmed)
         .map_err(|e| DataFusionError::Plan(format!("SQL parse error: {e}")))?;
 
     if statements.is_empty() {
@@ -242,9 +286,27 @@ mod tests {
     }
 
     #[test]
-    fn test_parse_drop_streaming_table_rewritten() {
+    fn test_parse_drop_streaming_table() {
         let stmt = first_stmt("DROP STREAMING TABLE my_sink");
-        assert!(is_type(stmt.as_ref(), "DropTableStatement"));
+        assert!(is_type(stmt.as_ref(), "DropStreamingTableStatement"));
+    }
+
+    #[test]
+    fn test_parse_drop_streaming_table_if_exists() {
+        let stmt = first_stmt("DROP STREAMING TABLE IF EXISTS my_sink");
+        assert!(is_type(stmt.as_ref(), "DropStreamingTableStatement"));
+    }
+
+    #[test]
+    fn test_parse_show_streaming_tables() {
+        let stmt = first_stmt("SHOW STREAMING TABLES");
+        assert!(is_type(stmt.as_ref(), "ShowStreamingTables"));
+    }
+
+    #[test]
+    fn test_parse_show_create_streaming_table() {
+        let stmt = first_stmt("SHOW CREATE STREAMING TABLE my_sink");
+        assert!(is_type(stmt.as_ref(), "ShowCreateStreamingTable"));
     }
 
     /// `CREATE STREAMING TABLE` is the sink DDL supported by FunctionStream (not `CREATE STREAM TABLE`).

From d1bf1c70568ebd602881d7956c0bc718cda04b7f Mon Sep 17 00:00:00 2001
From: luoluoyuyu <zhenyu@apache.org>
Date: Wed, 1 Apr 2026 21:01:09 +0800
Subject: [PATCH 38/44] update

---
 .../show_create_streaming_table_result.rs     |  10 +-
 src/coordinator/execution/executor.rs         |  11 +-
 src/runtime/streaming/job/job_manager.rs      |  10 +-
 src/sql/common/mod.rs                         |   2 +
 src/sql/common/topology.rs                    | 280 ++++++++++++++++++
 5 files changed, 293 insertions(+), 20 deletions(-)
 create mode 100644 src/sql/common/topology.rs

diff --git a/src/coordinator/dataset/show_create_streaming_table_result.rs b/src/coordinator/dataset/show_create_streaming_table_result.rs
index 2b9e0d0a..28f0069e 100644
--- a/src/coordinator/dataset/show_create_streaming_table_result.rs
+++ b/src/coordinator/dataset/show_create_streaming_table_result.rs
@@ -22,7 +22,7 @@ pub struct ShowCreateStreamingTableResult {
     table_name: String,
     status: String,
     pipeline_detail: String,
-    program_json: String,
+    topology: String,
 }
 
 impl ShowCreateStreamingTableResult {
@@ -30,13 +30,13 @@ impl ShowCreateStreamingTableResult {
         table_name: String,
         status: String,
         pipeline_detail: String,
-        program_json: String,
+        topology: String,
     ) -> Self {
         Self {
             table_name,
             status,
             pipeline_detail,
-            program_json,
+            topology,
         }
     }
 }
@@ -47,7 +47,7 @@ impl DataSet for ShowCreateStreamingTableResult {
             Field::new("Streaming Table", DataType::Utf8, false),
             Field::new("Status", DataType::Utf8, false),
             Field::new("Pipelines", DataType::Utf8, false),
-            Field::new("Program", DataType::Utf8, false),
+            Field::new("Topology", DataType::Utf8, false),
         ]));
 
         arrow_array::RecordBatch::try_new(
@@ -56,7 +56,7 @@ impl DataSet for ShowCreateStreamingTableResult {
                 Arc::new(StringArray::from(vec![self.table_name.as_str()])),
                 Arc::new(StringArray::from(vec![self.status.as_str()])),
                 Arc::new(StringArray::from(vec![self.pipeline_detail.as_str()])),
-                Arc::new(StringArray::from(vec![self.program_json.as_str()])),
+                Arc::new(StringArray::from(vec![self.topology.as_str()])),
             ],
         )
         .unwrap_or_else(|_| arrow_array::RecordBatch::new_empty(Arc::new(Schema::empty())))
diff --git a/src/coordinator/execution/executor.rs b/src/coordinator/execution/executor.rs
index f52504e0..9907dbe8 100644
--- a/src/coordinator/execution/executor.rs
+++ b/src/coordinator/execution/executor.rs
@@ -445,22 +445,13 @@ impl PlanVisitor for Executor {
                 pipeline_lines.join("\n")
             };
 
-            let mut program_json = serde_json::Value::String(detail.program_json.clone());
-            if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(&detail.program_json) {
-                let mut cleaned = parsed;
-                strip_noisy_fields(&mut cleaned);
-                program_json = cleaned;
-            }
-            let program_display =
-                serde_json::to_string_pretty(&program_json).unwrap_or(detail.program_json);
-
             Ok(ExecuteResult::ok_with_data(
                 format!("SHOW CREATE STREAMING TABLE {}", plan.table_name),
                 ShowCreateStreamingTableResult::new(
                     plan.table_name.clone(),
                     detail.status,
                     pipeline_detail,
-                    program_display,
+                    detail.topology,
                 ),
             ))
         };
diff --git a/src/runtime/streaming/job/job_manager.rs b/src/runtime/streaming/job/job_manager.rs
index 5508ab70..3a400b54 100644
--- a/src/runtime/streaming/job/job_manager.rs
+++ b/src/runtime/streaming/job/job_manager.rs
@@ -20,6 +20,8 @@ use tracing::{error, info, warn};
 
 use protocol::grpc::api::{ChainedOperator, FsProgram};
 
+use crate::sql::common::render_program_topology;
+
 use crate::runtime::streaming::api::context::TaskContext;
 use crate::runtime::streaming::api::operator::{ConstructedOperator, Operator};
 use crate::runtime::streaming::api::source::SourceOperator;
@@ -53,7 +55,7 @@ pub struct StreamingJobDetail {
     pub pipeline_count: i32,
     pub uptime_secs: u64,
     pub pipelines: Vec<PipelineDetail>,
-    pub program_json: String,
+    pub topology: String,
 }
 
 static GLOBAL_JOB_MANAGER: OnceLock<Arc<JobManager>> = OnceLock::new();
@@ -239,9 +241,7 @@ impl JobManager {
             })
             .collect();
 
-        let program_json = serde_json::to_string_pretty(&graph.program).unwrap_or_else(|e| {
-            format!("{{\"error\": \"Failed to serialize program: {e}\"}}")
-        });
+        let topology = render_program_topology(&graph.program);
 
         Some(StreamingJobDetail {
             job_id: graph.job_id.clone(),
@@ -249,7 +249,7 @@ impl JobManager {
             pipeline_count: graph.pipelines.len() as i32,
             uptime_secs,
             pipelines: pipeline_details,
-            program_json,
+            topology,
         })
     }
 
diff --git a/src/sql/common/mod.rs b/src/sql/common/mod.rs
index 03e9b4a6..4c0cc6d3 100644
--- a/src/sql/common/mod.rs
+++ b/src/sql/common/mod.rs
@@ -34,6 +34,7 @@ pub mod task_info;
 pub mod time_utils;
 pub mod worker;
 pub mod converter;
+pub mod topology;
 
 // ── Re-exports from existing modules ──
 pub use arrow_ext::FsExtensionType;
@@ -48,6 +49,7 @@ pub use operator_config::MetadataField;
 
 // ── Well-known column names ──
 pub use constants::sql_field::{TIMESTAMP_FIELD, UPDATING_META_FIELD};
+pub use topology::render_program_topology;
 
 // ── Environment variables ──
 pub const JOB_ID_ENV: &str = "JOB_ID";
diff --git a/src/sql/common/topology.rs b/src/sql/common/topology.rs
new file mode 100644
index 00000000..bc71d57f
--- /dev/null
+++ b/src/sql/common/topology.rs
@@ -0,0 +1,280 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! EXPLAIN-like DAG text renderer for [`FsProgram`].
+//!
+//! Renders a streaming pipeline topology as a human-readable ASCII graph using
+//! Kahn's topological sort.  Handles linear chains, fan-out, and fan-in (JOIN).
+
+use std::collections::{BTreeMap, VecDeque};
+use std::fmt::Write;
+
+use protocol::grpc::api::FsProgram;
+
+fn edge_type_label(edge_type: i32) -> &'static str {
+    match edge_type {
+        1 => "Forward",
+        2 => "Shuffle",
+        3 => "LeftJoin",
+        4 => "RightJoin",
+        _ => "Unknown",
+    }
+}
+
+/// Render an [`FsProgram`] as an EXPLAIN-style topology string.
+pub fn render_program_topology(program: &FsProgram) -> String {
+    if program.nodes.is_empty() {
+        return "(empty topology)".to_string();
+    }
+
+    struct EdgeInfo { target: i32, edge_type: i32 }
+    struct InputInfo { source: i32, edge_type: i32 }
+
+    let node_map: BTreeMap<i32, &protocol::grpc::api::FsNode> =
+        program.nodes.iter().map(|n| (n.node_index, n)).collect();
+
+    let mut downstream: BTreeMap<i32, Vec<EdgeInfo>> = BTreeMap::new();
+    let mut upstream: BTreeMap<i32, Vec<InputInfo>> = BTreeMap::new();
+    let mut in_degree: BTreeMap<i32, usize> = BTreeMap::new();
+
+    for idx in node_map.keys() {
+        in_degree.entry(*idx).or_insert(0);
+    }
+    for edge in &program.edges {
+        downstream.entry(edge.source).or_default().push(EdgeInfo {
+            target: edge.target,
+            edge_type: edge.edge_type,
+        });
+        upstream.entry(edge.target).or_default().push(InputInfo {
+            source: edge.source,
+            edge_type: edge.edge_type,
+        });
+        *in_degree.entry(edge.target).or_insert(0) += 1;
+    }
+
+    // Kahn's topological sort
+    let mut queue: VecDeque<i32> = in_degree
+        .iter()
+        .filter(|(_, deg)| **deg == 0)
+        .map(|(idx, _)| *idx)
+        .collect();
+    let mut topo_order: Vec<i32> = Vec::with_capacity(node_map.len());
+    let mut remaining = in_degree.clone();
+    while let Some(idx) = queue.pop_front() {
+        topo_order.push(idx);
+        if let Some(edges) = downstream.get(&idx) {
+            for e in edges {
+                if let Some(deg) = remaining.get_mut(&e.target) {
+                    *deg -= 1;
+                    if *deg == 0 {
+                        queue.push_back(e.target);
+                    }
+                }
+            }
+        }
+    }
+    for idx in node_map.keys() {
+        if !topo_order.contains(idx) {
+            topo_order.push(*idx);
+        }
+    }
+
+    let is_source = |idx: &i32| upstream.get(idx).map_or(true, |v| v.is_empty());
+    let is_sink = |idx: &i32| downstream.get(idx).map_or(true, |v| v.is_empty());
+
+    let mut out = String::new();
+    let _ = writeln!(
+        out,
+        "Pipeline Topology  ({} nodes, {} edges)",
+        program.nodes.len(),
+        program.edges.len(),
+    );
+    let _ = writeln!(out, "{}", "=".repeat(50));
+
+    for (pos, &node_idx) in topo_order.iter().enumerate() {
+        let Some(node) = node_map.get(&node_idx) else {
+            continue;
+        };
+
+        let op_chain: String = node
+            .operators
+            .iter()
+            .map(|op| op.operator_name.as_str())
+            .collect::<Vec<_>>()
+            .join(" -> ");
+
+        let role = if is_source(&node_idx) {
+            "Source"
+        } else if is_sink(&node_idx) {
+            "Sink"
+        } else {
+            "Operator"
+        };
+
+        let _ = writeln!(out);
+        let _ = writeln!(
+            out,
+            "[{role}] Node {node_idx}    parallelism = {}",
+            node.parallelism,
+        );
+        let _ = writeln!(out, "  operators:  {op_chain}");
+
+        if !node.description.is_empty() {
+            let _ = writeln!(out, "  desc:       {}", node.description);
+        }
+
+        if let Some(inputs) = upstream.get(&node_idx) {
+            if inputs.len() == 1 {
+                let i = &inputs[0];
+                let _ = writeln!(
+                    out,
+                    "  input:      <-- [{}] Node {}",
+                    edge_type_label(i.edge_type),
+                    i.source,
+                );
+            } else if inputs.len() > 1 {
+                let _ = writeln!(out, "  inputs:");
+                for i in inputs {
+                    let _ = writeln!(
+                        out,
+                        "              <-- [{}] Node {}",
+                        edge_type_label(i.edge_type),
+                        i.source,
+                    );
+                }
+            }
+        }
+
+        if let Some(outputs) = downstream.get(&node_idx) {
+            if outputs.len() == 1 {
+                let e = &outputs[0];
+                let _ = writeln!(
+                    out,
+                    "  output:     --> [{}] Node {}",
+                    edge_type_label(e.edge_type),
+                    e.target,
+                );
+            } else if outputs.len() > 1 {
+                let _ = writeln!(out, "  outputs:");
+                for e in outputs {
+                    let _ = writeln!(
+                        out,
+                        "              --> [{}] Node {}",
+                        edge_type_label(e.edge_type),
+                        e.target,
+                    );
+                }
+            }
+        }
+
+        if pos < topo_order.len() - 1 {
+            let single_out = downstream.get(&node_idx).map_or(false, |v| v.len() == 1);
+            let next_idx = topo_order.get(pos + 1).copied();
+            let is_direct = single_out
+                && next_idx.map_or(false, |n| {
+                    downstream.get(&node_idx).map_or(false, |v| v[0].target == n)
+                });
+            let next_single_in = next_idx
+                .and_then(|n| upstream.get(&n))
+                .map_or(false, |v| v.len() == 1);
+
+            if is_direct && next_single_in {
+                let etype = downstream.get(&node_idx).unwrap()[0].edge_type;
+                let _ = writeln!(out, "        |");
+                let _ = writeln!(out, "        | {}", edge_type_label(etype));
+                let _ = writeln!(out, "        v");
+            }
+        }
+    }
+
+    out.trim_end().to_string()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use protocol::grpc::api::{ChainedOperator, FsEdge, FsNode, FsProgram};
+
+    fn make_node(node_index: i32, operators: Vec<(&str, &str)>, desc: &str, parallelism: u32) -> FsNode {
+        FsNode {
+            node_index,
+            node_id: node_index as u32,
+            parallelism,
+            description: desc.to_string(),
+            operators: operators
+                .into_iter()
+                .map(|(id, name)| ChainedOperator {
+                    operator_id: id.to_string(),
+                    operator_name: name.to_string(),
+                    operator_config: Vec::new(),
+                })
+                .collect(),
+            edges: Vec::new(),
+        }
+    }
+
+    fn make_edge(source: i32, target: i32, edge_type: i32) -> FsEdge {
+        FsEdge { source, target, schema: None, edge_type }
+    }
+
+    #[test]
+    fn empty_program_renders_placeholder() {
+        let program = FsProgram { nodes: vec![], edges: vec![], program_config: None };
+        assert_eq!(render_program_topology(&program), "(empty topology)");
+    }
+
+    #[test]
+    fn linear_pipeline_renders_correctly() {
+        let program = FsProgram {
+            nodes: vec![
+                make_node(0, vec![("src_0", "ConnectorSource")], "", 1),
+                make_node(1, vec![("val_1", "Value"), ("wm_2", "ExpressionWatermark")], "source -> watermark", 1),
+                make_node(2, vec![("sink_3", "ConnectorSink")], "sink (kafka)", 1),
+            ],
+            edges: vec![
+                make_edge(0, 1, 1),
+                make_edge(1, 2, 1),
+            ],
+            program_config: None,
+        };
+        let result = render_program_topology(&program);
+        assert!(result.contains("[Source] Node 0"));
+        assert!(result.contains("[Operator] Node 1"));
+        assert!(result.contains("[Sink] Node 2"));
+        assert!(result.contains("ConnectorSource"));
+        assert!(result.contains("Value -> ExpressionWatermark"));
+        assert!(result.contains("Forward"));
+    }
+
+    #[test]
+    fn join_topology_shows_multiple_inputs() {
+        let program = FsProgram {
+            nodes: vec![
+                make_node(0, vec![("src_a", "ConnectorSource")], "source A", 1),
+                make_node(1, vec![("src_b", "ConnectorSource")], "source B", 1),
+                make_node(2, vec![("join_0", "WindowJoin")], "join node", 2),
+                make_node(3, vec![("sink_0", "ConnectorSink")], "sink", 1),
+            ],
+            edges: vec![
+                make_edge(0, 2, 3), // LeftJoin
+                make_edge(1, 2, 4), // RightJoin
+                make_edge(2, 3, 1), // Forward
+            ],
+            program_config: None,
+        };
+        let result = render_program_topology(&program);
+        assert!(result.contains("inputs:"));
+        assert!(result.contains("LeftJoin"));
+        assert!(result.contains("RightJoin"));
+        assert!(result.contains("[Operator] Node 2"));
+    }
+}

From 871a1cf215270a49a420417718ec72359f5db8c2 Mon Sep 17 00:00:00 2001
From: luoluoyuyu <zhenyu@apache.org>
Date: Wed, 1 Apr 2026 21:06:28 +0800
Subject: [PATCH 39/44] update

---
 .../show_create_streaming_table_result.rs     | 13 +++++++----
 src/coordinator/execution/executor.rs         | 23 +------------------
 src/runtime/streaming/job/job_manager.rs      |  8 ++-----
 3 files changed, 12 insertions(+), 32 deletions(-)

diff --git a/src/coordinator/dataset/show_create_streaming_table_result.rs b/src/coordinator/dataset/show_create_streaming_table_result.rs
index 28f0069e..ed3ec600 100644
--- a/src/coordinator/dataset/show_create_streaming_table_result.rs
+++ b/src/coordinator/dataset/show_create_streaming_table_result.rs
@@ -14,6 +14,9 @@ use std::sync::Arc;
 
 use arrow_array::StringArray;
 use arrow_schema::{DataType, Field, Schema};
+use protocol::grpc::api::FsProgram;
+
+use crate::sql::common::render_program_topology;
 
 use super::DataSet;
 
@@ -22,7 +25,7 @@ pub struct ShowCreateStreamingTableResult {
     table_name: String,
     status: String,
     pipeline_detail: String,
-    topology: String,
+    program: FsProgram,
 }
 
 impl ShowCreateStreamingTableResult {
@@ -30,19 +33,21 @@ impl ShowCreateStreamingTableResult {
         table_name: String,
         status: String,
         pipeline_detail: String,
-        topology: String,
+        program: FsProgram,
     ) -> Self {
         Self {
             table_name,
             status,
             pipeline_detail,
-            topology,
+            program,
         }
     }
 }
 
 impl DataSet for ShowCreateStreamingTableResult {
     fn to_record_batch(&self) -> arrow_array::RecordBatch {
+        let topology = render_program_topology(&self.program);
+
         let schema = Arc::new(Schema::new(vec![
             Field::new("Streaming Table", DataType::Utf8, false),
             Field::new("Status", DataType::Utf8, false),
@@ -56,7 +61,7 @@ impl DataSet for ShowCreateStreamingTableResult {
                 Arc::new(StringArray::from(vec![self.table_name.as_str()])),
                 Arc::new(StringArray::from(vec![self.status.as_str()])),
                 Arc::new(StringArray::from(vec![self.pipeline_detail.as_str()])),
-                Arc::new(StringArray::from(vec![self.topology.as_str()])),
+                Arc::new(StringArray::from(vec![topology.as_str()])),
             ],
         )
         .unwrap_or_else(|_| arrow_array::RecordBatch::new_empty(Arc::new(Schema::empty())))
diff --git a/src/coordinator/execution/executor.rs b/src/coordinator/execution/executor.rs
index 9907dbe8..dcfbcb83 100644
--- a/src/coordinator/execution/executor.rs
+++ b/src/coordinator/execution/executor.rs
@@ -88,27 +88,6 @@ impl Executor {
 }
 
 
-fn strip_noisy_fields(value: &mut serde_json::Value) {
-    match value {
-        serde_json::Value::Object(map) => {
-            // 兼容 camelCase 和 snake_case，直接把配置项连根拔起
-            map.remove("operatorConfig");
-            map.remove("operator_config");
-
-            // 继续向子节点递归
-            for (_, v) in map.iter_mut() {
-                strip_noisy_fields(v);
-            }
-        }
-        serde_json::Value::Array(arr) => {
-            for v in arr.iter_mut() {
-                strip_noisy_fields(v);
-            }
-        }
-        _ => {}
-    }
-}
-
 impl PlanVisitor for Executor {
     fn visit_create_function(
         &self,
@@ -451,7 +430,7 @@ impl PlanVisitor for Executor {
                     plan.table_name.clone(),
                     detail.status,
                     pipeline_detail,
-                    detail.topology,
+                    detail.program,
                 ),
             ))
         };
diff --git a/src/runtime/streaming/job/job_manager.rs b/src/runtime/streaming/job/job_manager.rs
index 3a400b54..19a8a26e 100644
--- a/src/runtime/streaming/job/job_manager.rs
+++ b/src/runtime/streaming/job/job_manager.rs
@@ -20,8 +20,6 @@ use tracing::{error, info, warn};
 
 use protocol::grpc::api::{ChainedOperator, FsProgram};
 
-use crate::sql::common::render_program_topology;
-
 use crate::runtime::streaming::api::context::TaskContext;
 use crate::runtime::streaming::api::operator::{ConstructedOperator, Operator};
 use crate::runtime::streaming::api::source::SourceOperator;
@@ -55,7 +53,7 @@ pub struct StreamingJobDetail {
     pub pipeline_count: i32,
     pub uptime_secs: u64,
     pub pipelines: Vec<PipelineDetail>,
-    pub topology: String,
+    pub program: FsProgram,
 }
 
 static GLOBAL_JOB_MANAGER: OnceLock<Arc<JobManager>> = OnceLock::new();
@@ -241,15 +239,13 @@ impl JobManager {
             })
             .collect();
 
-        let topology = render_program_topology(&graph.program);
-
         Some(StreamingJobDetail {
             job_id: graph.job_id.clone(),
             status: overall_status,
             pipeline_count: graph.pipelines.len() as i32,
             uptime_secs,
             pipelines: pipeline_details,
-            topology,
+            program: graph.program.clone(),
         })
     }
 

From ff5ec464becaa851754dd5df1a5681a221760592 Mon Sep 17 00:00:00 2001
From: luoluoyuyu <zhenyu@apache.org>
Date: Wed, 1 Apr 2026 21:19:07 +0800
Subject: [PATCH 40/44] update

---
 src/sql/analysis/source_rewriter.rs | 31 -----------------------------
 1 file changed, 31 deletions(-)

diff --git a/src/sql/analysis/source_rewriter.rs b/src/sql/analysis/source_rewriter.rs
index 39df350e..0ade3ea1 100644
--- a/src/sql/analysis/source_rewriter.rs
+++ b/src/sql/analysis/source_rewriter.rs
@@ -10,7 +10,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-use std::collections::HashSet;
 use std::sync::Arc;
 use std::time::Duration;
 
@@ -25,7 +24,6 @@ use crate::sql::schema::source_table::SourceTable;
 use crate::sql::schema::ColumnDescriptor;
 use crate::sql::schema::table::Table;
 use crate::sql::schema::StreamSchemaProvider;
-use crate::sql::common::constants::sql_field;
 use crate::sql::common::UPDATING_META_FIELD;
 use crate::sql::extensions::debezium::UnrollDebeziumPayloadNode;
 use crate::sql::extensions::remote_table::RemoteTableBoundaryNode;
@@ -48,35 +46,6 @@ impl<'a> SourceRewriter<'a> {
 }
 
 impl SourceRewriter<'_> {
-    /// Output column names after stream-catalog source projection (physical fields plus optional
-    /// `_timestamp` alias when event time is renamed).
-    fn stream_source_projected_column_names(
-        schema: &datafusion::arrow::datatypes::Schema,
-        event_time_field: Option<&str>,
-    ) -> HashSet<String> {
-        let mut names: HashSet<String> =
-            schema.fields().iter().map(|f| f.name().clone()).collect();
-        if let Some(et) = event_time_field {
-            if et != TIMESTAMP_FIELD {
-                names.insert(TIMESTAMP_FIELD.to_string());
-            }
-        }
-        names
-    }
-
-    /// Resolves watermark column for [`StreamTable::Source`]: drop computed `__watermark` and any
-    /// name not present in the projected schema (defaults to `_timestamp` − delay).
-    fn stream_source_effective_watermark_field<'b>(
-        watermark_field: Option<&'b str>,
-        projected: &HashSet<String>,
-    ) -> Option<&'b str> {
-        let w = watermark_field?;
-        if w == sql_field::COMPUTED_WATERMARK {
-            return None;
-        }
-        projected.contains(w).then_some(w)
-    }
-
     fn projection_expr_for_column(col: &ColumnDescriptor, qualifier: &TableReference) -> Expr {
         if let Some(logic) = col.computation_logic() {
             logic

From d4387f9bbdf44170cd1082f143ad47b7c6cc8e6f Mon Sep 17 00:00:00 2001
From: luoluoyuyu <zhenyu@apache.org>
Date: Wed, 1 Apr 2026 21:29:04 +0800
Subject: [PATCH 41/44] update

---
 src/sql/common/hash.rs      | 100 ------------------------------------
 src/sql/common/mod.rs       |   3 --
 src/sql/common/task_info.rs |  92 ---------------------------------
 src/sql/common/worker.rs    |  26 ----------
 4 files changed, 221 deletions(-)
 delete mode 100644 src/sql/common/hash.rs
 delete mode 100644 src/sql/common/task_info.rs
 delete mode 100644 src/sql/common/worker.rs

diff --git a/src/sql/common/hash.rs b/src/sql/common/hash.rs
deleted file mode 100644
index 6dce5b9a..00000000
--- a/src/sql/common/hash.rs
+++ /dev/null
@@ -1,100 +0,0 @@
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use std::ops::RangeInclusive;
-
-/// Randomly generated seeds for consistent hashing. Changing these breaks existing state.
-pub const HASH_SEEDS: [u64; 4] = [
-    5093852630788334730,
-    1843948808084437226,
-    8049205638242432149,
-    17942305062735447798,
-];
-
-/// Returns the server index (0-based) responsible for the given hash value
-/// when distributing across `n` servers.
-pub fn server_for_hash(x: u64, n: usize) -> usize {
-    if n == 1 {
-        0
-    } else {
-        let range_size = (u64::MAX / (n as u64)) + 1;
-        (x / range_size) as usize
-    }
-}
-
-/// Returns the key range assigned to server `i` out of `n` total servers.
-pub fn range_for_server(i: usize, n: usize) -> RangeInclusive<u64> {
-    if n == 1 {
-        return 0..=u64::MAX;
-    }
-    let range_size = (u64::MAX / (n as u64)) + 1;
-    let start = range_size * (i as u64);
-    let end = if i + 1 == n {
-        u64::MAX
-    } else {
-        start + range_size - 1
-    };
-    start..=end
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_range_for_server() {
-        let n = 6;
-
-        for i in 0..(n - 1) {
-            let range1 = range_for_server(i, n);
-            let range2 = range_for_server(i + 1, n);
-
-            assert_eq!(*range1.end() + 1, *range2.start(), "Ranges not adjacent");
-            assert_eq!(
-                i,
-                server_for_hash(*range1.start(), n),
-                "start not assigned to range"
-            );
-            assert_eq!(
-                i,
-                server_for_hash(*range1.end(), n),
-                "end not assigned to range"
-            );
-        }
-
-        let last_range = range_for_server(n - 1, n);
-        assert_eq!(
-            *last_range.end(),
-            u64::MAX,
-            "Last range does not contain u64::MAX"
-        );
-        assert_eq!(
-            n - 1,
-            server_for_hash(u64::MAX, n),
-            "u64::MAX not in last range"
-        );
-    }
-
-    #[test]
-    fn test_server_for_hash() {
-        let n = 2;
-        let x = u64::MAX;
-
-        let server_index = server_for_hash(x, n);
-        let server_range = range_for_server(server_index, n);
-
-        assert!(
-            server_range.contains(&x),
-            "u64::MAX is not in the correct range"
-        );
-    }
-}
diff --git a/src/sql/common/mod.rs b/src/sql/common/mod.rs
index 4c0cc6d3..e042aea6 100644
--- a/src/sql/common/mod.rs
+++ b/src/sql/common/mod.rs
@@ -26,13 +26,10 @@ pub mod fs_schema;
 pub mod errors;
 pub mod format_from_opts;
 pub mod formats;
-pub mod hash;
 pub mod kafka_catalog;
 pub mod message;
 pub mod operator_config;
-pub mod task_info;
 pub mod time_utils;
-pub mod worker;
 pub mod converter;
 pub mod topology;
 
diff --git a/src/sql/common/task_info.rs b/src/sql/common/task_info.rs
deleted file mode 100644
index 479ab082..00000000
--- a/src/sql/common/task_info.rs
+++ /dev/null
@@ -1,92 +0,0 @@
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use bincode::{Decode, Encode};
-use serde::{Deserialize, Serialize};
-use std::collections::HashMap;
-use std::fmt::{Display, Formatter};
-use std::ops::RangeInclusive;
-
-#[derive(Eq, PartialEq, Hash, Debug, Clone, Encode, Decode, Serialize, Deserialize)]
-pub struct TaskInfo {
-    pub job_id: String,
-    pub node_id: u32,
-    pub operator_name: String,
-    pub operator_id: String,
-    pub task_index: u32,
-    pub parallelism: u32,
-    pub key_range: RangeInclusive<u64>,
-}
-
-impl Display for TaskInfo {
-    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        write!(
-            f,
-            "Task_{}-{}/{}",
-            self.operator_id, self.task_index, self.parallelism
-        )
-    }
-}
-
-impl TaskInfo {
-    pub fn for_test(job_id: &str, operator_id: &str) -> Self {
-        Self {
-            job_id: job_id.to_string(),
-            node_id: 1,
-            operator_name: "op".to_string(),
-            operator_id: operator_id.to_string(),
-            task_index: 0,
-            parallelism: 1,
-            key_range: 0..=u64::MAX,
-        }
-    }
-}
-
-pub fn get_test_task_info() -> TaskInfo {
-    TaskInfo {
-        job_id: "instance-1".to_string(),
-        node_id: 1,
-        operator_name: "test-operator".to_string(),
-        operator_id: "test-operator-1".to_string(),
-        task_index: 0,
-        parallelism: 1,
-        key_range: 0..=u64::MAX,
-    }
-}
-
-#[derive(Eq, PartialEq, Hash, Debug, Clone, Encode, Decode, Serialize, Deserialize)]
-pub struct ChainInfo {
-    pub job_id: String,
-    pub node_id: u32,
-    pub description: String,
-    pub task_index: u32,
-}
-
-impl Display for ChainInfo {
-    fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
-        write!(
-            f,
-            "TaskChain{}-{} ({})",
-            self.node_id, self.task_index, self.description
-        )
-    }
-}
-
-impl ChainInfo {
-    pub fn metric_label_map(&self) -> HashMap<String, String> {
-        let mut labels = HashMap::new();
-        labels.insert("node_id".to_string(), self.node_id.to_string());
-        labels.insert("subtask_idx".to_string(), self.task_index.to_string());
-        labels.insert("node_description".to_string(), self.description.to_string());
-        labels
-    }
-}
diff --git a/src/sql/common/worker.rs b/src/sql/common/worker.rs
deleted file mode 100644
index 48c218fb..00000000
--- a/src/sql/common/worker.rs
+++ /dev/null
@@ -1,26 +0,0 @@
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use std::fmt::{Display, Formatter};
-use std::sync::Arc;
-
-#[derive(Debug, Hash, Eq, PartialEq, Copy, Clone)]
-pub struct WorkerId(pub u64);
-
-#[derive(Debug, Hash, Eq, PartialEq, Clone)]
-pub struct MachineId(pub Arc<String>);
-
-impl Display for MachineId {
-    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        write!(f, "{}", self.0)
-    }
-}

From c842c0bee0df4339aaa6495b9f8b89085ab846f1 Mon Sep 17 00:00:00 2001
From: luoluoyuyu <zhenyu@apache.org>
Date: Wed, 1 Apr 2026 22:31:37 +0800
Subject: [PATCH 42/44] update

---
 README-zh.md                      |  21 +--
 README.md                         |  21 +--
 docs/connectors-and-formats-zh.md | 197 ++++++++++++++++++++++
 docs/connectors-and-formats.md    | 197 ++++++++++++++++++++++
 docs/sql-cli-guide-zh.md          |  66 +++++++-
 docs/sql-cli-guide.md             |  66 +++++++-
 docs/streaming-sql-guide-zh.md    | 261 ++++++++++++++++++++++++++++++
 docs/streaming-sql-guide.md       | 260 +++++++++++++++++++++++++++++
 8 files changed, 1067 insertions(+), 22 deletions(-)
 create mode 100644 docs/connectors-and-formats-zh.md
 create mode 100644 docs/connectors-and-formats.md
 create mode 100644 docs/streaming-sql-guide-zh.md
 create mode 100644 docs/streaming-sql-guide.md

diff --git a/README-zh.md b/README-zh.md
index b1d68eac..a15bfcc5 100644
--- a/README-zh.md
+++ b/README-zh.md
@@ -23,7 +23,7 @@
 
 [中文](README-zh.md) | [English](README.md)
 
-**Function Stream** 是一个基于 Rust 构建的高性能、事件驱动的流处理框架。它提供了一个模块化的运行时，用于编排编译为 **WebAssembly (WASM)** 的 Serverless 风格处理函数，支持使用 **Go、Python 和 Rust** 编写函数。
+**Function Stream** 是一个基于 Rust 构建的高性能、事件驱动的流处理框架。它提供了一个模块化的运行时，用于编排编译为 **WebAssembly (WASM)** 的 Serverless 风格处理函数，支持使用 **Go、Python 和 Rust** 编写函数。同时内置 **Streaming SQL** 引擎，可通过纯声明式 SQL 构建实时数据管道 — 包括时间窗口聚合、多流关联和持续 ETL。
 
 ## 目录
 
@@ -46,6 +46,7 @@
 
 ## 核心特性
 
+- **Streaming SQL 引擎**：使用纯 SQL 构建实时管道 — 注册数据源（`CREATE TABLE`）、启动持续计算（`CREATE STREAMING TABLE ... AS SELECT`）、管理生命周期（`SHOW` / `DROP`）。支持滚动窗口、滑动窗口、窗口关联等丰富语义。
 - **事件驱动的 WASM 运行时**：以接近原生的性能和沙箱隔离的方式执行多语言函数（Go、Python、Rust）。
 - **持久化状态管理**：内置支持基于 RocksDB 的状态存储，用于有状态流处理。
 - **SQL 驱动的 CLI**：使用类 SQL 命令进行作业管理和流检测的交互式 REPL。
@@ -200,14 +201,16 @@ function-stream-<version>/
 
 ## 文档
 
-| 文档                                                   | 描述            |
-|------------------------------------------------------|---------------|
-| [服务端配置与运维指南](docs/server-configuration-zh.md)        | 服务端配置与运维操作    |
-| [Function 任务配置规范](docs/function-configuration-zh.md) | 任务定义规范        |
-| [SQL CLI 交互式管理指南](docs/sql-cli-guide-zh.md)          | 交互式管理指南       |
-| [Function 管理与开发指南](docs/function-development-zh.md)  | 管理与开发指南       |
-| [Go SDK 开发与交互指南](docs/Go-SDK/go-sdk-guide-zh.md)     | Go SDK 指南        |
-| [Python SDK 开发与交互指南](docs/Python-SDK/python-sdk-guide-zh.md) | Python SDK 指南 |
+| 文档                                                                     | 描述                       |
+|------------------------------------------------------------------------|--------------------------|
+| [Streaming SQL 使用指南](docs/streaming-sql-guide-zh.md)                   | 声明式 SQL 实时流处理指南         |
+| [连接器、格式与类型参考](docs/connectors-and-formats-zh.md)                       | 支持的 Source/Sink、格式与数据类型  |
+| [服务端配置与运维指南](docs/server-configuration-zh.md)                          | 服务端配置与运维操作              |
+| [Function 任务配置规范](docs/function-configuration-zh.md)        | 任务定义规范           |
+| [SQL CLI 交互式管理指南](docs/sql-cli-guide-zh.md)                 | 交互式管理指南          |
+| [Function 管理与开发指南](docs/function-development-zh.md)         | 管理与开发指南          |
+| [Go SDK 开发与交互指南](docs/Go-SDK/go-sdk-guide-zh.md)            | Go SDK 指南         |
+| [Python SDK 开发与交互指南](docs/Python-SDK/python-sdk-guide-zh.md) | Python SDK 指南     |
 
 ## 配置
 
diff --git a/README.md b/README.md
index 51a69de1..f74bee33 100644
--- a/README.md
+++ b/README.md
@@ -23,7 +23,7 @@
 
 [中文](README-zh.md) | [English](README.md)
 
-**Function Stream** is a high-performance, event-driven stream processing framework built in Rust. It provides a modular runtime to orchestrate serverless-style processing functions compiled to **WebAssembly (WASM)**, supporting functions written in **Go, Python, and Rust**.
+**Function Stream** is a high-performance, event-driven stream processing framework built in Rust. It provides a modular runtime to orchestrate serverless-style processing functions compiled to **WebAssembly (WASM)**, supporting functions written in **Go, Python, and Rust**. It also features a **Streaming SQL** engine that lets you build real-time data pipelines — including time-windowed aggregations, multi-stream joins, and continuous ETL — using pure declarative SQL.
 
 ## Table of Contents
 
@@ -46,6 +46,7 @@
 
 ## Key Features
 
+* **Streaming SQL Engine**: Build real-time pipelines with pure SQL — register sources (`CREATE TABLE`), launch continuous computations (`CREATE STREAMING TABLE ... AS SELECT`), and manage lifecycle (`SHOW` / `DROP`). Supports tumbling windows, hopping windows, window joins, and more.
 * **Event-Driven WASM Runtime**: Executes polyglot functions (Go, Python, Rust) with near-native performance and sandboxed isolation.
 * **Durable State Management**: Built-in support for RocksDB-backed state stores for stateful stream processing.
 * **SQL-Powered CLI**: Interactive REPL for job management and stream inspection using SQL-like commands.
@@ -199,14 +200,16 @@ We provide a robust shell script to manage the server process, capable of handli
 
 ## Documentation
 
-| Document                                                 | Description                       |
-|----------------------------------------------------------|-----------------------------------|
-| [Server Configuration](docs/server-configuration.md)     | Server Configuration & Operations |
-| [Function Configuration](docs/function-configuration.md) | Task Definition Specification     |
-| [SQL CLI Guide](docs/sql-cli-guide.md)                   | Interactive Management Guide      |
-| [Function Development](docs/function-development.md)     | Management & Development Guide    |
-| [Go SDK Guide](docs/Go-SDK/go-sdk-guide.md)              | Go SDK Guide                      |
-| [Python SDK Guide](docs/Python-SDK/python-sdk-guide.md)   | Python SDK Guide                  |
+| Document                                                       | Description                                     |
+|----------------------------------------------------------------|-------------------------------------------------|
+| [Streaming SQL Guide](docs/streaming-sql-guide.md)             | Declarative SQL for Real-Time Stream Processing |
+| [Connectors, Formats & Types](docs/connectors-and-formats.md) | Supported Sources, Sinks, Formats & Data Types  |
+| [Server Configuration](docs/server-configuration.md)           | Server Configuration & Operations               |
+| [Function Configuration](docs/function-configuration.md) | Task Definition Specification                   |
+| [SQL CLI Guide](docs/sql-cli-guide.md)                   | Interactive Management Guide                    |
+| [Function Development](docs/function-development.md)     | Management & Development Guide                  |
+| [Go SDK Guide](docs/Go-SDK/go-sdk-guide.md)              | Go SDK Guide                                    |
+| [Python SDK Guide](docs/Python-SDK/python-sdk-guide.md)  | Python SDK Guide                                |
 
 ## Configuration
 
diff --git a/docs/connectors-and-formats-zh.md b/docs/connectors-and-formats-zh.md
new file mode 100644
index 00000000..8f25a7dc
--- /dev/null
+++ b/docs/connectors-and-formats-zh.md
@@ -0,0 +1,197 @@
+<!--
+
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+-->
+
+# 连接器、数据格式与 SQL 类型参考
+
+[中文](connectors-and-formats-zh.md) | [English](connectors-and-formats.md)
+
+本文档是 Function Stream Streaming SQL 引擎所支持的连接器（Source / Sink）、序列化格式以及 SQL 数据类型的权威参考。
+
+---
+
+## 目录
+
+- [1. 连接器 (Connector)](#1-连接器-connector)
+  - [1.1 Kafka Source（数据源）](#11-kafka-source数据源)
+  - [1.2 Kafka Sink（数据汇）](#12-kafka-sink数据汇)
+- [2. 数据格式 (Format)](#2-数据格式-format)
+- [3. SQL 数据类型](#3-sql-数据类型)
+- [4. 完整示例](#4-完整示例)
+
+---
+
+## 1. 连接器 (Connector)
+
+当前 Function Stream 支持 **Kafka** 作为生产可用的连接器，同时可作为数据源（Source）和数据汇（Sink）。
+
+### 1.1 Kafka Source（数据源）
+
+Kafka Source 从一个或多个 Kafka Topic 分区读取消息。在 `CREATE TABLE` 中使用以注册输入流。
+
+**必填属性：**
+
+| 属性 | 说明 | 示例 |
+|------|------|------|
+| `connector` | 必须为 `kafka`。 | `'kafka'` |
+| `topic` | 要消费的 Kafka Topic。 | `'raw_events'` |
+| `format` | 消息的序列化格式。 | `'json'` |
+| `bootstrap.servers` | Kafka Broker 地址列表，逗号分隔。 | `'broker1:9092,broker2:9092'` |
+
+**示例：**
+
+```sql
+CREATE TABLE page_views (
+    user_id VARCHAR,
+    page_url VARCHAR,
+    view_time TIMESTAMP NOT NULL,
+    WATERMARK FOR view_time AS view_time - INTERVAL '3' SECOND
+) WITH (
+    'connector' = 'kafka',
+    'topic' = 'page_views',
+    'format' = 'json',
+    'bootstrap.servers' = 'localhost:9092'
+);
+```
+
+### 1.2 Kafka Sink（数据汇）
+
+Kafka Sink 将计算结果写入 Kafka Topic。在 `CREATE STREAMING TABLE` 的 `WITH` 子句中配置。
+
+**必填属性：**
+
+| 属性 | 说明 | 示例 |
+|------|------|------|
+| `connector` | 必须为 `kafka`。 | `'kafka'` |
+| `topic` | 要写入的 Kafka Topic。 | `'sink_results'` |
+| `format` | 输出消息的序列化格式。 | `'json'` |
+| `bootstrap.servers` | Kafka Broker 地址列表。 | `'broker1:9092'` |
+
+**示例：**
+
+```sql
+CREATE STREAMING TABLE enriched_clicks WITH (
+    'connector' = 'kafka',
+    'topic' = 'enriched_clicks',
+    'format' = 'json',
+    'bootstrap.servers' = 'localhost:9092'
+) AS
+SELECT click_id, user_id, click_time
+FROM ad_clicks;
+```
+
+---
+
+## 2. 数据格式 (Format)
+
+当前唯一支持的序列化格式是 **JSON**。每条 Kafka 消息应为一个自描述的 JSON 对象，其字段直接映射到 `CREATE TABLE` 中定义的列。
+
+在 `WITH` 子句中设置 `'format' = 'json'`（省略时也默认为 JSON）。
+
+---
+
+## 3. SQL 数据类型
+
+以下是 `CREATE TABLE` 列定义中支持的 SQL 数据类型：
+
+### 数值类型
+
+| SQL 类型 | 别名 | Arrow 类型 | 说明 |
+|----------|------|-----------|------|
+| `BOOLEAN` | `BOOL` | Boolean | 布尔值。 |
+| `TINYINT` | — | Int8 | 8 位有符号整数。 |
+| `SMALLINT` | `INT2` | Int16 | 16 位有符号整数。 |
+| `INT` | `INTEGER`、`INT4` | Int32 | 32 位有符号整数。 |
+| `BIGINT` | `INT8` | Int64 | 64 位有符号整数。 |
+| `TINYINT UNSIGNED` | — | UInt8 | 8 位无符号整数。 |
+| `SMALLINT UNSIGNED` | `INT2 UNSIGNED` | UInt16 | 16 位无符号整数。 |
+| `INT UNSIGNED` | `INT4 UNSIGNED` | UInt32 | 32 位无符号整数。 |
+| `BIGINT UNSIGNED` | `INT8 UNSIGNED` | UInt64 | 64 位无符号整数。 |
+| `FLOAT` | `REAL`、`FLOAT4` | Float32 | 32 位 IEEE 754 浮点数。 |
+| `DOUBLE` | `DOUBLE PRECISION`、`FLOAT8` | Float64 | 64 位 IEEE 754 浮点数。 |
+| `DECIMAL(p, s)` | `NUMERIC(p, s)` | Decimal128 | 定点小数。精度 1–38，标度 <= 精度。 |
+
+### 字符串与二进制类型
+
+| SQL 类型 | 别名 | Arrow 类型 | 说明 |
+|----------|------|-----------|------|
+| `VARCHAR` | `TEXT`、`STRING`、`CHAR` | Utf8 | 可变长度 UTF-8 字符串。 |
+| `BYTEA` | — | Binary | 可变长度字节数组。 |
+| `JSON` | — | Utf8（JSON 扩展） | 带有 FunctionStream 扩展元数据的 JSON 类型字符串。 |
+
+### 日期与时间类型
+
+| SQL 类型 | Arrow 类型 | 说明 |
+|----------|-----------|------|
+| `TIMESTAMP` | Timestamp(Nanosecond) | 不含时区的日期时间（纳秒精度）。 |
+| `TIMESTAMP(0)` | Timestamp(Second) | 秒精度。 |
+| `TIMESTAMP(3)` | Timestamp(Millisecond) | 毫秒精度。 |
+| `TIMESTAMP(6)` | Timestamp(Microsecond) | 微秒精度。 |
+| `TIMESTAMP(9)` | Timestamp(Nanosecond) | 纳秒精度（与 `TIMESTAMP` 相同）。 |
+| `DATE` | Date32 | 日历日期（年、月、日）。 |
+| `DATETIME` | Timestamp(Nanosecond) | `TIMESTAMP` 的别名。 |
+| `TIME` | Time64(Nanosecond) | 不含时区的时刻。 |
+| `INTERVAL` | Interval(MonthDayNano) | 时间间隔 / 持续时间。 |
+
+### 复合类型
+
+| SQL 类型 | Arrow 类型 | 说明 |
+|----------|-----------|------|
+| `STRUCT<name type, ...>` | Struct | 命名组合字段。 |
+| `ARRAY<element_type>` | List | 相同类型元素的有序列表。也支持 `element_type[]` 语法。 |
+
+---
+
+## 4. 完整示例
+
+以下是一个结合 Kafka Source、Kafka Sink、JSON 格式和多种 SQL 数据类型的完整示例：
+
+```sql
+-- Source：从 Kafka 读取用户活动事件
+CREATE TABLE user_activity (
+    event_id VARCHAR,
+    user_id BIGINT,
+    action VARCHAR,
+    amount DECIMAL(10, 2),
+    tags ARRAY<VARCHAR>,
+    event_time TIMESTAMP NOT NULL,
+    WATERMARK FOR event_time AS event_time - INTERVAL '5' SECOND
+) WITH (
+    'connector' = 'kafka',
+    'topic' = 'user_activity',
+    'format' = 'json',
+    'bootstrap.servers' = 'localhost:9092'
+);
+
+-- Sink：1 分钟滚动窗口聚合
+CREATE STREAMING TABLE activity_stats_1m WITH (
+    'connector' = 'kafka',
+    'topic' = 'activity_stats_1m',
+    'format' = 'json',
+    'bootstrap.servers' = 'localhost:9092'
+) AS
+SELECT
+    TUMBLE(INTERVAL '1' MINUTE) AS time_window,
+    action,
+    COUNT(*) AS event_count,
+    SUM(amount) AS total_amount
+FROM user_activity
+GROUP BY 1, action;
+```
diff --git a/docs/connectors-and-formats.md b/docs/connectors-and-formats.md
new file mode 100644
index 00000000..46d0d964
--- /dev/null
+++ b/docs/connectors-and-formats.md
@@ -0,0 +1,197 @@
+<!--
+
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+-->
+
+# Connectors, Formats & Data Types
+
+[中文](connectors-and-formats-zh.md) | [English](connectors-and-formats.md)
+
+This document is the authoritative reference for connectors (sources & sinks), serialization formats, and SQL data types supported by Function Stream's Streaming SQL engine.
+
+---
+
+## Table of Contents
+
+- [1. Connectors](#1-connectors)
+  - [1.1 Kafka (Source)](#11-kafka-source)
+  - [1.2 Kafka (Sink)](#12-kafka-sink)
+- [2. Data Format](#2-data-format)
+- [3. SQL Data Types](#3-sql-data-types)
+- [4. Full Example](#4-full-example)
+
+---
+
+## 1. Connectors
+
+Currently Function Stream supports **Kafka** as the production-ready connector for both source (ingestion) and sink (egress).
+
+### 1.1 Kafka (Source)
+
+A Kafka source reads records from one or more Kafka topic partitions. Use it in `CREATE TABLE` to register an input stream.
+
+**Required Properties:**
+
+| Property | Description | Example |
+|----------|-------------|---------|
+| `connector` | Must be `kafka`. | `'kafka'` |
+| `topic` | Kafka topic to consume from. | `'raw_events'` |
+| `format` | Serialization format of messages. | `'json'` |
+| `bootstrap.servers` | Comma-separated list of Kafka broker addresses. | `'broker1:9092,broker2:9092'` |
+
+**Example:**
+
+```sql
+CREATE TABLE page_views (
+    user_id VARCHAR,
+    page_url VARCHAR,
+    view_time TIMESTAMP NOT NULL,
+    WATERMARK FOR view_time AS view_time - INTERVAL '3' SECOND
+) WITH (
+    'connector' = 'kafka',
+    'topic' = 'page_views',
+    'format' = 'json',
+    'bootstrap.servers' = 'localhost:9092'
+);
+```
+
+### 1.2 Kafka (Sink)
+
+A Kafka sink writes records into a Kafka topic. It is configured in the `WITH` clause of a `CREATE STREAMING TABLE` statement.
+
+**Required Properties:**
+
+| Property | Description | Example |
+|----------|-------------|---------|
+| `connector` | Must be `kafka`. | `'kafka'` |
+| `topic` | Kafka topic to write to. | `'sink_results'` |
+| `format` | Serialization format of output messages. | `'json'` |
+| `bootstrap.servers` | Comma-separated Kafka broker addresses. | `'broker1:9092'` |
+
+**Example:**
+
+```sql
+CREATE STREAMING TABLE enriched_clicks WITH (
+    'connector' = 'kafka',
+    'topic' = 'enriched_clicks',
+    'format' = 'json',
+    'bootstrap.servers' = 'localhost:9092'
+) AS
+SELECT click_id, user_id, click_time
+FROM ad_clicks;
+```
+
+---
+
+## 2. Data Format
+
+Currently the only supported serialization format is **JSON**. Each Kafka message is expected to be a self-describing JSON object whose fields map directly to the columns defined in `CREATE TABLE`.
+
+Set `'format' = 'json'` in the `WITH` clause (this is also the default when omitted).
+
+---
+
+## 3. SQL Data Types
+
+The following SQL data types are supported in `CREATE TABLE` column definitions:
+
+### Numeric Types
+
+| SQL Type | Aliases | Arrow Type | Description |
+|----------|---------|------------|-------------|
+| `BOOLEAN` | `BOOL` | Boolean | True / false. |
+| `TINYINT` | — | Int8 | 8-bit signed integer. |
+| `SMALLINT` | `INT2` | Int16 | 16-bit signed integer. |
+| `INT` | `INTEGER`, `INT4` | Int32 | 32-bit signed integer. |
+| `BIGINT` | `INT8` | Int64 | 64-bit signed integer. |
+| `TINYINT UNSIGNED` | — | UInt8 | 8-bit unsigned integer. |
+| `SMALLINT UNSIGNED` | `INT2 UNSIGNED` | UInt16 | 16-bit unsigned integer. |
+| `INT UNSIGNED` | `INT4 UNSIGNED` | UInt32 | 32-bit unsigned integer. |
+| `BIGINT UNSIGNED` | `INT8 UNSIGNED` | UInt64 | 64-bit unsigned integer. |
+| `FLOAT` | `REAL`, `FLOAT4` | Float32 | 32-bit IEEE 754 floating point. |
+| `DOUBLE` | `DOUBLE PRECISION`, `FLOAT8` | Float64 | 64-bit IEEE 754 floating point. |
+| `DECIMAL(p, s)` | `NUMERIC(p, s)` | Decimal128 | Fixed-point decimal. Precision 1–38, scale <= precision. |
+
+### String & Binary Types
+
+| SQL Type | Aliases | Arrow Type | Description |
+|----------|---------|------------|-------------|
+| `VARCHAR` | `TEXT`, `STRING`, `CHAR` | Utf8 | Variable-length UTF-8 string. |
+| `BYTEA` | — | Binary | Variable-length byte array. |
+| `JSON` | — | Utf8 (JSON extension) | JSON-typed string with FunctionStream extension metadata. |
+
+### Date & Time Types
+
+| SQL Type | Arrow Type | Description |
+|----------|------------|-------------|
+| `TIMESTAMP` | Timestamp(Nanosecond) | Date and time without timezone (nanosecond precision). |
+| `TIMESTAMP(0)` | Timestamp(Second) | Second precision. |
+| `TIMESTAMP(3)` | Timestamp(Millisecond) | Millisecond precision. |
+| `TIMESTAMP(6)` | Timestamp(Microsecond) | Microsecond precision. |
+| `TIMESTAMP(9)` | Timestamp(Nanosecond) | Nanosecond precision (same as `TIMESTAMP`). |
+| `DATE` | Date32 | Calendar date (year, month, day). |
+| `DATETIME` | Timestamp(Nanosecond) | Alias for `TIMESTAMP`. |
+| `TIME` | Time64(Nanosecond) | Time of day without timezone. |
+| `INTERVAL` | Interval(MonthDayNano) | Time duration / interval. |
+
+### Composite Types
+
+| SQL Type | Arrow Type | Description |
+|----------|------------|-------------|
+| `STRUCT<name type, ...>` | Struct | Named composite fields. |
+| `ARRAY<element_type>` | List | Ordered list of elements of the same type. Also supports `element_type[]` syntax. |
+
+---
+
+## 4. Full Example
+
+Below is a complete example combining a Kafka source, a Kafka sink, JSON format, and various SQL data types:
+
+```sql
+-- Source: user activity events from Kafka
+CREATE TABLE user_activity (
+    event_id VARCHAR,
+    user_id BIGINT,
+    action VARCHAR,
+    amount DECIMAL(10, 2),
+    tags ARRAY<VARCHAR>,
+    event_time TIMESTAMP NOT NULL,
+    WATERMARK FOR event_time AS event_time - INTERVAL '5' SECOND
+) WITH (
+    'connector' = 'kafka',
+    'topic' = 'user_activity',
+    'format' = 'json',
+    'bootstrap.servers' = 'localhost:9092'
+);
+
+-- Sink: 1-minute tumbling window aggregation
+CREATE STREAMING TABLE activity_stats_1m WITH (
+    'connector' = 'kafka',
+    'topic' = 'activity_stats_1m',
+    'format' = 'json',
+    'bootstrap.servers' = 'localhost:9092'
+) AS
+SELECT
+    TUMBLE(INTERVAL '1' MINUTE) AS time_window,
+    action,
+    COUNT(*) AS event_count,
+    SUM(amount) AS total_amount
+FROM user_activity
+GROUP BY 1, action;
+```
diff --git a/docs/sql-cli-guide-zh.md b/docs/sql-cli-guide-zh.md
index 8352dea1..bff05932 100644
--- a/docs/sql-cli-guide-zh.md
+++ b/docs/sql-cli-guide-zh.md
@@ -129,7 +129,69 @@ DROP FUNCTION go_processor_demo;
 
 ---
 
-## 三、REPL 内建辅助指令
+## 三、Streaming SQL：TABLE 与 STREAMING TABLE
+
+除了 Function 管理之外，CLI 还支持一整套 **Streaming SQL** 命令，用于声明数据源和构建实时管道。完整示例请参阅 [Streaming SQL 使用指南](streaming-sql-guide-zh.md)。
+
+### 3.1 注册数据源：CREATE TABLE
+
+声明外部数据源（如 Kafka），包含 Schema、事件时间和水位线策略。此操作仅创建**静态目录条目**，不消耗计算资源。
+
+```sql
+CREATE TABLE ad_impressions (
+    impression_id VARCHAR,
+    ad_id BIGINT,
+    campaign_id BIGINT,
+    user_id VARCHAR,
+    impression_time TIMESTAMP NOT NULL,
+    WATERMARK FOR impression_time AS impression_time - INTERVAL '2' SECOND
+) WITH (
+    'connector' = 'kafka',
+    'topic' = 'raw_ad_impressions',
+    'format' = 'json',
+    'bootstrap.servers' = 'localhost:9092'
+);
+```
+
+### 3.2 创建流计算管道：CREATE STREAMING TABLE
+
+使用 CTAS 语法启动持续运行的分布式计算管道。结果以纯追加模式写入目标连接器。
+
+```sql
+CREATE STREAMING TABLE metric_tumble_impressions_1m WITH (
+    'connector' = 'kafka',
+    'topic' = 'sink_impressions_1m',
+    'format' = 'json',
+    'bootstrap.servers' = 'localhost:9092'
+) AS
+SELECT
+    TUMBLE(INTERVAL '1' MINUTE) AS time_window,
+    campaign_id,
+    COUNT(*) AS total_impressions
+FROM ad_impressions
+GROUP BY 1, campaign_id;
+```
+
+### 3.3 查看与监控
+
+| 命令 | 说明 |
+|------|------|
+| `SHOW TABLES` | 列出所有已注册的数据源表。 |
+| `SHOW CREATE TABLE <name>` | 显示某张表的建表 DDL。 |
+| `SHOW STREAMING TABLES` | 列出所有正在运行的流计算管道及其状态。 |
+| `SHOW CREATE STREAMING TABLE <name>` | 查看某条管道的物理执行拓扑图（ASCII 格式）。 |
+
+### 3.4 销毁流计算管道：DROP STREAMING TABLE
+
+停止并释放某条流计算管道的所有资源：
+
+```sql
+DROP STREAMING TABLE metric_tumble_impressions_1m;
+```
+
+---
+
+## 四、REPL 内建辅助指令
 
 在 `function-stream>` 提示符下，支持以下便捷指令：
 
@@ -141,7 +203,7 @@ DROP FUNCTION go_processor_demo;
 
 ---
 
-## 四、技术约束与注意事项
+## 五、技术约束与注意事项
 
 - **路径隔离**：SQL CLI 本身不负责上传文件。function_path 指向的文件必须预先存在于**服务端机器**的磁盘上。若需远程上传打包，请使用 Python SDK。
 - **Python 函数限制**：由于 Python 函数涉及动态依赖分析与代码打包，目前**不支持**通过 SQL CLI 创建，仅能通过 CLI 进行 START / STOP / SHOW 等生命周期管理。
diff --git a/docs/sql-cli-guide.md b/docs/sql-cli-guide.md
index be42a37e..a7f36a88 100644
--- a/docs/sql-cli-guide.md
+++ b/docs/sql-cli-guide.md
@@ -129,7 +129,69 @@ DROP FUNCTION go_processor_demo;
 
 ---
 
-## 3. REPL Built-in Auxiliary Commands
+## 3. Streaming SQL: TABLE & STREAMING TABLE
+
+In addition to Function management, the CLI supports a full set of **Streaming SQL** commands for declaring data sources and building real-time pipelines. For a comprehensive guide with examples, see [Streaming SQL Guide](streaming-sql-guide.md).
+
+### 3.1 Register Data Source: CREATE TABLE
+
+Declare an external data source (e.g. Kafka) with schema, event time, and watermark strategy. This creates a **static catalog entry** that consumes no compute resources.
+
+```sql
+CREATE TABLE ad_impressions (
+    impression_id VARCHAR,
+    ad_id BIGINT,
+    campaign_id BIGINT,
+    user_id VARCHAR,
+    impression_time TIMESTAMP NOT NULL,
+    WATERMARK FOR impression_time AS impression_time - INTERVAL '2' SECOND
+) WITH (
+    'connector' = 'kafka',
+    'topic' = 'raw_ad_impressions',
+    'format' = 'json',
+    'bootstrap.servers' = 'localhost:9092'
+);
+```
+
+### 3.2 Create Streaming Pipeline: CREATE STREAMING TABLE
+
+Launch a continuous, distributed compute pipeline using CTAS syntax. Results are written to the target connector in append-only mode.
+
+```sql
+CREATE STREAMING TABLE metric_tumble_impressions_1m WITH (
+    'connector' = 'kafka',
+    'topic' = 'sink_impressions_1m',
+    'format' = 'json',
+    'bootstrap.servers' = 'localhost:9092'
+) AS
+SELECT
+    TUMBLE(INTERVAL '1' MINUTE) AS time_window,
+    campaign_id,
+    COUNT(*) AS total_impressions
+FROM ad_impressions
+GROUP BY 1, campaign_id;
+```
+
+### 3.3 Inspect & Monitor
+
+| Command | Description |
+|---------|-------------|
+| `SHOW TABLES` | List all registered source tables. |
+| `SHOW CREATE TABLE <name>` | Display the DDL of a registered table. |
+| `SHOW STREAMING TABLES` | List all running streaming pipelines with status. |
+| `SHOW CREATE STREAMING TABLE <name>` | Inspect the physical execution graph (ASCII topology). |
+
+### 3.4 Destroy Streaming Pipeline: DROP STREAMING TABLE
+
+Stop and release all resources for a streaming pipeline:
+
+```sql
+DROP STREAMING TABLE metric_tumble_impressions_1m;
+```
+
+---
+
+## 4. REPL Built-in Auxiliary Commands
 
 At the `function-stream>` prompt, the following convenient commands are supported:
 
@@ -141,7 +203,7 @@ At the `function-stream>` prompt, the following convenient commands are supporte
 
 ---
 
-## 4. Technical Constraints and Notes
+## 5. Technical Constraints and Notes
 
 - **Path Isolation**: The SQL CLI itself is not responsible for uploading files. The file pointed to by function_path must pre-exist on the **Server machine's** disk. If remote upload packaging is required, please use the Python SDK.
 - **Python Function Limitations**: Since Python functions involve dynamic dependency analysis and code packaging, they are currently **not supported** for creation via SQL CLI; only lifecycle management such as START / STOP / SHOW via CLI is supported.
diff --git a/docs/streaming-sql-guide-zh.md b/docs/streaming-sql-guide-zh.md
new file mode 100644
index 00000000..ee18ab2a
--- /dev/null
+++ b/docs/streaming-sql-guide-zh.md
@@ -0,0 +1,261 @@
+<!--
+
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+-->
+
+# Streaming SQL 使用指南
+
+[中文](streaming-sql-guide-zh.md) | [English](streaming-sql-guide.md)
+
+Function Stream 提供了声明式 SQL 接口来构建实时流处理管道。通过 Streaming SQL，您可以轻松应对无界数据流（Unbounded Data）的摄取、时间窗口聚合、流式关联以及任务生命周期管理 — 无需编写任何命令式代码。
+
+---
+
+## 目录
+
+- [核心概念](#核心概念)
+- [第一部分：注册数据源 (TABLE)](#第一部分注册数据源-table)
+- [第二部分：构建实时 Pipeline (STREAMING TABLE)](#第二部分构建实时-pipeline-streaming-table)
+  - [滚动窗口 (Tumbling Window)](#场景-1滚动窗口-tumbling-window)
+  - [滑动窗口 (Hopping Window)](#场景-2滑动窗口-hopping-window)
+  - [窗口双流关联 (Window Join)](#场景-3窗口双流关联-window-join)
+- [第三部分：生命周期与流任务管理](#第三部分生命周期与流任务管理)
+  - [数据源管理](#1-数据源与元数据管理)
+  - [Pipeline 监控](#2-实时-pipeline-监控与排障)
+  - [停止与释放](#3-安全停止与释放资源)
+- [SQL 语法速查表](#sql-语法速查表)
+
+---
+
+## 核心概念
+
+| 概念 | SQL 关键字 | 说明 |
+|------|-----------|------|
+| **TABLE** | `CREATE TABLE` | 系统目录（Catalog）中的静态逻辑定义。只记录外部数据源的连接信息、格式和 Schema，不消耗任何计算资源。 |
+| **STREAMING TABLE** | `CREATE STREAMING TABLE ... AS SELECT` | 持续运行的物理数据管道。引擎会在后台拉起真实的分布式计算任务，并将结果以纯追加（Append-only）方式持续写入外部系统。 |
+| **事件时间 (Event Time)** | `WATERMARK FOR <column>` | 引擎内部用于推进时间进度的时间戳列。 |
+| **水位线 (Watermark)** | `AS <column> - INTERVAL ...` | 对迟到乱序数据的容忍度。超过水位线的事件将被丢弃。 |
+
+> 支持的连接器、数据格式和 SQL 数据类型的完整参考，请参阅 [连接器、格式与类型参考](connectors-and-formats-zh.md)。
+
+---
+
+## 第一部分：注册数据源 (TABLE)
+
+`TABLE` 是系统目录（Catalog）中的静态逻辑定义。它只记录外部数据源（如 Kafka）的连接信息、格式和 Schema，**不消耗任何计算资源**。
+
+在流计算中，我们必须为输入流指定**事件时间（Event Time）**和**水位线（Watermark）**，以此作为引擎内部推进时间、触发计算的唯一依据。
+
+### 示例：注册广告曝光流与点击流
+
+```sql
+-- 1. 注册广告曝光流
+CREATE TABLE ad_impressions (
+    impression_id VARCHAR,
+    ad_id BIGINT,
+    campaign_id BIGINT,
+    user_id VARCHAR,
+    impression_time TIMESTAMP NOT NULL,
+    -- 核心：将 impression_time 设为事件时间，并容忍最多 2 秒的数据迟到乱序
+    WATERMARK FOR impression_time AS impression_time - INTERVAL '2' SECOND
+) WITH (
+    'connector' = 'kafka',
+    'topic' = 'raw_ad_impressions',
+    'format' = 'json',
+    'bootstrap.servers' = 'localhost:9092'
+);
+
+-- 2. 注册广告点击流
+CREATE TABLE ad_clicks (
+    click_id VARCHAR,
+    impression_id VARCHAR,
+    ad_id BIGINT,
+    click_time TIMESTAMP NOT NULL,
+    WATERMARK FOR click_time AS click_time - INTERVAL '5' SECOND
+) WITH (
+    'connector' = 'kafka',
+    'topic' = 'raw_ad_clicks',
+    'format' = 'json',
+    'bootstrap.servers' = 'localhost:9092'
+);
+```
+
+**关键要素：**
+
+- `WATERMARK FOR <列> AS <列> - INTERVAL '<n>' SECOND`：声明事件时间列以及允许的最大乱序延迟。
+- `WITH (...)`：连接器属性 — 类型、Topic、格式、Broker 地址。
+
+---
+
+## 第二部分：构建实时 Pipeline (STREAMING TABLE)
+
+`STREAMING TABLE` 是持续运行的物理数据管道。使用 `CREATE STREAMING TABLE ... AS SELECT`（CTAS）语法，引擎会在后台拉起真实的分布式计算任务，并将结果以**纯追加（Append-only）**的方式持续写入外部系统。
+
+### 场景 1：滚动窗口 (Tumbling Window)
+
+将时间切分为互不重叠的固定窗口。
+
+```sql
+-- 需求：每 1 分钟统计一次各广告计划的曝光总量
+CREATE STREAMING TABLE metric_tumble_impressions_1m WITH (
+    'connector' = 'kafka',
+    'topic' = 'sink_impressions_1m',
+    'format' = 'json',
+    'bootstrap.servers' = 'localhost:9092'
+) AS
+SELECT
+    TUMBLE(INTERVAL '1' MINUTE) AS time_window,
+    campaign_id,
+    COUNT(*) AS total_impressions
+FROM ad_impressions
+GROUP BY
+    1, -- 指代 SELECT 中的第一个字段 (time_window)
+    campaign_id;
+```
+
+### 场景 2：滑动窗口 (Hopping Window)
+
+窗口之间存在重叠，用于平滑趋势监控。
+
+```sql
+-- 需求：统计过去 10 分钟内各广告的独立访客数(UV)，每 1 分钟刷新一次
+CREATE STREAMING TABLE metric_hop_uv_10m WITH (
+    'connector' = 'kafka',
+    'topic' = 'sink_uv_10m_step_1m',
+    'format' = 'json',
+    'bootstrap.servers' = 'localhost:9092'
+) AS
+SELECT
+    HOP(INTERVAL '1' MINUTE, INTERVAL '10' MINUTE) AS time_window,
+    ad_id,
+    COUNT(DISTINCT user_id) AS unique_users
+FROM ad_impressions
+GROUP BY
+    1,
+    ad_id;
+```
+
+### 场景 3：窗口双流关联 (Window Join)
+
+将两条流在完全相同的时间窗口内进行等值关联。因为状态限定在窗口内，水位线越过窗口后状态会自动清理，绝不发生内存泄漏（OOM）。
+
+```sql
+-- 需求：精确计算 5 分钟级别的点击率 (CTR)
+CREATE STREAMING TABLE metric_window_join_ctr_5m WITH (
+    'connector' = 'kafka',
+    'topic' = 'sink_ctr_5m',
+    'format' = 'json',
+    'bootstrap.servers' = 'localhost:9092'
+) AS
+SELECT
+    imp.time_window,
+    imp.ad_id,
+    imp.impressions,
+    COALESCE(clk.clicks, 0) AS clicks
+FROM (
+    SELECT TUMBLE(INTERVAL '5' MINUTE) AS time_window, ad_id, COUNT(*) AS impressions
+    FROM ad_impressions
+    GROUP BY 1, ad_id
+) imp
+LEFT JOIN (
+    SELECT TUMBLE(INTERVAL '5' MINUTE) AS time_window, ad_id, COUNT(*) AS clicks
+    FROM ad_clicks
+    GROUP BY 1, ad_id
+) clk
+ON imp.time_window = clk.time_window AND imp.ad_id = clk.ad_id;
+```
+
+> **要求：**关联条件**必须**包含相同的时间窗口列，以确保状态有界。
+
+---
+
+## 第三部分：生命周期与流任务管理
+
+Function Stream 提供了一套完整的运维指令，帮助您管理元数据目录、排查物理执行图以及销毁流计算任务。
+
+### 1. 数据源与元数据管理
+
+**查看所有已注册的数据源表：**
+
+```sql
+SHOW TABLES;
+```
+
+列出当前 Catalog 中的所有静态表定义及其对应的 Event Time 与 Watermark 策略。
+
+**查看原始建表语句（DDL）：**
+
+```sql
+SHOW CREATE TABLE ad_clicks;
+```
+
+用于导出或排查某张表的底层连接参数（如 Kafka Topic、Format 等）。
+
+### 2. 实时 Pipeline 监控与排障
+
+**查看当前运行的计算流：**
+
+```sql
+SHOW STREAMING TABLES;
+```
+
+输出字段说明：
+
+| 字段 | 说明 |
+|------|------|
+| `job_id` | 计算流的名称（如 `metric_tumble_impressions_1m`）。 |
+| `status` | 当前生命周期状态（如 `RUNNING`、`FAILED`）。 |
+| `pipeline_count` | 该任务在底层被拆分成的并行算子链数量。 |
+| `uptime` | 任务已持续运行的时长。 |
+
+**洞察物理执行拓扑 (Execution Graph)：**
+
+```sql
+SHOW CREATE STREAMING TABLE metric_tumble_impressions_1m;
+```
+
+这是 Function Stream 极其强大的排障指令。它会以 ASCII 格式打印出一条 SQL 是如何在底层被转化为真实分布式计算图的：
+
+- `[Source]` — 从连接器读取数据。
+- `[Operator] ExpressionWatermark` — 注入水位线。
+- `[Shuffle]` — 重分布网络数据。
+- `[Operator] TumblingWindowAggregate` — 执行真正的窗口聚合。
+- `[Sink] ConnectorSink` — 将结果发往目标连接器（如 Kafka）。
+
+### 3. 安全停止与释放资源
+
+当某个实时大屏活动结束，或者您需要更新计算逻辑时，必须显式销毁旧的流任务：
+
+```sql
+DROP STREAMING TABLE metric_tumble_impressions_1m;
+```
+
+---
+
+## SQL 语法速查表
+
+| 语句 | 说明 |
+|------|------|
+| `CREATE TABLE ... WITH (...)` | 注册外部数据源，声明 Schema、事件时间和水位线。 |
+| `CREATE STREAMING TABLE ... WITH (...) AS SELECT ...` | 创建并启动持续运行的流计算管道。 |
+| `SHOW TABLES` | 列出所有已注册的数据源表。 |
+| `SHOW CREATE TABLE <name>` | 显示某张表的建表 DDL。 |
+| `SHOW STREAMING TABLES` | 列出所有正在运行的流计算管道及其状态。 |
+| `SHOW CREATE STREAMING TABLE <name>` | 查看某条管道的物理执行拓扑图。 |
+| `DROP STREAMING TABLE <name>` | 销毁流计算管道并释放所有资源。 |
diff --git a/docs/streaming-sql-guide.md b/docs/streaming-sql-guide.md
new file mode 100644
index 00000000..8250e794
--- /dev/null
+++ b/docs/streaming-sql-guide.md
@@ -0,0 +1,260 @@
+<!--
+
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+-->
+
+# Streaming SQL Guide
+
+[中文](streaming-sql-guide-zh.md) | [English](streaming-sql-guide.md)
+
+Function Stream provides a declarative SQL interface for building real-time stream processing pipelines. With Streaming SQL you can ingest unbounded data streams, perform time-windowed aggregations, join multiple streams, and manage pipeline lifecycles — all without writing imperative code.
+
+---
+
+## Table of Contents
+
+- [Core Concepts](#core-concepts)
+- [Part 1: Registering Data Sources (TABLE)](#part-1-registering-data-sources-table)
+- [Part 2: Building Real-Time Pipelines (STREAMING TABLE)](#part-2-building-real-time-pipelines-streaming-table)
+  - [Tumbling Window](#scenario-1-tumbling-window)
+  - [Hopping Window](#scenario-2-hopping-window)
+  - [Window Join](#scenario-3-window-join)
+- [Part 3: Lifecycle & Pipeline Management](#part-3-lifecycle--pipeline-management)
+  - [Data Source Management](#1-data-source--metadata-management)
+  - [Pipeline Monitoring](#2-real-time-pipeline-monitoring--troubleshooting)
+  - [Stopping & Cleanup](#3-safe-shutdown--resource-release)
+- [SQL Reference Summary](#sql-reference-summary)
+
+---
+
+## Core Concepts
+
+| Concept | SQL Keyword | Description |
+|---------|-------------|-------------|
+| **TABLE** | `CREATE TABLE` | A static logical definition in the catalog. Records external source connection info, format, and schema. Consumes no compute resources. |
+| **STREAMING TABLE** | `CREATE STREAMING TABLE ... AS SELECT` | A physically running data pipeline. The engine allocates distributed compute tasks and continuously writes results to external systems in append-only mode. |
+| **Event Time** | `WATERMARK FOR <column>` | The timestamp column used by the engine to track the progression of time within a stream. |
+| **Watermark** | `AS <column> - INTERVAL ...` | A tolerance for late-arriving, out-of-order data. Events arriving after the watermark are dropped. |
+
+> For the full reference on supported connectors, data formats, and SQL data types, see [Connectors, Formats & Data Types](connectors-and-formats.md).
+
+---
+
+## Part 1: Registering Data Sources (TABLE)
+
+A `TABLE` is a static logical definition in the system catalog. It only records the connection information (e.g. Kafka broker, topic), data format, and schema of an external data source. **It does not consume any compute resources.**
+
+In stream processing, you must specify an **Event Time** column and a **Watermark** strategy for each input stream. The engine uses these as the sole basis for advancing time and triggering computations.
+
+### Example: Register an Ad-Impressions Stream and a Clicks Stream
+
+```sql
+-- 1. Register the ad-impressions stream
+CREATE TABLE ad_impressions (
+    impression_id VARCHAR,
+    ad_id BIGINT,
+    campaign_id BIGINT,
+    user_id VARCHAR,
+    impression_time TIMESTAMP NOT NULL,
+    WATERMARK FOR impression_time AS impression_time - INTERVAL '2' SECOND
+) WITH (
+    'connector' = 'kafka',
+    'topic' = 'raw_ad_impressions',
+    'format' = 'json',
+    'bootstrap.servers' = 'localhost:9092'
+);
+
+-- 2. Register the ad-clicks stream
+CREATE TABLE ad_clicks (
+    click_id VARCHAR,
+    impression_id VARCHAR,
+    ad_id BIGINT,
+    click_time TIMESTAMP NOT NULL,
+    WATERMARK FOR click_time AS click_time - INTERVAL '5' SECOND
+) WITH (
+    'connector' = 'kafka',
+    'topic' = 'raw_ad_clicks',
+    'format' = 'json',
+    'bootstrap.servers' = 'localhost:9092'
+);
+```
+
+**Key elements:**
+
+- `WATERMARK FOR <column> AS <column> - INTERVAL '<n>' SECOND`: declares the event-time column and the maximum tolerated out-of-order delay.
+- `WITH (...)`: connector properties — type, topic, format, and broker address.
+
+---
+
+## Part 2: Building Real-Time Pipelines (STREAMING TABLE)
+
+A `STREAMING TABLE` is a continuously running physical data pipeline. Using the `CREATE STREAMING TABLE ... AS SELECT` (CTAS) syntax, the engine launches real distributed compute tasks in the background and continuously writes results to an external system in **append-only** mode.
+
+### Scenario 1: Tumbling Window
+
+Divides time into fixed, non-overlapping windows.
+
+```sql
+-- Count total impressions per campaign every 1 minute
+CREATE STREAMING TABLE metric_tumble_impressions_1m WITH (
+    'connector' = 'kafka',
+    'topic' = 'sink_impressions_1m',
+    'format' = 'json',
+    'bootstrap.servers' = 'localhost:9092'
+) AS
+SELECT
+    TUMBLE(INTERVAL '1' MINUTE) AS time_window,
+    campaign_id,
+    COUNT(*) AS total_impressions
+FROM ad_impressions
+GROUP BY
+    1,
+    campaign_id;
+```
+
+### Scenario 2: Hopping Window
+
+Windows overlap, useful for smoothed trend monitoring.
+
+```sql
+-- Count distinct visitors (UV) per ad over the last 10 minutes, refreshed every 1 minute
+CREATE STREAMING TABLE metric_hop_uv_10m WITH (
+    'connector' = 'kafka',
+    'topic' = 'sink_uv_10m_step_1m',
+    'format' = 'json',
+    'bootstrap.servers' = 'localhost:9092'
+) AS
+SELECT
+    HOP(INTERVAL '1' MINUTE, INTERVAL '10' MINUTE) AS time_window,
+    ad_id,
+    COUNT(DISTINCT user_id) AS unique_users
+FROM ad_impressions
+GROUP BY
+    1,
+    ad_id;
+```
+
+### Scenario 3: Window Join
+
+Join two streams within exactly the same time window. Because state is bounded by the window, memory is automatically reclaimed once the watermark advances past the window boundary — eliminating the risk of OOM.
+
+```sql
+-- Calculate 5-minute click-through rate (CTR)
+CREATE STREAMING TABLE metric_window_join_ctr_5m WITH (
+    'connector' = 'kafka',
+    'topic' = 'sink_ctr_5m',
+    'format' = 'json',
+    'bootstrap.servers' = 'localhost:9092'
+) AS
+SELECT
+    imp.time_window,
+    imp.ad_id,
+    imp.impressions,
+    COALESCE(clk.clicks, 0) AS clicks
+FROM (
+    SELECT TUMBLE(INTERVAL '5' MINUTE) AS time_window, ad_id, COUNT(*) AS impressions
+    FROM ad_impressions
+    GROUP BY 1, ad_id
+) imp
+LEFT JOIN (
+    SELECT TUMBLE(INTERVAL '5' MINUTE) AS time_window, ad_id, COUNT(*) AS clicks
+    FROM ad_clicks
+    GROUP BY 1, ad_id
+) clk
+ON imp.time_window = clk.time_window AND imp.ad_id = clk.ad_id;
+```
+
+> **Requirement:** The join condition **must** include the same time-window column to ensure bounded state.
+
+---
+
+## Part 3: Lifecycle & Pipeline Management
+
+Function Stream provides a complete set of operational commands for managing the metadata catalog, inspecting physical execution graphs, and destroying streaming pipelines.
+
+### 1. Data Source & Metadata Management
+
+**List all registered source tables:**
+
+```sql
+SHOW TABLES;
+```
+
+Lists all static table definitions in the current catalog along with their Event Time and Watermark strategies.
+
+**Show the original DDL of a table:**
+
+```sql
+SHOW CREATE TABLE ad_clicks;
+```
+
+Useful for exporting or auditing the underlying connection parameters (Kafka topic, format, etc.).
+
+### 2. Real-Time Pipeline Monitoring & Troubleshooting
+
+**List all running streaming pipelines:**
+
+```sql
+SHOW STREAMING TABLES;
+```
+
+Output columns:
+
+| Column | Description |
+|--------|-------------|
+| `job_id` | Pipeline name (e.g. `metric_tumble_impressions_1m`). |
+| `status` | Lifecycle state (`RUNNING`, `FAILED`, etc.). |
+| `pipeline_count` | Number of parallel operator chains the engine split the job into. |
+| `uptime` | How long the pipeline has been running. |
+
+**Inspect the physical execution topology:**
+
+```sql
+SHOW CREATE STREAMING TABLE metric_tumble_impressions_1m;
+```
+
+This prints an ASCII representation of how the SQL was translated into a distributed execution graph:
+
+- `[Source]` — reads from the connector.
+- `[Operator] ExpressionWatermark` — injects watermarks.
+- `[Shuffle]` — redistributes data across the network.
+- `[Operator] TumblingWindowAggregate` — performs the actual windowed aggregation.
+- `[Sink] ConnectorSink` — writes results to the target connector (e.g. Kafka).
+
+### 3. Safe Shutdown & Resource Release
+
+When a campaign ends or you need to update the pipeline logic, explicitly destroy the old streaming pipeline:
+
+```sql
+DROP STREAMING TABLE metric_tumble_impressions_1m;
+```
+
+---
+
+## SQL Reference Summary
+
+| Statement | Description |
+|-----------|-------------|
+| `CREATE TABLE ... WITH (...)` | Register an external data source with schema, event time, and watermark. |
+| `CREATE STREAMING TABLE ... WITH (...) AS SELECT ...` | Create and launch a continuous streaming pipeline. |
+| `SHOW TABLES` | List all registered source tables. |
+| `SHOW CREATE TABLE <name>` | Display the DDL of a registered table. |
+| `SHOW STREAMING TABLES` | List all running streaming pipelines with status. |
+| `SHOW CREATE STREAMING TABLE <name>` | Inspect the physical execution graph of a pipeline. |
+| `DROP STREAMING TABLE <name>` | Destroy a streaming pipeline and release all resources. |

From 29d7a4b8c53b8714000f670730876e6a4578a61f Mon Sep 17 00:00:00 2001
From: luoluoyuyu <zhenyu@apache.org>
Date: Wed, 1 Apr 2026 22:40:55 +0800
Subject: [PATCH 43/44] update

---
 docs/streaming-sql-guide-zh.md | 27 +++++++++++++++++++++++++--
 docs/streaming-sql-guide.md    | 27 +++++++++++++++++++++++++--
 2 files changed, 50 insertions(+), 4 deletions(-)

diff --git a/docs/streaming-sql-guide-zh.md b/docs/streaming-sql-guide-zh.md
index ee18ab2a..98842614 100644
--- a/docs/streaming-sql-guide-zh.md
+++ b/docs/streaming-sql-guide-zh.md
@@ -34,7 +34,8 @@ Function Stream 提供了声明式 SQL 接口来构建实时流处理管道。
 - [第二部分：构建实时 Pipeline (STREAMING TABLE)](#第二部分构建实时-pipeline-streaming-table)
   - [滚动窗口 (Tumbling Window)](#场景-1滚动窗口-tumbling-window)
   - [滑动窗口 (Hopping Window)](#场景-2滑动窗口-hopping-window)
-  - [窗口双流关联 (Window Join)](#场景-3窗口双流关联-window-join)
+  - [会话窗口 (Session Window)](#场景-3会话窗口-session-window)
+  - [窗口双流关联 (Window Join)](#场景-4窗口双流关联-window-join)
 - [第三部分：生命周期与流任务管理](#第三部分生命周期与流任务管理)
   - [数据源管理](#1-数据源与元数据管理)
   - [Pipeline 监控](#2-实时-pipeline-监控与排障)
@@ -151,7 +152,29 @@ GROUP BY
     ad_id;
 ```
 
-### 场景 3：窗口双流关联 (Window Join)
+### 场景 3：会话窗口 (Session Window)
+
+会话窗口根据指定的不活跃间隔（Gap）对事件进行分组。如果在 Gap 时间内没有新事件到达，窗口关闭并输出结果。会话窗口非常适合用户行为会话分析。
+
+```sql
+-- 需求：按用户检测广告曝光会话，30 秒无活动则会话结束
+CREATE STREAMING TABLE metric_session_impressions WITH (
+    'connector' = 'kafka',
+    'topic' = 'sink_session_impressions',
+    'format' = 'json',
+    'bootstrap.servers' = 'localhost:9092'
+) AS
+SELECT
+    SESSION(INTERVAL '30' SECOND) AS time_window,
+    user_id,
+    COUNT(*) AS impressions_in_session
+FROM ad_impressions
+GROUP BY
+    1,
+    user_id;
+```
+
+### 场景 4：窗口双流关联 (Window Join)
 
 将两条流在完全相同的时间窗口内进行等值关联。因为状态限定在窗口内，水位线越过窗口后状态会自动清理，绝不发生内存泄漏（OOM）。
 
diff --git a/docs/streaming-sql-guide.md b/docs/streaming-sql-guide.md
index 8250e794..cafaf887 100644
--- a/docs/streaming-sql-guide.md
+++ b/docs/streaming-sql-guide.md
@@ -34,7 +34,8 @@ Function Stream provides a declarative SQL interface for building real-time stre
 - [Part 2: Building Real-Time Pipelines (STREAMING TABLE)](#part-2-building-real-time-pipelines-streaming-table)
   - [Tumbling Window](#scenario-1-tumbling-window)
   - [Hopping Window](#scenario-2-hopping-window)
-  - [Window Join](#scenario-3-window-join)
+  - [Session Window](#scenario-3-session-window)
+  - [Window Join](#scenario-4-window-join)
 - [Part 3: Lifecycle & Pipeline Management](#part-3-lifecycle--pipeline-management)
   - [Data Source Management](#1-data-source--metadata-management)
   - [Pipeline Monitoring](#2-real-time-pipeline-monitoring--troubleshooting)
@@ -150,7 +151,29 @@ GROUP BY
     ad_id;
 ```
 
-### Scenario 3: Window Join
+### Scenario 3: Session Window
+
+A session window groups events that arrive within a specified gap of inactivity. If no new event arrives within the gap duration, the window closes and emits results. Session windows are ideal for user-session analysis.
+
+```sql
+-- Detect ad-impression sessions per user; a session ends after 30 seconds of inactivity
+CREATE STREAMING TABLE metric_session_impressions WITH (
+    'connector' = 'kafka',
+    'topic' = 'sink_session_impressions',
+    'format' = 'json',
+    'bootstrap.servers' = 'localhost:9092'
+) AS
+SELECT
+    SESSION(INTERVAL '30' SECOND) AS time_window,
+    user_id,
+    COUNT(*) AS impressions_in_session
+FROM ad_impressions
+GROUP BY
+    1,
+    user_id;
+```
+
+### Scenario 4: Window Join
 
 Join two streams within exactly the same time window. Because state is bounded by the window, memory is automatically reclaimed once the watermark advances past the window boundary — eliminating the risk of OOM.
 

From 7b1f95960b7a74537e5dfc7f93037381805bba4d Mon Sep 17 00:00:00 2001
From: luoluoyuyu <zhenyu@apache.org>
Date: Wed, 1 Apr 2026 23:28:52 +0800
Subject: [PATCH 44/44] update

---
 protocol/proto/storage.proto          |  15 +++
 src/coordinator/execution/executor.rs |  28 +++++-
 src/server/initializer.rs             |   1 +
 src/storage/stream_catalog/manager.rs | 135 +++++++++++++++++++++++++-
 src/storage/stream_catalog/mod.rs     |   1 +
 5 files changed, 176 insertions(+), 4 deletions(-)

diff --git a/protocol/proto/storage.proto b/protocol/proto/storage.proto
index 9ab0995d..f107d472 100644
--- a/protocol/proto/storage.proto
+++ b/protocol/proto/storage.proto
@@ -39,6 +39,21 @@ message CatalogSourceTable {
   string description = 7;
 }
 
+// =============================================================================
+// Streaming table storage (CREATE STREAMING TABLE persistence)
+// =============================================================================
+
+// Persisted record for one streaming table (CREATE STREAMING TABLE).
+// On restart, the engine re-submits each record to JobManager to resume the pipeline.
+message StreamingTableDefinition {
+  string table_name = 1;
+  int64  created_at_millis = 2;
+  // Serialized function_stream.api.FsProgram — the full execution graph.
+  // Stored as opaque bytes to avoid coupling storage schema with runtime API protos.
+  bytes  fs_program_bytes = 3;
+  string comment = 4;
+}
+
 // =============================================================================
 // Task storage (RocksDB metadata + module payload)
 // =============================================================================
diff --git a/src/coordinator/execution/executor.rs b/src/coordinator/execution/executor.rs
index dcfbcb83..c24a4cda 100644
--- a/src/coordinator/execution/executor.rs
+++ b/src/coordinator/execution/executor.rs
@@ -14,7 +14,7 @@ use std::sync::Arc;
 
 use protocol::grpc::api::FsProgram;
 use thiserror::Error;
-use tracing::{debug, info};
+use tracing::{debug, info, warn};
 
 use crate::coordinator::dataset::{
     empty_record_batch, ExecuteResult, ShowCatalogTablesResult,
@@ -323,14 +323,28 @@ impl PlanVisitor for Executor {
 
             let job_id = plan.name.clone();
             let job_id = tokio::task::block_in_place(|| {
-                tokio::runtime::Handle::current().block_on(job_manager.submit_job(job_id, fs_program))
+                tokio::runtime::Handle::current()
+                    .block_on(job_manager.submit_job(job_id, fs_program.clone()))
             })
             .map_err(|e| ExecuteError::Internal(format!("Failed to submit streaming job: {e}")))?;
 
+            self.catalog_manager
+                .persist_streaming_job(
+                    &plan.name,
+                    &fs_program,
+                    plan.comment.as_deref().unwrap_or(""),
+                )
+                .map_err(|e| {
+                    ExecuteError::Internal(format!(
+                        "Streaming job '{}' submitted but persistence failed: {e}",
+                        plan.name
+                    ))
+                })?;
+
             info!(
                 job_id = %job_id,
                 table = %plan.name,
-                "Streaming job submitted"
+                "Streaming job submitted and persisted"
             );
 
             Ok(ExecuteResult::ok_with_data(
@@ -472,6 +486,14 @@ impl PlanVisitor for Executor {
                 );
             }
 
+            if let Err(e) = self.catalog_manager.remove_streaming_job(&plan.table_name) {
+                warn!(
+                    table = %plan.table_name,
+                    error = %e,
+                    "Failed to remove streaming job persisted definition (non-fatal)"
+                );
+            }
+
             let _ = self
                 .catalog_manager
                 .drop_catalog_table(&plan.table_name, true);
diff --git a/src/server/initializer.rs b/src/server/initializer.rs
index a73ec14a..70c19685 100644
--- a/src/server/initializer.rs
+++ b/src/server/initializer.rs
@@ -114,6 +114,7 @@ pub fn bootstrap_system(config: &GlobalConfig) -> Result<()> {
     registry.initialize_all(config)?;
 
     crate::storage::stream_catalog::restore_global_catalog_from_store();
+    crate::storage::stream_catalog::restore_streaming_jobs_from_store();
 
     info!("System bootstrap finished. Node is ready to accept traffic.");
     Ok(())
diff --git a/src/storage/stream_catalog/manager.rs b/src/storage/stream_catalog/manager.rs
index 086b206b..a0eb9b49 100644
--- a/src/storage/stream_catalog/manager.rs
+++ b/src/storage/stream_catalog/manager.rs
@@ -15,6 +15,7 @@ use std::sync::{Arc, OnceLock};
 use anyhow::{anyhow, bail, Context};
 use datafusion::common::{internal_err, plan_err, Result as DFResult};
 use prost::Message;
+use protocol::grpc::api::FsProgram;
 use protocol::storage::{self as pb, table_definition};
 use tracing::{info, warn};
 use unicase::UniCase;
@@ -30,6 +31,7 @@ use super::codec::CatalogCodec;
 use super::meta_store::MetaStore;
 
 const CATALOG_KEY_PREFIX: &str = "catalog:stream_table:";
+const STREAMING_JOB_KEY_PREFIX: &str = "streaming_job:";
 
 pub struct CatalogManager {
     store: Arc<dyn MetaStore>,
@@ -72,6 +74,79 @@ impl CatalogManager {
         format!("{CATALOG_KEY_PREFIX}{}", table_name.to_lowercase())
     }
 
+    #[inline]
+    fn build_streaming_job_key(table_name: &str) -> String {
+        format!("{STREAMING_JOB_KEY_PREFIX}{}", table_name.to_lowercase())
+    }
+
+    // ========================================================================
+    // Streaming job persistence (CREATE STREAMING TABLE / DROP STREAMING TABLE)
+    // ========================================================================
+
+    pub fn persist_streaming_job(
+        &self,
+        table_name: &str,
+        fs_program: &FsProgram,
+        comment: &str,
+    ) -> DFResult<()> {
+        let program_bytes = fs_program.encode_to_vec();
+        let def = pb::StreamingTableDefinition {
+            table_name: table_name.to_string(),
+            created_at_millis: chrono::Utc::now().timestamp_millis(),
+            fs_program_bytes: program_bytes,
+            comment: comment.to_string(),
+        };
+        let payload = def.encode_to_vec();
+        let key = Self::build_streaming_job_key(table_name);
+        self.store.put(&key, payload)?;
+        info!(table = %table_name, "Streaming job definition persisted");
+        Ok(())
+    }
+
+    pub fn remove_streaming_job(&self, table_name: &str) -> DFResult<()> {
+        let key = Self::build_streaming_job_key(table_name);
+        self.store.delete(&key)?;
+        info!(table = %table_name, "Streaming job definition removed from store");
+        Ok(())
+    }
+
+    pub fn load_streaming_job_definitions(
+        &self,
+    ) -> DFResult<Vec<(String, FsProgram)>> {
+        let records = self.store.scan_prefix(STREAMING_JOB_KEY_PREFIX)?;
+        let mut out = Vec::with_capacity(records.len());
+        for (key, payload) in records {
+            let def = match pb::StreamingTableDefinition::decode(payload.as_slice()) {
+                Ok(v) => v,
+                Err(e) => {
+                    warn!(
+                        key = %key,
+                        error = %e,
+                        "Skipping corrupted streaming job record"
+                    );
+                    continue;
+                }
+            };
+            let program = match FsProgram::decode(def.fs_program_bytes.as_slice()) {
+                Ok(v) => v,
+                Err(e) => {
+                    warn!(
+                        table = %def.table_name,
+                        error = %e,
+                        "Skipping streaming job with corrupted FsProgram"
+                    );
+                    continue;
+                }
+            };
+            out.push((def.table_name, program));
+        }
+        Ok(out)
+    }
+
+    // ========================================================================
+    // Catalog table persistence (CREATE TABLE / DROP TABLE)
+    // ========================================================================
+
     pub fn add_catalog_table(&self, table: CatalogTable) -> DFResult<()> {
         let proto_def = self.encode_catalog_table(&table)?;
         let payload = proto_def.encode_to_vec();
@@ -392,6 +467,64 @@ pub fn restore_global_catalog_from_store() {
     }
 }
 
+pub fn restore_streaming_jobs_from_store() {
+    use crate::runtime::streaming::job::JobManager;
+
+    let Some(catalog) = CatalogManager::try_global() else {
+        warn!("CatalogManager not available; skipping streaming job restore");
+        return;
+    };
+    let job_manager = match JobManager::global() {
+        Ok(jm) => jm,
+        Err(e) => {
+            warn!(error = %e, "JobManager not available; skipping streaming job restore");
+            return;
+        }
+    };
+
+    let definitions = match catalog.load_streaming_job_definitions() {
+        Ok(defs) => defs,
+        Err(e) => {
+            warn!(error = %e, "Failed to load streaming job definitions from store");
+            return;
+        }
+    };
+
+    if definitions.is_empty() {
+        info!("No persisted streaming jobs to restore");
+        return;
+    }
+
+    let total = definitions.len();
+    info!(count = total, "Restoring persisted streaming jobs");
+
+    let rt = tokio::runtime::Handle::current();
+    let mut restored = 0usize;
+    let mut failed = 0usize;
+
+    for (table_name, fs_program) in definitions {
+        let jm = job_manager.clone();
+        let name = table_name.clone();
+        match rt.block_on(jm.submit_job(name.clone(), fs_program)) {
+            Ok(job_id) => {
+                info!(table = %table_name, job_id = %job_id, "Streaming job restored");
+                restored += 1;
+            }
+            Err(e) => {
+                warn!(table = %table_name, error = %e, "Failed to restore streaming job");
+                failed += 1;
+            }
+        }
+    }
+
+    info!(
+        restored = restored,
+        failed = failed,
+        total = total,
+        "Streaming job restore complete"
+    );
+}
+
 pub fn initialize_stream_catalog(config: &crate::config::GlobalConfig) -> anyhow::Result<()> {
     if !config.stream_catalog.persist {
         return CatalogManager::init_global_in_memory()
@@ -440,7 +573,7 @@ mod tests {
     use crate::sql::schema::connection_type::ConnectionType;
     use crate::sql::schema::source_table::SourceTable;
     use crate::sql::schema::table::Table as CatalogTable;
-    use crate::storage::stream_catalog::{InMemoryMetaStore, MetaStore};
+    use crate::storage::stream_catalog::InMemoryMetaStore;
 
     use super::CatalogManager;
 
diff --git a/src/storage/stream_catalog/mod.rs b/src/storage/stream_catalog/mod.rs
index 1b893cea..b99f3080 100644
--- a/src/storage/stream_catalog/mod.rs
+++ b/src/storage/stream_catalog/mod.rs
@@ -20,6 +20,7 @@ mod rocksdb_meta_store;
 pub use manager::{
     CatalogManager, initialize_stream_catalog,
     restore_global_catalog_from_store,
+    restore_streaming_jobs_from_store,
 };
 pub use meta_store::{InMemoryMetaStore, MetaStore};
 pub use rocksdb_meta_store::RocksDbMetaStore;