Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,12 @@ All notable changes to this project will be documented in this file. It uses the
`levenshtein()` (2-arg, mapped to `editDistance`). Thanks to
Philip Dubé for the PR ([#210]).

### 🐞 Bug Fixes

* Fixed `EXPLAIN (VERBOSE)` failing with "could not find window clause for
winref N" when window functions are pushed down to ClickHouse. Thanks to
Philip Dubé for the PR ([#223]).

### 📚 Documentation

* Added "Extensions Pushdown" section to the [reference
Expand All @@ -43,6 +49,8 @@ All notable changes to this project will be documented in this file. It uses the
"PostgreSQL Docs: fuzzystrmatch"
[#210]: https://github.com/ClickHouse/pg_clickhouse/pull/210
"pg_clickhouse#210 Support pushing down soundex & levenshtein from fuzzystrmatch"
[#223]: https://github.com/ClickHouse/pg_clickhouse/pull/223
"pg_clickhouse#223 Fix EXPLAIN (VERBOSE) for window functions"

## [v0.2.0] — 2026-04-13

Expand Down
97 changes: 97 additions & 0 deletions src/fdw.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "miscadmin.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#include "optimizer/clauses.h"
#include "optimizer/cost.h"
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
Expand Down Expand Up @@ -590,6 +591,82 @@ clickhouseGetForeignPaths(PlannerInfo * root,
add_paths_with_pathkeys_for_rel(root, baserel, NULL);
}

/*
* Substitute WindowFunc nodes with FuncExpr placeholders in both
* fdw_scan_tlist and the outer targetlist of a ForeignScan. Needed because
* EXPLAIN VERBOSE deparses WindowFunc via a WindowClause/WindowAgg lookup
* that is not available when windowing is pushed down to ClickHouse
* (ruleutils.c:get_windowfunc_expr_helper).
*
* Placeholder must be non-Const/Var/Param so setrefs' fix_upper_expr will
* rewrite outer tlist entries to INDEX_VAR references against fdw_scan_tlist
* (search_indexed_tlist_for_non_var explicitly skips Consts). An un-rewritten
* Const would emit its literal at execution time instead of reading the
* computed column from the scan tuple.
*
* FuncExpr on wf->winfnoid deparses as "funcname(args)", close to the
* original minus the OVER clause (which the Remote SQL line already prints).
*
* Two WindowFuncs with identical args but different winref refer to distinct
* windows, so their placeholders must stay distinguishable to equal(),
* otherwise setrefs collapses both outer refs to the first fdw_scan_tlist
* slot and execution reads the wrong column. inputcollid is abused as a
* counter: equal() compares it, while the executor only consults when
* function is actually invoked; this never happens as placeholder
* lives only as schema in fdw_scan_tlist or is rewritten to INDEX_VAR in
* outer tlist.
*/
typedef struct WindowFuncSubstState
{
List *originals; /* WindowFunc nodes seen so far */
List *placeholders; /* matching placeholder nodes (same index) */
int counter;
} WindowFuncSubstState;

static Node * replace_windowfuncs_mutator(Node * node, WindowFuncSubstState * state);

static Node *
replace_windowfuncs_mutator_callback(Node * node, void *state)
{
return replace_windowfuncs_mutator(node, state);
}

static Node *
replace_windowfuncs_mutator(Node * node, WindowFuncSubstState * state)
{
if (node == NULL)
return NULL;
if (IsA(node, WindowFunc))
{
WindowFunc *wf = (WindowFunc *) node;
ListCell *lo,
*lp;
FuncExpr *ph;

forboth(lo, state->originals, lp, state->placeholders)
{
if (equal(lfirst(lo), wf))
return (Node *) copyObject(lfirst(lp));
}

ph = makeNode(FuncExpr);
ph->funcid = wf->winfnoid;
ph->funcresulttype = wf->wintype;
ph->funcretset = false;
ph->funcvariadic = false;
ph->funcformat = COERCE_EXPLICIT_CALL;
ph->funccollid = wf->wincollid;
ph->inputcollid = ++state->counter;
ph->args = (List *) copyObject(wf->args);
ph->location = -1;

state->originals = lappend(state->originals, copyObject(wf));
state->placeholders = lappend(state->placeholders, ph);
return (Node *) ph;
}
return expression_tree_mutator(node, replace_windowfuncs_mutator_callback, state);
}

/*
* clickhouseGetForeignPlan
* Create ForeignScan plan node which implements selected best path
Expand Down Expand Up @@ -770,6 +847,26 @@ clickhouseGetForeignPlan(PlannerInfo * root,
/* Remember remote_exprs for possible use by clickhousePlanDirectModify */
fpinfo->final_remote_exprs = remote_exprs;

/*
* When window functions are pushed down, ForeignScan takes the place of
* what would otherwise be a WindowAgg plan node. Core EXPLAIN VERBOSE
* deparses WindowFunc via a WindowClause (query context) or WindowAgg in
* the plan; neither exists here, so deparse would error with "could not
* find window clause for winref N". Remote SQL has already been deparsed
* above, so swap WindowFunc for FuncExpr placeholders in both
* fdw_scan_tlist and the outer tlist. setrefs' equal() match rewrites the
* outer tlist to INDEX_VAR references against fdw_scan_tlist so execution
* reads real values from the scan tuple; VERBOSE deparse sees the
* placeholder instead of a WindowFunc.
*/
if (contain_window_function((Node *) fdw_scan_tlist))
{
WindowFuncSubstState state = {NIL, NIL, 0};

fdw_scan_tlist = (List *) replace_windowfuncs_mutator((Node *) fdw_scan_tlist, &state);
tlist = (List *) replace_windowfuncs_mutator((Node *) tlist, &state);
}

/*
* Build the fdw_private list that will be available to the executor.
* Items in the list must match order in enum FdwScanPrivateIndex.
Expand Down
72 changes: 72 additions & 0 deletions test/expected/window_functions.out
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,78 @@
lead_200 | 2026-03-10 09:00:00+00 | 150 | 2
(3 rows)

-- VERBOSE ROW_NUMBER pushdown (binary)
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Output: entity_id, ts_event, (row_number())
Relations: Window on (events)
Remote SQL: SELECT entity_id, ts_event, row_number() OVER (PARTITION BY entity_id ORDER BY ts_event DESC NULLS FIRST) FROM wf_test.events WHERE ((event_name = 'lead_created'))
(4 rows)

-- VERBOSE ROW_NUMBER pushdown (http)
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Output: entity_id, ts_event, (row_number())
Relations: Window on (events)
Remote SQL: SELECT entity_id, ts_event, row_number() OVER (PARTITION BY entity_id ORDER BY ts_event DESC NULLS FIRST) FROM wf_test.events WHERE ((event_name = 'lead_created'))
(4 rows)

-- VERBOSE MIN/MAX OVER pushdown (binary)
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Output: entity_id, amount, (min(amount)), (max(amount))
Relations: Window on (events)
Remote SQL: SELECT entity_id, amount, min(amount) OVER (PARTITION BY entity_id), max(amount) OVER (PARTITION BY entity_id) FROM wf_test.events WHERE ((event_name = 'lead_created'))
(4 rows)

-- VERBOSE LEAD pushdown (binary)
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Output: entity_id, ts_event, (lead(ts_event))
Relations: Window on (events)
Remote SQL: SELECT entity_id, ts_event, lead(ts_event) OVER (PARTITION BY entity_id ORDER BY ts_event ASC) FROM wf_test.events WHERE ((event_name = 'lead_created'))
(4 rows)

-- VERBOSE ntile pushdown (binary)
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Output: entity_id, ts_event, (ntile(2))
Relations: Window on (events)
Remote SQL: SELECT entity_id, ts_event, ntile(2) OVER (ORDER BY ts_event ASC) FROM wf_test.events WHERE ((event_name = 'lead_created')) ORDER BY ts_event ASC NULLS LAST
(4 rows)

-- VERBOSE window + ORDER BY + LIMIT pushdown (binary)
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Output: entity_id, ts_event, amount, (row_number())
Relations: Window on (events)
Remote SQL: SELECT entity_id, ts_event, amount, row_number() OVER (PARTITION BY entity_id ORDER BY ts_event DESC NULLS FIRST) FROM wf_test.events WHERE ((event_name = 'lead_created')) ORDER BY entity_id ASC NULLS LAST, ts_event ASC NULLS LAST LIMIT 3
(4 rows)

-- VERBOSE duplicate window call (binary)
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Output: entity_id, ts_event, (row_number()), (row_number())
Relations: Window on (events)
Remote SQL: SELECT entity_id, ts_event, row_number() OVER (PARTITION BY entity_id ORDER BY ts_event ASC), row_number() OVER (PARTITION BY entity_id ORDER BY ts_event ASC) FROM wf_test.events WHERE ((event_name = 'lead_created'))
(4 rows)

entity_id | ts_event | rn1 | rn2
-----------+------------------------+-----+-----
lead_100 | 2026-03-01 10:00:00+00 | 1 | 1
lead_100 | 2026-03-15 14:00:00+00 | 2 | 2
lead_200 | 2026-03-10 09:00:00+00 | 1 | 1
lead_200 | 2026-03-20 11:00:00+00 | 2 | 2
lead_300 | 2026-03-05 08:00:00+00 | 1 | 1
(5 rows)

clickhouse_raw_query
----------------------

Expand Down
72 changes: 72 additions & 0 deletions test/expected/window_functions_1.out
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,78 @@ DETAIL: Remote Query: SELECT entity_id, amount, cume_dist() OVER (ORDER BY amou
lead_200 | 2026-03-10 09:00:00+00 | 150 | 2
(3 rows)

-- VERBOSE ROW_NUMBER pushdown (binary)
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Output: entity_id, ts_event, (row_number())
Relations: Window on (events)
Remote SQL: SELECT entity_id, ts_event, row_number() OVER (PARTITION BY entity_id ORDER BY ts_event DESC NULLS FIRST) FROM wf_test.events WHERE ((event_name = 'lead_created'))
(4 rows)

-- VERBOSE ROW_NUMBER pushdown (http)
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Output: entity_id, ts_event, (row_number())
Relations: Window on (events)
Remote SQL: SELECT entity_id, ts_event, row_number() OVER (PARTITION BY entity_id ORDER BY ts_event DESC NULLS FIRST) FROM wf_test.events WHERE ((event_name = 'lead_created'))
(4 rows)

-- VERBOSE MIN/MAX OVER pushdown (binary)
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Output: entity_id, amount, (min(amount)), (max(amount))
Relations: Window on (events)
Remote SQL: SELECT entity_id, amount, min(amount) OVER (PARTITION BY entity_id), max(amount) OVER (PARTITION BY entity_id) FROM wf_test.events WHERE ((event_name = 'lead_created'))
(4 rows)

-- VERBOSE LEAD pushdown (binary)
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Output: entity_id, ts_event, (lead(ts_event))
Relations: Window on (events)
Remote SQL: SELECT entity_id, ts_event, lead(ts_event) OVER (PARTITION BY entity_id ORDER BY ts_event ASC) FROM wf_test.events WHERE ((event_name = 'lead_created'))
(4 rows)

-- VERBOSE ntile pushdown (binary)
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Output: entity_id, ts_event, (ntile(2))
Relations: Window on (events)
Remote SQL: SELECT entity_id, ts_event, ntile(2) OVER (ORDER BY ts_event ASC) FROM wf_test.events WHERE ((event_name = 'lead_created')) ORDER BY ts_event ASC NULLS LAST
(4 rows)

-- VERBOSE window + ORDER BY + LIMIT pushdown (binary)
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Output: entity_id, ts_event, amount, (row_number())
Relations: Window on (events)
Remote SQL: SELECT entity_id, ts_event, amount, row_number() OVER (PARTITION BY entity_id ORDER BY ts_event DESC NULLS FIRST) FROM wf_test.events WHERE ((event_name = 'lead_created')) ORDER BY entity_id ASC NULLS LAST, ts_event ASC NULLS LAST LIMIT 3
(4 rows)

-- VERBOSE duplicate window call (binary)
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan
Output: entity_id, ts_event, (row_number()), (row_number())
Relations: Window on (events)
Remote SQL: SELECT entity_id, ts_event, row_number() OVER (PARTITION BY entity_id ORDER BY ts_event ASC), row_number() OVER (PARTITION BY entity_id ORDER BY ts_event ASC) FROM wf_test.events WHERE ((event_name = 'lead_created'))
(4 rows)

entity_id | ts_event | rn1 | rn2
-----------+------------------------+-----+-----
lead_100 | 2026-03-01 10:00:00+00 | 1 | 1
lead_100 | 2026-03-15 14:00:00+00 | 2 | 2
lead_200 | 2026-03-10 09:00:00+00 | 1 | 1
lead_200 | 2026-03-20 11:00:00+00 | 2 | 2
lead_300 | 2026-03-05 08:00:00+00 | 1 | 1
(5 rows)

clickhouse_raw_query
----------------------

Expand Down
Loading
Loading