mirror of
https://github.com/postgres/postgres.git
synced 2025-12-02 00:02:42 -05:00
Create a new header optimizer/optimizer.h, which exposes just the planner functions that can be used "at arm's length", without need to access Paths or the other planner-internal data structures defined in nodes/relation.h. This is intended to provide the whole planner API seen by most of the rest of the system; although FDWs still need to use additional stuff, and more thought is also needed about just what selfuncs.c should rely on. The main point of doing this now is to limit the amount of new #include baggage that will be needed by "planner support functions", which I expect to introduce later, and which will be in relevant datatype modules rather than anywhere near the planner. This commit just moves relevant declarations into optimizer.h from other header files (a couple of which go away because everything got moved), and adjusts #include lists to match. There's further cleanup that could be done if we want to decide that some stuff being exposed by optimizer.h doesn't belong in the planner at all, but I'll leave that for another day. Discussion: https://postgr.es/m/11460.1548706639@sss.pgh.pa.us
537 lines
17 KiB
C
537 lines
17 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* planagg.c
|
|
* Special planning for aggregate queries.
|
|
*
|
|
* This module tries to replace MIN/MAX aggregate functions by subqueries
|
|
* of the form
|
|
* (SELECT col FROM tab
|
|
* WHERE col IS NOT NULL AND existing-quals
|
|
* ORDER BY col ASC/DESC
|
|
* LIMIT 1)
|
|
* Given a suitable index on tab.col, this can be much faster than the
|
|
* generic scan-all-the-rows aggregation plan. We can handle multiple
|
|
* MIN/MAX aggregates by generating multiple subqueries, and their
|
|
* orderings can be different. However, if the query contains any
|
|
* non-optimizable aggregates, there's no point since we'll have to
|
|
* scan all the rows anyway.
|
|
*
|
|
*
|
|
* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
*
|
|
*
|
|
* IDENTIFICATION
|
|
* src/backend/optimizer/plan/planagg.c
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
#include "postgres.h"
|
|
|
|
#include "access/htup_details.h"
|
|
#include "catalog/pg_aggregate.h"
|
|
#include "catalog/pg_type.h"
|
|
#include "nodes/makefuncs.h"
|
|
#include "nodes/nodeFuncs.h"
|
|
#include "optimizer/clauses.h"
|
|
#include "optimizer/cost.h"
|
|
#include "optimizer/optimizer.h"
|
|
#include "optimizer/pathnode.h"
|
|
#include "optimizer/paths.h"
|
|
#include "optimizer/planmain.h"
|
|
#include "optimizer/subselect.h"
|
|
#include "optimizer/tlist.h"
|
|
#include "parser/parsetree.h"
|
|
#include "parser/parse_clause.h"
|
|
#include "rewrite/rewriteManip.h"
|
|
#include "utils/lsyscache.h"
|
|
#include "utils/syscache.h"
|
|
|
|
|
|
static bool find_minmax_aggs_walker(Node *node, List **context);
|
|
static bool build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo,
|
|
Oid eqop, Oid sortop, bool nulls_first);
|
|
static void minmax_qp_callback(PlannerInfo *root, void *extra);
|
|
static Oid fetch_agg_sort_op(Oid aggfnoid);
|
|
|
|
|
|
/*
|
|
* preprocess_minmax_aggregates - preprocess MIN/MAX aggregates
|
|
*
|
|
* Check to see whether the query contains MIN/MAX aggregate functions that
|
|
* might be optimizable via indexscans. If it does, and all the aggregates
|
|
* are potentially optimizable, then create a MinMaxAggPath and add it to
|
|
* the (UPPERREL_GROUP_AGG, NULL) upperrel.
|
|
*
|
|
* This should be called by grouping_planner() just before it's ready to call
|
|
* query_planner(), because we generate indexscan paths by cloning the
|
|
* planner's state and invoking query_planner() on a modified version of
|
|
* the query parsetree. Thus, all preprocessing needed before query_planner()
|
|
* must already be done.
|
|
*
|
|
* Note: we are passed the preprocessed targetlist separately, because it's
|
|
* not necessarily equal to root->parse->targetList.
|
|
*/
|
|
void
|
|
preprocess_minmax_aggregates(PlannerInfo *root, List *tlist)
|
|
{
|
|
Query *parse = root->parse;
|
|
FromExpr *jtnode;
|
|
RangeTblRef *rtr;
|
|
RangeTblEntry *rte;
|
|
List *aggs_list;
|
|
RelOptInfo *grouped_rel;
|
|
ListCell *lc;
|
|
|
|
/* minmax_aggs list should be empty at this point */
|
|
Assert(root->minmax_aggs == NIL);
|
|
|
|
/* Nothing to do if query has no aggregates */
|
|
if (!parse->hasAggs)
|
|
return;
|
|
|
|
Assert(!parse->setOperations); /* shouldn't get here if a setop */
|
|
Assert(parse->rowMarks == NIL); /* nor if FOR UPDATE */
|
|
|
|
/*
|
|
* Reject unoptimizable cases.
|
|
*
|
|
* We don't handle GROUP BY or windowing, because our current
|
|
* implementations of grouping require looking at all the rows anyway, and
|
|
* so there's not much point in optimizing MIN/MAX.
|
|
*/
|
|
if (parse->groupClause || list_length(parse->groupingSets) > 1 ||
|
|
parse->hasWindowFuncs)
|
|
return;
|
|
|
|
/*
|
|
* Reject if query contains any CTEs; there's no way to build an indexscan
|
|
* on one so we couldn't succeed here. (If the CTEs are unreferenced,
|
|
* that's not true, but it doesn't seem worth expending cycles to check.)
|
|
*/
|
|
if (parse->cteList)
|
|
return;
|
|
|
|
/*
|
|
* We also restrict the query to reference exactly one table, since join
|
|
* conditions can't be handled reasonably. (We could perhaps handle a
|
|
* query containing cartesian-product joins, but it hardly seems worth the
|
|
* trouble.) However, the single table could be buried in several levels
|
|
* of FromExpr due to subqueries. Note the "single" table could be an
|
|
* inheritance parent, too, including the case of a UNION ALL subquery
|
|
* that's been flattened to an appendrel.
|
|
*/
|
|
jtnode = parse->jointree;
|
|
while (IsA(jtnode, FromExpr))
|
|
{
|
|
if (list_length(jtnode->fromlist) != 1)
|
|
return;
|
|
jtnode = linitial(jtnode->fromlist);
|
|
}
|
|
if (!IsA(jtnode, RangeTblRef))
|
|
return;
|
|
rtr = (RangeTblRef *) jtnode;
|
|
rte = planner_rt_fetch(rtr->rtindex, root);
|
|
if (rte->rtekind == RTE_RELATION)
|
|
/* ordinary relation, ok */ ;
|
|
else if (rte->rtekind == RTE_SUBQUERY && rte->inh)
|
|
/* flattened UNION ALL subquery, ok */ ;
|
|
else
|
|
return;
|
|
|
|
/*
|
|
* Scan the tlist and HAVING qual to find all the aggregates and verify
|
|
* all are MIN/MAX aggregates. Stop as soon as we find one that isn't.
|
|
*/
|
|
aggs_list = NIL;
|
|
if (find_minmax_aggs_walker((Node *) tlist, &aggs_list))
|
|
return;
|
|
if (find_minmax_aggs_walker(parse->havingQual, &aggs_list))
|
|
return;
|
|
|
|
/*
|
|
* OK, there is at least the possibility of performing the optimization.
|
|
* Build an access path for each aggregate. If any of the aggregates
|
|
* prove to be non-indexable, give up; there is no point in optimizing
|
|
* just some of them.
|
|
*/
|
|
foreach(lc, aggs_list)
|
|
{
|
|
MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc);
|
|
Oid eqop;
|
|
bool reverse;
|
|
|
|
/*
|
|
* We'll need the equality operator that goes with the aggregate's
|
|
* ordering operator.
|
|
*/
|
|
eqop = get_equality_op_for_ordering_op(mminfo->aggsortop, &reverse);
|
|
if (!OidIsValid(eqop)) /* shouldn't happen */
|
|
elog(ERROR, "could not find equality operator for ordering operator %u",
|
|
mminfo->aggsortop);
|
|
|
|
/*
|
|
* We can use either an ordering that gives NULLS FIRST or one that
|
|
* gives NULLS LAST; furthermore there's unlikely to be much
|
|
* performance difference between them, so it doesn't seem worth
|
|
* costing out both ways if we get a hit on the first one. NULLS
|
|
* FIRST is more likely to be available if the operator is a
|
|
* reverse-sort operator, so try that first if reverse.
|
|
*/
|
|
if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, reverse))
|
|
continue;
|
|
if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, !reverse))
|
|
continue;
|
|
|
|
/* No indexable path for this aggregate, so fail */
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* OK, we can do the query this way. Prepare to create a MinMaxAggPath
|
|
* node.
|
|
*
|
|
* First, create an output Param node for each agg. (If we end up not
|
|
* using the MinMaxAggPath, we'll waste a PARAM_EXEC slot for each agg,
|
|
* which is not worth worrying about. We can't wait till create_plan time
|
|
* to decide whether to make the Param, unfortunately.)
|
|
*/
|
|
foreach(lc, aggs_list)
|
|
{
|
|
MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc);
|
|
|
|
mminfo->param =
|
|
SS_make_initplan_output_param(root,
|
|
exprType((Node *) mminfo->target),
|
|
-1,
|
|
exprCollation((Node *) mminfo->target));
|
|
}
|
|
|
|
/*
|
|
* Create a MinMaxAggPath node with the appropriate estimated costs and
|
|
* other needed data, and add it to the UPPERREL_GROUP_AGG upperrel, where
|
|
* it will compete against the standard aggregate implementation. (It
|
|
* will likely always win, but we need not assume that here.)
|
|
*
|
|
* Note: grouping_planner won't have created this upperrel yet, but it's
|
|
* fine for us to create it first. We will not have inserted the correct
|
|
* consider_parallel value in it, but MinMaxAggPath paths are currently
|
|
* never parallel-safe anyway, so that doesn't matter. Likewise, it
|
|
* doesn't matter that we haven't filled FDW-related fields in the rel.
|
|
*/
|
|
grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG, NULL);
|
|
add_path(grouped_rel, (Path *)
|
|
create_minmaxagg_path(root, grouped_rel,
|
|
create_pathtarget(root, tlist),
|
|
aggs_list,
|
|
(List *) parse->havingQual));
|
|
}
|
|
|
|
/*
|
|
* find_minmax_aggs_walker
|
|
* Recursively scan the Aggref nodes in an expression tree, and check
|
|
* that each one is a MIN/MAX aggregate. If so, build a list of the
|
|
* distinct aggregate calls in the tree.
|
|
*
|
|
* Returns true if a non-MIN/MAX aggregate is found, false otherwise.
|
|
* (This seemingly-backward definition is used because expression_tree_walker
|
|
* aborts the scan on true return, which is what we want.)
|
|
*
|
|
* Found aggregates are added to the list at *context; it's up to the caller
|
|
* to initialize the list to NIL.
|
|
*
|
|
* This does not descend into subqueries, and so should be used only after
|
|
* reduction of sublinks to subplans. There mustn't be outer-aggregate
|
|
* references either.
|
|
*/
|
|
static bool
|
|
find_minmax_aggs_walker(Node *node, List **context)
|
|
{
|
|
if (node == NULL)
|
|
return false;
|
|
if (IsA(node, Aggref))
|
|
{
|
|
Aggref *aggref = (Aggref *) node;
|
|
Oid aggsortop;
|
|
TargetEntry *curTarget;
|
|
MinMaxAggInfo *mminfo;
|
|
ListCell *l;
|
|
|
|
Assert(aggref->agglevelsup == 0);
|
|
if (list_length(aggref->args) != 1)
|
|
return true; /* it couldn't be MIN/MAX */
|
|
|
|
/*
|
|
* ORDER BY is usually irrelevant for MIN/MAX, but it can change the
|
|
* outcome if the aggsortop's operator class recognizes non-identical
|
|
* values as equal. For example, 4.0 and 4.00 are equal according to
|
|
* numeric_ops, yet distinguishable. If MIN() receives more than one
|
|
* value equal to 4.0 and no value less than 4.0, it is unspecified
|
|
* which of those equal values MIN() returns. An ORDER BY expression
|
|
* that differs for each of those equal values of the argument
|
|
* expression makes the result predictable once again. This is a
|
|
* niche requirement, and we do not implement it with subquery paths.
|
|
* In any case, this test lets us reject ordered-set aggregates
|
|
* quickly.
|
|
*/
|
|
if (aggref->aggorder != NIL)
|
|
return true;
|
|
/* note: we do not care if DISTINCT is mentioned ... */
|
|
|
|
/*
|
|
* We might implement the optimization when a FILTER clause is present
|
|
* by adding the filter to the quals of the generated subquery. For
|
|
* now, just punt.
|
|
*/
|
|
if (aggref->aggfilter != NULL)
|
|
return true;
|
|
|
|
aggsortop = fetch_agg_sort_op(aggref->aggfnoid);
|
|
if (!OidIsValid(aggsortop))
|
|
return true; /* not a MIN/MAX aggregate */
|
|
|
|
curTarget = (TargetEntry *) linitial(aggref->args);
|
|
|
|
if (contain_mutable_functions((Node *) curTarget->expr))
|
|
return true; /* not potentially indexable */
|
|
|
|
if (type_is_rowtype(exprType((Node *) curTarget->expr)))
|
|
return true; /* IS NOT NULL would have weird semantics */
|
|
|
|
/*
|
|
* Check whether it's already in the list, and add it if not.
|
|
*/
|
|
foreach(l, *context)
|
|
{
|
|
mminfo = (MinMaxAggInfo *) lfirst(l);
|
|
if (mminfo->aggfnoid == aggref->aggfnoid &&
|
|
equal(mminfo->target, curTarget->expr))
|
|
return false;
|
|
}
|
|
|
|
mminfo = makeNode(MinMaxAggInfo);
|
|
mminfo->aggfnoid = aggref->aggfnoid;
|
|
mminfo->aggsortop = aggsortop;
|
|
mminfo->target = curTarget->expr;
|
|
mminfo->subroot = NULL; /* don't compute path yet */
|
|
mminfo->path = NULL;
|
|
mminfo->pathcost = 0;
|
|
mminfo->param = NULL;
|
|
|
|
*context = lappend(*context, mminfo);
|
|
|
|
/*
|
|
* We need not recurse into the argument, since it can't contain any
|
|
* aggregates.
|
|
*/
|
|
return false;
|
|
}
|
|
Assert(!IsA(node, SubLink));
|
|
return expression_tree_walker(node, find_minmax_aggs_walker,
|
|
(void *) context);
|
|
}
|
|
|
|
/*
|
|
* build_minmax_path
|
|
* Given a MIN/MAX aggregate, try to build an indexscan Path it can be
|
|
* optimized with.
|
|
*
|
|
* If successful, stash the best path in *mminfo and return true.
|
|
* Otherwise, return false.
|
|
*/
|
|
static bool
|
|
build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo,
|
|
Oid eqop, Oid sortop, bool nulls_first)
|
|
{
|
|
PlannerInfo *subroot;
|
|
Query *parse;
|
|
TargetEntry *tle;
|
|
List *tlist;
|
|
NullTest *ntest;
|
|
SortGroupClause *sortcl;
|
|
RelOptInfo *final_rel;
|
|
Path *sorted_path;
|
|
Cost path_cost;
|
|
double path_fraction;
|
|
|
|
/*
|
|
* We are going to construct what is effectively a sub-SELECT query, so
|
|
* clone the current query level's state and adjust it to make it look
|
|
* like a subquery. Any outer references will now be one level higher
|
|
* than before. (This means that when we are done, there will be no Vars
|
|
* of level 1, which is why the subquery can become an initplan.)
|
|
*/
|
|
subroot = (PlannerInfo *) palloc(sizeof(PlannerInfo));
|
|
memcpy(subroot, root, sizeof(PlannerInfo));
|
|
subroot->query_level++;
|
|
subroot->parent_root = root;
|
|
/* reset subplan-related stuff */
|
|
subroot->plan_params = NIL;
|
|
subroot->outer_params = NULL;
|
|
subroot->init_plans = NIL;
|
|
|
|
subroot->parse = parse = copyObject(root->parse);
|
|
IncrementVarSublevelsUp((Node *) parse, 1, 1);
|
|
|
|
/* append_rel_list might contain outer Vars? */
|
|
subroot->append_rel_list = copyObject(root->append_rel_list);
|
|
IncrementVarSublevelsUp((Node *) subroot->append_rel_list, 1, 1);
|
|
/* There shouldn't be any OJ info to translate, as yet */
|
|
Assert(subroot->join_info_list == NIL);
|
|
/* and we haven't made equivalence classes, either */
|
|
Assert(subroot->eq_classes == NIL);
|
|
/* and we haven't created PlaceHolderInfos, either */
|
|
Assert(subroot->placeholder_list == NIL);
|
|
|
|
/*----------
|
|
* Generate modified query of the form
|
|
* (SELECT col FROM tab
|
|
* WHERE col IS NOT NULL AND existing-quals
|
|
* ORDER BY col ASC/DESC
|
|
* LIMIT 1)
|
|
*----------
|
|
*/
|
|
/* single tlist entry that is the aggregate target */
|
|
tle = makeTargetEntry(copyObject(mminfo->target),
|
|
(AttrNumber) 1,
|
|
pstrdup("agg_target"),
|
|
false);
|
|
tlist = list_make1(tle);
|
|
subroot->processed_tlist = parse->targetList = tlist;
|
|
|
|
/* No HAVING, no DISTINCT, no aggregates anymore */
|
|
parse->havingQual = NULL;
|
|
subroot->hasHavingQual = false;
|
|
parse->distinctClause = NIL;
|
|
parse->hasDistinctOn = false;
|
|
parse->hasAggs = false;
|
|
|
|
/* Build "target IS NOT NULL" expression */
|
|
ntest = makeNode(NullTest);
|
|
ntest->nulltesttype = IS_NOT_NULL;
|
|
ntest->arg = copyObject(mminfo->target);
|
|
/* we checked it wasn't a rowtype in find_minmax_aggs_walker */
|
|
ntest->argisrow = false;
|
|
ntest->location = -1;
|
|
|
|
/* User might have had that in WHERE already */
|
|
if (!list_member((List *) parse->jointree->quals, ntest))
|
|
parse->jointree->quals = (Node *)
|
|
lcons(ntest, (List *) parse->jointree->quals);
|
|
|
|
/* Build suitable ORDER BY clause */
|
|
sortcl = makeNode(SortGroupClause);
|
|
sortcl->tleSortGroupRef = assignSortGroupRef(tle, tlist);
|
|
sortcl->eqop = eqop;
|
|
sortcl->sortop = sortop;
|
|
sortcl->nulls_first = nulls_first;
|
|
sortcl->hashable = false; /* no need to make this accurate */
|
|
parse->sortClause = list_make1(sortcl);
|
|
|
|
/* set up expressions for LIMIT 1 */
|
|
parse->limitOffset = NULL;
|
|
parse->limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid,
|
|
sizeof(int64),
|
|
Int64GetDatum(1), false,
|
|
FLOAT8PASSBYVAL);
|
|
|
|
/*
|
|
* Generate the best paths for this query, telling query_planner that we
|
|
* have LIMIT 1.
|
|
*/
|
|
subroot->tuple_fraction = 1.0;
|
|
subroot->limit_tuples = 1.0;
|
|
|
|
final_rel = query_planner(subroot, tlist, minmax_qp_callback, NULL);
|
|
|
|
/*
|
|
* Since we didn't go through subquery_planner() to handle the subquery,
|
|
* we have to do some of the same cleanup it would do, in particular cope
|
|
* with params and initplans used within this subquery. (This won't
|
|
* matter if we end up not using the subplan.)
|
|
*/
|
|
SS_identify_outer_params(subroot);
|
|
SS_charge_for_initplans(subroot, final_rel);
|
|
|
|
/*
|
|
* Get the best presorted path, that being the one that's cheapest for
|
|
* fetching just one row. If there's no such path, fail.
|
|
*/
|
|
if (final_rel->rows > 1.0)
|
|
path_fraction = 1.0 / final_rel->rows;
|
|
else
|
|
path_fraction = 1.0;
|
|
|
|
sorted_path =
|
|
get_cheapest_fractional_path_for_pathkeys(final_rel->pathlist,
|
|
subroot->query_pathkeys,
|
|
NULL,
|
|
path_fraction);
|
|
if (!sorted_path)
|
|
return false;
|
|
|
|
/*
|
|
* The path might not return exactly what we want, so fix that. (We
|
|
* assume that this won't change any conclusions about which was the
|
|
* cheapest path.)
|
|
*/
|
|
sorted_path = apply_projection_to_path(subroot, final_rel, sorted_path,
|
|
create_pathtarget(subroot, tlist));
|
|
|
|
/*
|
|
* Determine cost to get just the first row of the presorted path.
|
|
*
|
|
* Note: cost calculation here should match
|
|
* compare_fractional_path_costs().
|
|
*/
|
|
path_cost = sorted_path->startup_cost +
|
|
path_fraction * (sorted_path->total_cost - sorted_path->startup_cost);
|
|
|
|
/* Save state for further processing */
|
|
mminfo->subroot = subroot;
|
|
mminfo->path = sorted_path;
|
|
mminfo->pathcost = path_cost;
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* Compute query_pathkeys and other pathkeys during query_planner()
|
|
*/
|
|
static void
|
|
minmax_qp_callback(PlannerInfo *root, void *extra)
|
|
{
|
|
root->group_pathkeys = NIL;
|
|
root->window_pathkeys = NIL;
|
|
root->distinct_pathkeys = NIL;
|
|
|
|
root->sort_pathkeys =
|
|
make_pathkeys_for_sortclauses(root,
|
|
root->parse->sortClause,
|
|
root->parse->targetList);
|
|
|
|
root->query_pathkeys = root->sort_pathkeys;
|
|
}
|
|
|
|
/*
|
|
* Get the OID of the sort operator, if any, associated with an aggregate.
|
|
* Returns InvalidOid if there is no such operator.
|
|
*/
|
|
static Oid
|
|
fetch_agg_sort_op(Oid aggfnoid)
|
|
{
|
|
HeapTuple aggTuple;
|
|
Form_pg_aggregate aggform;
|
|
Oid aggsortop;
|
|
|
|
/* fetch aggregate entry from pg_aggregate */
|
|
aggTuple = SearchSysCache1(AGGFNOID, ObjectIdGetDatum(aggfnoid));
|
|
if (!HeapTupleIsValid(aggTuple))
|
|
return InvalidOid;
|
|
aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple);
|
|
aggsortop = aggform->aggsortop;
|
|
ReleaseSysCache(aggTuple);
|
|
|
|
return aggsortop;
|
|
}
|