From 6254465d0621f724cdc9a1e99da24fa8a29f579d Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 18 Oct 2001 16:11:42 +0000 Subject: [PATCH] Extend code that deduces implied equality clauses to detect whether a clause being added to a particular restriction-clause list is redundant with those already in the list. This avoids useless work at runtime, and (perhaps more importantly) keeps the selectivity estimation routines from generating too-small estimates of numbers of output rows. Also some minor improvements in OPTIMIZER_DEBUG displays. --- src/backend/nodes/print.c | 36 +++++- src/backend/optimizer/README | 17 +++ src/backend/optimizer/path/allpaths.c | 154 ++++++++++++++----------- src/backend/optimizer/path/joinrels.c | 6 +- src/backend/optimizer/path/pathkeys.c | 4 +- src/backend/optimizer/plan/initsplan.c | 151 +++++++++++++++++++++--- src/backend/optimizer/plan/planner.c | 5 +- src/backend/optimizer/util/relnode.c | 102 +++++++++++++--- src/include/optimizer/paths.h | 8 +- 9 files changed, 373 insertions(+), 110 deletions(-) diff --git a/src/backend/nodes/print.c b/src/backend/nodes/print.c index 932f55ab885..98347695622 100644 --- a/src/backend/nodes/print.c +++ b/src/backend/nodes/print.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/nodes/print.c,v 1.47 2001/03/22 03:59:32 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/nodes/print.c,v 1.48 2001/10/18 16:11:41 tgl Exp $ * * HISTORY * AUTHOR DATE MAJOR EVENT @@ -20,10 +20,12 @@ #include "postgres.h" #include "access/printtup.h" +#include "catalog/pg_type.h" #include "nodes/print.h" #include "optimizer/clauses.h" #include "parser/parsetree.h" #include "utils/lsyscache.h" +#include "utils/syscache.h" static char *plannode_type(Plan *p); @@ -188,6 +190,36 @@ print_expr(Node *expr, List *rtable) } printf("%s.%s", relname, attname); } + else if (IsA(expr, Const)) + { + Const *c = (Const *) expr; + HeapTuple typeTup; + Oid typoutput; + Oid typelem; + char *outputstr; + + if (c->constisnull) + { + printf("NULL"); + return; + } + + typeTup = SearchSysCache(TYPEOID, + ObjectIdGetDatum(c->consttype), + 0, 0, 0); + if (!HeapTupleIsValid(typeTup)) + elog(ERROR, "Cache lookup for type %u failed", c->consttype); + typoutput = ((Form_pg_type) GETSTRUCT(typeTup))->typoutput; + typelem = ((Form_pg_type) GETSTRUCT(typeTup))->typelem; + ReleaseSysCache(typeTup); + + outputstr = DatumGetCString(OidFunctionCall3(typoutput, + c->constvalue, + ObjectIdGetDatum(typelem), + Int32GetDatum(-1))); + printf("%s", outputstr); + pfree(outputstr); + } else if (IsA(expr, Expr)) { Expr *e = (Expr *) expr; @@ -232,7 +264,7 @@ print_pathkeys(List *pathkeys, List *rtable) if (lnext(k)) printf(", "); } - printf(") "); + printf(")"); if (lnext(i)) printf(", "); } diff --git a/src/backend/optimizer/README b/src/backend/optimizer/README index 520ab1b2421..472efbcd9cc 100644 --- a/src/backend/optimizer/README +++ b/src/backend/optimizer/README @@ -461,6 +461,23 @@ code than it would otherwise have, and reduces the number of tuples processed in join stages, so it's a win to make these deductions even if we weren't forced to. +When we generate implied equality constraints, we may find ourselves +adding redundant clauses to specific relations. For example, consider + SELECT * FROM t1, t2, t3 WHERE t1.a = t2.b AND t2.b = t3.c; +We will generate the implied clause t1.a = t3.c and add it to the tree. +This is good since it allows us to consider joining t1 and t3 directly, +which we otherwise wouldn't do. But when we reach the stage of joining +all three relations, we will have redundant join clauses --- eg, if we +join t1 and t2 first, then the path that joins (t1 t2) to t3 will have +both t2.b = t3.c and t1.a = t3.c as restriction clauses. This is bad; +not only is evaluation of the extra clause useless work at runtime, +but the selectivity estimator routines will underestimate the number +of tuples produced since they won't know that the two clauses are +perfectly redundant. We fix this by detecting and removing redundant +clauses as the restriction clause list is built for each join. (We +can't do it sooner, since which clauses are redundant will vary depending +on the join order.) + Yet another implication of all this is that mergejoinable operators must form closed equivalence sets. For example, if "int2 = int4" and "int4 = int8" are both marked mergejoinable, then there had better diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 8a6e9b9374a..ebc303d4654 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -8,13 +8,16 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.78 2001/07/31 17:56:30 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.79 2001/10/18 16:11:41 tgl Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" +#ifdef OPTIMIZER_DEBUG +#include "nodes/print.h" +#endif #include "optimizer/clauses.h" #include "optimizer/cost.h" #include "optimizer/geqo.h" @@ -42,11 +45,6 @@ static void set_subquery_pathlist(Query *root, RelOptInfo *rel, static RelOptInfo *make_one_rel_by_joins(Query *root, int levels_needed, List *initial_rels); -#ifdef OPTIMIZER_DEBUG -static void debug_print_rel(Query *root, RelOptInfo *rel); - -#endif - /* * make_one_rel @@ -116,6 +114,10 @@ set_base_rel_pathlists(Query *root) /* Plain relation */ set_plain_rel_pathlist(root, rel, rte); } + +#ifdef OPTIMIZER_DEBUG + debug_print_rel(root, rel); +#endif } } @@ -520,10 +522,22 @@ make_one_rel_by_joins(Query *root, int levels_needed, List *initial_rels) #ifdef OPTIMIZER_DEBUG static void -print_joinclauses(Query *root, List *clauses) +print_relids(Relids relids) +{ + List *l; + + foreach(l, relids) + { + printf("%d", lfirsti(l)); + if (lnext(l)) + printf(" "); + } +} + +static void +print_restrictclauses(Query *root, List *clauses) { List *l; - extern void print_expr(Node *expr, List *rtable); /* in print.c */ foreach(l, clauses) { @@ -531,21 +545,17 @@ print_joinclauses(Query *root, List *clauses) print_expr((Node *) c->clause, root->rtable); if (lnext(l)) - printf(" "); + printf(", "); } } static void print_path(Query *root, Path *path, int indent) { - char *ptype = NULL; - JoinPath *jp; - bool join = false; + const char *ptype; + bool join; int i; - for (i = 0; i < indent; i++) - printf("\t"); - switch (nodeTag(path)) { case T_Path: @@ -556,6 +566,10 @@ print_path(Query *root, Path *path, int indent) ptype = "IdxScan"; join = false; break; + case T_TidPath: + ptype = "TidScan"; + join = false; + break; case T_NestPath: ptype = "Nestloop"; join = true; @@ -569,82 +583,82 @@ print_path(Query *root, Path *path, int indent) join = true; break; default: + ptype = "???Path"; + join = false; break; } + + for (i = 0; i < indent; i++) + printf("\t"); + printf("%s(", ptype); + print_relids(path->parent->relids); + printf(") rows=%.0f cost=%.2f..%.2f\n", + path->parent->rows, path->startup_cost, path->total_cost); + + if (path->pathkeys) + { + for (i = 0; i < indent; i++) + printf("\t"); + printf(" pathkeys: "); + print_pathkeys(path->pathkeys, root->rtable); + } + if (join) { - jp = (JoinPath *) path; + JoinPath *jp = (JoinPath *) path; - printf("%s rows=%.0f cost=%.2f..%.2f\n", - ptype, path->parent->rows, - path->startup_cost, path->total_cost); + for (i = 0; i < indent; i++) + printf("\t"); + printf(" clauses: "); + print_restrictclauses(root, jp->joinrestrictinfo); + printf("\n"); - if (path->pathkeys) + if (nodeTag(path) == T_MergePath) { - for (i = 0; i < indent; i++) - printf("\t"); - printf(" pathkeys="); - print_pathkeys(path->pathkeys, root->rtable); - } + MergePath *mp = (MergePath *) path; - switch (nodeTag(path)) - { - case T_MergePath: - case T_HashPath: + if (mp->outersortkeys || mp->innersortkeys) + { for (i = 0; i < indent; i++) printf("\t"); - printf(" clauses=("); - print_joinclauses(root, jp->joinrestrictinfo); - printf(")\n"); - - if (nodeTag(path) == T_MergePath) - { - MergePath *mp = (MergePath *) path; - - if (mp->outersortkeys || mp->innersortkeys) - { - for (i = 0; i < indent; i++) - printf("\t"); - printf(" sortouter=%d sortinner=%d\n", - ((mp->outersortkeys) ? 1 : 0), - ((mp->innersortkeys) ? 1 : 0)); - } - } - break; - default: - break; + printf(" sortouter=%d sortinner=%d\n", + ((mp->outersortkeys) ? 1 : 0), + ((mp->innersortkeys) ? 1 : 0)); + } } + print_path(root, jp->outerjoinpath, indent + 1); print_path(root, jp->innerjoinpath, indent + 1); } - else - { - int relid = lfirsti(path->parent->relids); - - printf("%s(%d) rows=%.0f cost=%.2f..%.2f\n", - ptype, relid, path->parent->rows, - path->startup_cost, path->total_cost); - - if (path->pathkeys) - { - for (i = 0; i < indent; i++) - printf("\t"); - printf(" pathkeys="); - print_pathkeys(path->pathkeys, root->rtable); - } - } } -static void +void debug_print_rel(Query *root, RelOptInfo *rel) { List *l; - printf("("); - foreach(l, rel->relids) - printf("%d ", lfirsti(l)); + printf("RELOPTINFO ("); + print_relids(rel->relids); printf("): rows=%.0f width=%d\n", rel->rows, rel->width); + if (rel->baserestrictinfo) + { + printf("\tbaserestrictinfo: "); + print_restrictclauses(root, rel->baserestrictinfo); + printf("\n"); + } + + foreach(l, rel->joininfo) + { + JoinInfo *j = (JoinInfo *) lfirst(l); + + printf("\tjoininfo ("); + print_relids(j->unjoined_relids); + printf("): "); + print_restrictclauses(root, j->jinfo_restrictinfo); + printf("\n"); + } + printf("\tpath list:\n"); foreach(l, rel->pathlist) print_path(root, lfirst(l), 1); @@ -652,6 +666,8 @@ debug_print_rel(Query *root, RelOptInfo *rel) print_path(root, rel->cheapest_startup_path, 1); printf("\n\tcheapest total path:\n"); print_path(root, rel->cheapest_total_path, 1); + printf("\n"); + fflush(stdout); } #endif /* OPTIMIZER_DEBUG */ diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c index 3bde257a37e..8c805e0e3ed 100644 --- a/src/backend/optimizer/path/joinrels.c +++ b/src/backend/optimizer/path/joinrels.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinrels.c,v 1.53 2001/05/20 20:28:18 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinrels.c,v 1.54 2001/10/18 16:11:41 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -374,6 +374,10 @@ make_jointree_rel(Query *root, Node *jtnode) */ set_cheapest(rel); +#ifdef OPTIMIZER_DEBUG + debug_print_rel(root, rel); +#endif + return rel; } else diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index 4f4b6720e1e..9e964290e6d 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -11,7 +11,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/pathkeys.c,v 1.32 2001/03/22 06:16:14 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/pathkeys.c,v 1.33 2001/10/18 16:11:41 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -705,7 +705,7 @@ make_pathkeys_for_sortclauses(List *sortclauses, * This is a worthwhile savings because these routines will be invoked * many times when dealing with a many-relation query. */ -static void +void cache_mergeclause_pathkeys(Query *root, RestrictInfo *restrictinfo) { Node *key; diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c index cf2f798954b..ff2e13ec977 100644 --- a/src/backend/optimizer/plan/initsplan.c +++ b/src/backend/optimizer/plan/initsplan.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/initsplan.c,v 1.63 2001/06/05 05:26:04 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/initsplan.c,v 1.64 2001/10/18 16:11:41 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -40,10 +40,13 @@ static void mark_baserels_for_outer_join(Query *root, Relids rels, static void distribute_qual_to_rels(Query *root, Node *clause, bool ispusheddown, bool isouterjoin, + bool isdeduced, Relids qualscope); static void add_join_info_to_rels(Query *root, RestrictInfo *restrictinfo, Relids join_relids); static void add_vars_to_targetlist(Query *root, List *vars); +static bool qual_is_redundant(Query *root, RestrictInfo *restrictinfo, + List *restrictlist); static void check_mergejoinable(RestrictInfo *restrictinfo); static void check_hashjoinable(RestrictInfo *restrictinfo); @@ -219,7 +222,7 @@ distribute_quals_to_rels(Query *root, Node *jtnode) */ foreach(qual, (List *) f->quals) distribute_qual_to_rels(root, (Node *) lfirst(qual), - true, false, result); + true, false, false, result); } else if (IsA(jtnode, JoinExpr)) { @@ -279,7 +282,7 @@ distribute_quals_to_rels(Query *root, Node *jtnode) foreach(qual, (List *) j->quals) distribute_qual_to_rels(root, (Node *) lfirst(qual), - false, isouterjoin, result); + false, isouterjoin, false, result); } else elog(ERROR, "distribute_quals_to_rels: unexpected node type %d", @@ -339,6 +342,7 @@ mark_baserels_for_outer_join(Query *root, Relids rels, Relids outerrels) * 'ispusheddown': if TRUE, force the clause to be marked 'ispusheddown' * (this indicates the clause came from a FromExpr, not a JoinExpr) * 'isouterjoin': TRUE if the qual came from an OUTER JOIN's ON-clause + * 'isdeduced': TRUE if the qual came from implied-equality deduction * 'qualscope': list of baserels the qual's syntactic scope covers * * 'qualscope' identifies what level of JOIN the qual came from. For a top @@ -349,6 +353,7 @@ static void distribute_qual_to_rels(Query *root, Node *clause, bool ispusheddown, bool isouterjoin, + bool isdeduced, Relids qualscope) { RestrictInfo *restrictinfo = makeNode(RestrictInfo); @@ -405,8 +410,17 @@ distribute_qual_to_rels(Query *root, Node *clause, * clause's own reference list. At the time we are called, the * outerjoinset list of each baserel will show exactly those outer * joins that are below the qual in the join tree. + * + * If the qual came from implied-equality deduction, we can evaluate + * the qual at its natural semantic level. + * */ - if (isouterjoin) + if (isdeduced) + { + Assert(sameseti(relids, qualscope)); + can_be_equijoin = true; + } + else if (isouterjoin) { relids = qualscope; can_be_equijoin = false; @@ -461,9 +475,6 @@ distribute_qual_to_rels(Query *root, Node *clause, */ RelOptInfo *rel = build_base_rel(root, lfirsti(relids)); - rel->baserestrictinfo = lappend(rel->baserestrictinfo, - restrictinfo); - /* * Check for a "mergejoinable" clause even though it's not a join * clause. This is so that we can recognize that "a.x = a.y" @@ -474,10 +485,26 @@ distribute_qual_to_rels(Query *root, Node *clause, */ if (can_be_equijoin) check_mergejoinable(restrictinfo); + + /* + * If the clause was deduced from implied equality, check to see + * whether it is redundant with restriction clauses we already have + * for this rel. Note we cannot apply this check to user-written + * clauses, since we haven't found the canonical pathkey sets yet + * while processing user clauses. (NB: no comparable check is done + * in the join-clause case; redundancy will be detected when the + * join clause is moved into a join rel's restriction list.) + */ + if (!isdeduced || + !qual_is_redundant(root, restrictinfo, rel->baserestrictinfo)) + { + /* Add clause to rel's restriction list */ + rel->baserestrictinfo = lappend(rel->baserestrictinfo, + restrictinfo); + } } else if (relids != NIL) { - /* * 'clause' is a join clause, since there is more than one rel in * the relid list. Set additional RestrictInfo fields for @@ -524,9 +551,11 @@ distribute_qual_to_rels(Query *root, Node *clause, * the two sides represent equivalent PathKeyItems for path keys: any * path that is sorted by one side will also be sorted by the other * (as soon as the two rels are joined, that is). Record the key - * equivalence for future use. + * equivalence for future use. (We can skip this for a deduced clause, + * since the keys are already known equivalent in that case.) */ - if (can_be_equijoin && restrictinfo->mergejoinoperator != InvalidOid) + if (can_be_equijoin && restrictinfo->mergejoinoperator != InvalidOid && + !isdeduced) add_equijoined_keys(root, restrictinfo); } @@ -684,18 +713,106 @@ process_implied_equality(Query *root, Node *item1, Node *item2, /* * Note: we mark the qual "pushed down" to ensure that it can never be - * taken for an original JOIN/ON clause. We also claim it is an - * outer- join clause, which it isn't, but that keeps - * distribute_qual_to_rels from examining the outerjoinsets of the - * relevant rels (which are no longer of interest, but could keep the - * qual from being pushed down to where it should be). It'll also - * save a useless call to add_equijoined keys... + * taken for an original JOIN/ON clause. */ distribute_qual_to_rels(root, (Node *) clause, - true, true, + true, false, true, pull_varnos((Node *) clause)); } +/* + * qual_is_redundant + * Detect whether an implied-equality qual that turns out to be a + * restriction clause for a single base relation is redundant with + * already-known restriction clauses for that rel. This occurs with, + * for example, + * SELECT * FROM tab WHERE f1 = f2 AND f2 = f3; + * We need to suppress the redundant condition to avoid computing + * too-small selectivity, not to mention wasting time at execution. + */ +static bool +qual_is_redundant(Query *root, + RestrictInfo *restrictinfo, + List *restrictlist) +{ + List *oldquals; + List *olditem; + Node *newleft; + Node *newright; + List *equalvars; + bool someadded; + + /* + * Set cached pathkeys. NB: it is okay to do this now because this + * routine is only invoked while we are generating implied equalities. + * Therefore, the equi_key_list is already complete and so we can + * correctly determine canonical pathkeys. + */ + cache_mergeclause_pathkeys(root, restrictinfo); + /* If different, say "not redundant" (should never happen) */ + if (restrictinfo->left_pathkey != restrictinfo->right_pathkey) + return false; + /* + * Scan existing quals to find those referencing same pathkeys. + * Usually there will be few, if any, so build a list of just the + * interesting ones. + */ + oldquals = NIL; + foreach(olditem, restrictlist) + { + RestrictInfo *oldrinfo = (RestrictInfo *) lfirst(olditem); + + if (oldrinfo->mergejoinoperator != InvalidOid) + { + cache_mergeclause_pathkeys(root, oldrinfo); + if (restrictinfo->left_pathkey == oldrinfo->left_pathkey && + restrictinfo->right_pathkey == oldrinfo->right_pathkey) + oldquals = lcons(oldrinfo, oldquals); + } + } + if (oldquals == NIL) + return false; + /* + * Now, we want to develop a list of Vars that are known equal to the + * left side of the new qual. We traverse the old-quals list repeatedly + * to transitively expand the Vars list. If at any point we find we + * can reach the right-side Var of the new qual, we are done. We give + * up when we can't expand the equalvars list any more. + */ + newleft = (Node *) get_leftop(restrictinfo->clause); + newright = (Node *) get_rightop(restrictinfo->clause); + equalvars = makeList1(newleft); + do { + someadded = false; + foreach(olditem, oldquals) + { + RestrictInfo *oldrinfo = (RestrictInfo *) lfirst(olditem); + Node *oldleft = (Node *) get_leftop(oldrinfo->clause); + Node *oldright = (Node *) get_rightop(oldrinfo->clause); + Node *newguy = NULL; + + if (member(oldleft, equalvars)) + newguy = oldright; + else if (member(oldright, equalvars)) + newguy = oldleft; + else + continue; + if (equal(newguy, newright)) + return true; /* we proved new clause is redundant */ + equalvars = lcons(newguy, equalvars); + someadded = true; + /* + * Remove this qual from list, since we don't need it anymore. + * Note this doesn't break the foreach() loop, since lremove + * doesn't touch the next-link of the removed cons cell. + */ + oldquals = lremove(oldrinfo, oldquals); + } + } while (someadded); + + return false; /* it's not redundant */ +} + /***************************************************************************** * diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index a2fa8832058..48e41686e9b 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.108 2001/06/05 05:26:04 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.109 2001/10/18 16:11:41 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -17,6 +17,9 @@ #include "catalog/pg_type.h" #include "nodes/makefuncs.h" +#ifdef OPTIMIZER_DEBUG +#include "nodes/print.h" +#endif #include "optimizer/clauses.h" #include "optimizer/paths.h" #include "optimizer/planmain.h" diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index 86d923116a4..3b85f321dfb 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/util/relnode.c,v 1.33 2001/05/20 20:28:19 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/util/relnode.c,v 1.34 2001/10/18 16:11:42 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -17,6 +17,7 @@ #include "optimizer/cost.h" #include "optimizer/joininfo.h" #include "optimizer/pathnode.h" +#include "optimizer/paths.h" #include "optimizer/plancat.h" #include "optimizer/tlist.h" #include "parser/parsetree.h" @@ -24,9 +25,10 @@ static RelOptInfo *make_base_rel(Query *root, int relid); static List *new_join_tlist(List *tlist, int first_resdomno); -static List *build_joinrel_restrictlist(RelOptInfo *joinrel, - RelOptInfo *outer_rel, - RelOptInfo *inner_rel); +static List *build_joinrel_restrictlist(Query *root, + RelOptInfo *joinrel, + RelOptInfo *outer_rel, + RelOptInfo *inner_rel); static void build_joinrel_joinlist(RelOptInfo *joinrel, RelOptInfo *outer_rel, RelOptInfo *inner_rel); @@ -274,7 +276,8 @@ build_join_rel(Query *root, * particular pair of component relations. */ if (restrictlist_ptr) - *restrictlist_ptr = build_joinrel_restrictlist(joinrel, + *restrictlist_ptr = build_joinrel_restrictlist(root, + joinrel, outer_rel, inner_rel); return joinrel; @@ -326,7 +329,10 @@ build_join_rel(Query *root, * caller might or might not need the restrictlist, but I need it * anyway for set_joinrel_size_estimates().) */ - restrictlist = build_joinrel_restrictlist(joinrel, outer_rel, inner_rel); + restrictlist = build_joinrel_restrictlist(root, + joinrel, + outer_rel, + inner_rel); if (restrictlist_ptr) *restrictlist_ptr = restrictlist; build_joinrel_joinlist(joinrel, outer_rel, inner_rel); @@ -407,6 +413,11 @@ new_join_tlist(List *tlist, * join paths made from this pair of sub-relations. (It will not need to * be considered further up the join tree.) * + * When building a restriction list, we eliminate redundant clauses. + * We don't try to do that for join clause lists, since the join clauses + * aren't really doing anything, just waiting to become part of higher + * levels' restriction lists. + * * 'joinrel' is a join relation node * 'outer_rel' and 'inner_rel' are a pair of relations that can be joined * to form joinrel. @@ -421,19 +432,80 @@ new_join_tlist(List *tlist, * the original nodes in the lists made for the join relation. */ static List * -build_joinrel_restrictlist(RelOptInfo *joinrel, +build_joinrel_restrictlist(Query *root, + RelOptInfo *joinrel, RelOptInfo *outer_rel, RelOptInfo *inner_rel) { + List *result = NIL; + List *rlist; + List *item; /* - * We must eliminate duplicates, since we will see the same clauses - * arriving from both input relations... + * Collect all the clauses that syntactically belong at this level. */ - return set_union(subbuild_joinrel_restrictlist(joinrel, - outer_rel->joininfo), - subbuild_joinrel_restrictlist(joinrel, - inner_rel->joininfo)); + rlist = nconc(subbuild_joinrel_restrictlist(joinrel, + outer_rel->joininfo), + subbuild_joinrel_restrictlist(joinrel, + inner_rel->joininfo)); + + /* + * Eliminate duplicate and redundant clauses. + * + * We must eliminate duplicates, since we will see many of the same + * clauses arriving from both input relations. Also, if a clause is + * a mergejoinable clause, it's possible that it is redundant with + * previous clauses (see optimizer/README for discussion). We detect + * that case and omit the redundant clause from the result list. + * + * We can detect redundant mergejoinable clauses very cheaply by using + * their left and right pathkeys, which uniquely identify the sets of + * equijoined variables in question. All the members of a pathkey set + * that are in the left relation have already been forced to be equal; + * likewise for those in the right relation. So, we need to have only + * one clause that checks equality between any set member on the left and + * any member on the right; by transitivity, all the rest are then equal. + */ + foreach(item, rlist) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(item); + + /* eliminate duplicates */ + if (member(rinfo, result)) + continue; + + /* check for redundant merge clauses */ + if (rinfo->mergejoinoperator != InvalidOid) + { + bool redundant = false; + List *olditem; + + cache_mergeclause_pathkeys(root, rinfo); + + foreach(olditem, result) + { + RestrictInfo *oldrinfo = (RestrictInfo *) lfirst(olditem); + + if (oldrinfo->mergejoinoperator != InvalidOid && + rinfo->left_pathkey == oldrinfo->left_pathkey && + rinfo->right_pathkey == oldrinfo->right_pathkey) + { + redundant = true; + break; + } + } + + if (redundant) + continue; + } + + /* otherwise, add it to result list */ + result = lappend(result, rinfo); + } + + freeList(rlist); + + return result; } static void @@ -458,7 +530,6 @@ subbuild_joinrel_restrictlist(RelOptInfo *joinrel, if (is_subseti(joininfo->unjoined_relids, joinrel->relids)) { - /* * Clauses in this JoinInfo list become restriction clauses * for the joinrel, since they refer to no outside rels. @@ -471,7 +542,6 @@ subbuild_joinrel_restrictlist(RelOptInfo *joinrel, } else { - /* * These clauses are still join clauses at this level, so we * ignore them in this routine. @@ -497,7 +567,6 @@ subbuild_joinrel_joinlist(RelOptInfo *joinrel, joinrel->relids); if (new_unjoined_relids == NIL) { - /* * Clauses in this JoinInfo list become restriction clauses * for the joinrel, since they refer to no outside rels. So we @@ -506,7 +575,6 @@ subbuild_joinrel_joinlist(RelOptInfo *joinrel, } else { - /* * These clauses are still join clauses at this level, so find * or make the appropriate JoinInfo item for the joinrel, and diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 08fe2ccd776..1dcb2cc2730 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -8,7 +8,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: paths.h,v 1.55 2001/08/21 16:36:06 tgl Exp $ + * $Id: paths.h,v 1.56 2001/10/18 16:11:42 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -31,6 +31,10 @@ extern int geqo_rels; extern RelOptInfo *make_one_rel(Query *root); extern RelOptInfo *make_fromexpr_rel(Query *root, FromExpr *from); +#ifdef OPTIMIZER_DEBUG +extern void debug_print_rel(Query *root, RelOptInfo *rel); +#endif + /* * indxpath.c * routines to generate index paths @@ -111,6 +115,8 @@ extern List *build_join_pathkeys(Query *root, List *outer_pathkeys); extern List *make_pathkeys_for_sortclauses(List *sortclauses, List *tlist); +extern void cache_mergeclause_pathkeys(Query *root, + RestrictInfo *restrictinfo); extern List *find_mergeclauses_for_pathkeys(Query *root, List *pathkeys, List *restrictinfos);