From 88b0898fe35a5a0325fca21bd4f3ed6dffb364c1 Mon Sep 17 00:00:00 2001 From: Dean Rasheed Date: Tue, 8 Dec 2020 19:39:24 +0000 Subject: [PATCH] Improve estimation of OR clauses using multiple extended statistics. When estimating an OR clause using multiple extended statistics objects, treat the estimates for each set of clauses for each statistics object as independent of one another. The overlap estimates produced for each statistics object do not apply to clauses covered by other statistics objects. Dean Rasheed, reviewed by Tomas Vondra. Discussion: https://postgr.es/m/CAEZATCW=J65GUFm50RcPv-iASnS2mTXQbr=CfBvWRVhFLJ_fWA@mail.gmail.com --- src/backend/statistics/extended_stats.c | 25 +++++++++++++++++-------- src/test/regress/expected/stats_ext.out | 2 +- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c index 8d3cd091ada..555bc325619 100644 --- a/src/backend/statistics/extended_stats.c +++ b/src/backend/statistics/extended_stats.c @@ -1356,17 +1356,19 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli if (is_or) { bool *or_matches = NULL; - Selectivity simple_or_sel = 0.0; + Selectivity simple_or_sel = 0.0, + stat_sel = 0.0; MCVList *mcv_list; /* Load the MCV list stored in the statistics object */ mcv_list = statext_mcv_load(stat->statOid); /* - * Compute the selectivity of the ORed list of clauses by - * estimating each in turn and combining them using the formula - * P(A OR B) = P(A) + P(B) - P(A AND B). This allows us to use - * the multivariate MCV stats to better estimate each term. + * Compute the selectivity of the ORed list of clauses covered by + * this statistics object by estimating each in turn and combining + * them using the formula P(A OR B) = P(A) + P(B) - P(A AND B). + * This allows us to use the multivariate MCV stats to better + * estimate the individual terms and their overlap. * * Each time we iterate this formula, the clause "A" above is * equal to all the clauses processed so far, combined with "OR". @@ -1437,12 +1439,19 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli overlap_basesel, mcv_totalsel); - /* Factor these into the overall result */ - sel += clause_sel - overlap_sel; - CLAMP_PROBABILITY(sel); + /* Factor these into the result for this statistics object */ + stat_sel += clause_sel - overlap_sel; + CLAMP_PROBABILITY(stat_sel); listidx++; } + + /* + * Factor the result for this statistics object into the overall + * result. We treat the results from each separate statistics + * object as independent of one another. + */ + sel = sel + stat_sel - sel * stat_sel; } else /* Implicitly-ANDed list of clauses */ { diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out index dbbe9844b2e..6e1c4f3edd1 100644 --- a/src/test/regress/expected/stats_ext.out +++ b/src/test/regress/expected/stats_ext.out @@ -1706,7 +1706,7 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE (a = 0 A SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 OR b = 0 OR c = 0 OR d = 0'); estimated | actual -----------+-------- - 1714 | 1572 + 1571 | 1572 (1 row) DROP TABLE mcv_lists_multi;