mirror of
https://github.com/postgres/postgres.git
synced 2025-06-01 00:01:20 -04:00
Improve estimation of OR clauses using multiple extended statistics.
When estimating an OR clause using multiple extended statistics objects, treat the estimates for each set of clauses for each statistics object as independent of one another. The overlap estimates produced for each statistics object do not apply to clauses covered by other statistics objects. Dean Rasheed, reviewed by Tomas Vondra. Discussion: https://postgr.es/m/CAEZATCW=J65GUFm50RcPv-iASnS2mTXQbr=CfBvWRVhFLJ_fWA@mail.gmail.com
This commit is contained in:
parent
f2a69b352d
commit
88b0898fe3
@ -1356,17 +1356,19 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli
|
||||
if (is_or)
|
||||
{
|
||||
bool *or_matches = NULL;
|
||||
Selectivity simple_or_sel = 0.0;
|
||||
Selectivity simple_or_sel = 0.0,
|
||||
stat_sel = 0.0;
|
||||
MCVList *mcv_list;
|
||||
|
||||
/* Load the MCV list stored in the statistics object */
|
||||
mcv_list = statext_mcv_load(stat->statOid);
|
||||
|
||||
/*
|
||||
* Compute the selectivity of the ORed list of clauses by
|
||||
* estimating each in turn and combining them using the formula
|
||||
* P(A OR B) = P(A) + P(B) - P(A AND B). This allows us to use
|
||||
* the multivariate MCV stats to better estimate each term.
|
||||
* Compute the selectivity of the ORed list of clauses covered by
|
||||
* this statistics object by estimating each in turn and combining
|
||||
* them using the formula P(A OR B) = P(A) + P(B) - P(A AND B).
|
||||
* This allows us to use the multivariate MCV stats to better
|
||||
* estimate the individual terms and their overlap.
|
||||
*
|
||||
* Each time we iterate this formula, the clause "A" above is
|
||||
* equal to all the clauses processed so far, combined with "OR".
|
||||
@ -1437,12 +1439,19 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli
|
||||
overlap_basesel,
|
||||
mcv_totalsel);
|
||||
|
||||
/* Factor these into the overall result */
|
||||
sel += clause_sel - overlap_sel;
|
||||
CLAMP_PROBABILITY(sel);
|
||||
/* Factor these into the result for this statistics object */
|
||||
stat_sel += clause_sel - overlap_sel;
|
||||
CLAMP_PROBABILITY(stat_sel);
|
||||
|
||||
listidx++;
|
||||
}
|
||||
|
||||
/*
|
||||
* Factor the result for this statistics object into the overall
|
||||
* result. We treat the results from each separate statistics
|
||||
* object as independent of one another.
|
||||
*/
|
||||
sel = sel + stat_sel - sel * stat_sel;
|
||||
}
|
||||
else /* Implicitly-ANDed list of clauses */
|
||||
{
|
||||
|
@ -1706,7 +1706,7 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE (a = 0 A
|
||||
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 OR b = 0 OR c = 0 OR d = 0');
|
||||
estimated | actual
|
||||
-----------+--------
|
||||
1714 | 1572
|
||||
1571 | 1572
|
||||
(1 row)
|
||||
|
||||
DROP TABLE mcv_lists_multi;
|
||||
|
Loading…
x
Reference in New Issue
Block a user