mirror of
				https://github.com/postgres/postgres.git
				synced 2025-10-24 00:03:18 -04:00 
			
		
		
		
	
		
			
				
	
	
		
			334 lines
		
	
	
		
			8.5 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			334 lines
		
	
	
		
			8.5 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*-------------------------------------------------------------------------
 | |
|  *
 | |
|  * _int_selfuncs.c
 | |
|  *	  Functions for selectivity estimation of intarray operators
 | |
|  *
 | |
|  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
 | |
|  * Portions Copyright (c) 1994, Regents of the University of California
 | |
|  *
 | |
|  *
 | |
|  * IDENTIFICATION
 | |
|  *	  contrib/intarray/_int_selfuncs.c
 | |
|  *
 | |
|  *-------------------------------------------------------------------------
 | |
|  */
 | |
| #include "postgres.h"
 | |
| 
 | |
| #include "_int.h"
 | |
| #include "access/htup_details.h"
 | |
| #include "catalog/pg_operator.h"
 | |
| #include "catalog/pg_statistic.h"
 | |
| #include "catalog/pg_type.h"
 | |
| #include "miscadmin.h"
 | |
| #include "utils/builtins.h"
 | |
| #include "utils/lsyscache.h"
 | |
| #include "utils/selfuncs.h"
 | |
| #include "utils/syscache.h"
 | |
| 
 | |
| PG_FUNCTION_INFO_V1(_int_overlap_sel);
 | |
| PG_FUNCTION_INFO_V1(_int_contains_sel);
 | |
| PG_FUNCTION_INFO_V1(_int_contained_sel);
 | |
| PG_FUNCTION_INFO_V1(_int_overlap_joinsel);
 | |
| PG_FUNCTION_INFO_V1(_int_contains_joinsel);
 | |
| PG_FUNCTION_INFO_V1(_int_contained_joinsel);
 | |
| PG_FUNCTION_INFO_V1(_int_matchsel);
 | |
| 
 | |
| 
 | |
| static Selectivity int_query_opr_selec(ITEM *item, Datum *mcelems, float4 *mcefreqs,
 | |
| 									   int nmcelems, float4 minfreq);
 | |
| static int	compare_val_int4(const void *a, const void *b);
 | |
| 
 | |
| /*
 | |
|  * Wrappers around the default array selectivity estimation functions.
 | |
|  *
 | |
|  * The default array selectivity operators for the @>, && and @< operators
 | |
|  * work fine for integer arrays. However, if we tried to just use arraycontsel
 | |
|  * and arraycontjoinsel directly as the cost estimator functions for our
 | |
|  * operators, they would not work as intended, because they look at the
 | |
|  * operator's OID. Our operators behave exactly like the built-in anyarray
 | |
|  * versions, but we must tell the cost estimator functions which built-in
 | |
|  * operators they correspond to. These wrappers just replace the operator
 | |
|  * OID with the corresponding built-in operator's OID, and call the built-in
 | |
|  * function.
 | |
|  */
 | |
| 
 | |
| Datum
 | |
| _int_overlap_sel(PG_FUNCTION_ARGS)
 | |
| {
 | |
| 	PG_RETURN_DATUM(DirectFunctionCall4(arraycontsel,
 | |
| 										PG_GETARG_DATUM(0),
 | |
| 										ObjectIdGetDatum(OID_ARRAY_OVERLAP_OP),
 | |
| 										PG_GETARG_DATUM(2),
 | |
| 										PG_GETARG_DATUM(3)));
 | |
| }
 | |
| 
 | |
| Datum
 | |
| _int_contains_sel(PG_FUNCTION_ARGS)
 | |
| {
 | |
| 	PG_RETURN_DATUM(DirectFunctionCall4(arraycontsel,
 | |
| 										PG_GETARG_DATUM(0),
 | |
| 										ObjectIdGetDatum(OID_ARRAY_CONTAINS_OP),
 | |
| 										PG_GETARG_DATUM(2),
 | |
| 										PG_GETARG_DATUM(3)));
 | |
| }
 | |
| 
 | |
| Datum
 | |
| _int_contained_sel(PG_FUNCTION_ARGS)
 | |
| {
 | |
| 	PG_RETURN_DATUM(DirectFunctionCall4(arraycontsel,
 | |
| 										PG_GETARG_DATUM(0),
 | |
| 										ObjectIdGetDatum(OID_ARRAY_CONTAINED_OP),
 | |
| 										PG_GETARG_DATUM(2),
 | |
| 										PG_GETARG_DATUM(3)));
 | |
| }
 | |
| 
 | |
| Datum
 | |
| _int_overlap_joinsel(PG_FUNCTION_ARGS)
 | |
| {
 | |
| 	PG_RETURN_DATUM(DirectFunctionCall5(arraycontjoinsel,
 | |
| 										PG_GETARG_DATUM(0),
 | |
| 										ObjectIdGetDatum(OID_ARRAY_OVERLAP_OP),
 | |
| 										PG_GETARG_DATUM(2),
 | |
| 										PG_GETARG_DATUM(3),
 | |
| 										PG_GETARG_DATUM(4)));
 | |
| }
 | |
| 
 | |
| Datum
 | |
| _int_contains_joinsel(PG_FUNCTION_ARGS)
 | |
| {
 | |
| 	PG_RETURN_DATUM(DirectFunctionCall5(arraycontjoinsel,
 | |
| 										PG_GETARG_DATUM(0),
 | |
| 										ObjectIdGetDatum(OID_ARRAY_CONTAINS_OP),
 | |
| 										PG_GETARG_DATUM(2),
 | |
| 										PG_GETARG_DATUM(3),
 | |
| 										PG_GETARG_DATUM(4)));
 | |
| }
 | |
| 
 | |
| Datum
 | |
| _int_contained_joinsel(PG_FUNCTION_ARGS)
 | |
| {
 | |
| 	PG_RETURN_DATUM(DirectFunctionCall5(arraycontjoinsel,
 | |
| 										PG_GETARG_DATUM(0),
 | |
| 										ObjectIdGetDatum(OID_ARRAY_CONTAINED_OP),
 | |
| 										PG_GETARG_DATUM(2),
 | |
| 										PG_GETARG_DATUM(3),
 | |
| 										PG_GETARG_DATUM(4)));
 | |
| }
 | |
| 
 | |
| 
 | |
| /*
 | |
|  * _int_matchsel -- restriction selectivity function for intarray @@ query_int
 | |
|  */
 | |
| Datum
 | |
| _int_matchsel(PG_FUNCTION_ARGS)
 | |
| {
 | |
| 	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
 | |
| 
 | |
| 	List	   *args = (List *) PG_GETARG_POINTER(2);
 | |
| 	int			varRelid = PG_GETARG_INT32(3);
 | |
| 	VariableStatData vardata;
 | |
| 	Node	   *other;
 | |
| 	bool		varonleft;
 | |
| 	Selectivity selec;
 | |
| 	QUERYTYPE  *query;
 | |
| 	Datum	   *mcelems = NULL;
 | |
| 	float4	   *mcefreqs = NULL;
 | |
| 	int			nmcelems = 0;
 | |
| 	float4		minfreq = 0.0;
 | |
| 	float4		nullfrac = 0.0;
 | |
| 	AttStatsSlot sslot;
 | |
| 
 | |
| 	/*
 | |
| 	 * If expression is not "variable @@ something" or "something @@ variable"
 | |
| 	 * then punt and return a default estimate.
 | |
| 	 */
 | |
| 	if (!get_restriction_variable(root, args, varRelid,
 | |
| 								  &vardata, &other, &varonleft))
 | |
| 		PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
 | |
| 
 | |
| 	/*
 | |
| 	 * Variable should be int[]. We don't support cases where variable is
 | |
| 	 * query_int.
 | |
| 	 */
 | |
| 	if (vardata.vartype != INT4ARRAYOID)
 | |
| 		PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
 | |
| 
 | |
| 	/*
 | |
| 	 * Can't do anything useful if the something is not a constant, either.
 | |
| 	 */
 | |
| 	if (!IsA(other, Const))
 | |
| 	{
 | |
| 		ReleaseVariableStats(vardata);
 | |
| 		PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
 | |
| 	}
 | |
| 
 | |
| 	/*
 | |
| 	 * The "@@" operator is strict, so we can cope with NULL right away.
 | |
| 	 */
 | |
| 	if (((Const *) other)->constisnull)
 | |
| 	{
 | |
| 		ReleaseVariableStats(vardata);
 | |
| 		PG_RETURN_FLOAT8(0.0);
 | |
| 	}
 | |
| 
 | |
| 	/* The caller made sure the const is a query, so get it now */
 | |
| 	query = DatumGetQueryTypeP(((Const *) other)->constvalue);
 | |
| 
 | |
| 	/* Empty query matches nothing */
 | |
| 	if (query->size == 0)
 | |
| 	{
 | |
| 		ReleaseVariableStats(vardata);
 | |
| 		return (Selectivity) 0.0;
 | |
| 	}
 | |
| 
 | |
| 	/*
 | |
| 	 * Get the statistics for the intarray column.
 | |
| 	 *
 | |
| 	 * We're interested in the Most-Common-Elements list, and the NULL
 | |
| 	 * fraction.
 | |
| 	 */
 | |
| 	if (HeapTupleIsValid(vardata.statsTuple))
 | |
| 	{
 | |
| 		Form_pg_statistic stats;
 | |
| 
 | |
| 		stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
 | |
| 		nullfrac = stats->stanullfrac;
 | |
| 
 | |
| 		/*
 | |
| 		 * For an int4 array, the default array type analyze function will
 | |
| 		 * collect a Most Common Elements list, which is an array of int4s.
 | |
| 		 */
 | |
| 		if (get_attstatsslot(&sslot, vardata.statsTuple,
 | |
| 							 STATISTIC_KIND_MCELEM, InvalidOid,
 | |
| 							 ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS))
 | |
| 		{
 | |
| 			Assert(sslot.valuetype == INT4OID);
 | |
| 
 | |
| 			/*
 | |
| 			 * There should be three more Numbers than Values, because the
 | |
| 			 * last three (for intarray) cells are taken for minimal, maximal
 | |
| 			 * and nulls frequency. Punt if not.
 | |
| 			 */
 | |
| 			if (sslot.nnumbers == sslot.nvalues + 3)
 | |
| 			{
 | |
| 				/* Grab the lowest frequency. */
 | |
| 				minfreq = sslot.numbers[sslot.nnumbers - (sslot.nnumbers - sslot.nvalues)];
 | |
| 
 | |
| 				mcelems = sslot.values;
 | |
| 				mcefreqs = sslot.numbers;
 | |
| 				nmcelems = sslot.nvalues;
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 	else
 | |
| 		memset(&sslot, 0, sizeof(sslot));
 | |
| 
 | |
| 	/* Process the logical expression in the query, using the stats */
 | |
| 	selec = int_query_opr_selec(GETQUERY(query) + query->size - 1,
 | |
| 								mcelems, mcefreqs, nmcelems, minfreq);
 | |
| 
 | |
| 	/* MCE stats count only non-null rows, so adjust for null rows. */
 | |
| 	selec *= (1.0 - nullfrac);
 | |
| 
 | |
| 	free_attstatsslot(&sslot);
 | |
| 	ReleaseVariableStats(vardata);
 | |
| 
 | |
| 	CLAMP_PROBABILITY(selec);
 | |
| 
 | |
| 	PG_RETURN_FLOAT8((float8) selec);
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Estimate selectivity of single intquery operator
 | |
|  */
 | |
| static Selectivity
 | |
| int_query_opr_selec(ITEM *item, Datum *mcelems, float4 *mcefreqs,
 | |
| 					int nmcelems, float4 minfreq)
 | |
| {
 | |
| 	Selectivity selec;
 | |
| 
 | |
| 	/* since this function recurses, it could be driven to stack overflow */
 | |
| 	check_stack_depth();
 | |
| 
 | |
| 	if (item->type == VAL)
 | |
| 	{
 | |
| 		Datum	   *searchres;
 | |
| 
 | |
| 		if (mcelems == NULL)
 | |
| 			return (Selectivity) DEFAULT_EQ_SEL;
 | |
| 
 | |
| 		searchres = (Datum *) bsearch(&item->val, mcelems, nmcelems,
 | |
| 									  sizeof(Datum), compare_val_int4);
 | |
| 		if (searchres)
 | |
| 		{
 | |
| 			/*
 | |
| 			 * The element is in MCELEM.  Return precise selectivity (or at
 | |
| 			 * least as precise as ANALYZE could find out).
 | |
| 			 */
 | |
| 			selec = mcefreqs[searchres - mcelems];
 | |
| 		}
 | |
| 		else
 | |
| 		{
 | |
| 			/*
 | |
| 			 * The element is not in MCELEM.  Punt, but assume that the
 | |
| 			 * selectivity cannot be more than minfreq / 2.
 | |
| 			 */
 | |
| 			selec = Min(DEFAULT_EQ_SEL, minfreq / 2);
 | |
| 		}
 | |
| 	}
 | |
| 	else if (item->type == OPR)
 | |
| 	{
 | |
| 		/* Current query node is an operator */
 | |
| 		Selectivity s1,
 | |
| 					s2;
 | |
| 
 | |
| 		s1 = int_query_opr_selec(item - 1, mcelems, mcefreqs, nmcelems,
 | |
| 								 minfreq);
 | |
| 		switch (item->val)
 | |
| 		{
 | |
| 			case (int32) '!':
 | |
| 				selec = 1.0 - s1;
 | |
| 				break;
 | |
| 
 | |
| 			case (int32) '&':
 | |
| 				s2 = int_query_opr_selec(item + item->left, mcelems, mcefreqs,
 | |
| 										 nmcelems, minfreq);
 | |
| 				selec = s1 * s2;
 | |
| 				break;
 | |
| 
 | |
| 			case (int32) '|':
 | |
| 				s2 = int_query_opr_selec(item + item->left, mcelems, mcefreqs,
 | |
| 										 nmcelems, minfreq);
 | |
| 				selec = s1 + s2 - s1 * s2;
 | |
| 				break;
 | |
| 
 | |
| 			default:
 | |
| 				elog(ERROR, "unrecognized operator: %d", item->val);
 | |
| 				selec = 0;		/* keep compiler quiet */
 | |
| 				break;
 | |
| 		}
 | |
| 	}
 | |
| 	else
 | |
| 	{
 | |
| 		elog(ERROR, "unrecognized int query item type: %u", item->type);
 | |
| 		selec = 0;				/* keep compiler quiet */
 | |
| 	}
 | |
| 
 | |
| 	/* Clamp intermediate results to stay sane despite roundoff error */
 | |
| 	CLAMP_PROBABILITY(selec);
 | |
| 
 | |
| 	return selec;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Comparison function for binary search in mcelem array.
 | |
|  */
 | |
| static int
 | |
| compare_val_int4(const void *a, const void *b)
 | |
| {
 | |
| 	int32		key = *(int32 *) a;
 | |
| 	const Datum *t = (const Datum *) b;
 | |
| 
 | |
| 	return key - DatumGetInt32(*t);
 | |
| }
 |