mirror of
https://github.com/postgres/postgres.git
synced 2025-05-13 01:13:08 -04:00
This fixes various typos, duplicated words, and tiny bits of whitespace mainly in code comments but also in docs. Author: Daniel Gustafsson <daniel@yesql.se> Author: Heikki Linnakangas <hlinnaka@iki.fi> Author: Alexander Lakhin <exclusion@gmail.com> Author: David Rowley <dgrowleyml@gmail.com> Author: Nazir Bilal Yavuz <byavuz81@gmail.com> Discussion: https://postgr.es/m/3F577953-A29E-4722-98AD-2DA9EFF2CBB8@yesql.se
5849 lines
172 KiB
C
5849 lines
172 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* partbounds.c
|
|
* Support routines for manipulating partition bounds
|
|
*
|
|
* Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
*
|
|
* IDENTIFICATION
|
|
* src/backend/partitioning/partbounds.c
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
|
|
#include "postgres.h"
|
|
|
|
#include "access/relation.h"
|
|
#include "access/table.h"
|
|
#include "access/tableam.h"
|
|
#include "catalog/partition.h"
|
|
#include "catalog/pg_inherits.h"
|
|
#include "catalog/pg_type.h"
|
|
#include "commands/tablecmds.h"
|
|
#include "common/hashfn.h"
|
|
#include "executor/executor.h"
|
|
#include "miscadmin.h"
|
|
#include "nodes/makefuncs.h"
|
|
#include "nodes/nodeFuncs.h"
|
|
#include "nodes/pathnodes.h"
|
|
#include "parser/parse_coerce.h"
|
|
#include "partitioning/partbounds.h"
|
|
#include "partitioning/partdesc.h"
|
|
#include "utils/array.h"
|
|
#include "utils/builtins.h"
|
|
#include "utils/datum.h"
|
|
#include "utils/fmgroids.h"
|
|
#include "utils/lsyscache.h"
|
|
#include "utils/partcache.h"
|
|
#include "utils/ruleutils.h"
|
|
#include "utils/snapmgr.h"
|
|
#include "utils/syscache.h"
|
|
|
|
/*
|
|
* When qsort'ing partition bounds after reading from the catalog, each bound
|
|
* is represented with one of the following structs.
|
|
*/
|
|
|
|
/* One bound of a hash partition */
|
|
typedef struct PartitionHashBound
|
|
{
|
|
int modulus;
|
|
int remainder;
|
|
int index;
|
|
} PartitionHashBound;
|
|
|
|
/* One value coming from some (index'th) list partition */
|
|
typedef struct PartitionListValue
|
|
{
|
|
int index;
|
|
Datum value;
|
|
} PartitionListValue;
|
|
|
|
/* One bound of a range partition */
|
|
typedef struct PartitionRangeBound
|
|
{
|
|
int index;
|
|
Datum *datums; /* range bound datums */
|
|
PartitionRangeDatumKind *kind; /* the kind of each datum */
|
|
bool lower; /* this is the lower (vs upper) bound */
|
|
} PartitionRangeBound;
|
|
|
|
/*
|
|
* Mapping from partitions of a joining relation to partitions of a join
|
|
* relation being computed (a.k.a merged partitions)
|
|
*/
|
|
typedef struct PartitionMap
|
|
{
|
|
int nparts; /* number of partitions */
|
|
int *merged_indexes; /* indexes of merged partitions */
|
|
bool *merged; /* flags to indicate whether partitions are
|
|
* merged with non-dummy partitions */
|
|
bool did_remapping; /* did we re-map partitions? */
|
|
int *old_indexes; /* old indexes of merged partitions if
|
|
* did_remapping */
|
|
} PartitionMap;
|
|
|
|
/* Macro for comparing two range bounds */
|
|
#define compare_range_bounds(partnatts, partsupfunc, partcollations, \
|
|
bound1, bound2) \
|
|
(partition_rbound_cmp(partnatts, partsupfunc, partcollations, \
|
|
(bound1)->datums, (bound1)->kind, (bound1)->lower, \
|
|
bound2))
|
|
|
|
static int32 qsort_partition_hbound_cmp(const void *a, const void *b);
|
|
static int32 qsort_partition_list_value_cmp(const void *a, const void *b,
|
|
void *arg);
|
|
static int32 qsort_partition_rbound_cmp(const void *a, const void *b,
|
|
void *arg);
|
|
static PartitionBoundInfo create_hash_bounds(PartitionBoundSpec **boundspecs,
|
|
int nparts, PartitionKey key, int **mapping);
|
|
static PartitionBoundInfo create_list_bounds(PartitionBoundSpec **boundspecs,
|
|
int nparts, PartitionKey key, int **mapping);
|
|
static PartitionBoundInfo create_range_bounds(PartitionBoundSpec **boundspecs,
|
|
int nparts, PartitionKey key, int **mapping);
|
|
static PartitionBoundInfo merge_list_bounds(FmgrInfo *partsupfunc,
|
|
Oid *partcollation,
|
|
RelOptInfo *outer_rel,
|
|
RelOptInfo *inner_rel,
|
|
JoinType jointype,
|
|
List **outer_parts,
|
|
List **inner_parts);
|
|
static PartitionBoundInfo merge_range_bounds(int partnatts,
|
|
FmgrInfo *partsupfuncs,
|
|
Oid *partcollations,
|
|
RelOptInfo *outer_rel,
|
|
RelOptInfo *inner_rel,
|
|
JoinType jointype,
|
|
List **outer_parts,
|
|
List **inner_parts);
|
|
static void init_partition_map(RelOptInfo *rel, PartitionMap *map);
|
|
static void free_partition_map(PartitionMap *map);
|
|
static bool is_dummy_partition(RelOptInfo *rel, int part_index);
|
|
static int merge_matching_partitions(PartitionMap *outer_map,
|
|
PartitionMap *inner_map,
|
|
int outer_index,
|
|
int inner_index,
|
|
int *next_index);
|
|
static int process_outer_partition(PartitionMap *outer_map,
|
|
PartitionMap *inner_map,
|
|
bool outer_has_default,
|
|
bool inner_has_default,
|
|
int outer_index,
|
|
int inner_default,
|
|
JoinType jointype,
|
|
int *next_index,
|
|
int *default_index);
|
|
static int process_inner_partition(PartitionMap *outer_map,
|
|
PartitionMap *inner_map,
|
|
bool outer_has_default,
|
|
bool inner_has_default,
|
|
int inner_index,
|
|
int outer_default,
|
|
JoinType jointype,
|
|
int *next_index,
|
|
int *default_index);
|
|
static void merge_null_partitions(PartitionMap *outer_map,
|
|
PartitionMap *inner_map,
|
|
bool outer_has_null,
|
|
bool inner_has_null,
|
|
int outer_null,
|
|
int inner_null,
|
|
JoinType jointype,
|
|
int *next_index,
|
|
int *null_index);
|
|
static void merge_default_partitions(PartitionMap *outer_map,
|
|
PartitionMap *inner_map,
|
|
bool outer_has_default,
|
|
bool inner_has_default,
|
|
int outer_default,
|
|
int inner_default,
|
|
JoinType jointype,
|
|
int *next_index,
|
|
int *default_index);
|
|
static int merge_partition_with_dummy(PartitionMap *map, int index,
|
|
int *next_index);
|
|
static void fix_merged_indexes(PartitionMap *outer_map,
|
|
PartitionMap *inner_map,
|
|
int nmerged, List *merged_indexes);
|
|
static void generate_matching_part_pairs(RelOptInfo *outer_rel,
|
|
RelOptInfo *inner_rel,
|
|
PartitionMap *outer_map,
|
|
PartitionMap *inner_map,
|
|
int nmerged,
|
|
List **outer_parts,
|
|
List **inner_parts);
|
|
static PartitionBoundInfo build_merged_partition_bounds(char strategy,
|
|
List *merged_datums,
|
|
List *merged_kinds,
|
|
List *merged_indexes,
|
|
int null_index,
|
|
int default_index);
|
|
static int get_range_partition(RelOptInfo *rel,
|
|
PartitionBoundInfo bi,
|
|
int *lb_pos,
|
|
PartitionRangeBound *lb,
|
|
PartitionRangeBound *ub);
|
|
static int get_range_partition_internal(PartitionBoundInfo bi,
|
|
int *lb_pos,
|
|
PartitionRangeBound *lb,
|
|
PartitionRangeBound *ub);
|
|
static bool compare_range_partitions(int partnatts, FmgrInfo *partsupfuncs,
|
|
Oid *partcollations,
|
|
PartitionRangeBound *outer_lb,
|
|
PartitionRangeBound *outer_ub,
|
|
PartitionRangeBound *inner_lb,
|
|
PartitionRangeBound *inner_ub,
|
|
int *lb_cmpval, int *ub_cmpval);
|
|
static void get_merged_range_bounds(int partnatts, FmgrInfo *partsupfuncs,
|
|
Oid *partcollations, JoinType jointype,
|
|
PartitionRangeBound *outer_lb,
|
|
PartitionRangeBound *outer_ub,
|
|
PartitionRangeBound *inner_lb,
|
|
PartitionRangeBound *inner_ub,
|
|
int lb_cmpval, int ub_cmpval,
|
|
PartitionRangeBound *merged_lb,
|
|
PartitionRangeBound *merged_ub);
|
|
static void add_merged_range_bounds(int partnatts, FmgrInfo *partsupfuncs,
|
|
Oid *partcollations,
|
|
PartitionRangeBound *merged_lb,
|
|
PartitionRangeBound *merged_ub,
|
|
int merged_index,
|
|
List **merged_datums,
|
|
List **merged_kinds,
|
|
List **merged_indexes);
|
|
static PartitionRangeBound *make_one_partition_rbound(PartitionKey key, int index,
|
|
List *datums, bool lower);
|
|
static int32 partition_hbound_cmp(int modulus1, int remainder1, int modulus2,
|
|
int remainder2);
|
|
static int32 partition_rbound_cmp(int partnatts, FmgrInfo *partsupfunc,
|
|
Oid *partcollation, Datum *datums1,
|
|
PartitionRangeDatumKind *kind1, bool lower1,
|
|
PartitionRangeBound *b2);
|
|
static int partition_range_bsearch(int partnatts, FmgrInfo *partsupfunc,
|
|
Oid *partcollation,
|
|
PartitionBoundInfo boundinfo,
|
|
PartitionRangeBound *probe, int32 *cmpval);
|
|
static Expr *make_partition_op_expr(PartitionKey key, int keynum,
|
|
uint16 strategy, Expr *arg1, Expr *arg2);
|
|
static Oid get_partition_operator(PartitionKey key, int col,
|
|
StrategyNumber strategy, bool *need_relabel);
|
|
static List *get_qual_for_hash(Relation parent, PartitionBoundSpec *spec);
|
|
static List *get_qual_for_list(Relation parent, PartitionBoundSpec *spec);
|
|
static List *get_qual_for_range(Relation parent, PartitionBoundSpec *spec,
|
|
bool for_default);
|
|
static void get_range_key_properties(PartitionKey key, int keynum,
|
|
PartitionRangeDatum *ldatum,
|
|
PartitionRangeDatum *udatum,
|
|
ListCell **partexprs_item,
|
|
Expr **keyCol,
|
|
Const **lower_val, Const **upper_val);
|
|
static List *get_range_nulltest(PartitionKey key);
|
|
|
|
/*
|
|
* get_qual_from_partbound
|
|
* Given a parser node for partition bound, return the list of executable
|
|
* expressions as partition constraint
|
|
*/
|
|
List *
|
|
get_qual_from_partbound(Relation parent, PartitionBoundSpec *spec)
|
|
{
|
|
PartitionKey key = RelationGetPartitionKey(parent);
|
|
List *my_qual = NIL;
|
|
|
|
Assert(key != NULL);
|
|
|
|
switch (key->strategy)
|
|
{
|
|
case PARTITION_STRATEGY_HASH:
|
|
Assert(spec->strategy == PARTITION_STRATEGY_HASH);
|
|
my_qual = get_qual_for_hash(parent, spec);
|
|
break;
|
|
|
|
case PARTITION_STRATEGY_LIST:
|
|
Assert(spec->strategy == PARTITION_STRATEGY_LIST);
|
|
my_qual = get_qual_for_list(parent, spec);
|
|
break;
|
|
|
|
case PARTITION_STRATEGY_RANGE:
|
|
Assert(spec->strategy == PARTITION_STRATEGY_RANGE);
|
|
my_qual = get_qual_for_range(parent, spec, false);
|
|
break;
|
|
}
|
|
|
|
return my_qual;
|
|
}
|
|
|
|
/*
|
|
* partition_bounds_create
|
|
* Build a PartitionBoundInfo struct from a list of PartitionBoundSpec
|
|
* nodes
|
|
*
|
|
* This function creates a PartitionBoundInfo and fills the values of its
|
|
* various members based on the input list. Importantly, 'datums' array will
|
|
* contain Datum representation of individual bounds (possibly after
|
|
* de-duplication as in case of range bounds), sorted in a canonical order
|
|
* defined by qsort_partition_* functions of respective partitioning methods.
|
|
* 'indexes' array will contain as many elements as there are bounds (specific
|
|
* exceptions to this rule are listed in the function body), which represent
|
|
* the 0-based canonical positions of partitions.
|
|
*
|
|
* Upon return from this function, *mapping is set to an array of
|
|
* list_length(boundspecs) elements, each of which maps the original index of
|
|
* a partition to its canonical index.
|
|
*
|
|
* Note: The objects returned by this function are wholly allocated in the
|
|
* current memory context.
|
|
*/
|
|
PartitionBoundInfo
|
|
partition_bounds_create(PartitionBoundSpec **boundspecs, int nparts,
|
|
PartitionKey key, int **mapping)
|
|
{
|
|
int i;
|
|
|
|
Assert(nparts > 0);
|
|
|
|
/*
|
|
* For each partitioning method, we first convert the partition bounds
|
|
* from their parser node representation to the internal representation,
|
|
* along with any additional preprocessing (such as de-duplicating range
|
|
* bounds). Resulting bound datums are then added to the 'datums' array
|
|
* in PartitionBoundInfo. For each datum added, an integer indicating the
|
|
* canonical partition index is added to the 'indexes' array.
|
|
*
|
|
* For each bound, we remember its partition's position (0-based) in the
|
|
* original list to later map it to the canonical index.
|
|
*/
|
|
|
|
/*
|
|
* Initialize mapping array with invalid values, this is filled within
|
|
* each sub-routine below depending on the bound type.
|
|
*/
|
|
*mapping = (int *) palloc(sizeof(int) * nparts);
|
|
for (i = 0; i < nparts; i++)
|
|
(*mapping)[i] = -1;
|
|
|
|
switch (key->strategy)
|
|
{
|
|
case PARTITION_STRATEGY_HASH:
|
|
return create_hash_bounds(boundspecs, nparts, key, mapping);
|
|
|
|
case PARTITION_STRATEGY_LIST:
|
|
return create_list_bounds(boundspecs, nparts, key, mapping);
|
|
|
|
case PARTITION_STRATEGY_RANGE:
|
|
return create_range_bounds(boundspecs, nparts, key, mapping);
|
|
}
|
|
|
|
Assert(false);
|
|
return NULL; /* keep compiler quiet */
|
|
}
|
|
|
|
/*
|
|
* create_hash_bounds
|
|
* Create a PartitionBoundInfo for a hash partitioned table
|
|
*/
|
|
static PartitionBoundInfo
|
|
create_hash_bounds(PartitionBoundSpec **boundspecs, int nparts,
|
|
PartitionKey key, int **mapping)
|
|
{
|
|
PartitionBoundInfo boundinfo;
|
|
PartitionHashBound *hbounds;
|
|
int i;
|
|
int greatest_modulus;
|
|
Datum *boundDatums;
|
|
|
|
boundinfo = (PartitionBoundInfoData *)
|
|
palloc0(sizeof(PartitionBoundInfoData));
|
|
boundinfo->strategy = key->strategy;
|
|
/* No special hash partitions. */
|
|
boundinfo->null_index = -1;
|
|
boundinfo->default_index = -1;
|
|
|
|
hbounds = (PartitionHashBound *)
|
|
palloc(nparts * sizeof(PartitionHashBound));
|
|
|
|
/* Convert from node to the internal representation */
|
|
for (i = 0; i < nparts; i++)
|
|
{
|
|
PartitionBoundSpec *spec = boundspecs[i];
|
|
|
|
if (spec->strategy != PARTITION_STRATEGY_HASH)
|
|
elog(ERROR, "invalid strategy in partition bound spec");
|
|
|
|
hbounds[i].modulus = spec->modulus;
|
|
hbounds[i].remainder = spec->remainder;
|
|
hbounds[i].index = i;
|
|
}
|
|
|
|
/* Sort all the bounds in ascending order */
|
|
qsort(hbounds, nparts, sizeof(PartitionHashBound),
|
|
qsort_partition_hbound_cmp);
|
|
|
|
/* After sorting, moduli are now stored in ascending order. */
|
|
greatest_modulus = hbounds[nparts - 1].modulus;
|
|
|
|
boundinfo->ndatums = nparts;
|
|
boundinfo->datums = (Datum **) palloc0(nparts * sizeof(Datum *));
|
|
boundinfo->kind = NULL;
|
|
boundinfo->interleaved_parts = NULL;
|
|
boundinfo->nindexes = greatest_modulus;
|
|
boundinfo->indexes = (int *) palloc(greatest_modulus * sizeof(int));
|
|
for (i = 0; i < greatest_modulus; i++)
|
|
boundinfo->indexes[i] = -1;
|
|
|
|
/*
|
|
* In the loop below, to save from allocating a series of small datum
|
|
* arrays, here we just allocate a single array and below we'll just
|
|
* assign a portion of this array per partition.
|
|
*/
|
|
boundDatums = (Datum *) palloc(nparts * 2 * sizeof(Datum));
|
|
|
|
/*
|
|
* For hash partitioning, there are as many datums (modulus and remainder
|
|
* pairs) as there are partitions. Indexes are simply values ranging from
|
|
* 0 to (nparts - 1).
|
|
*/
|
|
for (i = 0; i < nparts; i++)
|
|
{
|
|
int modulus = hbounds[i].modulus;
|
|
int remainder = hbounds[i].remainder;
|
|
|
|
boundinfo->datums[i] = &boundDatums[i * 2];
|
|
boundinfo->datums[i][0] = Int32GetDatum(modulus);
|
|
boundinfo->datums[i][1] = Int32GetDatum(remainder);
|
|
|
|
while (remainder < greatest_modulus)
|
|
{
|
|
/* overlap? */
|
|
Assert(boundinfo->indexes[remainder] == -1);
|
|
boundinfo->indexes[remainder] = i;
|
|
remainder += modulus;
|
|
}
|
|
|
|
(*mapping)[hbounds[i].index] = i;
|
|
}
|
|
pfree(hbounds);
|
|
|
|
return boundinfo;
|
|
}
|
|
|
|
/*
|
|
* get_non_null_list_datum_count
|
|
* Counts the number of non-null Datums in each partition.
|
|
*/
|
|
static int
|
|
get_non_null_list_datum_count(PartitionBoundSpec **boundspecs, int nparts)
|
|
{
|
|
int i;
|
|
int count = 0;
|
|
|
|
for (i = 0; i < nparts; i++)
|
|
{
|
|
ListCell *lc;
|
|
|
|
foreach(lc, boundspecs[i]->listdatums)
|
|
{
|
|
Const *val = lfirst_node(Const, lc);
|
|
|
|
if (!val->constisnull)
|
|
count++;
|
|
}
|
|
}
|
|
|
|
return count;
|
|
}
|
|
|
|
/*
|
|
* create_list_bounds
|
|
* Create a PartitionBoundInfo for a list partitioned table
|
|
*/
|
|
static PartitionBoundInfo
|
|
create_list_bounds(PartitionBoundSpec **boundspecs, int nparts,
|
|
PartitionKey key, int **mapping)
|
|
{
|
|
PartitionBoundInfo boundinfo;
|
|
PartitionListValue *all_values;
|
|
int i;
|
|
int j;
|
|
int ndatums;
|
|
int next_index = 0;
|
|
int default_index = -1;
|
|
int null_index = -1;
|
|
Datum *boundDatums;
|
|
|
|
boundinfo = (PartitionBoundInfoData *)
|
|
palloc0(sizeof(PartitionBoundInfoData));
|
|
boundinfo->strategy = key->strategy;
|
|
/* Will be set correctly below. */
|
|
boundinfo->null_index = -1;
|
|
boundinfo->default_index = -1;
|
|
|
|
ndatums = get_non_null_list_datum_count(boundspecs, nparts);
|
|
all_values = (PartitionListValue *)
|
|
palloc(ndatums * sizeof(PartitionListValue));
|
|
|
|
/* Create a unified list of non-null values across all partitions. */
|
|
for (j = 0, i = 0; i < nparts; i++)
|
|
{
|
|
PartitionBoundSpec *spec = boundspecs[i];
|
|
ListCell *c;
|
|
|
|
if (spec->strategy != PARTITION_STRATEGY_LIST)
|
|
elog(ERROR, "invalid strategy in partition bound spec");
|
|
|
|
/*
|
|
* Note the index of the partition bound spec for the default
|
|
* partition. There's no datum to add to the list on non-null datums
|
|
* for this partition.
|
|
*/
|
|
if (spec->is_default)
|
|
{
|
|
default_index = i;
|
|
continue;
|
|
}
|
|
|
|
foreach(c, spec->listdatums)
|
|
{
|
|
Const *val = lfirst_node(Const, c);
|
|
|
|
if (!val->constisnull)
|
|
{
|
|
all_values[j].index = i;
|
|
all_values[j].value = val->constvalue;
|
|
j++;
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
* Never put a null into the values array; save the index of
|
|
* the partition that stores nulls, instead.
|
|
*/
|
|
if (null_index != -1)
|
|
elog(ERROR, "found null more than once");
|
|
null_index = i;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* ensure we found a Datum for every slot in the all_values array */
|
|
Assert(j == ndatums);
|
|
|
|
qsort_arg(all_values, ndatums, sizeof(PartitionListValue),
|
|
qsort_partition_list_value_cmp, key);
|
|
|
|
boundinfo->ndatums = ndatums;
|
|
boundinfo->datums = (Datum **) palloc0(ndatums * sizeof(Datum *));
|
|
boundinfo->kind = NULL;
|
|
boundinfo->interleaved_parts = NULL;
|
|
boundinfo->nindexes = ndatums;
|
|
boundinfo->indexes = (int *) palloc(ndatums * sizeof(int));
|
|
|
|
/*
|
|
* In the loop below, to save from allocating a series of small datum
|
|
* arrays, here we just allocate a single array and below we'll just
|
|
* assign a portion of this array per datum.
|
|
*/
|
|
boundDatums = (Datum *) palloc(ndatums * sizeof(Datum));
|
|
|
|
/*
|
|
* Copy values. Canonical indexes are values ranging from 0 to (nparts -
|
|
* 1) assigned to each partition such that all datums of a given partition
|
|
* receive the same value. The value for a given partition is the index of
|
|
* that partition's smallest datum in the all_values[] array.
|
|
*/
|
|
for (i = 0; i < ndatums; i++)
|
|
{
|
|
int orig_index = all_values[i].index;
|
|
|
|
boundinfo->datums[i] = &boundDatums[i];
|
|
boundinfo->datums[i][0] = datumCopy(all_values[i].value,
|
|
key->parttypbyval[0],
|
|
key->parttyplen[0]);
|
|
|
|
/* If the old index has no mapping, assign one */
|
|
if ((*mapping)[orig_index] == -1)
|
|
(*mapping)[orig_index] = next_index++;
|
|
|
|
boundinfo->indexes[i] = (*mapping)[orig_index];
|
|
}
|
|
|
|
pfree(all_values);
|
|
|
|
/*
|
|
* Set the canonical value for null_index, if any.
|
|
*
|
|
* It is possible that the null-accepting partition has not been assigned
|
|
* an index yet, which could happen if such partition accepts only null
|
|
* and hence not handled in the above loop which only looked at non-null
|
|
* values.
|
|
*/
|
|
if (null_index != -1)
|
|
{
|
|
Assert(null_index >= 0);
|
|
if ((*mapping)[null_index] == -1)
|
|
(*mapping)[null_index] = next_index++;
|
|
boundinfo->null_index = (*mapping)[null_index];
|
|
}
|
|
|
|
/* Set the canonical value for default_index, if any. */
|
|
if (default_index != -1)
|
|
{
|
|
/*
|
|
* The default partition accepts any value not specified in the lists
|
|
* of other partitions, hence it should not get mapped index while
|
|
* assigning those for non-null datums.
|
|
*/
|
|
Assert(default_index >= 0);
|
|
Assert((*mapping)[default_index] == -1);
|
|
(*mapping)[default_index] = next_index++;
|
|
boundinfo->default_index = (*mapping)[default_index];
|
|
}
|
|
|
|
/*
|
|
* Calculate interleaved partitions. Here we look for partitions which
|
|
* might be interleaved with other partitions and set a bit in
|
|
* interleaved_parts for any partitions which may be interleaved with
|
|
* another partition.
|
|
*/
|
|
|
|
/*
|
|
* There must be multiple partitions to have any interleaved partitions,
|
|
* otherwise there's nothing to interleave with.
|
|
*/
|
|
if (nparts > 1)
|
|
{
|
|
/*
|
|
* Short-circuit check to see if only 1 Datum is allowed per
|
|
* partition. When this is true there's no need to do the more
|
|
* expensive checks to look for interleaved values.
|
|
*/
|
|
if (boundinfo->ndatums +
|
|
partition_bound_accepts_nulls(boundinfo) +
|
|
partition_bound_has_default(boundinfo) != nparts)
|
|
{
|
|
int last_index = -1;
|
|
|
|
/*
|
|
* Since the indexes array is sorted in Datum order, if any
|
|
* partitions are interleaved then it will show up by the
|
|
* partition indexes not being in ascending order. Here we check
|
|
* for that and record all partitions that are out of order.
|
|
*/
|
|
for (i = 0; i < boundinfo->nindexes; i++)
|
|
{
|
|
int index = boundinfo->indexes[i];
|
|
|
|
if (index < last_index)
|
|
boundinfo->interleaved_parts = bms_add_member(boundinfo->interleaved_parts,
|
|
index);
|
|
|
|
/*
|
|
* Otherwise, if the null_index exists in the indexes array,
|
|
* then the NULL partition must also allow some other Datum,
|
|
* therefore it's "interleaved".
|
|
*/
|
|
else if (partition_bound_accepts_nulls(boundinfo) &&
|
|
index == boundinfo->null_index)
|
|
boundinfo->interleaved_parts = bms_add_member(boundinfo->interleaved_parts,
|
|
index);
|
|
|
|
last_index = index;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* The DEFAULT partition is the "catch-all" partition that can contain
|
|
* anything that does not belong to any other partition. If there are
|
|
* any other partitions then the DEFAULT partition must be marked as
|
|
* interleaved.
|
|
*/
|
|
if (partition_bound_has_default(boundinfo))
|
|
boundinfo->interleaved_parts = bms_add_member(boundinfo->interleaved_parts,
|
|
boundinfo->default_index);
|
|
}
|
|
|
|
|
|
/* All partitions must now have been assigned canonical indexes. */
|
|
Assert(next_index == nparts);
|
|
return boundinfo;
|
|
}
|
|
|
|
/*
|
|
* create_range_bounds
|
|
* Create a PartitionBoundInfo for a range partitioned table
|
|
*/
|
|
static PartitionBoundInfo
|
|
create_range_bounds(PartitionBoundSpec **boundspecs, int nparts,
|
|
PartitionKey key, int **mapping)
|
|
{
|
|
PartitionBoundInfo boundinfo;
|
|
PartitionRangeBound **rbounds = NULL;
|
|
PartitionRangeBound **all_bounds,
|
|
*prev;
|
|
int i,
|
|
k,
|
|
partnatts;
|
|
int ndatums = 0;
|
|
int default_index = -1;
|
|
int next_index = 0;
|
|
Datum *boundDatums;
|
|
PartitionRangeDatumKind *boundKinds;
|
|
|
|
boundinfo = (PartitionBoundInfoData *)
|
|
palloc0(sizeof(PartitionBoundInfoData));
|
|
boundinfo->strategy = key->strategy;
|
|
/* There is no special null-accepting range partition. */
|
|
boundinfo->null_index = -1;
|
|
/* Will be set correctly below. */
|
|
boundinfo->default_index = -1;
|
|
|
|
all_bounds = (PartitionRangeBound **)
|
|
palloc0(2 * nparts * sizeof(PartitionRangeBound *));
|
|
|
|
/* Create a unified list of range bounds across all the partitions. */
|
|
ndatums = 0;
|
|
for (i = 0; i < nparts; i++)
|
|
{
|
|
PartitionBoundSpec *spec = boundspecs[i];
|
|
PartitionRangeBound *lower,
|
|
*upper;
|
|
|
|
if (spec->strategy != PARTITION_STRATEGY_RANGE)
|
|
elog(ERROR, "invalid strategy in partition bound spec");
|
|
|
|
/*
|
|
* Note the index of the partition bound spec for the default
|
|
* partition. There's no datum to add to the all_bounds array for
|
|
* this partition.
|
|
*/
|
|
if (spec->is_default)
|
|
{
|
|
default_index = i;
|
|
continue;
|
|
}
|
|
|
|
lower = make_one_partition_rbound(key, i, spec->lowerdatums, true);
|
|
upper = make_one_partition_rbound(key, i, spec->upperdatums, false);
|
|
all_bounds[ndatums++] = lower;
|
|
all_bounds[ndatums++] = upper;
|
|
}
|
|
|
|
Assert(ndatums == nparts * 2 ||
|
|
(default_index != -1 && ndatums == (nparts - 1) * 2));
|
|
|
|
/* Sort all the bounds in ascending order */
|
|
qsort_arg(all_bounds, ndatums,
|
|
sizeof(PartitionRangeBound *),
|
|
qsort_partition_rbound_cmp,
|
|
key);
|
|
|
|
/* Save distinct bounds from all_bounds into rbounds. */
|
|
rbounds = (PartitionRangeBound **)
|
|
palloc(ndatums * sizeof(PartitionRangeBound *));
|
|
k = 0;
|
|
prev = NULL;
|
|
for (i = 0; i < ndatums; i++)
|
|
{
|
|
PartitionRangeBound *cur = all_bounds[i];
|
|
bool is_distinct = false;
|
|
int j;
|
|
|
|
/* Is the current bound distinct from the previous one? */
|
|
for (j = 0; j < key->partnatts; j++)
|
|
{
|
|
Datum cmpval;
|
|
|
|
if (prev == NULL || cur->kind[j] != prev->kind[j])
|
|
{
|
|
is_distinct = true;
|
|
break;
|
|
}
|
|
|
|
/*
|
|
* If the bounds are both MINVALUE or MAXVALUE, stop now and treat
|
|
* them as equal, since any values after this point must be
|
|
* ignored.
|
|
*/
|
|
if (cur->kind[j] != PARTITION_RANGE_DATUM_VALUE)
|
|
break;
|
|
|
|
cmpval = FunctionCall2Coll(&key->partsupfunc[j],
|
|
key->partcollation[j],
|
|
cur->datums[j],
|
|
prev->datums[j]);
|
|
if (DatumGetInt32(cmpval) != 0)
|
|
{
|
|
is_distinct = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Only if the bound is distinct save it into a temporary array, i.e,
|
|
* rbounds which is later copied into boundinfo datums array.
|
|
*/
|
|
if (is_distinct)
|
|
rbounds[k++] = all_bounds[i];
|
|
|
|
prev = cur;
|
|
}
|
|
|
|
pfree(all_bounds);
|
|
|
|
/* Update ndatums to hold the count of distinct datums. */
|
|
ndatums = k;
|
|
|
|
/*
|
|
* Add datums to boundinfo. Canonical indexes are values ranging from 0
|
|
* to nparts - 1, assigned in that order to each partition's upper bound.
|
|
* For 'datums' elements that are lower bounds, there is -1 in the
|
|
* 'indexes' array to signify that no partition exists for the values less
|
|
* than such a bound and greater than or equal to the previous upper
|
|
* bound.
|
|
*/
|
|
boundinfo->ndatums = ndatums;
|
|
boundinfo->datums = (Datum **) palloc0(ndatums * sizeof(Datum *));
|
|
boundinfo->kind = (PartitionRangeDatumKind **)
|
|
palloc(ndatums *
|
|
sizeof(PartitionRangeDatumKind *));
|
|
boundinfo->interleaved_parts = NULL;
|
|
|
|
/*
|
|
* For range partitioning, an additional value of -1 is stored as the last
|
|
* element of the indexes[] array.
|
|
*/
|
|
boundinfo->nindexes = ndatums + 1;
|
|
boundinfo->indexes = (int *) palloc((ndatums + 1) * sizeof(int));
|
|
|
|
/*
|
|
* In the loop below, to save from allocating a series of small arrays,
|
|
* here we just allocate a single array for Datums and another for
|
|
* PartitionRangeDatumKinds, below we'll just assign a portion of these
|
|
* arrays in each loop.
|
|
*/
|
|
partnatts = key->partnatts;
|
|
boundDatums = (Datum *) palloc(ndatums * partnatts * sizeof(Datum));
|
|
boundKinds = (PartitionRangeDatumKind *) palloc(ndatums * partnatts *
|
|
sizeof(PartitionRangeDatumKind));
|
|
|
|
for (i = 0; i < ndatums; i++)
|
|
{
|
|
int j;
|
|
|
|
boundinfo->datums[i] = &boundDatums[i * partnatts];
|
|
boundinfo->kind[i] = &boundKinds[i * partnatts];
|
|
for (j = 0; j < partnatts; j++)
|
|
{
|
|
if (rbounds[i]->kind[j] == PARTITION_RANGE_DATUM_VALUE)
|
|
boundinfo->datums[i][j] =
|
|
datumCopy(rbounds[i]->datums[j],
|
|
key->parttypbyval[j],
|
|
key->parttyplen[j]);
|
|
boundinfo->kind[i][j] = rbounds[i]->kind[j];
|
|
}
|
|
|
|
/*
|
|
* There is no mapping for invalid indexes.
|
|
*
|
|
* Any lower bounds in the rbounds array have invalid indexes
|
|
* assigned, because the values between the previous bound (if there
|
|
* is one) and this (lower) bound are not part of the range of any
|
|
* existing partition.
|
|
*/
|
|
if (rbounds[i]->lower)
|
|
boundinfo->indexes[i] = -1;
|
|
else
|
|
{
|
|
int orig_index = rbounds[i]->index;
|
|
|
|
/* If the old index has no mapping, assign one */
|
|
if ((*mapping)[orig_index] == -1)
|
|
(*mapping)[orig_index] = next_index++;
|
|
|
|
boundinfo->indexes[i] = (*mapping)[orig_index];
|
|
}
|
|
}
|
|
|
|
pfree(rbounds);
|
|
|
|
/* Set the canonical value for default_index, if any. */
|
|
if (default_index != -1)
|
|
{
|
|
Assert(default_index >= 0 && (*mapping)[default_index] == -1);
|
|
(*mapping)[default_index] = next_index++;
|
|
boundinfo->default_index = (*mapping)[default_index];
|
|
}
|
|
|
|
/* The extra -1 element. */
|
|
Assert(i == ndatums);
|
|
boundinfo->indexes[i] = -1;
|
|
|
|
/* All partitions must now have been assigned canonical indexes. */
|
|
Assert(next_index == nparts);
|
|
return boundinfo;
|
|
}
|
|
|
|
/*
|
|
* Are two partition bound collections logically equal?
|
|
*
|
|
* Used in the keep logic of relcache.c (ie, in RelationClearRelation()).
|
|
* This is also useful when b1 and b2 are bound collections of two separate
|
|
* relations, respectively, because PartitionBoundInfo is a canonical
|
|
* representation of partition bounds.
|
|
*/
|
|
bool
|
|
partition_bounds_equal(int partnatts, int16 *parttyplen, bool *parttypbyval,
|
|
PartitionBoundInfo b1, PartitionBoundInfo b2)
|
|
{
|
|
int i;
|
|
|
|
if (b1->strategy != b2->strategy)
|
|
return false;
|
|
|
|
if (b1->ndatums != b2->ndatums)
|
|
return false;
|
|
|
|
if (b1->nindexes != b2->nindexes)
|
|
return false;
|
|
|
|
if (b1->null_index != b2->null_index)
|
|
return false;
|
|
|
|
if (b1->default_index != b2->default_index)
|
|
return false;
|
|
|
|
/* For all partition strategies, the indexes[] arrays have to match */
|
|
for (i = 0; i < b1->nindexes; i++)
|
|
{
|
|
if (b1->indexes[i] != b2->indexes[i])
|
|
return false;
|
|
}
|
|
|
|
/* Finally, compare the datums[] arrays */
|
|
if (b1->strategy == PARTITION_STRATEGY_HASH)
|
|
{
|
|
/*
|
|
* We arrange the partitions in the ascending order of their moduli
|
|
* and remainders. Also every modulus is factor of next larger
|
|
* modulus. Therefore we can safely store index of a given partition
|
|
* in indexes array at remainder of that partition. Also entries at
|
|
* (remainder + N * modulus) positions in indexes array are all same
|
|
* for (modulus, remainder) specification for any partition. Thus the
|
|
* datums arrays from the given bounds are the same, if and only if
|
|
* their indexes arrays are the same. So, it suffices to compare the
|
|
* indexes arrays.
|
|
*
|
|
* Nonetheless make sure that the bounds are indeed the same when the
|
|
* indexes match. Hash partition bound stores modulus and remainder
|
|
* at b1->datums[i][0] and b1->datums[i][1] position respectively.
|
|
*/
|
|
#ifdef USE_ASSERT_CHECKING
|
|
for (i = 0; i < b1->ndatums; i++)
|
|
Assert((b1->datums[i][0] == b2->datums[i][0] &&
|
|
b1->datums[i][1] == b2->datums[i][1]));
|
|
#endif
|
|
}
|
|
else
|
|
{
|
|
for (i = 0; i < b1->ndatums; i++)
|
|
{
|
|
int j;
|
|
|
|
for (j = 0; j < partnatts; j++)
|
|
{
|
|
/* For range partitions, the bounds might not be finite. */
|
|
if (b1->kind != NULL)
|
|
{
|
|
/* The different kinds of bound all differ from each other */
|
|
if (b1->kind[i][j] != b2->kind[i][j])
|
|
return false;
|
|
|
|
/*
|
|
* Non-finite bounds are equal without further
|
|
* examination.
|
|
*/
|
|
if (b1->kind[i][j] != PARTITION_RANGE_DATUM_VALUE)
|
|
continue;
|
|
}
|
|
|
|
/*
|
|
* Compare the actual values. Note that it would be both
|
|
* incorrect and unsafe to invoke the comparison operator
|
|
* derived from the partitioning specification here. It would
|
|
* be incorrect because we want the relcache entry to be
|
|
* updated for ANY change to the partition bounds, not just
|
|
* those that the partitioning operator thinks are
|
|
* significant. It would be unsafe because we might reach
|
|
* this code in the context of an aborted transaction, and an
|
|
* arbitrary partitioning operator might not be safe in that
|
|
* context. datumIsEqual() should be simple enough to be
|
|
* safe.
|
|
*/
|
|
if (!datumIsEqual(b1->datums[i][j], b2->datums[i][j],
|
|
parttypbyval[j], parttyplen[j]))
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* Return a copy of given PartitionBoundInfo structure. The data types of bounds
|
|
* are described by given partition key specification.
|
|
*
|
|
* Note: it's important that this function and its callees not do any catalog
|
|
* access, nor anything else that would result in allocating memory other than
|
|
* the returned data structure. Since this is called in a long-lived context,
|
|
* that would result in unwanted memory leaks.
|
|
*/
|
|
PartitionBoundInfo
|
|
partition_bounds_copy(PartitionBoundInfo src,
|
|
PartitionKey key)
|
|
{
|
|
PartitionBoundInfo dest;
|
|
int i;
|
|
int ndatums;
|
|
int nindexes;
|
|
int partnatts;
|
|
bool hash_part;
|
|
int natts;
|
|
Datum *boundDatums;
|
|
|
|
dest = (PartitionBoundInfo) palloc(sizeof(PartitionBoundInfoData));
|
|
|
|
dest->strategy = src->strategy;
|
|
ndatums = dest->ndatums = src->ndatums;
|
|
nindexes = dest->nindexes = src->nindexes;
|
|
partnatts = key->partnatts;
|
|
|
|
/* List partitioned tables have only a single partition key. */
|
|
Assert(key->strategy != PARTITION_STRATEGY_LIST || partnatts == 1);
|
|
|
|
dest->datums = (Datum **) palloc(sizeof(Datum *) * ndatums);
|
|
|
|
if (src->kind != NULL)
|
|
{
|
|
PartitionRangeDatumKind *boundKinds;
|
|
|
|
/* only RANGE partition should have a non-NULL kind */
|
|
Assert(key->strategy == PARTITION_STRATEGY_RANGE);
|
|
|
|
dest->kind = (PartitionRangeDatumKind **) palloc(ndatums *
|
|
sizeof(PartitionRangeDatumKind *));
|
|
|
|
/*
|
|
* In the loop below, to save from allocating a series of small arrays
|
|
* for storing the PartitionRangeDatumKind, we allocate a single chunk
|
|
* here and use a smaller portion of it for each datum.
|
|
*/
|
|
boundKinds = (PartitionRangeDatumKind *) palloc(ndatums * partnatts *
|
|
sizeof(PartitionRangeDatumKind));
|
|
|
|
for (i = 0; i < ndatums; i++)
|
|
{
|
|
dest->kind[i] = &boundKinds[i * partnatts];
|
|
memcpy(dest->kind[i], src->kind[i],
|
|
sizeof(PartitionRangeDatumKind) * partnatts);
|
|
}
|
|
}
|
|
else
|
|
dest->kind = NULL;
|
|
|
|
/* copy interleaved partitions for LIST partitioned tables */
|
|
dest->interleaved_parts = bms_copy(src->interleaved_parts);
|
|
|
|
/*
|
|
* For hash partitioning, datums array will have two elements - modulus
|
|
* and remainder.
|
|
*/
|
|
hash_part = (key->strategy == PARTITION_STRATEGY_HASH);
|
|
natts = hash_part ? 2 : partnatts;
|
|
boundDatums = palloc(ndatums * natts * sizeof(Datum));
|
|
|
|
for (i = 0; i < ndatums; i++)
|
|
{
|
|
int j;
|
|
|
|
dest->datums[i] = &boundDatums[i * natts];
|
|
|
|
for (j = 0; j < natts; j++)
|
|
{
|
|
bool byval;
|
|
int typlen;
|
|
|
|
if (hash_part)
|
|
{
|
|
typlen = sizeof(int32); /* Always int4 */
|
|
byval = true; /* int4 is pass-by-value */
|
|
}
|
|
else
|
|
{
|
|
byval = key->parttypbyval[j];
|
|
typlen = key->parttyplen[j];
|
|
}
|
|
|
|
if (dest->kind == NULL ||
|
|
dest->kind[i][j] == PARTITION_RANGE_DATUM_VALUE)
|
|
dest->datums[i][j] = datumCopy(src->datums[i][j],
|
|
byval, typlen);
|
|
}
|
|
}
|
|
|
|
dest->indexes = (int *) palloc(sizeof(int) * nindexes);
|
|
memcpy(dest->indexes, src->indexes, sizeof(int) * nindexes);
|
|
|
|
dest->null_index = src->null_index;
|
|
dest->default_index = src->default_index;
|
|
|
|
return dest;
|
|
}
|
|
|
|
/*
|
|
* partition_bounds_merge
|
|
* Check to see whether every partition of 'outer_rel' matches/overlaps
|
|
* one partition of 'inner_rel' at most, and vice versa; and if so, build
|
|
* and return the partition bounds for a join relation between the rels,
|
|
* generating two lists of the matching/overlapping partitions, which are
|
|
* returned to *outer_parts and *inner_parts respectively.
|
|
*
|
|
* The lists contain the same number of partitions, and the partitions at the
|
|
* same positions in the lists indicate join pairs used for partitioned join.
|
|
* If a partition on one side matches/overlaps multiple partitions on the other
|
|
* side, this function returns NULL, setting *outer_parts and *inner_parts to
|
|
* NIL.
|
|
*/
|
|
PartitionBoundInfo
|
|
partition_bounds_merge(int partnatts,
|
|
FmgrInfo *partsupfunc, Oid *partcollation,
|
|
RelOptInfo *outer_rel, RelOptInfo *inner_rel,
|
|
JoinType jointype,
|
|
List **outer_parts, List **inner_parts)
|
|
{
|
|
/*
|
|
* Currently, this function is called only from try_partitionwise_join(),
|
|
* so the join type should be INNER, LEFT, FULL, SEMI, or ANTI.
|
|
*/
|
|
Assert(jointype == JOIN_INNER || jointype == JOIN_LEFT ||
|
|
jointype == JOIN_FULL || jointype == JOIN_SEMI ||
|
|
jointype == JOIN_ANTI);
|
|
|
|
/* The partitioning strategies should be the same. */
|
|
Assert(outer_rel->boundinfo->strategy == inner_rel->boundinfo->strategy);
|
|
|
|
*outer_parts = *inner_parts = NIL;
|
|
switch (outer_rel->boundinfo->strategy)
|
|
{
|
|
case PARTITION_STRATEGY_HASH:
|
|
|
|
/*
|
|
* For hash partitioned tables, we currently support partitioned
|
|
* join only when they have exactly the same partition bounds.
|
|
*
|
|
* XXX: it might be possible to relax the restriction to support
|
|
* cases where hash partitioned tables have missing partitions
|
|
* and/or different moduli, but it's not clear if it would be
|
|
* useful to support the former case since it's unusual to have
|
|
* missing partitions. On the other hand, it would be useful to
|
|
* support the latter case, but in that case, there is a high
|
|
* probability that a partition on one side will match multiple
|
|
* partitions on the other side, which is the scenario the current
|
|
* implementation of partitioned join can't handle.
|
|
*/
|
|
return NULL;
|
|
|
|
case PARTITION_STRATEGY_LIST:
|
|
return merge_list_bounds(partsupfunc,
|
|
partcollation,
|
|
outer_rel,
|
|
inner_rel,
|
|
jointype,
|
|
outer_parts,
|
|
inner_parts);
|
|
|
|
case PARTITION_STRATEGY_RANGE:
|
|
return merge_range_bounds(partnatts,
|
|
partsupfunc,
|
|
partcollation,
|
|
outer_rel,
|
|
inner_rel,
|
|
jointype,
|
|
outer_parts,
|
|
inner_parts);
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
/*
|
|
* merge_list_bounds
|
|
* Create the partition bounds for a join relation between list
|
|
* partitioned tables, if possible
|
|
*
|
|
* In this function we try to find sets of matching partitions from both sides
|
|
* by comparing list values stored in their partition bounds. Since the list
|
|
* values appear in the ascending order, an algorithm similar to merge join is
|
|
* used for that. If a partition on one side doesn't have a matching
|
|
* partition on the other side, the algorithm tries to match it with the
|
|
* default partition on the other side if any; if not, the algorithm tries to
|
|
* match it with a dummy partition on the other side if it's on the
|
|
* non-nullable side of an outer join. Also, if both sides have the default
|
|
* partitions, the algorithm tries to match them with each other. We give up
|
|
* if the algorithm finds a partition matching multiple partitions on the
|
|
* other side, which is the scenario the current implementation of partitioned
|
|
* join can't handle.
|
|
*/
|
|
static PartitionBoundInfo
|
|
merge_list_bounds(FmgrInfo *partsupfunc, Oid *partcollation,
|
|
RelOptInfo *outer_rel, RelOptInfo *inner_rel,
|
|
JoinType jointype,
|
|
List **outer_parts, List **inner_parts)
|
|
{
|
|
PartitionBoundInfo merged_bounds = NULL;
|
|
PartitionBoundInfo outer_bi = outer_rel->boundinfo;
|
|
PartitionBoundInfo inner_bi = inner_rel->boundinfo;
|
|
bool outer_has_default = partition_bound_has_default(outer_bi);
|
|
bool inner_has_default = partition_bound_has_default(inner_bi);
|
|
int outer_default = outer_bi->default_index;
|
|
int inner_default = inner_bi->default_index;
|
|
bool outer_has_null = partition_bound_accepts_nulls(outer_bi);
|
|
bool inner_has_null = partition_bound_accepts_nulls(inner_bi);
|
|
PartitionMap outer_map;
|
|
PartitionMap inner_map;
|
|
int outer_pos;
|
|
int inner_pos;
|
|
int next_index = 0;
|
|
int null_index = -1;
|
|
int default_index = -1;
|
|
List *merged_datums = NIL;
|
|
List *merged_indexes = NIL;
|
|
|
|
Assert(*outer_parts == NIL);
|
|
Assert(*inner_parts == NIL);
|
|
Assert(outer_bi->strategy == inner_bi->strategy &&
|
|
outer_bi->strategy == PARTITION_STRATEGY_LIST);
|
|
/* List partitioning doesn't require kinds. */
|
|
Assert(!outer_bi->kind && !inner_bi->kind);
|
|
|
|
init_partition_map(outer_rel, &outer_map);
|
|
init_partition_map(inner_rel, &inner_map);
|
|
|
|
/*
|
|
* If the default partitions (if any) have been proven empty, deem them
|
|
* non-existent.
|
|
*/
|
|
if (outer_has_default && is_dummy_partition(outer_rel, outer_default))
|
|
outer_has_default = false;
|
|
if (inner_has_default && is_dummy_partition(inner_rel, inner_default))
|
|
inner_has_default = false;
|
|
|
|
/*
|
|
* Merge partitions from both sides. In each iteration we compare a pair
|
|
* of list values, one from each side, and decide whether the
|
|
* corresponding partitions match or not. If the two values match
|
|
* exactly, move to the next pair of list values, otherwise move to the
|
|
* next list value on the side with a smaller list value.
|
|
*/
|
|
outer_pos = inner_pos = 0;
|
|
while (outer_pos < outer_bi->ndatums || inner_pos < inner_bi->ndatums)
|
|
{
|
|
int outer_index = -1;
|
|
int inner_index = -1;
|
|
Datum *outer_datums;
|
|
Datum *inner_datums;
|
|
int cmpval;
|
|
Datum *merged_datum = NULL;
|
|
int merged_index = -1;
|
|
|
|
if (outer_pos < outer_bi->ndatums)
|
|
{
|
|
/*
|
|
* If the partition on the outer side has been proven empty,
|
|
* ignore it and move to the next datum on the outer side.
|
|
*/
|
|
outer_index = outer_bi->indexes[outer_pos];
|
|
if (is_dummy_partition(outer_rel, outer_index))
|
|
{
|
|
outer_pos++;
|
|
continue;
|
|
}
|
|
}
|
|
if (inner_pos < inner_bi->ndatums)
|
|
{
|
|
/*
|
|
* If the partition on the inner side has been proven empty,
|
|
* ignore it and move to the next datum on the inner side.
|
|
*/
|
|
inner_index = inner_bi->indexes[inner_pos];
|
|
if (is_dummy_partition(inner_rel, inner_index))
|
|
{
|
|
inner_pos++;
|
|
continue;
|
|
}
|
|
}
|
|
|
|
/* Get the list values. */
|
|
outer_datums = outer_pos < outer_bi->ndatums ?
|
|
outer_bi->datums[outer_pos] : NULL;
|
|
inner_datums = inner_pos < inner_bi->ndatums ?
|
|
inner_bi->datums[inner_pos] : NULL;
|
|
|
|
/*
|
|
* We run this loop till both sides finish. This allows us to avoid
|
|
* duplicating code to handle the remaining values on the side which
|
|
* finishes later. For that we set the comparison parameter cmpval in
|
|
* such a way that it appears as if the side which finishes earlier
|
|
* has an extra value higher than any other value on the unfinished
|
|
* side. That way we advance the values on the unfinished side till
|
|
* all of its values are exhausted.
|
|
*/
|
|
if (outer_pos >= outer_bi->ndatums)
|
|
cmpval = 1;
|
|
else if (inner_pos >= inner_bi->ndatums)
|
|
cmpval = -1;
|
|
else
|
|
{
|
|
Assert(outer_datums != NULL && inner_datums != NULL);
|
|
cmpval = DatumGetInt32(FunctionCall2Coll(&partsupfunc[0],
|
|
partcollation[0],
|
|
outer_datums[0],
|
|
inner_datums[0]));
|
|
}
|
|
|
|
if (cmpval == 0)
|
|
{
|
|
/* Two list values match exactly. */
|
|
Assert(outer_pos < outer_bi->ndatums);
|
|
Assert(inner_pos < inner_bi->ndatums);
|
|
Assert(outer_index >= 0);
|
|
Assert(inner_index >= 0);
|
|
|
|
/*
|
|
* Try merging both partitions. If successful, add the list value
|
|
* and index of the merged partition below.
|
|
*/
|
|
merged_index = merge_matching_partitions(&outer_map, &inner_map,
|
|
outer_index, inner_index,
|
|
&next_index);
|
|
if (merged_index == -1)
|
|
goto cleanup;
|
|
|
|
merged_datum = outer_datums;
|
|
|
|
/* Move to the next pair of list values. */
|
|
outer_pos++;
|
|
inner_pos++;
|
|
}
|
|
else if (cmpval < 0)
|
|
{
|
|
/* A list value missing from the inner side. */
|
|
Assert(outer_pos < outer_bi->ndatums);
|
|
|
|
/*
|
|
* If the inner side has the default partition, or this is an
|
|
* outer join, try to assign a merged partition to the outer
|
|
* partition (see process_outer_partition()). Otherwise, the
|
|
* outer partition will not contribute to the result.
|
|
*/
|
|
if (inner_has_default || IS_OUTER_JOIN(jointype))
|
|
{
|
|
/* Get the outer partition. */
|
|
outer_index = outer_bi->indexes[outer_pos];
|
|
Assert(outer_index >= 0);
|
|
merged_index = process_outer_partition(&outer_map,
|
|
&inner_map,
|
|
outer_has_default,
|
|
inner_has_default,
|
|
outer_index,
|
|
inner_default,
|
|
jointype,
|
|
&next_index,
|
|
&default_index);
|
|
if (merged_index == -1)
|
|
goto cleanup;
|
|
merged_datum = outer_datums;
|
|
}
|
|
|
|
/* Move to the next list value on the outer side. */
|
|
outer_pos++;
|
|
}
|
|
else
|
|
{
|
|
/* A list value missing from the outer side. */
|
|
Assert(cmpval > 0);
|
|
Assert(inner_pos < inner_bi->ndatums);
|
|
|
|
/*
|
|
* If the outer side has the default partition, or this is a FULL
|
|
* join, try to assign a merged partition to the inner partition
|
|
* (see process_inner_partition()). Otherwise, the inner
|
|
* partition will not contribute to the result.
|
|
*/
|
|
if (outer_has_default || jointype == JOIN_FULL)
|
|
{
|
|
/* Get the inner partition. */
|
|
inner_index = inner_bi->indexes[inner_pos];
|
|
Assert(inner_index >= 0);
|
|
merged_index = process_inner_partition(&outer_map,
|
|
&inner_map,
|
|
outer_has_default,
|
|
inner_has_default,
|
|
inner_index,
|
|
outer_default,
|
|
jointype,
|
|
&next_index,
|
|
&default_index);
|
|
if (merged_index == -1)
|
|
goto cleanup;
|
|
merged_datum = inner_datums;
|
|
}
|
|
|
|
/* Move to the next list value on the inner side. */
|
|
inner_pos++;
|
|
}
|
|
|
|
/*
|
|
* If we assigned a merged partition, add the list value and index of
|
|
* the merged partition if appropriate.
|
|
*/
|
|
if (merged_index >= 0 && merged_index != default_index)
|
|
{
|
|
merged_datums = lappend(merged_datums, merged_datum);
|
|
merged_indexes = lappend_int(merged_indexes, merged_index);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* If the NULL partitions (if any) have been proven empty, deem them
|
|
* non-existent.
|
|
*/
|
|
if (outer_has_null &&
|
|
is_dummy_partition(outer_rel, outer_bi->null_index))
|
|
outer_has_null = false;
|
|
if (inner_has_null &&
|
|
is_dummy_partition(inner_rel, inner_bi->null_index))
|
|
inner_has_null = false;
|
|
|
|
/* Merge the NULL partitions if any. */
|
|
if (outer_has_null || inner_has_null)
|
|
merge_null_partitions(&outer_map, &inner_map,
|
|
outer_has_null, inner_has_null,
|
|
outer_bi->null_index, inner_bi->null_index,
|
|
jointype, &next_index, &null_index);
|
|
else
|
|
Assert(null_index == -1);
|
|
|
|
/* Merge the default partitions if any. */
|
|
if (outer_has_default || inner_has_default)
|
|
merge_default_partitions(&outer_map, &inner_map,
|
|
outer_has_default, inner_has_default,
|
|
outer_default, inner_default,
|
|
jointype, &next_index, &default_index);
|
|
else
|
|
Assert(default_index == -1);
|
|
|
|
/* If we have merged partitions, create the partition bounds. */
|
|
if (next_index > 0)
|
|
{
|
|
/* Fix the merged_indexes list if necessary. */
|
|
if (outer_map.did_remapping || inner_map.did_remapping)
|
|
{
|
|
Assert(jointype == JOIN_FULL);
|
|
fix_merged_indexes(&outer_map, &inner_map,
|
|
next_index, merged_indexes);
|
|
}
|
|
|
|
/* Use maps to match partitions from inputs. */
|
|
generate_matching_part_pairs(outer_rel, inner_rel,
|
|
&outer_map, &inner_map,
|
|
next_index,
|
|
outer_parts, inner_parts);
|
|
Assert(*outer_parts != NIL);
|
|
Assert(*inner_parts != NIL);
|
|
Assert(list_length(*outer_parts) == list_length(*inner_parts));
|
|
Assert(list_length(*outer_parts) <= next_index);
|
|
|
|
/* Make a PartitionBoundInfo struct to return. */
|
|
merged_bounds = build_merged_partition_bounds(outer_bi->strategy,
|
|
merged_datums,
|
|
NIL,
|
|
merged_indexes,
|
|
null_index,
|
|
default_index);
|
|
Assert(merged_bounds);
|
|
}
|
|
|
|
cleanup:
|
|
/* Free local memory before returning. */
|
|
list_free(merged_datums);
|
|
list_free(merged_indexes);
|
|
free_partition_map(&outer_map);
|
|
free_partition_map(&inner_map);
|
|
|
|
return merged_bounds;
|
|
}
|
|
|
|
/*
|
|
* merge_range_bounds
|
|
* Create the partition bounds for a join relation between range
|
|
* partitioned tables, if possible
|
|
*
|
|
* In this function we try to find sets of overlapping partitions from both
|
|
* sides by comparing ranges stored in their partition bounds. Since the
|
|
* ranges appear in the ascending order, an algorithm similar to merge join is
|
|
* used for that. If a partition on one side doesn't have an overlapping
|
|
* partition on the other side, the algorithm tries to match it with the
|
|
* default partition on the other side if any; if not, the algorithm tries to
|
|
* match it with a dummy partition on the other side if it's on the
|
|
* non-nullable side of an outer join. Also, if both sides have the default
|
|
* partitions, the algorithm tries to match them with each other. We give up
|
|
* if the algorithm finds a partition overlapping multiple partitions on the
|
|
* other side, which is the scenario the current implementation of partitioned
|
|
* join can't handle.
|
|
*/
|
|
static PartitionBoundInfo
|
|
merge_range_bounds(int partnatts, FmgrInfo *partsupfuncs,
|
|
Oid *partcollations,
|
|
RelOptInfo *outer_rel, RelOptInfo *inner_rel,
|
|
JoinType jointype,
|
|
List **outer_parts, List **inner_parts)
|
|
{
|
|
PartitionBoundInfo merged_bounds = NULL;
|
|
PartitionBoundInfo outer_bi = outer_rel->boundinfo;
|
|
PartitionBoundInfo inner_bi = inner_rel->boundinfo;
|
|
bool outer_has_default = partition_bound_has_default(outer_bi);
|
|
bool inner_has_default = partition_bound_has_default(inner_bi);
|
|
int outer_default = outer_bi->default_index;
|
|
int inner_default = inner_bi->default_index;
|
|
PartitionMap outer_map;
|
|
PartitionMap inner_map;
|
|
int outer_index;
|
|
int inner_index;
|
|
int outer_lb_pos;
|
|
int inner_lb_pos;
|
|
PartitionRangeBound outer_lb;
|
|
PartitionRangeBound outer_ub;
|
|
PartitionRangeBound inner_lb;
|
|
PartitionRangeBound inner_ub;
|
|
int next_index = 0;
|
|
int default_index = -1;
|
|
List *merged_datums = NIL;
|
|
List *merged_kinds = NIL;
|
|
List *merged_indexes = NIL;
|
|
|
|
Assert(*outer_parts == NIL);
|
|
Assert(*inner_parts == NIL);
|
|
Assert(outer_bi->strategy == inner_bi->strategy &&
|
|
outer_bi->strategy == PARTITION_STRATEGY_RANGE);
|
|
|
|
init_partition_map(outer_rel, &outer_map);
|
|
init_partition_map(inner_rel, &inner_map);
|
|
|
|
/*
|
|
* If the default partitions (if any) have been proven empty, deem them
|
|
* non-existent.
|
|
*/
|
|
if (outer_has_default && is_dummy_partition(outer_rel, outer_default))
|
|
outer_has_default = false;
|
|
if (inner_has_default && is_dummy_partition(inner_rel, inner_default))
|
|
inner_has_default = false;
|
|
|
|
/*
|
|
* Merge partitions from both sides. In each iteration we compare a pair
|
|
* of ranges, one from each side, and decide whether the corresponding
|
|
* partitions match or not. If the two ranges overlap, move to the next
|
|
* pair of ranges, otherwise move to the next range on the side with a
|
|
* lower range. outer_lb_pos/inner_lb_pos keep track of the positions of
|
|
* lower bounds in the datums arrays in the outer/inner
|
|
* PartitionBoundInfos respectively.
|
|
*/
|
|
outer_lb_pos = inner_lb_pos = 0;
|
|
outer_index = get_range_partition(outer_rel, outer_bi, &outer_lb_pos,
|
|
&outer_lb, &outer_ub);
|
|
inner_index = get_range_partition(inner_rel, inner_bi, &inner_lb_pos,
|
|
&inner_lb, &inner_ub);
|
|
while (outer_index >= 0 || inner_index >= 0)
|
|
{
|
|
bool overlap;
|
|
int ub_cmpval;
|
|
int lb_cmpval;
|
|
PartitionRangeBound merged_lb = {-1, NULL, NULL, true};
|
|
PartitionRangeBound merged_ub = {-1, NULL, NULL, false};
|
|
int merged_index = -1;
|
|
|
|
/*
|
|
* We run this loop till both sides finish. This allows us to avoid
|
|
* duplicating code to handle the remaining ranges on the side which
|
|
* finishes later. For that we set the comparison parameter cmpval in
|
|
* such a way that it appears as if the side which finishes earlier
|
|
* has an extra range higher than any other range on the unfinished
|
|
* side. That way we advance the ranges on the unfinished side till
|
|
* all of its ranges are exhausted.
|
|
*/
|
|
if (outer_index == -1)
|
|
{
|
|
overlap = false;
|
|
lb_cmpval = 1;
|
|
ub_cmpval = 1;
|
|
}
|
|
else if (inner_index == -1)
|
|
{
|
|
overlap = false;
|
|
lb_cmpval = -1;
|
|
ub_cmpval = -1;
|
|
}
|
|
else
|
|
overlap = compare_range_partitions(partnatts, partsupfuncs,
|
|
partcollations,
|
|
&outer_lb, &outer_ub,
|
|
&inner_lb, &inner_ub,
|
|
&lb_cmpval, &ub_cmpval);
|
|
|
|
if (overlap)
|
|
{
|
|
/* Two ranges overlap; form a join pair. */
|
|
|
|
PartitionRangeBound save_outer_ub;
|
|
PartitionRangeBound save_inner_ub;
|
|
|
|
/* Both partitions should not have been merged yet. */
|
|
Assert(outer_index >= 0);
|
|
Assert(outer_map.merged_indexes[outer_index] == -1 &&
|
|
outer_map.merged[outer_index] == false);
|
|
Assert(inner_index >= 0);
|
|
Assert(inner_map.merged_indexes[inner_index] == -1 &&
|
|
inner_map.merged[inner_index] == false);
|
|
|
|
/*
|
|
* Get the index of the merged partition. Both partitions aren't
|
|
* merged yet, so the partitions should be merged successfully.
|
|
*/
|
|
merged_index = merge_matching_partitions(&outer_map, &inner_map,
|
|
outer_index, inner_index,
|
|
&next_index);
|
|
Assert(merged_index >= 0);
|
|
|
|
/* Get the range bounds of the merged partition. */
|
|
get_merged_range_bounds(partnatts, partsupfuncs,
|
|
partcollations, jointype,
|
|
&outer_lb, &outer_ub,
|
|
&inner_lb, &inner_ub,
|
|
lb_cmpval, ub_cmpval,
|
|
&merged_lb, &merged_ub);
|
|
|
|
/* Save the upper bounds of both partitions for use below. */
|
|
save_outer_ub = outer_ub;
|
|
save_inner_ub = inner_ub;
|
|
|
|
/* Move to the next pair of ranges. */
|
|
outer_index = get_range_partition(outer_rel, outer_bi, &outer_lb_pos,
|
|
&outer_lb, &outer_ub);
|
|
inner_index = get_range_partition(inner_rel, inner_bi, &inner_lb_pos,
|
|
&inner_lb, &inner_ub);
|
|
|
|
/*
|
|
* If the range of a partition on one side overlaps the range of
|
|
* the next partition on the other side, that will cause the
|
|
* partition on one side to match at least two partitions on the
|
|
* other side, which is the case that we currently don't support
|
|
* partitioned join for; give up.
|
|
*/
|
|
if (ub_cmpval > 0 && inner_index >= 0 &&
|
|
compare_range_bounds(partnatts, partsupfuncs, partcollations,
|
|
&save_outer_ub, &inner_lb) > 0)
|
|
goto cleanup;
|
|
if (ub_cmpval < 0 && outer_index >= 0 &&
|
|
compare_range_bounds(partnatts, partsupfuncs, partcollations,
|
|
&outer_lb, &save_inner_ub) < 0)
|
|
goto cleanup;
|
|
|
|
/*
|
|
* A row from a non-overlapping portion (if any) of a partition on
|
|
* one side might find its join partner in the default partition
|
|
* (if any) on the other side, causing the same situation as
|
|
* above; give up in that case.
|
|
*/
|
|
if ((outer_has_default && (lb_cmpval > 0 || ub_cmpval < 0)) ||
|
|
(inner_has_default && (lb_cmpval < 0 || ub_cmpval > 0)))
|
|
goto cleanup;
|
|
}
|
|
else if (ub_cmpval < 0)
|
|
{
|
|
/* A non-overlapping outer range. */
|
|
|
|
/* The outer partition should not have been merged yet. */
|
|
Assert(outer_index >= 0);
|
|
Assert(outer_map.merged_indexes[outer_index] == -1 &&
|
|
outer_map.merged[outer_index] == false);
|
|
|
|
/*
|
|
* If the inner side has the default partition, or this is an
|
|
* outer join, try to assign a merged partition to the outer
|
|
* partition (see process_outer_partition()). Otherwise, the
|
|
* outer partition will not contribute to the result.
|
|
*/
|
|
if (inner_has_default || IS_OUTER_JOIN(jointype))
|
|
{
|
|
merged_index = process_outer_partition(&outer_map,
|
|
&inner_map,
|
|
outer_has_default,
|
|
inner_has_default,
|
|
outer_index,
|
|
inner_default,
|
|
jointype,
|
|
&next_index,
|
|
&default_index);
|
|
if (merged_index == -1)
|
|
goto cleanup;
|
|
merged_lb = outer_lb;
|
|
merged_ub = outer_ub;
|
|
}
|
|
|
|
/* Move to the next range on the outer side. */
|
|
outer_index = get_range_partition(outer_rel, outer_bi, &outer_lb_pos,
|
|
&outer_lb, &outer_ub);
|
|
}
|
|
else
|
|
{
|
|
/* A non-overlapping inner range. */
|
|
Assert(ub_cmpval > 0);
|
|
|
|
/* The inner partition should not have been merged yet. */
|
|
Assert(inner_index >= 0);
|
|
Assert(inner_map.merged_indexes[inner_index] == -1 &&
|
|
inner_map.merged[inner_index] == false);
|
|
|
|
/*
|
|
* If the outer side has the default partition, or this is a FULL
|
|
* join, try to assign a merged partition to the inner partition
|
|
* (see process_inner_partition()). Otherwise, the inner
|
|
* partition will not contribute to the result.
|
|
*/
|
|
if (outer_has_default || jointype == JOIN_FULL)
|
|
{
|
|
merged_index = process_inner_partition(&outer_map,
|
|
&inner_map,
|
|
outer_has_default,
|
|
inner_has_default,
|
|
inner_index,
|
|
outer_default,
|
|
jointype,
|
|
&next_index,
|
|
&default_index);
|
|
if (merged_index == -1)
|
|
goto cleanup;
|
|
merged_lb = inner_lb;
|
|
merged_ub = inner_ub;
|
|
}
|
|
|
|
/* Move to the next range on the inner side. */
|
|
inner_index = get_range_partition(inner_rel, inner_bi, &inner_lb_pos,
|
|
&inner_lb, &inner_ub);
|
|
}
|
|
|
|
/*
|
|
* If we assigned a merged partition, add the range bounds and index
|
|
* of the merged partition if appropriate.
|
|
*/
|
|
if (merged_index >= 0 && merged_index != default_index)
|
|
add_merged_range_bounds(partnatts, partsupfuncs, partcollations,
|
|
&merged_lb, &merged_ub, merged_index,
|
|
&merged_datums, &merged_kinds,
|
|
&merged_indexes);
|
|
}
|
|
|
|
/* Merge the default partitions if any. */
|
|
if (outer_has_default || inner_has_default)
|
|
merge_default_partitions(&outer_map, &inner_map,
|
|
outer_has_default, inner_has_default,
|
|
outer_default, inner_default,
|
|
jointype, &next_index, &default_index);
|
|
else
|
|
Assert(default_index == -1);
|
|
|
|
/* If we have merged partitions, create the partition bounds. */
|
|
if (next_index > 0)
|
|
{
|
|
/*
|
|
* Unlike the case of list partitioning, we wouldn't have re-merged
|
|
* partitions, so did_remapping should be left alone.
|
|
*/
|
|
Assert(!outer_map.did_remapping);
|
|
Assert(!inner_map.did_remapping);
|
|
|
|
/* Use maps to match partitions from inputs. */
|
|
generate_matching_part_pairs(outer_rel, inner_rel,
|
|
&outer_map, &inner_map,
|
|
next_index,
|
|
outer_parts, inner_parts);
|
|
Assert(*outer_parts != NIL);
|
|
Assert(*inner_parts != NIL);
|
|
Assert(list_length(*outer_parts) == list_length(*inner_parts));
|
|
Assert(list_length(*outer_parts) == next_index);
|
|
|
|
/* Make a PartitionBoundInfo struct to return. */
|
|
merged_bounds = build_merged_partition_bounds(outer_bi->strategy,
|
|
merged_datums,
|
|
merged_kinds,
|
|
merged_indexes,
|
|
-1,
|
|
default_index);
|
|
Assert(merged_bounds);
|
|
}
|
|
|
|
cleanup:
|
|
/* Free local memory before returning. */
|
|
list_free(merged_datums);
|
|
list_free(merged_kinds);
|
|
list_free(merged_indexes);
|
|
free_partition_map(&outer_map);
|
|
free_partition_map(&inner_map);
|
|
|
|
return merged_bounds;
|
|
}
|
|
|
|
/*
|
|
* init_partition_map
|
|
* Initialize a PartitionMap struct for given relation
|
|
*/
|
|
static void
|
|
init_partition_map(RelOptInfo *rel, PartitionMap *map)
|
|
{
|
|
int nparts = rel->nparts;
|
|
int i;
|
|
|
|
map->nparts = nparts;
|
|
map->merged_indexes = (int *) palloc(sizeof(int) * nparts);
|
|
map->merged = (bool *) palloc(sizeof(bool) * nparts);
|
|
map->did_remapping = false;
|
|
map->old_indexes = (int *) palloc(sizeof(int) * nparts);
|
|
for (i = 0; i < nparts; i++)
|
|
{
|
|
map->merged_indexes[i] = map->old_indexes[i] = -1;
|
|
map->merged[i] = false;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* free_partition_map
|
|
*/
|
|
static void
|
|
free_partition_map(PartitionMap *map)
|
|
{
|
|
pfree(map->merged_indexes);
|
|
pfree(map->merged);
|
|
pfree(map->old_indexes);
|
|
}
|
|
|
|
/*
|
|
* is_dummy_partition --- has partition been proven empty?
|
|
*/
|
|
static bool
|
|
is_dummy_partition(RelOptInfo *rel, int part_index)
|
|
{
|
|
RelOptInfo *part_rel;
|
|
|
|
Assert(part_index >= 0);
|
|
part_rel = rel->part_rels[part_index];
|
|
if (part_rel == NULL || IS_DUMMY_REL(part_rel))
|
|
return true;
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* merge_matching_partitions
|
|
* Try to merge given outer/inner partitions, and return the index of a
|
|
* merged partition produced from them if successful, -1 otherwise
|
|
*
|
|
* If the merged partition is newly created, *next_index is incremented.
|
|
*/
|
|
static int
|
|
merge_matching_partitions(PartitionMap *outer_map, PartitionMap *inner_map,
|
|
int outer_index, int inner_index, int *next_index)
|
|
{
|
|
int outer_merged_index;
|
|
int inner_merged_index;
|
|
bool outer_merged;
|
|
bool inner_merged;
|
|
|
|
Assert(outer_index >= 0 && outer_index < outer_map->nparts);
|
|
outer_merged_index = outer_map->merged_indexes[outer_index];
|
|
outer_merged = outer_map->merged[outer_index];
|
|
Assert(inner_index >= 0 && inner_index < inner_map->nparts);
|
|
inner_merged_index = inner_map->merged_indexes[inner_index];
|
|
inner_merged = inner_map->merged[inner_index];
|
|
|
|
/*
|
|
* Handle cases where we have already assigned a merged partition to each
|
|
* of the given partitions.
|
|
*/
|
|
if (outer_merged_index >= 0 && inner_merged_index >= 0)
|
|
{
|
|
/*
|
|
* If the merged partitions are the same, no need to do anything;
|
|
* return the index of the merged partitions. Otherwise, if each of
|
|
* the given partitions has been merged with a dummy partition on the
|
|
* other side, re-map them to either of the two merged partitions.
|
|
* Otherwise, they can't be merged, so return -1.
|
|
*/
|
|
if (outer_merged_index == inner_merged_index)
|
|
{
|
|
Assert(outer_merged);
|
|
Assert(inner_merged);
|
|
return outer_merged_index;
|
|
}
|
|
if (!outer_merged && !inner_merged)
|
|
{
|
|
/*
|
|
* This can only happen for a list-partitioning case. We re-map
|
|
* them to the merged partition with the smaller of the two merged
|
|
* indexes to preserve the property that the canonical order of
|
|
* list partitions is determined by the indexes assigned to the
|
|
* smallest list value of each partition.
|
|
*/
|
|
if (outer_merged_index < inner_merged_index)
|
|
{
|
|
outer_map->merged[outer_index] = true;
|
|
inner_map->merged_indexes[inner_index] = outer_merged_index;
|
|
inner_map->merged[inner_index] = true;
|
|
inner_map->did_remapping = true;
|
|
inner_map->old_indexes[inner_index] = inner_merged_index;
|
|
return outer_merged_index;
|
|
}
|
|
else
|
|
{
|
|
inner_map->merged[inner_index] = true;
|
|
outer_map->merged_indexes[outer_index] = inner_merged_index;
|
|
outer_map->merged[outer_index] = true;
|
|
outer_map->did_remapping = true;
|
|
outer_map->old_indexes[outer_index] = outer_merged_index;
|
|
return inner_merged_index;
|
|
}
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
/* At least one of the given partitions should not have yet been merged. */
|
|
Assert(outer_merged_index == -1 || inner_merged_index == -1);
|
|
|
|
/*
|
|
* If neither of them has been merged, merge them. Otherwise, if one has
|
|
* been merged with a dummy partition on the other side (and the other
|
|
* hasn't yet been merged with anything), re-merge them. Otherwise, they
|
|
* can't be merged, so return -1.
|
|
*/
|
|
if (outer_merged_index == -1 && inner_merged_index == -1)
|
|
{
|
|
int merged_index = *next_index;
|
|
|
|
Assert(!outer_merged);
|
|
Assert(!inner_merged);
|
|
outer_map->merged_indexes[outer_index] = merged_index;
|
|
outer_map->merged[outer_index] = true;
|
|
inner_map->merged_indexes[inner_index] = merged_index;
|
|
inner_map->merged[inner_index] = true;
|
|
*next_index = *next_index + 1;
|
|
return merged_index;
|
|
}
|
|
if (outer_merged_index >= 0 && !outer_map->merged[outer_index])
|
|
{
|
|
Assert(inner_merged_index == -1);
|
|
Assert(!inner_merged);
|
|
inner_map->merged_indexes[inner_index] = outer_merged_index;
|
|
inner_map->merged[inner_index] = true;
|
|
outer_map->merged[outer_index] = true;
|
|
return outer_merged_index;
|
|
}
|
|
if (inner_merged_index >= 0 && !inner_map->merged[inner_index])
|
|
{
|
|
Assert(outer_merged_index == -1);
|
|
Assert(!outer_merged);
|
|
outer_map->merged_indexes[outer_index] = inner_merged_index;
|
|
outer_map->merged[outer_index] = true;
|
|
inner_map->merged[inner_index] = true;
|
|
return inner_merged_index;
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
/*
|
|
* process_outer_partition
|
|
* Try to assign given outer partition a merged partition, and return the
|
|
* index of the merged partition if successful, -1 otherwise
|
|
*
|
|
* If the partition is newly created, *next_index is incremented. Also, if it
|
|
* is the default partition of the join relation, *default_index is set to the
|
|
* index if not already done.
|
|
*/
|
|
static int
|
|
process_outer_partition(PartitionMap *outer_map,
|
|
PartitionMap *inner_map,
|
|
bool outer_has_default,
|
|
bool inner_has_default,
|
|
int outer_index,
|
|
int inner_default,
|
|
JoinType jointype,
|
|
int *next_index,
|
|
int *default_index)
|
|
{
|
|
int merged_index = -1;
|
|
|
|
Assert(outer_index >= 0);
|
|
|
|
/*
|
|
* If the inner side has the default partition, a row from the outer
|
|
* partition might find its join partner in the default partition; try
|
|
* merging the outer partition with the default partition. Otherwise,
|
|
* this should be an outer join, in which case the outer partition has to
|
|
* be scanned all the way anyway; merge the outer partition with a dummy
|
|
* partition on the other side.
|
|
*/
|
|
if (inner_has_default)
|
|
{
|
|
Assert(inner_default >= 0);
|
|
|
|
/*
|
|
* If the outer side has the default partition as well, the default
|
|
* partition on the inner side will have two matching partitions on
|
|
* the other side: the outer partition and the default partition on
|
|
* the outer side. Partitionwise join doesn't handle this scenario
|
|
* yet.
|
|
*/
|
|
if (outer_has_default)
|
|
return -1;
|
|
|
|
merged_index = merge_matching_partitions(outer_map, inner_map,
|
|
outer_index, inner_default,
|
|
next_index);
|
|
if (merged_index == -1)
|
|
return -1;
|
|
|
|
/*
|
|
* If this is a FULL join, the default partition on the inner side has
|
|
* to be scanned all the way anyway, so the resulting partition will
|
|
* contain all key values from the default partition, which any other
|
|
* partition of the join relation will not contain. Thus the
|
|
* resulting partition will act as the default partition of the join
|
|
* relation; record the index in *default_index if not already done.
|
|
*/
|
|
if (jointype == JOIN_FULL)
|
|
{
|
|
if (*default_index == -1)
|
|
*default_index = merged_index;
|
|
else
|
|
Assert(*default_index == merged_index);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
Assert(IS_OUTER_JOIN(jointype));
|
|
Assert(jointype != JOIN_RIGHT);
|
|
|
|
/* If we have already assigned a partition, no need to do anything. */
|
|
merged_index = outer_map->merged_indexes[outer_index];
|
|
if (merged_index == -1)
|
|
merged_index = merge_partition_with_dummy(outer_map, outer_index,
|
|
next_index);
|
|
}
|
|
return merged_index;
|
|
}
|
|
|
|
/*
|
|
* process_inner_partition
|
|
* Try to assign given inner partition a merged partition, and return the
|
|
* index of the merged partition if successful, -1 otherwise
|
|
*
|
|
* If the partition is newly created, *next_index is incremented. Also, if it
|
|
* is the default partition of the join relation, *default_index is set to the
|
|
* index if not already done.
|
|
*/
|
|
static int
|
|
process_inner_partition(PartitionMap *outer_map,
|
|
PartitionMap *inner_map,
|
|
bool outer_has_default,
|
|
bool inner_has_default,
|
|
int inner_index,
|
|
int outer_default,
|
|
JoinType jointype,
|
|
int *next_index,
|
|
int *default_index)
|
|
{
|
|
int merged_index = -1;
|
|
|
|
Assert(inner_index >= 0);
|
|
|
|
/*
|
|
* If the outer side has the default partition, a row from the inner
|
|
* partition might find its join partner in the default partition; try
|
|
* merging the inner partition with the default partition. Otherwise,
|
|
* this should be a FULL join, in which case the inner partition has to be
|
|
* scanned all the way anyway; merge the inner partition with a dummy
|
|
* partition on the other side.
|
|
*/
|
|
if (outer_has_default)
|
|
{
|
|
Assert(outer_default >= 0);
|
|
|
|
/*
|
|
* If the inner side has the default partition as well, the default
|
|
* partition on the outer side will have two matching partitions on
|
|
* the other side: the inner partition and the default partition on
|
|
* the inner side. Partitionwise join doesn't handle this scenario
|
|
* yet.
|
|
*/
|
|
if (inner_has_default)
|
|
return -1;
|
|
|
|
merged_index = merge_matching_partitions(outer_map, inner_map,
|
|
outer_default, inner_index,
|
|
next_index);
|
|
if (merged_index == -1)
|
|
return -1;
|
|
|
|
/*
|
|
* If this is an outer join, the default partition on the outer side
|
|
* has to be scanned all the way anyway, so the resulting partition
|
|
* will contain all key values from the default partition, which any
|
|
* other partition of the join relation will not contain. Thus the
|
|
* resulting partition will act as the default partition of the join
|
|
* relation; record the index in *default_index if not already done.
|
|
*/
|
|
if (IS_OUTER_JOIN(jointype))
|
|
{
|
|
Assert(jointype != JOIN_RIGHT);
|
|
if (*default_index == -1)
|
|
*default_index = merged_index;
|
|
else
|
|
Assert(*default_index == merged_index);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
Assert(jointype == JOIN_FULL);
|
|
|
|
/* If we have already assigned a partition, no need to do anything. */
|
|
merged_index = inner_map->merged_indexes[inner_index];
|
|
if (merged_index == -1)
|
|
merged_index = merge_partition_with_dummy(inner_map, inner_index,
|
|
next_index);
|
|
}
|
|
return merged_index;
|
|
}
|
|
|
|
/*
|
|
* merge_null_partitions
|
|
* Merge the NULL partitions from a join's outer and inner sides.
|
|
*
|
|
* If the merged partition produced from them is the NULL partition of the join
|
|
* relation, *null_index is set to the index of the merged partition.
|
|
*
|
|
* Note: We assume here that the join clause for a partitioned join is strict
|
|
* because have_partkey_equi_join() requires that the corresponding operator
|
|
* be mergejoinable, and we currently assume that mergejoinable operators are
|
|
* strict (see MJEvalOuterValues()/MJEvalInnerValues()).
|
|
*/
|
|
static void
|
|
merge_null_partitions(PartitionMap *outer_map,
|
|
PartitionMap *inner_map,
|
|
bool outer_has_null,
|
|
bool inner_has_null,
|
|
int outer_null,
|
|
int inner_null,
|
|
JoinType jointype,
|
|
int *next_index,
|
|
int *null_index)
|
|
{
|
|
bool consider_outer_null = false;
|
|
bool consider_inner_null = false;
|
|
|
|
Assert(outer_has_null || inner_has_null);
|
|
Assert(*null_index == -1);
|
|
|
|
/*
|
|
* Check whether the NULL partitions have already been merged and if so,
|
|
* set the consider_outer_null/consider_inner_null flags.
|
|
*/
|
|
if (outer_has_null)
|
|
{
|
|
Assert(outer_null >= 0 && outer_null < outer_map->nparts);
|
|
if (outer_map->merged_indexes[outer_null] == -1)
|
|
consider_outer_null = true;
|
|
}
|
|
if (inner_has_null)
|
|
{
|
|
Assert(inner_null >= 0 && inner_null < inner_map->nparts);
|
|
if (inner_map->merged_indexes[inner_null] == -1)
|
|
consider_inner_null = true;
|
|
}
|
|
|
|
/* If both flags are set false, we don't need to do anything. */
|
|
if (!consider_outer_null && !consider_inner_null)
|
|
return;
|
|
|
|
if (consider_outer_null && !consider_inner_null)
|
|
{
|
|
Assert(outer_has_null);
|
|
|
|
/*
|
|
* If this is an outer join, the NULL partition on the outer side has
|
|
* to be scanned all the way anyway; merge the NULL partition with a
|
|
* dummy partition on the other side. In that case
|
|
* consider_outer_null means that the NULL partition only contains
|
|
* NULL values as the key values, so the merged partition will do so;
|
|
* treat it as the NULL partition of the join relation.
|
|
*/
|
|
if (IS_OUTER_JOIN(jointype))
|
|
{
|
|
Assert(jointype != JOIN_RIGHT);
|
|
*null_index = merge_partition_with_dummy(outer_map, outer_null,
|
|
next_index);
|
|
}
|
|
}
|
|
else if (!consider_outer_null && consider_inner_null)
|
|
{
|
|
Assert(inner_has_null);
|
|
|
|
/*
|
|
* If this is a FULL join, the NULL partition on the inner side has to
|
|
* be scanned all the way anyway; merge the NULL partition with a
|
|
* dummy partition on the other side. In that case
|
|
* consider_inner_null means that the NULL partition only contains
|
|
* NULL values as the key values, so the merged partition will do so;
|
|
* treat it as the NULL partition of the join relation.
|
|
*/
|
|
if (jointype == JOIN_FULL)
|
|
*null_index = merge_partition_with_dummy(inner_map, inner_null,
|
|
next_index);
|
|
}
|
|
else
|
|
{
|
|
Assert(consider_outer_null && consider_inner_null);
|
|
Assert(outer_has_null);
|
|
Assert(inner_has_null);
|
|
|
|
/*
|
|
* If this is an outer join, the NULL partition on the outer side (and
|
|
* that on the inner side if this is a FULL join) have to be scanned
|
|
* all the way anyway, so merge them. Note that each of the NULL
|
|
* partitions isn't merged yet, so they should be merged successfully.
|
|
* Like the above, each of the NULL partitions only contains NULL
|
|
* values as the key values, so the merged partition will do so; treat
|
|
* it as the NULL partition of the join relation.
|
|
*
|
|
* Note: if this an INNER/SEMI join, the join clause will never be
|
|
* satisfied by two NULL values (see comments above), so both the NULL
|
|
* partitions can be eliminated.
|
|
*/
|
|
if (IS_OUTER_JOIN(jointype))
|
|
{
|
|
Assert(jointype != JOIN_RIGHT);
|
|
*null_index = merge_matching_partitions(outer_map, inner_map,
|
|
outer_null, inner_null,
|
|
next_index);
|
|
Assert(*null_index >= 0);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* merge_default_partitions
|
|
* Merge the default partitions from a join's outer and inner sides.
|
|
*
|
|
* If the merged partition produced from them is the default partition of the
|
|
* join relation, *default_index is set to the index of the merged partition.
|
|
*/
|
|
static void
|
|
merge_default_partitions(PartitionMap *outer_map,
|
|
PartitionMap *inner_map,
|
|
bool outer_has_default,
|
|
bool inner_has_default,
|
|
int outer_default,
|
|
int inner_default,
|
|
JoinType jointype,
|
|
int *next_index,
|
|
int *default_index)
|
|
{
|
|
int outer_merged_index = -1;
|
|
int inner_merged_index = -1;
|
|
|
|
Assert(outer_has_default || inner_has_default);
|
|
|
|
/* Get the merged partition indexes for the default partitions. */
|
|
if (outer_has_default)
|
|
{
|
|
Assert(outer_default >= 0 && outer_default < outer_map->nparts);
|
|
outer_merged_index = outer_map->merged_indexes[outer_default];
|
|
}
|
|
if (inner_has_default)
|
|
{
|
|
Assert(inner_default >= 0 && inner_default < inner_map->nparts);
|
|
inner_merged_index = inner_map->merged_indexes[inner_default];
|
|
}
|
|
|
|
if (outer_has_default && !inner_has_default)
|
|
{
|
|
/*
|
|
* If this is an outer join, the default partition on the outer side
|
|
* has to be scanned all the way anyway; if we have not yet assigned a
|
|
* partition, merge the default partition with a dummy partition on
|
|
* the other side. The merged partition will act as the default
|
|
* partition of the join relation (see comments in
|
|
* process_inner_partition()).
|
|
*/
|
|
if (IS_OUTER_JOIN(jointype))
|
|
{
|
|
Assert(jointype != JOIN_RIGHT);
|
|
if (outer_merged_index == -1)
|
|
{
|
|
Assert(*default_index == -1);
|
|
*default_index = merge_partition_with_dummy(outer_map,
|
|
outer_default,
|
|
next_index);
|
|
}
|
|
else
|
|
Assert(*default_index == outer_merged_index);
|
|
}
|
|
else
|
|
Assert(*default_index == -1);
|
|
}
|
|
else if (!outer_has_default && inner_has_default)
|
|
{
|
|
/*
|
|
* If this is a FULL join, the default partition on the inner side has
|
|
* to be scanned all the way anyway; if we have not yet assigned a
|
|
* partition, merge the default partition with a dummy partition on
|
|
* the other side. The merged partition will act as the default
|
|
* partition of the join relation (see comments in
|
|
* process_outer_partition()).
|
|
*/
|
|
if (jointype == JOIN_FULL)
|
|
{
|
|
if (inner_merged_index == -1)
|
|
{
|
|
Assert(*default_index == -1);
|
|
*default_index = merge_partition_with_dummy(inner_map,
|
|
inner_default,
|
|
next_index);
|
|
}
|
|
else
|
|
Assert(*default_index == inner_merged_index);
|
|
}
|
|
else
|
|
Assert(*default_index == -1);
|
|
}
|
|
else
|
|
{
|
|
Assert(outer_has_default && inner_has_default);
|
|
|
|
/*
|
|
* The default partitions have to be joined with each other, so merge
|
|
* them. Note that each of the default partitions isn't merged yet
|
|
* (see, process_outer_partition()/process_inner_partition()), so they
|
|
* should be merged successfully. The merged partition will act as
|
|
* the default partition of the join relation.
|
|
*/
|
|
Assert(outer_merged_index == -1);
|
|
Assert(inner_merged_index == -1);
|
|
Assert(*default_index == -1);
|
|
*default_index = merge_matching_partitions(outer_map,
|
|
inner_map,
|
|
outer_default,
|
|
inner_default,
|
|
next_index);
|
|
Assert(*default_index >= 0);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* merge_partition_with_dummy
|
|
* Assign given partition a new partition of a join relation
|
|
*
|
|
* Note: The caller assumes that the given partition doesn't have a non-dummy
|
|
* matching partition on the other side, but if the given partition finds the
|
|
* matching partition later, we will adjust the assignment.
|
|
*/
|
|
static int
|
|
merge_partition_with_dummy(PartitionMap *map, int index, int *next_index)
|
|
{
|
|
int merged_index = *next_index;
|
|
|
|
Assert(index >= 0 && index < map->nparts);
|
|
Assert(map->merged_indexes[index] == -1);
|
|
Assert(!map->merged[index]);
|
|
map->merged_indexes[index] = merged_index;
|
|
/* Leave the merged flag alone! */
|
|
*next_index = *next_index + 1;
|
|
return merged_index;
|
|
}
|
|
|
|
/*
|
|
* fix_merged_indexes
|
|
* Adjust merged indexes of re-merged partitions
|
|
*/
|
|
static void
|
|
fix_merged_indexes(PartitionMap *outer_map, PartitionMap *inner_map,
|
|
int nmerged, List *merged_indexes)
|
|
{
|
|
int *new_indexes;
|
|
int merged_index;
|
|
int i;
|
|
ListCell *lc;
|
|
|
|
Assert(nmerged > 0);
|
|
|
|
new_indexes = (int *) palloc(sizeof(int) * nmerged);
|
|
for (i = 0; i < nmerged; i++)
|
|
new_indexes[i] = -1;
|
|
|
|
/* Build the mapping of old merged indexes to new merged indexes. */
|
|
if (outer_map->did_remapping)
|
|
{
|
|
for (i = 0; i < outer_map->nparts; i++)
|
|
{
|
|
merged_index = outer_map->old_indexes[i];
|
|
if (merged_index >= 0)
|
|
new_indexes[merged_index] = outer_map->merged_indexes[i];
|
|
}
|
|
}
|
|
if (inner_map->did_remapping)
|
|
{
|
|
for (i = 0; i < inner_map->nparts; i++)
|
|
{
|
|
merged_index = inner_map->old_indexes[i];
|
|
if (merged_index >= 0)
|
|
new_indexes[merged_index] = inner_map->merged_indexes[i];
|
|
}
|
|
}
|
|
|
|
/* Fix the merged_indexes list using the mapping. */
|
|
foreach(lc, merged_indexes)
|
|
{
|
|
merged_index = lfirst_int(lc);
|
|
Assert(merged_index >= 0);
|
|
if (new_indexes[merged_index] >= 0)
|
|
lfirst_int(lc) = new_indexes[merged_index];
|
|
}
|
|
|
|
pfree(new_indexes);
|
|
}
|
|
|
|
/*
|
|
* generate_matching_part_pairs
|
|
* Generate a pair of lists of partitions that produce merged partitions
|
|
*
|
|
* The lists of partitions are built in the order of merged partition indexes,
|
|
* and returned in *outer_parts and *inner_parts.
|
|
*/
|
|
static void
|
|
generate_matching_part_pairs(RelOptInfo *outer_rel, RelOptInfo *inner_rel,
|
|
PartitionMap *outer_map, PartitionMap *inner_map,
|
|
int nmerged,
|
|
List **outer_parts, List **inner_parts)
|
|
{
|
|
int outer_nparts = outer_map->nparts;
|
|
int inner_nparts = inner_map->nparts;
|
|
int *outer_indexes;
|
|
int *inner_indexes;
|
|
int max_nparts;
|
|
int i;
|
|
|
|
Assert(nmerged > 0);
|
|
Assert(*outer_parts == NIL);
|
|
Assert(*inner_parts == NIL);
|
|
|
|
outer_indexes = (int *) palloc(sizeof(int) * nmerged);
|
|
inner_indexes = (int *) palloc(sizeof(int) * nmerged);
|
|
for (i = 0; i < nmerged; i++)
|
|
outer_indexes[i] = inner_indexes[i] = -1;
|
|
|
|
/* Set pairs of matching partitions. */
|
|
Assert(outer_nparts == outer_rel->nparts);
|
|
Assert(inner_nparts == inner_rel->nparts);
|
|
max_nparts = Max(outer_nparts, inner_nparts);
|
|
for (i = 0; i < max_nparts; i++)
|
|
{
|
|
if (i < outer_nparts)
|
|
{
|
|
int merged_index = outer_map->merged_indexes[i];
|
|
|
|
if (merged_index >= 0)
|
|
{
|
|
Assert(merged_index < nmerged);
|
|
outer_indexes[merged_index] = i;
|
|
}
|
|
}
|
|
if (i < inner_nparts)
|
|
{
|
|
int merged_index = inner_map->merged_indexes[i];
|
|
|
|
if (merged_index >= 0)
|
|
{
|
|
Assert(merged_index < nmerged);
|
|
inner_indexes[merged_index] = i;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Build the list pairs. */
|
|
for (i = 0; i < nmerged; i++)
|
|
{
|
|
int outer_index = outer_indexes[i];
|
|
int inner_index = inner_indexes[i];
|
|
|
|
/*
|
|
* If both partitions are dummy, it means the merged partition that
|
|
* had been assigned to the outer/inner partition was removed when
|
|
* re-merging the outer/inner partition in
|
|
* merge_matching_partitions(); ignore the merged partition.
|
|
*/
|
|
if (outer_index == -1 && inner_index == -1)
|
|
continue;
|
|
|
|
*outer_parts = lappend(*outer_parts, outer_index >= 0 ?
|
|
outer_rel->part_rels[outer_index] : NULL);
|
|
*inner_parts = lappend(*inner_parts, inner_index >= 0 ?
|
|
inner_rel->part_rels[inner_index] : NULL);
|
|
}
|
|
|
|
pfree(outer_indexes);
|
|
pfree(inner_indexes);
|
|
}
|
|
|
|
/*
|
|
* build_merged_partition_bounds
|
|
* Create a PartitionBoundInfo struct from merged partition bounds
|
|
*/
|
|
static PartitionBoundInfo
|
|
build_merged_partition_bounds(char strategy, List *merged_datums,
|
|
List *merged_kinds, List *merged_indexes,
|
|
int null_index, int default_index)
|
|
{
|
|
PartitionBoundInfo merged_bounds;
|
|
int ndatums = list_length(merged_datums);
|
|
int pos;
|
|
ListCell *lc;
|
|
|
|
merged_bounds = (PartitionBoundInfo) palloc(sizeof(PartitionBoundInfoData));
|
|
merged_bounds->strategy = strategy;
|
|
merged_bounds->ndatums = ndatums;
|
|
|
|
merged_bounds->datums = (Datum **) palloc(sizeof(Datum *) * ndatums);
|
|
pos = 0;
|
|
foreach(lc, merged_datums)
|
|
merged_bounds->datums[pos++] = (Datum *) lfirst(lc);
|
|
|
|
if (strategy == PARTITION_STRATEGY_RANGE)
|
|
{
|
|
Assert(list_length(merged_kinds) == ndatums);
|
|
merged_bounds->kind = (PartitionRangeDatumKind **)
|
|
palloc(sizeof(PartitionRangeDatumKind *) * ndatums);
|
|
pos = 0;
|
|
foreach(lc, merged_kinds)
|
|
merged_bounds->kind[pos++] = (PartitionRangeDatumKind *) lfirst(lc);
|
|
|
|
/* There are ndatums+1 indexes in the case of range partitioning. */
|
|
merged_indexes = lappend_int(merged_indexes, -1);
|
|
ndatums++;
|
|
}
|
|
else
|
|
{
|
|
Assert(strategy == PARTITION_STRATEGY_LIST);
|
|
Assert(merged_kinds == NIL);
|
|
merged_bounds->kind = NULL;
|
|
}
|
|
|
|
/* interleaved_parts is always NULL for join relations. */
|
|
merged_bounds->interleaved_parts = NULL;
|
|
|
|
Assert(list_length(merged_indexes) == ndatums);
|
|
merged_bounds->nindexes = ndatums;
|
|
merged_bounds->indexes = (int *) palloc(sizeof(int) * ndatums);
|
|
pos = 0;
|
|
foreach(lc, merged_indexes)
|
|
merged_bounds->indexes[pos++] = lfirst_int(lc);
|
|
|
|
merged_bounds->null_index = null_index;
|
|
merged_bounds->default_index = default_index;
|
|
|
|
return merged_bounds;
|
|
}
|
|
|
|
/*
|
|
* get_range_partition
|
|
* Get the next non-dummy partition of a range-partitioned relation,
|
|
* returning the index of that partition
|
|
*
|
|
* *lb and *ub are set to the lower and upper bounds of that partition
|
|
* respectively, and *lb_pos is advanced to the next lower bound, if any.
|
|
*/
|
|
static int
|
|
get_range_partition(RelOptInfo *rel,
|
|
PartitionBoundInfo bi,
|
|
int *lb_pos,
|
|
PartitionRangeBound *lb,
|
|
PartitionRangeBound *ub)
|
|
{
|
|
int part_index;
|
|
|
|
Assert(bi->strategy == PARTITION_STRATEGY_RANGE);
|
|
|
|
do
|
|
{
|
|
part_index = get_range_partition_internal(bi, lb_pos, lb, ub);
|
|
if (part_index == -1)
|
|
return -1;
|
|
} while (is_dummy_partition(rel, part_index));
|
|
|
|
return part_index;
|
|
}
|
|
|
|
static int
|
|
get_range_partition_internal(PartitionBoundInfo bi,
|
|
int *lb_pos,
|
|
PartitionRangeBound *lb,
|
|
PartitionRangeBound *ub)
|
|
{
|
|
/* Return the index as -1 if we've exhausted all lower bounds. */
|
|
if (*lb_pos >= bi->ndatums)
|
|
return -1;
|
|
|
|
/* A lower bound should have at least one more bound after it. */
|
|
Assert(*lb_pos + 1 < bi->ndatums);
|
|
|
|
/* Set the lower bound. */
|
|
lb->index = bi->indexes[*lb_pos];
|
|
lb->datums = bi->datums[*lb_pos];
|
|
lb->kind = bi->kind[*lb_pos];
|
|
lb->lower = true;
|
|
/* Set the upper bound. */
|
|
ub->index = bi->indexes[*lb_pos + 1];
|
|
ub->datums = bi->datums[*lb_pos + 1];
|
|
ub->kind = bi->kind[*lb_pos + 1];
|
|
ub->lower = false;
|
|
|
|
/* The index assigned to an upper bound should be valid. */
|
|
Assert(ub->index >= 0);
|
|
|
|
/*
|
|
* Advance the position to the next lower bound. If there are no bounds
|
|
* left beyond the upper bound, we have reached the last lower bound.
|
|
*/
|
|
if (*lb_pos + 2 >= bi->ndatums)
|
|
*lb_pos = bi->ndatums;
|
|
else
|
|
{
|
|
/*
|
|
* If the index assigned to the bound next to the upper bound isn't
|
|
* valid, that is the next lower bound; else, the upper bound is also
|
|
* the lower bound of the next range partition.
|
|
*/
|
|
if (bi->indexes[*lb_pos + 2] < 0)
|
|
*lb_pos = *lb_pos + 2;
|
|
else
|
|
*lb_pos = *lb_pos + 1;
|
|
}
|
|
|
|
return ub->index;
|
|
}
|
|
|
|
/*
|
|
* compare_range_partitions
|
|
* Compare the bounds of two range partitions, and return true if the
|
|
* two partitions overlap, false otherwise
|
|
*
|
|
* *lb_cmpval is set to -1, 0, or 1 if the outer partition's lower bound is
|
|
* lower than, equal to, or higher than the inner partition's lower bound
|
|
* respectively. Likewise, *ub_cmpval is set to -1, 0, or 1 if the outer
|
|
* partition's upper bound is lower than, equal to, or higher than the inner
|
|
* partition's upper bound respectively.
|
|
*/
|
|
static bool
|
|
compare_range_partitions(int partnatts, FmgrInfo *partsupfuncs,
|
|
Oid *partcollations,
|
|
PartitionRangeBound *outer_lb,
|
|
PartitionRangeBound *outer_ub,
|
|
PartitionRangeBound *inner_lb,
|
|
PartitionRangeBound *inner_ub,
|
|
int *lb_cmpval, int *ub_cmpval)
|
|
{
|
|
/*
|
|
* Check if the outer partition's upper bound is lower than the inner
|
|
* partition's lower bound; if so the partitions aren't overlapping.
|
|
*/
|
|
if (compare_range_bounds(partnatts, partsupfuncs, partcollations,
|
|
outer_ub, inner_lb) < 0)
|
|
{
|
|
*lb_cmpval = -1;
|
|
*ub_cmpval = -1;
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* Check if the outer partition's lower bound is higher than the inner
|
|
* partition's upper bound; if so the partitions aren't overlapping.
|
|
*/
|
|
if (compare_range_bounds(partnatts, partsupfuncs, partcollations,
|
|
outer_lb, inner_ub) > 0)
|
|
{
|
|
*lb_cmpval = 1;
|
|
*ub_cmpval = 1;
|
|
return false;
|
|
}
|
|
|
|
/* All other cases indicate overlapping partitions. */
|
|
*lb_cmpval = compare_range_bounds(partnatts, partsupfuncs, partcollations,
|
|
outer_lb, inner_lb);
|
|
*ub_cmpval = compare_range_bounds(partnatts, partsupfuncs, partcollations,
|
|
outer_ub, inner_ub);
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* get_merged_range_bounds
|
|
* Given the bounds of range partitions to be joined, determine the bounds
|
|
* of a merged partition produced from the range partitions
|
|
*
|
|
* *merged_lb and *merged_ub are set to the lower and upper bounds of the
|
|
* merged partition.
|
|
*/
|
|
static void
|
|
get_merged_range_bounds(int partnatts, FmgrInfo *partsupfuncs,
|
|
Oid *partcollations, JoinType jointype,
|
|
PartitionRangeBound *outer_lb,
|
|
PartitionRangeBound *outer_ub,
|
|
PartitionRangeBound *inner_lb,
|
|
PartitionRangeBound *inner_ub,
|
|
int lb_cmpval, int ub_cmpval,
|
|
PartitionRangeBound *merged_lb,
|
|
PartitionRangeBound *merged_ub)
|
|
{
|
|
Assert(compare_range_bounds(partnatts, partsupfuncs, partcollations,
|
|
outer_lb, inner_lb) == lb_cmpval);
|
|
Assert(compare_range_bounds(partnatts, partsupfuncs, partcollations,
|
|
outer_ub, inner_ub) == ub_cmpval);
|
|
|
|
switch (jointype)
|
|
{
|
|
case JOIN_INNER:
|
|
case JOIN_SEMI:
|
|
|
|
/*
|
|
* An INNER/SEMI join will have the rows that fit both sides, so
|
|
* the lower bound of the merged partition will be the higher of
|
|
* the two lower bounds, and the upper bound of the merged
|
|
* partition will be the lower of the two upper bounds.
|
|
*/
|
|
*merged_lb = (lb_cmpval > 0) ? *outer_lb : *inner_lb;
|
|
*merged_ub = (ub_cmpval < 0) ? *outer_ub : *inner_ub;
|
|
break;
|
|
|
|
case JOIN_LEFT:
|
|
case JOIN_ANTI:
|
|
|
|
/*
|
|
* A LEFT/ANTI join will have all the rows from the outer side, so
|
|
* the bounds of the merged partition will be the same as the
|
|
* outer bounds.
|
|
*/
|
|
*merged_lb = *outer_lb;
|
|
*merged_ub = *outer_ub;
|
|
break;
|
|
|
|
case JOIN_FULL:
|
|
|
|
/*
|
|
* A FULL join will have all the rows from both sides, so the
|
|
* lower bound of the merged partition will be the lower of the
|
|
* two lower bounds, and the upper bound of the merged partition
|
|
* will be the higher of the two upper bounds.
|
|
*/
|
|
*merged_lb = (lb_cmpval < 0) ? *outer_lb : *inner_lb;
|
|
*merged_ub = (ub_cmpval > 0) ? *outer_ub : *inner_ub;
|
|
break;
|
|
|
|
default:
|
|
elog(ERROR, "unrecognized join type: %d", (int) jointype);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* add_merged_range_bounds
|
|
* Add the bounds of a merged partition to the lists of range bounds
|
|
*/
|
|
static void
|
|
add_merged_range_bounds(int partnatts, FmgrInfo *partsupfuncs,
|
|
Oid *partcollations,
|
|
PartitionRangeBound *merged_lb,
|
|
PartitionRangeBound *merged_ub,
|
|
int merged_index,
|
|
List **merged_datums,
|
|
List **merged_kinds,
|
|
List **merged_indexes)
|
|
{
|
|
int cmpval;
|
|
|
|
if (!*merged_datums)
|
|
{
|
|
/* First merged partition */
|
|
Assert(!*merged_kinds);
|
|
Assert(!*merged_indexes);
|
|
cmpval = 1;
|
|
}
|
|
else
|
|
{
|
|
PartitionRangeBound prev_ub;
|
|
|
|
Assert(*merged_datums);
|
|
Assert(*merged_kinds);
|
|
Assert(*merged_indexes);
|
|
|
|
/* Get the last upper bound. */
|
|
prev_ub.index = llast_int(*merged_indexes);
|
|
prev_ub.datums = (Datum *) llast(*merged_datums);
|
|
prev_ub.kind = (PartitionRangeDatumKind *) llast(*merged_kinds);
|
|
prev_ub.lower = false;
|
|
|
|
/*
|
|
* We pass lower1 = false to partition_rbound_cmp() to prevent it from
|
|
* considering the last upper bound to be smaller than the lower bound
|
|
* of the merged partition when the values of the two range bounds
|
|
* compare equal.
|
|
*/
|
|
cmpval = partition_rbound_cmp(partnatts, partsupfuncs, partcollations,
|
|
merged_lb->datums, merged_lb->kind,
|
|
false, &prev_ub);
|
|
Assert(cmpval >= 0);
|
|
}
|
|
|
|
/*
|
|
* If the lower bound is higher than the last upper bound, add the lower
|
|
* bound with the index as -1 indicating that that is a lower bound; else,
|
|
* the last upper bound will be reused as the lower bound of the merged
|
|
* partition, so skip this.
|
|
*/
|
|
if (cmpval > 0)
|
|
{
|
|
*merged_datums = lappend(*merged_datums, merged_lb->datums);
|
|
*merged_kinds = lappend(*merged_kinds, merged_lb->kind);
|
|
*merged_indexes = lappend_int(*merged_indexes, -1);
|
|
}
|
|
|
|
/* Add the upper bound and index of the merged partition. */
|
|
*merged_datums = lappend(*merged_datums, merged_ub->datums);
|
|
*merged_kinds = lappend(*merged_kinds, merged_ub->kind);
|
|
*merged_indexes = lappend_int(*merged_indexes, merged_index);
|
|
}
|
|
|
|
/*
|
|
* partitions_are_ordered
|
|
* Determine whether the partitions described by 'boundinfo' are ordered,
|
|
* that is partitions appearing earlier in the PartitionDesc sequence
|
|
* contain partition keys strictly less than those appearing later.
|
|
* Also, if NULL values are possible, they must come in the last
|
|
* partition defined in the PartitionDesc. 'live_parts' marks which
|
|
* partitions we should include when checking the ordering. Partitions
|
|
* that do not appear in 'live_parts' are ignored.
|
|
*
|
|
* If out of order, or there is insufficient info to know the order,
|
|
* then we return false.
|
|
*/
|
|
bool
|
|
partitions_are_ordered(PartitionBoundInfo boundinfo, Bitmapset *live_parts)
|
|
{
|
|
Assert(boundinfo != NULL);
|
|
|
|
switch (boundinfo->strategy)
|
|
{
|
|
case PARTITION_STRATEGY_RANGE:
|
|
|
|
/*
|
|
* RANGE-type partitioning guarantees that the partitions can be
|
|
* scanned in the order that they're defined in the PartitionDesc
|
|
* to provide sequential, non-overlapping ranges of tuples.
|
|
* However, if a DEFAULT partition exists and it's contained
|
|
* within live_parts, then the partitions are not ordered.
|
|
*/
|
|
if (!partition_bound_has_default(boundinfo) ||
|
|
!bms_is_member(boundinfo->default_index, live_parts))
|
|
return true;
|
|
break;
|
|
|
|
case PARTITION_STRATEGY_LIST:
|
|
|
|
/*
|
|
* LIST partitioned are ordered providing none of live_parts
|
|
* overlap with the partitioned table's interleaved partitions.
|
|
*/
|
|
if (!bms_overlap(live_parts, boundinfo->interleaved_parts))
|
|
return true;
|
|
|
|
break;
|
|
case PARTITION_STRATEGY_HASH:
|
|
break;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* check_new_partition_bound
|
|
*
|
|
* Checks if the new partition's bound overlaps any of the existing partitions
|
|
* of parent. Also performs additional checks as necessary per strategy.
|
|
*/
|
|
void
|
|
check_new_partition_bound(char *relname, Relation parent,
|
|
PartitionBoundSpec *spec, ParseState *pstate)
|
|
{
|
|
PartitionKey key = RelationGetPartitionKey(parent);
|
|
PartitionDesc partdesc = RelationGetPartitionDesc(parent, false);
|
|
PartitionBoundInfo boundinfo = partdesc->boundinfo;
|
|
int with = -1;
|
|
bool overlap = false;
|
|
int overlap_location = -1;
|
|
|
|
if (spec->is_default)
|
|
{
|
|
/*
|
|
* The default partition bound never conflicts with any other
|
|
* partition's; if that's what we're attaching, the only possible
|
|
* problem is that one already exists, so check for that and we're
|
|
* done.
|
|
*/
|
|
if (boundinfo == NULL || !partition_bound_has_default(boundinfo))
|
|
return;
|
|
|
|
/* Default partition already exists, error out. */
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
|
|
errmsg("partition \"%s\" conflicts with existing default partition \"%s\"",
|
|
relname, get_rel_name(partdesc->oids[boundinfo->default_index])),
|
|
parser_errposition(pstate, spec->location)));
|
|
}
|
|
|
|
switch (key->strategy)
|
|
{
|
|
case PARTITION_STRATEGY_HASH:
|
|
{
|
|
Assert(spec->strategy == PARTITION_STRATEGY_HASH);
|
|
Assert(spec->remainder >= 0 && spec->remainder < spec->modulus);
|
|
|
|
if (partdesc->nparts > 0)
|
|
{
|
|
int greatest_modulus;
|
|
int remainder;
|
|
int offset;
|
|
|
|
/*
|
|
* Check rule that every modulus must be a factor of the
|
|
* next larger modulus. (For example, if you have a bunch
|
|
* of partitions that all have modulus 5, you can add a
|
|
* new partition with modulus 10 or a new partition with
|
|
* modulus 15, but you cannot add both a partition with
|
|
* modulus 10 and a partition with modulus 15, because 10
|
|
* is not a factor of 15.) We need only check the next
|
|
* smaller and next larger existing moduli, relying on
|
|
* previous enforcement of this rule to be sure that the
|
|
* rest are in line.
|
|
*/
|
|
|
|
/*
|
|
* Get the greatest (modulus, remainder) pair contained in
|
|
* boundinfo->datums that is less than or equal to the
|
|
* (spec->modulus, spec->remainder) pair.
|
|
*/
|
|
offset = partition_hash_bsearch(boundinfo,
|
|
spec->modulus,
|
|
spec->remainder);
|
|
if (offset < 0)
|
|
{
|
|
int next_modulus;
|
|
|
|
/*
|
|
* All existing moduli are greater or equal, so the
|
|
* new one must be a factor of the smallest one, which
|
|
* is first in the boundinfo.
|
|
*/
|
|
next_modulus = DatumGetInt32(boundinfo->datums[0][0]);
|
|
if (next_modulus % spec->modulus != 0)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
|
|
errmsg("every hash partition modulus must be a factor of the next larger modulus"),
|
|
errdetail("The new modulus %d is not a factor of %d, the modulus of existing partition \"%s\".",
|
|
spec->modulus, next_modulus,
|
|
get_rel_name(partdesc->oids[0]))));
|
|
}
|
|
else
|
|
{
|
|
int prev_modulus;
|
|
|
|
/*
|
|
* We found the largest (modulus, remainder) pair less
|
|
* than or equal to the new one. That modulus must be
|
|
* a divisor of, or equal to, the new modulus.
|
|
*/
|
|
prev_modulus = DatumGetInt32(boundinfo->datums[offset][0]);
|
|
|
|
if (spec->modulus % prev_modulus != 0)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
|
|
errmsg("every hash partition modulus must be a factor of the next larger modulus"),
|
|
errdetail("The new modulus %d is not divisible by %d, the modulus of existing partition \"%s\".",
|
|
spec->modulus,
|
|
prev_modulus,
|
|
get_rel_name(partdesc->oids[offset]))));
|
|
|
|
if (offset + 1 < boundinfo->ndatums)
|
|
{
|
|
int next_modulus;
|
|
|
|
/*
|
|
* Look at the next higher (modulus, remainder)
|
|
* pair. That could have the same modulus and a
|
|
* larger remainder than the new pair, in which
|
|
* case we're good. If it has a larger modulus,
|
|
* the new modulus must divide that one.
|
|
*/
|
|
next_modulus = DatumGetInt32(boundinfo->datums[offset + 1][0]);
|
|
|
|
if (next_modulus % spec->modulus != 0)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
|
|
errmsg("every hash partition modulus must be a factor of the next larger modulus"),
|
|
errdetail("The new modulus %d is not a factor of %d, the modulus of existing partition \"%s\".",
|
|
spec->modulus, next_modulus,
|
|
get_rel_name(partdesc->oids[offset + 1]))));
|
|
}
|
|
}
|
|
|
|
greatest_modulus = boundinfo->nindexes;
|
|
remainder = spec->remainder;
|
|
|
|
/*
|
|
* Normally, the lowest remainder that could conflict with
|
|
* the new partition is equal to the remainder specified
|
|
* for the new partition, but when the new partition has a
|
|
* modulus higher than any used so far, we need to adjust.
|
|
*/
|
|
if (remainder >= greatest_modulus)
|
|
remainder = remainder % greatest_modulus;
|
|
|
|
/* Check every potentially-conflicting remainder. */
|
|
do
|
|
{
|
|
if (boundinfo->indexes[remainder] != -1)
|
|
{
|
|
overlap = true;
|
|
overlap_location = spec->location;
|
|
with = boundinfo->indexes[remainder];
|
|
break;
|
|
}
|
|
remainder += spec->modulus;
|
|
} while (remainder < greatest_modulus);
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
case PARTITION_STRATEGY_LIST:
|
|
{
|
|
Assert(spec->strategy == PARTITION_STRATEGY_LIST);
|
|
|
|
if (partdesc->nparts > 0)
|
|
{
|
|
ListCell *cell;
|
|
|
|
Assert(boundinfo &&
|
|
boundinfo->strategy == PARTITION_STRATEGY_LIST &&
|
|
(boundinfo->ndatums > 0 ||
|
|
partition_bound_accepts_nulls(boundinfo) ||
|
|
partition_bound_has_default(boundinfo)));
|
|
|
|
foreach(cell, spec->listdatums)
|
|
{
|
|
Const *val = lfirst_node(Const, cell);
|
|
|
|
overlap_location = val->location;
|
|
if (!val->constisnull)
|
|
{
|
|
int offset;
|
|
bool equal;
|
|
|
|
offset = partition_list_bsearch(&key->partsupfunc[0],
|
|
key->partcollation,
|
|
boundinfo,
|
|
val->constvalue,
|
|
&equal);
|
|
if (offset >= 0 && equal)
|
|
{
|
|
overlap = true;
|
|
with = boundinfo->indexes[offset];
|
|
break;
|
|
}
|
|
}
|
|
else if (partition_bound_accepts_nulls(boundinfo))
|
|
{
|
|
overlap = true;
|
|
with = boundinfo->null_index;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
case PARTITION_STRATEGY_RANGE:
|
|
{
|
|
PartitionRangeBound *lower,
|
|
*upper;
|
|
int cmpval;
|
|
|
|
Assert(spec->strategy == PARTITION_STRATEGY_RANGE);
|
|
lower = make_one_partition_rbound(key, -1, spec->lowerdatums, true);
|
|
upper = make_one_partition_rbound(key, -1, spec->upperdatums, false);
|
|
|
|
/*
|
|
* First check if the resulting range would be empty with
|
|
* specified lower and upper bounds. partition_rbound_cmp
|
|
* cannot return zero here, since the lower-bound flags are
|
|
* different.
|
|
*/
|
|
cmpval = partition_rbound_cmp(key->partnatts,
|
|
key->partsupfunc,
|
|
key->partcollation,
|
|
lower->datums, lower->kind,
|
|
true, upper);
|
|
Assert(cmpval != 0);
|
|
if (cmpval > 0)
|
|
{
|
|
/* Point to problematic key in the lower datums list. */
|
|
PartitionRangeDatum *datum = list_nth(spec->lowerdatums,
|
|
cmpval - 1);
|
|
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
|
|
errmsg("empty range bound specified for partition \"%s\"",
|
|
relname),
|
|
errdetail("Specified lower bound %s is greater than or equal to upper bound %s.",
|
|
get_range_partbound_string(spec->lowerdatums),
|
|
get_range_partbound_string(spec->upperdatums)),
|
|
parser_errposition(pstate, datum->location)));
|
|
}
|
|
|
|
if (partdesc->nparts > 0)
|
|
{
|
|
int offset;
|
|
|
|
Assert(boundinfo &&
|
|
boundinfo->strategy == PARTITION_STRATEGY_RANGE &&
|
|
(boundinfo->ndatums > 0 ||
|
|
partition_bound_has_default(boundinfo)));
|
|
|
|
/*
|
|
* Test whether the new lower bound (which is treated
|
|
* inclusively as part of the new partition) lies inside
|
|
* an existing partition, or in a gap.
|
|
*
|
|
* If it's inside an existing partition, the bound at
|
|
* offset + 1 will be the upper bound of that partition,
|
|
* and its index will be >= 0.
|
|
*
|
|
* If it's in a gap, the bound at offset + 1 will be the
|
|
* lower bound of the next partition, and its index will
|
|
* be -1. This is also true if there is no next partition,
|
|
* since the index array is initialised with an extra -1
|
|
* at the end.
|
|
*/
|
|
offset = partition_range_bsearch(key->partnatts,
|
|
key->partsupfunc,
|
|
key->partcollation,
|
|
boundinfo, lower,
|
|
&cmpval);
|
|
|
|
if (boundinfo->indexes[offset + 1] < 0)
|
|
{
|
|
/*
|
|
* Check that the new partition will fit in the gap.
|
|
* For it to fit, the new upper bound must be less
|
|
* than or equal to the lower bound of the next
|
|
* partition, if there is one.
|
|
*/
|
|
if (offset + 1 < boundinfo->ndatums)
|
|
{
|
|
Datum *datums;
|
|
PartitionRangeDatumKind *kind;
|
|
bool is_lower;
|
|
|
|
datums = boundinfo->datums[offset + 1];
|
|
kind = boundinfo->kind[offset + 1];
|
|
is_lower = (boundinfo->indexes[offset + 1] == -1);
|
|
|
|
cmpval = partition_rbound_cmp(key->partnatts,
|
|
key->partsupfunc,
|
|
key->partcollation,
|
|
datums, kind,
|
|
is_lower, upper);
|
|
if (cmpval < 0)
|
|
{
|
|
/*
|
|
* Point to problematic key in the upper
|
|
* datums list.
|
|
*/
|
|
PartitionRangeDatum *datum =
|
|
list_nth(spec->upperdatums, abs(cmpval) - 1);
|
|
|
|
/*
|
|
* The new partition overlaps with the
|
|
* existing partition between offset + 1 and
|
|
* offset + 2.
|
|
*/
|
|
overlap = true;
|
|
overlap_location = datum->location;
|
|
with = boundinfo->indexes[offset + 2];
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
* The new partition overlaps with the existing
|
|
* partition between offset and offset + 1.
|
|
*/
|
|
PartitionRangeDatum *datum;
|
|
|
|
/*
|
|
* Point to problematic key in the list of lower
|
|
* datums; if we have equality, point to the first
|
|
* one.
|
|
*/
|
|
datum = cmpval == 0 ? linitial(spec->lowerdatums) :
|
|
list_nth(spec->lowerdatums, abs(cmpval) - 1);
|
|
overlap = true;
|
|
overlap_location = datum->location;
|
|
with = boundinfo->indexes[offset + 1];
|
|
}
|
|
}
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (overlap)
|
|
{
|
|
Assert(with >= 0);
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
|
|
errmsg("partition \"%s\" would overlap partition \"%s\"",
|
|
relname, get_rel_name(partdesc->oids[with])),
|
|
parser_errposition(pstate, overlap_location)));
|
|
}
|
|
}
|
|
|
|
/*
|
|
* check_default_partition_contents
|
|
*
|
|
* This function checks if there exists a row in the default partition that
|
|
* would properly belong to the new partition being added. If it finds one,
|
|
* it throws an error.
|
|
*/
|
|
void
|
|
check_default_partition_contents(Relation parent, Relation default_rel,
|
|
PartitionBoundSpec *new_spec)
|
|
{
|
|
List *new_part_constraints;
|
|
List *def_part_constraints;
|
|
List *all_parts;
|
|
ListCell *lc;
|
|
|
|
new_part_constraints = (new_spec->strategy == PARTITION_STRATEGY_LIST)
|
|
? get_qual_for_list(parent, new_spec)
|
|
: get_qual_for_range(parent, new_spec, false);
|
|
def_part_constraints =
|
|
get_proposed_default_constraint(new_part_constraints);
|
|
|
|
/*
|
|
* Map the Vars in the constraint expression from parent's attnos to
|
|
* default_rel's.
|
|
*/
|
|
def_part_constraints =
|
|
map_partition_varattnos(def_part_constraints, 1, default_rel,
|
|
parent);
|
|
|
|
/*
|
|
* If the existing constraints on the default partition imply that it will
|
|
* not contain any row that would belong to the new partition, we can
|
|
* avoid scanning the default partition.
|
|
*/
|
|
if (PartConstraintImpliedByRelConstraint(default_rel, def_part_constraints))
|
|
{
|
|
ereport(DEBUG1,
|
|
(errmsg_internal("updated partition constraint for default partition \"%s\" is implied by existing constraints",
|
|
RelationGetRelationName(default_rel))));
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Scan the default partition and its subpartitions, and check for rows
|
|
* that do not satisfy the revised partition constraints.
|
|
*/
|
|
if (default_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
|
|
all_parts = find_all_inheritors(RelationGetRelid(default_rel),
|
|
AccessExclusiveLock, NULL);
|
|
else
|
|
all_parts = list_make1_oid(RelationGetRelid(default_rel));
|
|
|
|
foreach(lc, all_parts)
|
|
{
|
|
Oid part_relid = lfirst_oid(lc);
|
|
Relation part_rel;
|
|
Expr *partition_constraint;
|
|
EState *estate;
|
|
ExprState *partqualstate = NULL;
|
|
Snapshot snapshot;
|
|
ExprContext *econtext;
|
|
TableScanDesc scan;
|
|
MemoryContext oldCxt;
|
|
TupleTableSlot *tupslot;
|
|
|
|
/* Lock already taken above. */
|
|
if (part_relid != RelationGetRelid(default_rel))
|
|
{
|
|
part_rel = table_open(part_relid, NoLock);
|
|
|
|
/*
|
|
* Map the Vars in the constraint expression from default_rel's
|
|
* the sub-partition's.
|
|
*/
|
|
partition_constraint = make_ands_explicit(def_part_constraints);
|
|
partition_constraint = (Expr *)
|
|
map_partition_varattnos((List *) partition_constraint, 1,
|
|
part_rel, default_rel);
|
|
|
|
/*
|
|
* If the partition constraints on default partition child imply
|
|
* that it will not contain any row that would belong to the new
|
|
* partition, we can avoid scanning the child table.
|
|
*/
|
|
if (PartConstraintImpliedByRelConstraint(part_rel,
|
|
def_part_constraints))
|
|
{
|
|
ereport(DEBUG1,
|
|
(errmsg_internal("updated partition constraint for default partition \"%s\" is implied by existing constraints",
|
|
RelationGetRelationName(part_rel))));
|
|
|
|
table_close(part_rel, NoLock);
|
|
continue;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
part_rel = default_rel;
|
|
partition_constraint = make_ands_explicit(def_part_constraints);
|
|
}
|
|
|
|
/*
|
|
* Only RELKIND_RELATION relations (i.e. leaf partitions) need to be
|
|
* scanned.
|
|
*/
|
|
if (part_rel->rd_rel->relkind != RELKIND_RELATION)
|
|
{
|
|
if (part_rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
|
|
ereport(WARNING,
|
|
(errcode(ERRCODE_CHECK_VIOLATION),
|
|
errmsg("skipped scanning foreign table \"%s\" which is a partition of default partition \"%s\"",
|
|
RelationGetRelationName(part_rel),
|
|
RelationGetRelationName(default_rel))));
|
|
|
|
if (RelationGetRelid(default_rel) != RelationGetRelid(part_rel))
|
|
table_close(part_rel, NoLock);
|
|
|
|
continue;
|
|
}
|
|
|
|
estate = CreateExecutorState();
|
|
|
|
/* Build expression execution states for partition check quals */
|
|
partqualstate = ExecPrepareExpr(partition_constraint, estate);
|
|
|
|
econtext = GetPerTupleExprContext(estate);
|
|
snapshot = RegisterSnapshot(GetLatestSnapshot());
|
|
tupslot = table_slot_create(part_rel, &estate->es_tupleTable);
|
|
scan = table_beginscan(part_rel, snapshot, 0, NULL);
|
|
|
|
/*
|
|
* Switch to per-tuple memory context and reset it for each tuple
|
|
* produced, so we don't leak memory.
|
|
*/
|
|
oldCxt = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
|
|
|
|
while (table_scan_getnextslot(scan, ForwardScanDirection, tupslot))
|
|
{
|
|
econtext->ecxt_scantuple = tupslot;
|
|
|
|
if (!ExecCheck(partqualstate, econtext))
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_CHECK_VIOLATION),
|
|
errmsg("updated partition constraint for default partition \"%s\" would be violated by some row",
|
|
RelationGetRelationName(default_rel)),
|
|
errtable(default_rel)));
|
|
|
|
ResetExprContext(econtext);
|
|
CHECK_FOR_INTERRUPTS();
|
|
}
|
|
|
|
MemoryContextSwitchTo(oldCxt);
|
|
table_endscan(scan);
|
|
UnregisterSnapshot(snapshot);
|
|
ExecDropSingleTupleTableSlot(tupslot);
|
|
FreeExecutorState(estate);
|
|
|
|
if (RelationGetRelid(default_rel) != RelationGetRelid(part_rel))
|
|
table_close(part_rel, NoLock); /* keep the lock until commit */
|
|
}
|
|
}
|
|
|
|
/*
|
|
* get_hash_partition_greatest_modulus
|
|
*
|
|
* Returns the greatest modulus of the hash partition bound.
|
|
* This is no longer used in the core code, but we keep it around
|
|
* in case external modules are using it.
|
|
*/
|
|
int
|
|
get_hash_partition_greatest_modulus(PartitionBoundInfo bound)
|
|
{
|
|
Assert(bound && bound->strategy == PARTITION_STRATEGY_HASH);
|
|
return bound->nindexes;
|
|
}
|
|
|
|
/*
|
|
* make_one_partition_rbound
|
|
*
|
|
* Return a PartitionRangeBound given a list of PartitionRangeDatum elements
|
|
* and a flag telling whether the bound is lower or not. Made into a function
|
|
* because there are multiple sites that want to use this facility.
|
|
*/
|
|
static PartitionRangeBound *
|
|
make_one_partition_rbound(PartitionKey key, int index, List *datums, bool lower)
|
|
{
|
|
PartitionRangeBound *bound;
|
|
ListCell *lc;
|
|
int i;
|
|
|
|
Assert(datums != NIL);
|
|
|
|
bound = (PartitionRangeBound *) palloc0(sizeof(PartitionRangeBound));
|
|
bound->index = index;
|
|
bound->datums = (Datum *) palloc0(key->partnatts * sizeof(Datum));
|
|
bound->kind = (PartitionRangeDatumKind *) palloc0(key->partnatts *
|
|
sizeof(PartitionRangeDatumKind));
|
|
bound->lower = lower;
|
|
|
|
i = 0;
|
|
foreach(lc, datums)
|
|
{
|
|
PartitionRangeDatum *datum = lfirst_node(PartitionRangeDatum, lc);
|
|
|
|
/* What's contained in this range datum? */
|
|
bound->kind[i] = datum->kind;
|
|
|
|
if (datum->kind == PARTITION_RANGE_DATUM_VALUE)
|
|
{
|
|
Const *val = castNode(Const, datum->value);
|
|
|
|
if (val->constisnull)
|
|
elog(ERROR, "invalid range bound datum");
|
|
bound->datums[i] = val->constvalue;
|
|
}
|
|
|
|
i++;
|
|
}
|
|
|
|
return bound;
|
|
}
|
|
|
|
/*
|
|
* partition_rbound_cmp
|
|
*
|
|
* For two range bounds this decides whether the 1st one (specified by
|
|
* datums1, kind1, and lower1) is <, =, or > the bound specified in *b2.
|
|
*
|
|
* 0 is returned if they are equal, otherwise a non-zero integer whose sign
|
|
* indicates the ordering, and whose absolute value gives the 1-based
|
|
* partition key number of the first mismatching column.
|
|
*
|
|
* partnatts, partsupfunc and partcollation give the number of attributes in the
|
|
* bounds to be compared, comparison function to be used and the collations of
|
|
* attributes, respectively.
|
|
*
|
|
* Note that if the values of the two range bounds compare equal, then we take
|
|
* into account whether they are upper or lower bounds, and an upper bound is
|
|
* considered to be smaller than a lower bound. This is important to the way
|
|
* that RelationBuildPartitionDesc() builds the PartitionBoundInfoData
|
|
* structure, which only stores the upper bound of a common boundary between
|
|
* two contiguous partitions.
|
|
*/
|
|
static int32
|
|
partition_rbound_cmp(int partnatts, FmgrInfo *partsupfunc,
|
|
Oid *partcollation,
|
|
Datum *datums1, PartitionRangeDatumKind *kind1,
|
|
bool lower1, PartitionRangeBound *b2)
|
|
{
|
|
int32 colnum = 0;
|
|
int32 cmpval = 0; /* placate compiler */
|
|
int i;
|
|
Datum *datums2 = b2->datums;
|
|
PartitionRangeDatumKind *kind2 = b2->kind;
|
|
bool lower2 = b2->lower;
|
|
|
|
for (i = 0; i < partnatts; i++)
|
|
{
|
|
/* Track column number in case we need it for result */
|
|
colnum++;
|
|
|
|
/*
|
|
* First, handle cases where the column is unbounded, which should not
|
|
* invoke the comparison procedure, and should not consider any later
|
|
* columns. Note that the PartitionRangeDatumKind enum elements
|
|
* compare the same way as the values they represent.
|
|
*/
|
|
if (kind1[i] < kind2[i])
|
|
return -colnum;
|
|
else if (kind1[i] > kind2[i])
|
|
return colnum;
|
|
else if (kind1[i] != PARTITION_RANGE_DATUM_VALUE)
|
|
{
|
|
/*
|
|
* The column bounds are both MINVALUE or both MAXVALUE. No later
|
|
* columns should be considered, but we still need to compare
|
|
* whether they are upper or lower bounds.
|
|
*/
|
|
break;
|
|
}
|
|
|
|
cmpval = DatumGetInt32(FunctionCall2Coll(&partsupfunc[i],
|
|
partcollation[i],
|
|
datums1[i],
|
|
datums2[i]));
|
|
if (cmpval != 0)
|
|
break;
|
|
}
|
|
|
|
/*
|
|
* If the comparison is anything other than equal, we're done. If they
|
|
* compare equal though, we still have to consider whether the boundaries
|
|
* are inclusive or exclusive. Exclusive one is considered smaller of the
|
|
* two.
|
|
*/
|
|
if (cmpval == 0 && lower1 != lower2)
|
|
cmpval = lower1 ? 1 : -1;
|
|
|
|
return cmpval == 0 ? 0 : (cmpval < 0 ? -colnum : colnum);
|
|
}
|
|
|
|
/*
|
|
* partition_rbound_datum_cmp
|
|
*
|
|
* Return whether range bound (specified in rb_datums and rb_kind)
|
|
* is <, =, or > partition key of tuple (tuple_datums)
|
|
*
|
|
* n_tuple_datums, partsupfunc and partcollation give number of attributes in
|
|
* the bounds to be compared, comparison function to be used and the collations
|
|
* of attributes resp.
|
|
*/
|
|
int32
|
|
partition_rbound_datum_cmp(FmgrInfo *partsupfunc, Oid *partcollation,
|
|
Datum *rb_datums, PartitionRangeDatumKind *rb_kind,
|
|
Datum *tuple_datums, int n_tuple_datums)
|
|
{
|
|
int i;
|
|
int32 cmpval = -1;
|
|
|
|
for (i = 0; i < n_tuple_datums; i++)
|
|
{
|
|
if (rb_kind[i] == PARTITION_RANGE_DATUM_MINVALUE)
|
|
return -1;
|
|
else if (rb_kind[i] == PARTITION_RANGE_DATUM_MAXVALUE)
|
|
return 1;
|
|
|
|
cmpval = DatumGetInt32(FunctionCall2Coll(&partsupfunc[i],
|
|
partcollation[i],
|
|
rb_datums[i],
|
|
tuple_datums[i]));
|
|
if (cmpval != 0)
|
|
break;
|
|
}
|
|
|
|
return cmpval;
|
|
}
|
|
|
|
/*
|
|
* partition_hbound_cmp
|
|
*
|
|
* Compares modulus first, then remainder if modulus is equal.
|
|
*/
|
|
static int32
|
|
partition_hbound_cmp(int modulus1, int remainder1, int modulus2, int remainder2)
|
|
{
|
|
if (modulus1 < modulus2)
|
|
return -1;
|
|
if (modulus1 > modulus2)
|
|
return 1;
|
|
if (modulus1 == modulus2 && remainder1 != remainder2)
|
|
return (remainder1 > remainder2) ? 1 : -1;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* partition_list_bsearch
|
|
* Returns the index of the greatest bound datum that is less than equal
|
|
* to the given value or -1 if all of the bound datums are greater
|
|
*
|
|
* *is_equal is set to true if the bound datum at the returned index is equal
|
|
* to the input value.
|
|
*/
|
|
int
|
|
partition_list_bsearch(FmgrInfo *partsupfunc, Oid *partcollation,
|
|
PartitionBoundInfo boundinfo,
|
|
Datum value, bool *is_equal)
|
|
{
|
|
int lo,
|
|
hi,
|
|
mid;
|
|
|
|
lo = -1;
|
|
hi = boundinfo->ndatums - 1;
|
|
while (lo < hi)
|
|
{
|
|
int32 cmpval;
|
|
|
|
mid = (lo + hi + 1) / 2;
|
|
cmpval = DatumGetInt32(FunctionCall2Coll(&partsupfunc[0],
|
|
partcollation[0],
|
|
boundinfo->datums[mid][0],
|
|
value));
|
|
if (cmpval <= 0)
|
|
{
|
|
lo = mid;
|
|
*is_equal = (cmpval == 0);
|
|
if (*is_equal)
|
|
break;
|
|
}
|
|
else
|
|
hi = mid - 1;
|
|
}
|
|
|
|
return lo;
|
|
}
|
|
|
|
/*
|
|
* partition_range_bsearch
|
|
* Returns the index of the greatest range bound that is less than or
|
|
* equal to the given range bound or -1 if all of the range bounds are
|
|
* greater
|
|
*
|
|
* Upon return from this function, *cmpval is set to 0 if the bound at the
|
|
* returned index matches the input range bound exactly, otherwise a
|
|
* non-zero integer whose sign indicates the ordering, and whose absolute
|
|
* value gives the 1-based partition key number of the first mismatching
|
|
* column.
|
|
*/
|
|
static int
|
|
partition_range_bsearch(int partnatts, FmgrInfo *partsupfunc,
|
|
Oid *partcollation,
|
|
PartitionBoundInfo boundinfo,
|
|
PartitionRangeBound *probe, int32 *cmpval)
|
|
{
|
|
int lo,
|
|
hi,
|
|
mid;
|
|
|
|
lo = -1;
|
|
hi = boundinfo->ndatums - 1;
|
|
while (lo < hi)
|
|
{
|
|
mid = (lo + hi + 1) / 2;
|
|
*cmpval = partition_rbound_cmp(partnatts, partsupfunc,
|
|
partcollation,
|
|
boundinfo->datums[mid],
|
|
boundinfo->kind[mid],
|
|
(boundinfo->indexes[mid] == -1),
|
|
probe);
|
|
if (*cmpval <= 0)
|
|
{
|
|
lo = mid;
|
|
if (*cmpval == 0)
|
|
break;
|
|
}
|
|
else
|
|
hi = mid - 1;
|
|
}
|
|
|
|
return lo;
|
|
}
|
|
|
|
/*
|
|
* partition_range_datum_bsearch
|
|
* Returns the index of the greatest range bound that is less than or
|
|
* equal to the given tuple or -1 if all of the range bounds are greater
|
|
*
|
|
* *is_equal is set to true if the range bound at the returned index is equal
|
|
* to the input tuple.
|
|
*/
|
|
int
|
|
partition_range_datum_bsearch(FmgrInfo *partsupfunc, Oid *partcollation,
|
|
PartitionBoundInfo boundinfo,
|
|
int nvalues, Datum *values, bool *is_equal)
|
|
{
|
|
int lo,
|
|
hi,
|
|
mid;
|
|
|
|
lo = -1;
|
|
hi = boundinfo->ndatums - 1;
|
|
while (lo < hi)
|
|
{
|
|
int32 cmpval;
|
|
|
|
mid = (lo + hi + 1) / 2;
|
|
cmpval = partition_rbound_datum_cmp(partsupfunc,
|
|
partcollation,
|
|
boundinfo->datums[mid],
|
|
boundinfo->kind[mid],
|
|
values,
|
|
nvalues);
|
|
if (cmpval <= 0)
|
|
{
|
|
lo = mid;
|
|
*is_equal = (cmpval == 0);
|
|
|
|
if (*is_equal)
|
|
break;
|
|
}
|
|
else
|
|
hi = mid - 1;
|
|
}
|
|
|
|
return lo;
|
|
}
|
|
|
|
/*
|
|
* partition_hash_bsearch
|
|
* Returns the index of the greatest (modulus, remainder) pair that is
|
|
* less than or equal to the given (modulus, remainder) pair or -1 if
|
|
* all of them are greater
|
|
*/
|
|
int
|
|
partition_hash_bsearch(PartitionBoundInfo boundinfo,
|
|
int modulus, int remainder)
|
|
{
|
|
int lo,
|
|
hi,
|
|
mid;
|
|
|
|
lo = -1;
|
|
hi = boundinfo->ndatums - 1;
|
|
while (lo < hi)
|
|
{
|
|
int32 cmpval,
|
|
bound_modulus,
|
|
bound_remainder;
|
|
|
|
mid = (lo + hi + 1) / 2;
|
|
bound_modulus = DatumGetInt32(boundinfo->datums[mid][0]);
|
|
bound_remainder = DatumGetInt32(boundinfo->datums[mid][1]);
|
|
cmpval = partition_hbound_cmp(bound_modulus, bound_remainder,
|
|
modulus, remainder);
|
|
if (cmpval <= 0)
|
|
{
|
|
lo = mid;
|
|
|
|
if (cmpval == 0)
|
|
break;
|
|
}
|
|
else
|
|
hi = mid - 1;
|
|
}
|
|
|
|
return lo;
|
|
}
|
|
|
|
/*
|
|
* qsort_partition_hbound_cmp
|
|
*
|
|
* Hash bounds are sorted by modulus, then by remainder.
|
|
*/
|
|
static int32
|
|
qsort_partition_hbound_cmp(const void *a, const void *b)
|
|
{
|
|
const PartitionHashBound *h1 = (const PartitionHashBound *) a;
|
|
const PartitionHashBound *h2 = (const PartitionHashBound *) b;
|
|
|
|
return partition_hbound_cmp(h1->modulus, h1->remainder,
|
|
h2->modulus, h2->remainder);
|
|
}
|
|
|
|
/*
|
|
* qsort_partition_list_value_cmp
|
|
*
|
|
* Compare two list partition bound datums.
|
|
*/
|
|
static int32
|
|
qsort_partition_list_value_cmp(const void *a, const void *b, void *arg)
|
|
{
|
|
Datum val1 = ((const PartitionListValue *) a)->value,
|
|
val2 = ((const PartitionListValue *) b)->value;
|
|
PartitionKey key = (PartitionKey) arg;
|
|
|
|
return DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[0],
|
|
key->partcollation[0],
|
|
val1, val2));
|
|
}
|
|
|
|
/*
|
|
* qsort_partition_rbound_cmp
|
|
*
|
|
* Used when sorting range bounds across all range partitions.
|
|
*/
|
|
static int32
|
|
qsort_partition_rbound_cmp(const void *a, const void *b, void *arg)
|
|
{
|
|
PartitionRangeBound *b1 = (*(PartitionRangeBound *const *) a);
|
|
PartitionRangeBound *b2 = (*(PartitionRangeBound *const *) b);
|
|
PartitionKey key = (PartitionKey) arg;
|
|
|
|
return compare_range_bounds(key->partnatts, key->partsupfunc,
|
|
key->partcollation,
|
|
b1, b2);
|
|
}
|
|
|
|
/*
|
|
* get_partition_operator
|
|
*
|
|
* Return oid of the operator of the given strategy for the given partition
|
|
* key column. It is assumed that the partitioning key is of the same type as
|
|
* the chosen partitioning opclass, or at least binary-compatible. In the
|
|
* latter case, *need_relabel is set to true if the opclass is not of a
|
|
* polymorphic type (indicating a RelabelType node needed on top), otherwise
|
|
* false.
|
|
*/
|
|
static Oid
|
|
get_partition_operator(PartitionKey key, int col, StrategyNumber strategy,
|
|
bool *need_relabel)
|
|
{
|
|
Oid operoid;
|
|
|
|
/*
|
|
* Get the operator in the partitioning opfamily using the opclass'
|
|
* declared input type as both left- and righttype.
|
|
*/
|
|
operoid = get_opfamily_member(key->partopfamily[col],
|
|
key->partopcintype[col],
|
|
key->partopcintype[col],
|
|
strategy);
|
|
if (!OidIsValid(operoid))
|
|
elog(ERROR, "missing operator %d(%u,%u) in partition opfamily %u",
|
|
strategy, key->partopcintype[col], key->partopcintype[col],
|
|
key->partopfamily[col]);
|
|
|
|
/*
|
|
* If the partition key column is not of the same type as the operator
|
|
* class and not polymorphic, tell caller to wrap the non-Const expression
|
|
* in a RelabelType. This matches what parse_coerce.c does.
|
|
*/
|
|
*need_relabel = (key->parttypid[col] != key->partopcintype[col] &&
|
|
key->partopcintype[col] != RECORDOID &&
|
|
!IsPolymorphicType(key->partopcintype[col]));
|
|
|
|
return operoid;
|
|
}
|
|
|
|
/*
|
|
* make_partition_op_expr
|
|
* Returns an Expr for the given partition key column with arg1 and
|
|
* arg2 as its leftop and rightop, respectively
|
|
*/
|
|
static Expr *
|
|
make_partition_op_expr(PartitionKey key, int keynum,
|
|
uint16 strategy, Expr *arg1, Expr *arg2)
|
|
{
|
|
Oid operoid;
|
|
bool need_relabel = false;
|
|
Expr *result = NULL;
|
|
|
|
/* Get the correct btree operator for this partitioning column */
|
|
operoid = get_partition_operator(key, keynum, strategy, &need_relabel);
|
|
|
|
/*
|
|
* Chosen operator may be such that the non-Const operand needs to be
|
|
* coerced, so apply the same; see the comment in
|
|
* get_partition_operator().
|
|
*/
|
|
if (!IsA(arg1, Const) &&
|
|
(need_relabel ||
|
|
key->partcollation[keynum] != key->parttypcoll[keynum]))
|
|
arg1 = (Expr *) makeRelabelType(arg1,
|
|
key->partopcintype[keynum],
|
|
-1,
|
|
key->partcollation[keynum],
|
|
COERCE_EXPLICIT_CAST);
|
|
|
|
/* Generate the actual expression */
|
|
switch (key->strategy)
|
|
{
|
|
case PARTITION_STRATEGY_LIST:
|
|
{
|
|
List *elems = (List *) arg2;
|
|
int nelems = list_length(elems);
|
|
|
|
Assert(nelems >= 1);
|
|
Assert(keynum == 0);
|
|
|
|
if (nelems > 1 &&
|
|
!type_is_array(key->parttypid[keynum]))
|
|
{
|
|
ArrayExpr *arrexpr;
|
|
ScalarArrayOpExpr *saopexpr;
|
|
|
|
/* Construct an ArrayExpr for the right-hand inputs */
|
|
arrexpr = makeNode(ArrayExpr);
|
|
arrexpr->array_typeid =
|
|
get_array_type(key->parttypid[keynum]);
|
|
arrexpr->array_collid = key->parttypcoll[keynum];
|
|
arrexpr->element_typeid = key->parttypid[keynum];
|
|
arrexpr->elements = elems;
|
|
arrexpr->multidims = false;
|
|
arrexpr->location = -1;
|
|
|
|
/* Build leftop = ANY (rightop) */
|
|
saopexpr = makeNode(ScalarArrayOpExpr);
|
|
saopexpr->opno = operoid;
|
|
saopexpr->opfuncid = get_opcode(operoid);
|
|
saopexpr->hashfuncid = InvalidOid;
|
|
saopexpr->negfuncid = InvalidOid;
|
|
saopexpr->useOr = true;
|
|
saopexpr->inputcollid = key->partcollation[keynum];
|
|
saopexpr->args = list_make2(arg1, arrexpr);
|
|
saopexpr->location = -1;
|
|
|
|
result = (Expr *) saopexpr;
|
|
}
|
|
else
|
|
{
|
|
List *elemops = NIL;
|
|
ListCell *lc;
|
|
|
|
foreach(lc, elems)
|
|
{
|
|
Expr *elem = lfirst(lc),
|
|
*elemop;
|
|
|
|
elemop = make_opclause(operoid,
|
|
BOOLOID,
|
|
false,
|
|
arg1, elem,
|
|
InvalidOid,
|
|
key->partcollation[keynum]);
|
|
elemops = lappend(elemops, elemop);
|
|
}
|
|
|
|
result = nelems > 1 ? makeBoolExpr(OR_EXPR, elemops, -1) : linitial(elemops);
|
|
}
|
|
break;
|
|
}
|
|
|
|
case PARTITION_STRATEGY_RANGE:
|
|
result = make_opclause(operoid,
|
|
BOOLOID,
|
|
false,
|
|
arg1, arg2,
|
|
InvalidOid,
|
|
key->partcollation[keynum]);
|
|
break;
|
|
|
|
case PARTITION_STRATEGY_HASH:
|
|
Assert(false);
|
|
break;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* get_qual_for_hash
|
|
*
|
|
* Returns a CHECK constraint expression to use as a hash partition's
|
|
* constraint, given the parent relation and partition bound structure.
|
|
*
|
|
* The partition constraint for a hash partition is always a call to the
|
|
* built-in function satisfies_hash_partition().
|
|
*/
|
|
static List *
|
|
get_qual_for_hash(Relation parent, PartitionBoundSpec *spec)
|
|
{
|
|
PartitionKey key = RelationGetPartitionKey(parent);
|
|
FuncExpr *fexpr;
|
|
Node *relidConst;
|
|
Node *modulusConst;
|
|
Node *remainderConst;
|
|
List *args;
|
|
ListCell *partexprs_item;
|
|
int i;
|
|
|
|
/* Fixed arguments. */
|
|
relidConst = (Node *) makeConst(OIDOID,
|
|
-1,
|
|
InvalidOid,
|
|
sizeof(Oid),
|
|
ObjectIdGetDatum(RelationGetRelid(parent)),
|
|
false,
|
|
true);
|
|
|
|
modulusConst = (Node *) makeConst(INT4OID,
|
|
-1,
|
|
InvalidOid,
|
|
sizeof(int32),
|
|
Int32GetDatum(spec->modulus),
|
|
false,
|
|
true);
|
|
|
|
remainderConst = (Node *) makeConst(INT4OID,
|
|
-1,
|
|
InvalidOid,
|
|
sizeof(int32),
|
|
Int32GetDatum(spec->remainder),
|
|
false,
|
|
true);
|
|
|
|
args = list_make3(relidConst, modulusConst, remainderConst);
|
|
partexprs_item = list_head(key->partexprs);
|
|
|
|
/* Add an argument for each key column. */
|
|
for (i = 0; i < key->partnatts; i++)
|
|
{
|
|
Node *keyCol;
|
|
|
|
/* Left operand */
|
|
if (key->partattrs[i] != 0)
|
|
{
|
|
keyCol = (Node *) makeVar(1,
|
|
key->partattrs[i],
|
|
key->parttypid[i],
|
|
key->parttypmod[i],
|
|
key->parttypcoll[i],
|
|
0);
|
|
}
|
|
else
|
|
{
|
|
keyCol = (Node *) copyObject(lfirst(partexprs_item));
|
|
partexprs_item = lnext(key->partexprs, partexprs_item);
|
|
}
|
|
|
|
args = lappend(args, keyCol);
|
|
}
|
|
|
|
fexpr = makeFuncExpr(F_SATISFIES_HASH_PARTITION,
|
|
BOOLOID,
|
|
args,
|
|
InvalidOid,
|
|
InvalidOid,
|
|
COERCE_EXPLICIT_CALL);
|
|
|
|
return list_make1(fexpr);
|
|
}
|
|
|
|
/*
|
|
* get_qual_for_list
|
|
*
|
|
* Returns an implicit-AND list of expressions to use as a list partition's
|
|
* constraint, given the parent relation and partition bound structure.
|
|
*
|
|
* The function returns NIL for a default partition when it's the only
|
|
* partition since in that case there is no constraint.
|
|
*/
|
|
static List *
|
|
get_qual_for_list(Relation parent, PartitionBoundSpec *spec)
|
|
{
|
|
PartitionKey key = RelationGetPartitionKey(parent);
|
|
List *result;
|
|
Expr *keyCol;
|
|
Expr *opexpr;
|
|
NullTest *nulltest;
|
|
ListCell *cell;
|
|
List *elems = NIL;
|
|
bool list_has_null = false;
|
|
|
|
/*
|
|
* Only single-column list partitioning is supported, so we are worried
|
|
* only about the partition key with index 0.
|
|
*/
|
|
Assert(key->partnatts == 1);
|
|
|
|
/* Construct Var or expression representing the partition column */
|
|
if (key->partattrs[0] != 0)
|
|
keyCol = (Expr *) makeVar(1,
|
|
key->partattrs[0],
|
|
key->parttypid[0],
|
|
key->parttypmod[0],
|
|
key->parttypcoll[0],
|
|
0);
|
|
else
|
|
keyCol = (Expr *) copyObject(linitial(key->partexprs));
|
|
|
|
/*
|
|
* For default list partition, collect datums for all the partitions. The
|
|
* default partition constraint should check that the partition key is
|
|
* equal to none of those.
|
|
*/
|
|
if (spec->is_default)
|
|
{
|
|
int i;
|
|
int ndatums = 0;
|
|
PartitionDesc pdesc = RelationGetPartitionDesc(parent, false);
|
|
PartitionBoundInfo boundinfo = pdesc->boundinfo;
|
|
|
|
if (boundinfo)
|
|
{
|
|
ndatums = boundinfo->ndatums;
|
|
|
|
if (partition_bound_accepts_nulls(boundinfo))
|
|
list_has_null = true;
|
|
}
|
|
|
|
/*
|
|
* If default is the only partition, there need not be any partition
|
|
* constraint on it.
|
|
*/
|
|
if (ndatums == 0 && !list_has_null)
|
|
return NIL;
|
|
|
|
for (i = 0; i < ndatums; i++)
|
|
{
|
|
Const *val;
|
|
|
|
/*
|
|
* Construct Const from known-not-null datum. We must be careful
|
|
* to copy the value, because our result has to be able to outlive
|
|
* the relcache entry we're copying from.
|
|
*/
|
|
val = makeConst(key->parttypid[0],
|
|
key->parttypmod[0],
|
|
key->parttypcoll[0],
|
|
key->parttyplen[0],
|
|
datumCopy(*boundinfo->datums[i],
|
|
key->parttypbyval[0],
|
|
key->parttyplen[0]),
|
|
false, /* isnull */
|
|
key->parttypbyval[0]);
|
|
|
|
elems = lappend(elems, val);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
* Create list of Consts for the allowed values, excluding any nulls.
|
|
*/
|
|
foreach(cell, spec->listdatums)
|
|
{
|
|
Const *val = lfirst_node(Const, cell);
|
|
|
|
if (val->constisnull)
|
|
list_has_null = true;
|
|
else
|
|
elems = lappend(elems, copyObject(val));
|
|
}
|
|
}
|
|
|
|
if (elems)
|
|
{
|
|
/*
|
|
* Generate the operator expression from the non-null partition
|
|
* values.
|
|
*/
|
|
opexpr = make_partition_op_expr(key, 0, BTEqualStrategyNumber,
|
|
keyCol, (Expr *) elems);
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
* If there are no partition values, we don't need an operator
|
|
* expression.
|
|
*/
|
|
opexpr = NULL;
|
|
}
|
|
|
|
if (!list_has_null)
|
|
{
|
|
/*
|
|
* Gin up a "col IS NOT NULL" test that will be ANDed with the main
|
|
* expression. This might seem redundant, but the partition routing
|
|
* machinery needs it.
|
|
*/
|
|
nulltest = makeNode(NullTest);
|
|
nulltest->arg = keyCol;
|
|
nulltest->nulltesttype = IS_NOT_NULL;
|
|
nulltest->argisrow = false;
|
|
nulltest->location = -1;
|
|
|
|
result = opexpr ? list_make2(nulltest, opexpr) : list_make1(nulltest);
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
* Gin up a "col IS NULL" test that will be OR'd with the main
|
|
* expression.
|
|
*/
|
|
nulltest = makeNode(NullTest);
|
|
nulltest->arg = keyCol;
|
|
nulltest->nulltesttype = IS_NULL;
|
|
nulltest->argisrow = false;
|
|
nulltest->location = -1;
|
|
|
|
if (opexpr)
|
|
{
|
|
Expr *or;
|
|
|
|
or = makeBoolExpr(OR_EXPR, list_make2(nulltest, opexpr), -1);
|
|
result = list_make1(or);
|
|
}
|
|
else
|
|
result = list_make1(nulltest);
|
|
}
|
|
|
|
/*
|
|
* Note that, in general, applying NOT to a constraint expression doesn't
|
|
* necessarily invert the set of rows it accepts, because NOT (NULL) is
|
|
* NULL. However, the partition constraints we construct here never
|
|
* evaluate to NULL, so applying NOT works as intended.
|
|
*/
|
|
if (spec->is_default)
|
|
{
|
|
result = list_make1(make_ands_explicit(result));
|
|
result = list_make1(makeBoolExpr(NOT_EXPR, result, -1));
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* get_qual_for_range
|
|
*
|
|
* Returns an implicit-AND list of expressions to use as a range partition's
|
|
* constraint, given the parent relation and partition bound structure.
|
|
*
|
|
* For a multi-column range partition key, say (a, b, c), with (al, bl, cl)
|
|
* as the lower bound tuple and (au, bu, cu) as the upper bound tuple, we
|
|
* generate an expression tree of the following form:
|
|
*
|
|
* (a IS NOT NULL) and (b IS NOT NULL) and (c IS NOT NULL)
|
|
* AND
|
|
* (a > al OR (a = al AND b > bl) OR (a = al AND b = bl AND c >= cl))
|
|
* AND
|
|
* (a < au OR (a = au AND b < bu) OR (a = au AND b = bu AND c < cu))
|
|
*
|
|
* It is often the case that a prefix of lower and upper bound tuples contains
|
|
* the same values, for example, (al = au), in which case, we will emit an
|
|
* expression tree of the following form:
|
|
*
|
|
* (a IS NOT NULL) and (b IS NOT NULL) and (c IS NOT NULL)
|
|
* AND
|
|
* (a = al)
|
|
* AND
|
|
* (b > bl OR (b = bl AND c >= cl))
|
|
* AND
|
|
* (b < bu OR (b = bu AND c < cu))
|
|
*
|
|
* If a bound datum is either MINVALUE or MAXVALUE, these expressions are
|
|
* simplified using the fact that any value is greater than MINVALUE and less
|
|
* than MAXVALUE. So, for example, if cu = MAXVALUE, c < cu is automatically
|
|
* true, and we need not emit any expression for it, and the last line becomes
|
|
*
|
|
* (b < bu) OR (b = bu), which is simplified to (b <= bu)
|
|
*
|
|
* In most common cases with only one partition column, say a, the following
|
|
* expression tree will be generated: a IS NOT NULL AND a >= al AND a < au
|
|
*
|
|
* For default partition, it returns the negation of the constraints of all
|
|
* the other partitions.
|
|
*
|
|
* External callers should pass for_default as false; we set it to true only
|
|
* when recursing.
|
|
*/
|
|
static List *
|
|
get_qual_for_range(Relation parent, PartitionBoundSpec *spec,
|
|
bool for_default)
|
|
{
|
|
List *result = NIL;
|
|
ListCell *cell1,
|
|
*cell2,
|
|
*partexprs_item,
|
|
*partexprs_item_saved;
|
|
int i,
|
|
j;
|
|
PartitionRangeDatum *ldatum,
|
|
*udatum;
|
|
PartitionKey key = RelationGetPartitionKey(parent);
|
|
Expr *keyCol;
|
|
Const *lower_val,
|
|
*upper_val;
|
|
List *lower_or_arms,
|
|
*upper_or_arms;
|
|
int num_or_arms,
|
|
current_or_arm;
|
|
ListCell *lower_or_start_datum,
|
|
*upper_or_start_datum;
|
|
bool need_next_lower_arm,
|
|
need_next_upper_arm;
|
|
|
|
if (spec->is_default)
|
|
{
|
|
List *or_expr_args = NIL;
|
|
PartitionDesc pdesc = RelationGetPartitionDesc(parent, false);
|
|
Oid *inhoids = pdesc->oids;
|
|
int nparts = pdesc->nparts,
|
|
k;
|
|
|
|
for (k = 0; k < nparts; k++)
|
|
{
|
|
Oid inhrelid = inhoids[k];
|
|
HeapTuple tuple;
|
|
Datum datum;
|
|
PartitionBoundSpec *bspec;
|
|
|
|
tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(inhrelid));
|
|
if (!HeapTupleIsValid(tuple))
|
|
elog(ERROR, "cache lookup failed for relation %u", inhrelid);
|
|
|
|
datum = SysCacheGetAttrNotNull(RELOID, tuple,
|
|
Anum_pg_class_relpartbound);
|
|
bspec = (PartitionBoundSpec *)
|
|
stringToNode(TextDatumGetCString(datum));
|
|
if (!IsA(bspec, PartitionBoundSpec))
|
|
elog(ERROR, "expected PartitionBoundSpec");
|
|
|
|
if (!bspec->is_default)
|
|
{
|
|
List *part_qual;
|
|
|
|
part_qual = get_qual_for_range(parent, bspec, true);
|
|
|
|
/*
|
|
* AND the constraints of the partition and add to
|
|
* or_expr_args
|
|
*/
|
|
or_expr_args = lappend(or_expr_args, list_length(part_qual) > 1
|
|
? makeBoolExpr(AND_EXPR, part_qual, -1)
|
|
: linitial(part_qual));
|
|
}
|
|
ReleaseSysCache(tuple);
|
|
}
|
|
|
|
if (or_expr_args != NIL)
|
|
{
|
|
Expr *other_parts_constr;
|
|
|
|
/*
|
|
* Combine the constraints obtained for non-default partitions
|
|
* using OR. As requested, each of the OR's args doesn't include
|
|
* the NOT NULL test for partition keys (which is to avoid its
|
|
* useless repetition). Add the same now.
|
|
*/
|
|
other_parts_constr =
|
|
makeBoolExpr(AND_EXPR,
|
|
lappend(get_range_nulltest(key),
|
|
list_length(or_expr_args) > 1
|
|
? makeBoolExpr(OR_EXPR, or_expr_args,
|
|
-1)
|
|
: linitial(or_expr_args)),
|
|
-1);
|
|
|
|
/*
|
|
* Finally, the default partition contains everything *NOT*
|
|
* contained in the non-default partitions.
|
|
*/
|
|
result = list_make1(makeBoolExpr(NOT_EXPR,
|
|
list_make1(other_parts_constr), -1));
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* If it is the recursive call for default, we skip the get_range_nulltest
|
|
* to avoid accumulating the NullTest on the same keys for each partition.
|
|
*/
|
|
if (!for_default)
|
|
result = get_range_nulltest(key);
|
|
|
|
/*
|
|
* Iterate over the key columns and check if the corresponding lower and
|
|
* upper datums are equal using the btree equality operator for the
|
|
* column's type. If equal, we emit single keyCol = common_value
|
|
* expression. Starting from the first column for which the corresponding
|
|
* lower and upper bound datums are not equal, we generate OR expressions
|
|
* as shown in the function's header comment.
|
|
*/
|
|
i = 0;
|
|
partexprs_item = list_head(key->partexprs);
|
|
partexprs_item_saved = partexprs_item; /* placate compiler */
|
|
forboth(cell1, spec->lowerdatums, cell2, spec->upperdatums)
|
|
{
|
|
EState *estate;
|
|
MemoryContext oldcxt;
|
|
Expr *test_expr;
|
|
ExprState *test_exprstate;
|
|
Datum test_result;
|
|
bool isNull;
|
|
|
|
ldatum = lfirst_node(PartitionRangeDatum, cell1);
|
|
udatum = lfirst_node(PartitionRangeDatum, cell2);
|
|
|
|
/*
|
|
* Since get_range_key_properties() modifies partexprs_item, and we
|
|
* might need to start over from the previous expression in the later
|
|
* part of this function, save away the current value.
|
|
*/
|
|
partexprs_item_saved = partexprs_item;
|
|
|
|
get_range_key_properties(key, i, ldatum, udatum,
|
|
&partexprs_item,
|
|
&keyCol,
|
|
&lower_val, &upper_val);
|
|
|
|
/*
|
|
* If either value is NULL, the corresponding partition bound is
|
|
* either MINVALUE or MAXVALUE, and we treat them as unequal, because
|
|
* even if they're the same, there is no common value to equate the
|
|
* key column with.
|
|
*/
|
|
if (!lower_val || !upper_val)
|
|
break;
|
|
|
|
/* Create the test expression */
|
|
estate = CreateExecutorState();
|
|
oldcxt = MemoryContextSwitchTo(estate->es_query_cxt);
|
|
test_expr = make_partition_op_expr(key, i, BTEqualStrategyNumber,
|
|
(Expr *) lower_val,
|
|
(Expr *) upper_val);
|
|
fix_opfuncids((Node *) test_expr);
|
|
test_exprstate = ExecInitExpr(test_expr, NULL);
|
|
test_result = ExecEvalExprSwitchContext(test_exprstate,
|
|
GetPerTupleExprContext(estate),
|
|
&isNull);
|
|
MemoryContextSwitchTo(oldcxt);
|
|
FreeExecutorState(estate);
|
|
|
|
/* If not equal, go generate the OR expressions */
|
|
if (!DatumGetBool(test_result))
|
|
break;
|
|
|
|
/*
|
|
* The bounds for the last key column can't be equal, because such a
|
|
* range partition would never be allowed to be defined (it would have
|
|
* an empty range otherwise).
|
|
*/
|
|
if (i == key->partnatts - 1)
|
|
elog(ERROR, "invalid range bound specification");
|
|
|
|
/* Equal, so generate keyCol = lower_val expression */
|
|
result = lappend(result,
|
|
make_partition_op_expr(key, i, BTEqualStrategyNumber,
|
|
keyCol, (Expr *) lower_val));
|
|
|
|
i++;
|
|
}
|
|
|
|
/* First pair of lower_val and upper_val that are not equal. */
|
|
lower_or_start_datum = cell1;
|
|
upper_or_start_datum = cell2;
|
|
|
|
/* OR will have as many arms as there are key columns left. */
|
|
num_or_arms = key->partnatts - i;
|
|
current_or_arm = 0;
|
|
lower_or_arms = upper_or_arms = NIL;
|
|
need_next_lower_arm = need_next_upper_arm = true;
|
|
while (current_or_arm < num_or_arms)
|
|
{
|
|
List *lower_or_arm_args = NIL,
|
|
*upper_or_arm_args = NIL;
|
|
|
|
/* Restart scan of columns from the i'th one */
|
|
j = i;
|
|
partexprs_item = partexprs_item_saved;
|
|
|
|
for_both_cell(cell1, spec->lowerdatums, lower_or_start_datum,
|
|
cell2, spec->upperdatums, upper_or_start_datum)
|
|
{
|
|
PartitionRangeDatum *ldatum_next = NULL,
|
|
*udatum_next = NULL;
|
|
|
|
ldatum = lfirst_node(PartitionRangeDatum, cell1);
|
|
if (lnext(spec->lowerdatums, cell1))
|
|
ldatum_next = castNode(PartitionRangeDatum,
|
|
lfirst(lnext(spec->lowerdatums, cell1)));
|
|
udatum = lfirst_node(PartitionRangeDatum, cell2);
|
|
if (lnext(spec->upperdatums, cell2))
|
|
udatum_next = castNode(PartitionRangeDatum,
|
|
lfirst(lnext(spec->upperdatums, cell2)));
|
|
get_range_key_properties(key, j, ldatum, udatum,
|
|
&partexprs_item,
|
|
&keyCol,
|
|
&lower_val, &upper_val);
|
|
|
|
if (need_next_lower_arm && lower_val)
|
|
{
|
|
uint16 strategy;
|
|
|
|
/*
|
|
* For the non-last columns of this arm, use the EQ operator.
|
|
* For the last column of this arm, use GT, unless this is the
|
|
* last column of the whole bound check, or the next bound
|
|
* datum is MINVALUE, in which case use GE.
|
|
*/
|
|
if (j - i < current_or_arm)
|
|
strategy = BTEqualStrategyNumber;
|
|
else if (j == key->partnatts - 1 ||
|
|
(ldatum_next &&
|
|
ldatum_next->kind == PARTITION_RANGE_DATUM_MINVALUE))
|
|
strategy = BTGreaterEqualStrategyNumber;
|
|
else
|
|
strategy = BTGreaterStrategyNumber;
|
|
|
|
lower_or_arm_args = lappend(lower_or_arm_args,
|
|
make_partition_op_expr(key, j,
|
|
strategy,
|
|
keyCol,
|
|
(Expr *) lower_val));
|
|
}
|
|
|
|
if (need_next_upper_arm && upper_val)
|
|
{
|
|
uint16 strategy;
|
|
|
|
/*
|
|
* For the non-last columns of this arm, use the EQ operator.
|
|
* For the last column of this arm, use LT, unless the next
|
|
* bound datum is MAXVALUE, in which case use LE.
|
|
*/
|
|
if (j - i < current_or_arm)
|
|
strategy = BTEqualStrategyNumber;
|
|
else if (udatum_next &&
|
|
udatum_next->kind == PARTITION_RANGE_DATUM_MAXVALUE)
|
|
strategy = BTLessEqualStrategyNumber;
|
|
else
|
|
strategy = BTLessStrategyNumber;
|
|
|
|
upper_or_arm_args = lappend(upper_or_arm_args,
|
|
make_partition_op_expr(key, j,
|
|
strategy,
|
|
keyCol,
|
|
(Expr *) upper_val));
|
|
}
|
|
|
|
/*
|
|
* Did we generate enough of OR's arguments? First arm considers
|
|
* the first of the remaining columns, second arm considers first
|
|
* two of the remaining columns, and so on.
|
|
*/
|
|
++j;
|
|
if (j - i > current_or_arm)
|
|
{
|
|
/*
|
|
* We must not emit any more arms if the new column that will
|
|
* be considered is unbounded, or this one was.
|
|
*/
|
|
if (!lower_val || !ldatum_next ||
|
|
ldatum_next->kind != PARTITION_RANGE_DATUM_VALUE)
|
|
need_next_lower_arm = false;
|
|
if (!upper_val || !udatum_next ||
|
|
udatum_next->kind != PARTITION_RANGE_DATUM_VALUE)
|
|
need_next_upper_arm = false;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (lower_or_arm_args != NIL)
|
|
lower_or_arms = lappend(lower_or_arms,
|
|
list_length(lower_or_arm_args) > 1
|
|
? makeBoolExpr(AND_EXPR, lower_or_arm_args, -1)
|
|
: linitial(lower_or_arm_args));
|
|
|
|
if (upper_or_arm_args != NIL)
|
|
upper_or_arms = lappend(upper_or_arms,
|
|
list_length(upper_or_arm_args) > 1
|
|
? makeBoolExpr(AND_EXPR, upper_or_arm_args, -1)
|
|
: linitial(upper_or_arm_args));
|
|
|
|
/* If no work to do in the next iteration, break away. */
|
|
if (!need_next_lower_arm && !need_next_upper_arm)
|
|
break;
|
|
|
|
++current_or_arm;
|
|
}
|
|
|
|
/*
|
|
* Generate the OR expressions for each of lower and upper bounds (if
|
|
* required), and append to the list of implicitly ANDed list of
|
|
* expressions.
|
|
*/
|
|
if (lower_or_arms != NIL)
|
|
result = lappend(result,
|
|
list_length(lower_or_arms) > 1
|
|
? makeBoolExpr(OR_EXPR, lower_or_arms, -1)
|
|
: linitial(lower_or_arms));
|
|
if (upper_or_arms != NIL)
|
|
result = lappend(result,
|
|
list_length(upper_or_arms) > 1
|
|
? makeBoolExpr(OR_EXPR, upper_or_arms, -1)
|
|
: linitial(upper_or_arms));
|
|
|
|
/*
|
|
* As noted above, for non-default, we return list with constant TRUE. If
|
|
* the result is NIL during the recursive call for default, it implies
|
|
* this is the only other partition which can hold every value of the key
|
|
* except NULL. Hence we return the NullTest result skipped earlier.
|
|
*/
|
|
if (result == NIL)
|
|
result = for_default
|
|
? get_range_nulltest(key)
|
|
: list_make1(makeBoolConst(true, false));
|
|
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* get_range_key_properties
|
|
* Returns range partition key information for a given column
|
|
*
|
|
* This is a subroutine for get_qual_for_range, and its API is pretty
|
|
* specialized to that caller.
|
|
*
|
|
* Constructs an Expr for the key column (returned in *keyCol) and Consts
|
|
* for the lower and upper range limits (returned in *lower_val and
|
|
* *upper_val). For MINVALUE/MAXVALUE limits, NULL is returned instead of
|
|
* a Const. All of these structures are freshly palloc'd.
|
|
*
|
|
* *partexprs_item points to the cell containing the next expression in
|
|
* the key->partexprs list, or NULL. It may be advanced upon return.
|
|
*/
|
|
static void
|
|
get_range_key_properties(PartitionKey key, int keynum,
|
|
PartitionRangeDatum *ldatum,
|
|
PartitionRangeDatum *udatum,
|
|
ListCell **partexprs_item,
|
|
Expr **keyCol,
|
|
Const **lower_val, Const **upper_val)
|
|
{
|
|
/* Get partition key expression for this column */
|
|
if (key->partattrs[keynum] != 0)
|
|
{
|
|
*keyCol = (Expr *) makeVar(1,
|
|
key->partattrs[keynum],
|
|
key->parttypid[keynum],
|
|
key->parttypmod[keynum],
|
|
key->parttypcoll[keynum],
|
|
0);
|
|
}
|
|
else
|
|
{
|
|
if (*partexprs_item == NULL)
|
|
elog(ERROR, "wrong number of partition key expressions");
|
|
*keyCol = copyObject(lfirst(*partexprs_item));
|
|
*partexprs_item = lnext(key->partexprs, *partexprs_item);
|
|
}
|
|
|
|
/* Get appropriate Const nodes for the bounds */
|
|
if (ldatum->kind == PARTITION_RANGE_DATUM_VALUE)
|
|
*lower_val = castNode(Const, copyObject(ldatum->value));
|
|
else
|
|
*lower_val = NULL;
|
|
|
|
if (udatum->kind == PARTITION_RANGE_DATUM_VALUE)
|
|
*upper_val = castNode(Const, copyObject(udatum->value));
|
|
else
|
|
*upper_val = NULL;
|
|
}
|
|
|
|
/*
|
|
* get_range_nulltest
|
|
*
|
|
* A non-default range partition table does not currently allow partition
|
|
* keys to be null, so emit an IS NOT NULL expression for each key column.
|
|
*/
|
|
static List *
|
|
get_range_nulltest(PartitionKey key)
|
|
{
|
|
List *result = NIL;
|
|
NullTest *nulltest;
|
|
ListCell *partexprs_item;
|
|
int i;
|
|
|
|
partexprs_item = list_head(key->partexprs);
|
|
for (i = 0; i < key->partnatts; i++)
|
|
{
|
|
Expr *keyCol;
|
|
|
|
if (key->partattrs[i] != 0)
|
|
{
|
|
keyCol = (Expr *) makeVar(1,
|
|
key->partattrs[i],
|
|
key->parttypid[i],
|
|
key->parttypmod[i],
|
|
key->parttypcoll[i],
|
|
0);
|
|
}
|
|
else
|
|
{
|
|
if (partexprs_item == NULL)
|
|
elog(ERROR, "wrong number of partition key expressions");
|
|
keyCol = copyObject(lfirst(partexprs_item));
|
|
partexprs_item = lnext(key->partexprs, partexprs_item);
|
|
}
|
|
|
|
nulltest = makeNode(NullTest);
|
|
nulltest->arg = keyCol;
|
|
nulltest->nulltesttype = IS_NOT_NULL;
|
|
nulltest->argisrow = false;
|
|
nulltest->location = -1;
|
|
result = lappend(result, nulltest);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* compute_partition_hash_value
|
|
*
|
|
* Compute the hash value for given partition key values.
|
|
*/
|
|
uint64
|
|
compute_partition_hash_value(int partnatts, FmgrInfo *partsupfunc, const Oid *partcollation,
|
|
const Datum *values, const bool *isnull)
|
|
{
|
|
int i;
|
|
uint64 rowHash = 0;
|
|
Datum seed = UInt64GetDatum(HASH_PARTITION_SEED);
|
|
|
|
for (i = 0; i < partnatts; i++)
|
|
{
|
|
/* Nulls are just ignored */
|
|
if (!isnull[i])
|
|
{
|
|
Datum hash;
|
|
|
|
Assert(OidIsValid(partsupfunc[i].fn_oid));
|
|
|
|
/*
|
|
* Compute hash for each datum value by calling respective
|
|
* datatype-specific hash functions of each partition key
|
|
* attribute.
|
|
*/
|
|
hash = FunctionCall2Coll(&partsupfunc[i], partcollation[i],
|
|
values[i], seed);
|
|
|
|
/* Form a single 64-bit hash value */
|
|
rowHash = hash_combine64(rowHash, DatumGetUInt64(hash));
|
|
}
|
|
}
|
|
|
|
return rowHash;
|
|
}
|
|
|
|
/*
|
|
* satisfies_hash_partition
|
|
*
|
|
* This is an SQL-callable function for use in hash partition constraints.
|
|
* The first three arguments are the parent table OID, modulus, and remainder.
|
|
* The remaining arguments are the value of the partitioning columns (or
|
|
* expressions); these are hashed and the results are combined into a single
|
|
* hash value by calling hash_combine64.
|
|
*
|
|
* Returns true if remainder produced when this computed single hash value is
|
|
* divided by the given modulus is equal to given remainder, otherwise false.
|
|
* NB: it's important that this never return null, as the constraint machinery
|
|
* would consider that to be a "pass".
|
|
*
|
|
* See get_qual_for_hash() for usage.
|
|
*/
|
|
Datum
|
|
satisfies_hash_partition(PG_FUNCTION_ARGS)
|
|
{
|
|
typedef struct ColumnsHashData
|
|
{
|
|
Oid relid;
|
|
int nkeys;
|
|
Oid variadic_type;
|
|
int16 variadic_typlen;
|
|
bool variadic_typbyval;
|
|
char variadic_typalign;
|
|
Oid partcollid[PARTITION_MAX_KEYS];
|
|
FmgrInfo partsupfunc[FLEXIBLE_ARRAY_MEMBER];
|
|
} ColumnsHashData;
|
|
Oid parentId;
|
|
int modulus;
|
|
int remainder;
|
|
Datum seed = UInt64GetDatum(HASH_PARTITION_SEED);
|
|
ColumnsHashData *my_extra;
|
|
uint64 rowHash = 0;
|
|
|
|
/* Return false if the parent OID, modulus, or remainder is NULL. */
|
|
if (PG_ARGISNULL(0) || PG_ARGISNULL(1) || PG_ARGISNULL(2))
|
|
PG_RETURN_BOOL(false);
|
|
parentId = PG_GETARG_OID(0);
|
|
modulus = PG_GETARG_INT32(1);
|
|
remainder = PG_GETARG_INT32(2);
|
|
|
|
/* Sanity check modulus and remainder. */
|
|
if (modulus <= 0)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
errmsg("modulus for hash partition must be an integer value greater than zero")));
|
|
if (remainder < 0)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
errmsg("remainder for hash partition must be an integer value greater than or equal to zero")));
|
|
if (remainder >= modulus)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
errmsg("remainder for hash partition must be less than modulus")));
|
|
|
|
/*
|
|
* Cache hash function information.
|
|
*/
|
|
my_extra = (ColumnsHashData *) fcinfo->flinfo->fn_extra;
|
|
if (my_extra == NULL || my_extra->relid != parentId)
|
|
{
|
|
Relation parent;
|
|
PartitionKey key;
|
|
int j;
|
|
|
|
/* Open parent relation and fetch partition key info */
|
|
parent = relation_open(parentId, AccessShareLock);
|
|
key = RelationGetPartitionKey(parent);
|
|
|
|
/* Reject parent table that is not hash-partitioned. */
|
|
if (key == NULL || key->strategy != PARTITION_STRATEGY_HASH)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
errmsg("\"%s\" is not a hash partitioned table",
|
|
get_rel_name(parentId))));
|
|
|
|
if (!get_fn_expr_variadic(fcinfo->flinfo))
|
|
{
|
|
int nargs = PG_NARGS() - 3;
|
|
|
|
/* complain if wrong number of column values */
|
|
if (key->partnatts != nargs)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
errmsg("number of partitioning columns (%d) does not match number of partition keys provided (%d)",
|
|
key->partnatts, nargs)));
|
|
|
|
/* allocate space for our cache */
|
|
fcinfo->flinfo->fn_extra =
|
|
MemoryContextAllocZero(fcinfo->flinfo->fn_mcxt,
|
|
offsetof(ColumnsHashData, partsupfunc) +
|
|
sizeof(FmgrInfo) * nargs);
|
|
my_extra = (ColumnsHashData *) fcinfo->flinfo->fn_extra;
|
|
my_extra->relid = parentId;
|
|
my_extra->nkeys = key->partnatts;
|
|
memcpy(my_extra->partcollid, key->partcollation,
|
|
key->partnatts * sizeof(Oid));
|
|
|
|
/* check argument types and save fmgr_infos */
|
|
for (j = 0; j < key->partnatts; ++j)
|
|
{
|
|
Oid argtype = get_fn_expr_argtype(fcinfo->flinfo, j + 3);
|
|
|
|
if (argtype != key->parttypid[j] && !IsBinaryCoercible(argtype, key->parttypid[j]))
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
errmsg("column %d of the partition key has type %s, but supplied value is of type %s",
|
|
j + 1, format_type_be(key->parttypid[j]), format_type_be(argtype))));
|
|
|
|
fmgr_info_copy(&my_extra->partsupfunc[j],
|
|
&key->partsupfunc[j],
|
|
fcinfo->flinfo->fn_mcxt);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
ArrayType *variadic_array = PG_GETARG_ARRAYTYPE_P(3);
|
|
|
|
/* allocate space for our cache -- just one FmgrInfo in this case */
|
|
fcinfo->flinfo->fn_extra =
|
|
MemoryContextAllocZero(fcinfo->flinfo->fn_mcxt,
|
|
offsetof(ColumnsHashData, partsupfunc) +
|
|
sizeof(FmgrInfo));
|
|
my_extra = (ColumnsHashData *) fcinfo->flinfo->fn_extra;
|
|
my_extra->relid = parentId;
|
|
my_extra->nkeys = key->partnatts;
|
|
my_extra->variadic_type = ARR_ELEMTYPE(variadic_array);
|
|
get_typlenbyvalalign(my_extra->variadic_type,
|
|
&my_extra->variadic_typlen,
|
|
&my_extra->variadic_typbyval,
|
|
&my_extra->variadic_typalign);
|
|
my_extra->partcollid[0] = key->partcollation[0];
|
|
|
|
/* check argument types */
|
|
for (j = 0; j < key->partnatts; ++j)
|
|
if (key->parttypid[j] != my_extra->variadic_type)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
errmsg("column %d of the partition key has type \"%s\", but supplied value is of type \"%s\"",
|
|
j + 1,
|
|
format_type_be(key->parttypid[j]),
|
|
format_type_be(my_extra->variadic_type))));
|
|
|
|
fmgr_info_copy(&my_extra->partsupfunc[0],
|
|
&key->partsupfunc[0],
|
|
fcinfo->flinfo->fn_mcxt);
|
|
}
|
|
|
|
/* Hold lock until commit */
|
|
relation_close(parent, NoLock);
|
|
}
|
|
|
|
if (!OidIsValid(my_extra->variadic_type))
|
|
{
|
|
int nkeys = my_extra->nkeys;
|
|
int i;
|
|
|
|
/*
|
|
* For a non-variadic call, neither the number of arguments nor their
|
|
* types can change across calls, so avoid the expense of rechecking
|
|
* here.
|
|
*/
|
|
|
|
for (i = 0; i < nkeys; i++)
|
|
{
|
|
Datum hash;
|
|
|
|
/* keys start from fourth argument of function. */
|
|
int argno = i + 3;
|
|
|
|
if (PG_ARGISNULL(argno))
|
|
continue;
|
|
|
|
hash = FunctionCall2Coll(&my_extra->partsupfunc[i],
|
|
my_extra->partcollid[i],
|
|
PG_GETARG_DATUM(argno),
|
|
seed);
|
|
|
|
/* Form a single 64-bit hash value */
|
|
rowHash = hash_combine64(rowHash, DatumGetUInt64(hash));
|
|
}
|
|
}
|
|
else
|
|
{
|
|
ArrayType *variadic_array = PG_GETARG_ARRAYTYPE_P(3);
|
|
int i;
|
|
int nelems;
|
|
Datum *datum;
|
|
bool *isnull;
|
|
|
|
deconstruct_array(variadic_array,
|
|
my_extra->variadic_type,
|
|
my_extra->variadic_typlen,
|
|
my_extra->variadic_typbyval,
|
|
my_extra->variadic_typalign,
|
|
&datum, &isnull, &nelems);
|
|
|
|
/* complain if wrong number of column values */
|
|
if (nelems != my_extra->nkeys)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
errmsg("number of partitioning columns (%d) does not match number of partition keys provided (%d)",
|
|
my_extra->nkeys, nelems)));
|
|
|
|
for (i = 0; i < nelems; i++)
|
|
{
|
|
Datum hash;
|
|
|
|
if (isnull[i])
|
|
continue;
|
|
|
|
hash = FunctionCall2Coll(&my_extra->partsupfunc[0],
|
|
my_extra->partcollid[0],
|
|
datum[i],
|
|
seed);
|
|
|
|
/* Form a single 64-bit hash value */
|
|
rowHash = hash_combine64(rowHash, DatumGetUInt64(hash));
|
|
}
|
|
}
|
|
|
|
PG_RETURN_BOOL(rowHash % modulus == remainder);
|
|
}
|
|
|
|
/*
|
|
* check_two_partitions_bounds_range
|
|
*
|
|
* (function for BY RANGE partitioning)
|
|
*
|
|
* This is a helper function for check_partitions_for_split() and
|
|
* calculate_partition_bound_for_merge().
|
|
* This function compares upper bound of first_bound and lower bound of
|
|
* second_bound. These bounds should be equal except when
|
|
* "defaultPart == true" (this means that one of split partitions is DEFAULT).
|
|
* In this case upper bound of first_bound can be less than lower bound of
|
|
* second_bound because space between these bounds will be included in
|
|
* DEFAULT partition.
|
|
*
|
|
* parent: partitioned table
|
|
* first_name: name of first partition
|
|
* first_bound: bound of first partition
|
|
* second_name: name of second partition
|
|
* second_bound: bound of second partition
|
|
* defaultPart: true if one of split partitions is DEFAULT
|
|
* pstate: pointer to ParseState struct for determining error position
|
|
*/
|
|
static void
|
|
check_two_partitions_bounds_range(Relation parent,
|
|
RangeVar *first_name,
|
|
PartitionBoundSpec *first_bound,
|
|
RangeVar *second_name,
|
|
PartitionBoundSpec *second_bound,
|
|
bool defaultPart,
|
|
ParseState *pstate)
|
|
{
|
|
PartitionKey key = RelationGetPartitionKey(parent);
|
|
PartitionRangeBound *first_upper;
|
|
PartitionRangeBound *second_lower;
|
|
int cmpval;
|
|
|
|
Assert(key->strategy == PARTITION_STRATEGY_RANGE);
|
|
|
|
first_upper = make_one_partition_rbound(key, -1, first_bound->upperdatums, false);
|
|
second_lower = make_one_partition_rbound(key, -1, second_bound->lowerdatums, true);
|
|
|
|
/*
|
|
* lower1=false (the second to last argument) for correct comparison of
|
|
* lower and upper bounds.
|
|
*/
|
|
cmpval = partition_rbound_cmp(key->partnatts,
|
|
key->partsupfunc,
|
|
key->partcollation,
|
|
second_lower->datums, second_lower->kind,
|
|
false, first_upper);
|
|
if ((!defaultPart && cmpval) || (defaultPart && cmpval < 0))
|
|
{
|
|
PartitionRangeDatum *datum = linitial(second_bound->lowerdatums);
|
|
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
|
|
errmsg("lower bound of partition \"%s\" conflicts with upper bound of previous partition \"%s\"",
|
|
second_name->relname, first_name->relname),
|
|
parser_errposition(pstate, datum->location)));
|
|
}
|
|
}
|
|
|
|
/*
|
|
* check_partitions_not_overlap_list
|
|
*
|
|
* (function for BY LIST partitioning)
|
|
*
|
|
* This is a helper function for check_partitions_for_split().
|
|
* Checks that the values of the new partitions do not overlap.
|
|
*
|
|
* parent: partitioned table
|
|
* parts: array of SinglePartitionSpec structs with info about split partitions
|
|
* nparts: size of array "parts"
|
|
*/
|
|
static void
|
|
check_partitions_not_overlap_list(Relation parent,
|
|
SinglePartitionSpec **parts,
|
|
int nparts,
|
|
ParseState *pstate)
|
|
{
|
|
PartitionKey key PG_USED_FOR_ASSERTS_ONLY = RelationGetPartitionKey(parent);
|
|
int overlap_location = -1;
|
|
int i,
|
|
j;
|
|
SinglePartitionSpec *sps1,
|
|
*sps2;
|
|
List *overlap;
|
|
|
|
Assert(key->strategy == PARTITION_STRATEGY_LIST);
|
|
|
|
for (i = 0; i < nparts; i++)
|
|
{
|
|
sps1 = parts[i];
|
|
|
|
for (j = i + 1; j < nparts; j++)
|
|
{
|
|
sps2 = parts[j];
|
|
|
|
/*
|
|
* Calculate intersection between values of two partitions.
|
|
*/
|
|
overlap = list_intersection(sps1->bound->listdatums,
|
|
sps2->bound->listdatums);
|
|
if (list_length(overlap) > 0)
|
|
{
|
|
Const *val = (Const *) lfirst(list_head(overlap));
|
|
|
|
overlap_location = val->location;
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
|
|
errmsg("new partition \"%s\" would overlap with another new partition \"%s\"",
|
|
sps1->name->relname, sps2->name->relname),
|
|
parser_errposition(pstate, overlap_location)));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* get_partition_bound_spec
|
|
*
|
|
* Returns description of partition with Oid "partOid" and name "name".
|
|
*
|
|
* partOid: partition Oid
|
|
* name: partition name
|
|
*/
|
|
static PartitionBoundSpec *
|
|
get_partition_bound_spec(Oid partOid, RangeVar *name)
|
|
{
|
|
HeapTuple tuple;
|
|
Datum datum;
|
|
bool isnull;
|
|
PartitionBoundSpec *boundspec = NULL;
|
|
|
|
/* Try fetching the tuple from the catcache, for speed. */
|
|
tuple = SearchSysCache1(RELOID, partOid);
|
|
if (!HeapTupleIsValid(tuple))
|
|
elog(ERROR, "cache lookup failed for relation \"%s\"",
|
|
name->relname);
|
|
|
|
datum = SysCacheGetAttr(RELOID, tuple,
|
|
Anum_pg_class_relpartbound,
|
|
&isnull);
|
|
if (isnull)
|
|
elog(ERROR, "partition bound for relation \"%s\" is null",
|
|
name->relname);
|
|
|
|
boundspec = stringToNode(TextDatumGetCString(datum));
|
|
|
|
if (!IsA(boundspec, PartitionBoundSpec))
|
|
elog(ERROR, "expected PartitionBoundSpec for relation \"%s\"",
|
|
name->relname);
|
|
|
|
ReleaseSysCache(tuple);
|
|
return boundspec;
|
|
}
|
|
|
|
/*
|
|
* check_partition_bounds_for_split_range
|
|
*
|
|
* (function for BY RANGE partitioning)
|
|
*
|
|
* Checks that bounds of new partition "spec" are inside bounds of split
|
|
* partition (with Oid splitPartOid). If first=true (this means that "spec" is
|
|
* the first of new partitions) then lower bound of "spec" should be equal (or
|
|
* greater than or equal in case defaultPart=true) to lower bound of split
|
|
* partition. If last=true (this means that "spec" is the last of new
|
|
* partitions) then upper bound of "spec" should be equal (or less than or
|
|
* equal in case defaultPart=true) to upper bound of split partition.
|
|
*
|
|
* parent: partitioned table
|
|
* relname: name of the new partition
|
|
* spec: bounds specification of the new partition
|
|
* splitPartOid: split partition Oid
|
|
* splitPartName: split partition name
|
|
* first: true in case new partition "spec" is first of new partitions
|
|
* last: true in case new partition "spec" is last of new partitions
|
|
* defaultPart: true in case partitioned table has DEFAULT partition
|
|
* pstate: pointer to ParseState struct for determine error position
|
|
*/
|
|
static void
|
|
check_partition_bounds_for_split_range(Relation parent,
|
|
char *relname,
|
|
PartitionBoundSpec *spec,
|
|
Oid splitPartOid,
|
|
RangeVar *splitPartName,
|
|
bool first,
|
|
bool last,
|
|
bool defaultPart,
|
|
ParseState *pstate)
|
|
{
|
|
PartitionKey key = RelationGetPartitionKey(parent);
|
|
PartitionRangeBound *lower,
|
|
*upper;
|
|
int cmpval;
|
|
|
|
Assert(key->strategy == PARTITION_STRATEGY_RANGE);
|
|
Assert(spec->strategy == PARTITION_STRATEGY_RANGE);
|
|
|
|
lower = make_one_partition_rbound(key, -1, spec->lowerdatums, true);
|
|
upper = make_one_partition_rbound(key, -1, spec->upperdatums, false);
|
|
|
|
/*
|
|
* First check if the resulting range would be empty with specified lower
|
|
* and upper bounds. partition_rbound_cmp cannot return zero here, since
|
|
* the lower-bound flags are different.
|
|
*/
|
|
cmpval = partition_rbound_cmp(key->partnatts,
|
|
key->partsupfunc,
|
|
key->partcollation,
|
|
lower->datums, lower->kind,
|
|
true, upper);
|
|
Assert(cmpval != 0);
|
|
if (cmpval > 0)
|
|
{
|
|
/* Point to problematic key in the lower datums list. */
|
|
PartitionRangeDatum *datum = list_nth(spec->lowerdatums, cmpval - 1);
|
|
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
|
|
errmsg("empty range bound specified for partition \"%s\"",
|
|
relname),
|
|
errdetail("Specified lower bound %s is greater than or equal to upper bound %s.",
|
|
get_range_partbound_string(spec->lowerdatums),
|
|
get_range_partbound_string(spec->upperdatums)),
|
|
parser_errposition(pstate, datum->location)));
|
|
}
|
|
|
|
/* Need to check first and last partitions (from set of new partitions) */
|
|
if (first || last)
|
|
{
|
|
PartitionBoundSpec *split_spec = get_partition_bound_spec(splitPartOid, splitPartName);
|
|
bool overlap = false;
|
|
|
|
if (first)
|
|
{
|
|
PartitionRangeBound *split_lower;
|
|
|
|
split_lower = make_one_partition_rbound(key, -1, split_spec->lowerdatums, true);
|
|
|
|
cmpval = partition_rbound_cmp(key->partnatts,
|
|
key->partsupfunc,
|
|
key->partcollation,
|
|
lower->datums, lower->kind,
|
|
true, split_lower);
|
|
|
|
/*
|
|
* Lower bound of "spec" should be equal (or greater than or equal
|
|
* in case defaultPart=true) to lower bound of split partition.
|
|
*/
|
|
if ((!defaultPart && cmpval) || (defaultPart && cmpval < 0))
|
|
overlap = true;
|
|
}
|
|
else
|
|
{
|
|
PartitionRangeBound *split_upper;
|
|
|
|
split_upper = make_one_partition_rbound(key, -1, split_spec->upperdatums, false);
|
|
|
|
cmpval = partition_rbound_cmp(key->partnatts,
|
|
key->partsupfunc,
|
|
key->partcollation,
|
|
upper->datums, upper->kind,
|
|
false, split_upper);
|
|
|
|
/*
|
|
* Upper bound of "spec" should be equal (or less than or equal in
|
|
* case defaultPart=true) to upper bound of split partition.
|
|
*/
|
|
if ((!defaultPart && cmpval) || (defaultPart && cmpval > 0))
|
|
overlap = true;
|
|
}
|
|
|
|
if (overlap)
|
|
{
|
|
PartitionRangeDatum *datum;
|
|
|
|
datum = list_nth(first ? spec->lowerdatums : spec->upperdatums, abs(cmpval) - 1);
|
|
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
|
|
errmsg("%s bound of partition \"%s\" is %s %s bound of split partition",
|
|
first ? "lower" : "upper",
|
|
relname,
|
|
defaultPart ? (first ? "less than" : "greater than") : "not equal to",
|
|
first ? "lower" : "upper"),
|
|
parser_errposition(pstate, datum->location)));
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* check_partition_bounds_for_split_list
|
|
*
|
|
* (function for BY LIST partitioning)
|
|
*
|
|
* Checks that bounds of new partition are inside bounds of split partition
|
|
* (with Oid splitPartOid).
|
|
*
|
|
* parent: partitioned table
|
|
* relname: name of the new partition
|
|
* spec: bounds specification of the new partition
|
|
* splitPartOid: split partition Oid
|
|
* pstate: pointer to ParseState struct for determine error position
|
|
*/
|
|
static void
|
|
check_partition_bounds_for_split_list(Relation parent, char *relname,
|
|
PartitionBoundSpec *spec,
|
|
Oid splitPartOid,
|
|
ParseState *pstate)
|
|
{
|
|
PartitionKey key = RelationGetPartitionKey(parent);
|
|
PartitionDesc partdesc = RelationGetPartitionDesc(parent, false);
|
|
PartitionBoundInfo boundinfo = partdesc->boundinfo;
|
|
int with = -1;
|
|
bool overlap = false;
|
|
int overlap_location = -1;
|
|
ListCell *cell;
|
|
|
|
Assert(key->strategy == PARTITION_STRATEGY_LIST);
|
|
Assert(spec->strategy == PARTITION_STRATEGY_LIST);
|
|
Assert(boundinfo && boundinfo->strategy == PARTITION_STRATEGY_LIST);
|
|
|
|
/*
|
|
* Search each value of new partition "spec" in existing partitions. All
|
|
* of them should be in split partition (with Oid splitPartOid).
|
|
*/
|
|
foreach(cell, spec->listdatums)
|
|
{
|
|
Const *val = lfirst_node(Const, cell);
|
|
|
|
overlap_location = val->location;
|
|
if (!val->constisnull)
|
|
{
|
|
int offset;
|
|
bool equal;
|
|
|
|
offset = partition_list_bsearch(&key->partsupfunc[0],
|
|
key->partcollation,
|
|
boundinfo,
|
|
val->constvalue,
|
|
&equal);
|
|
if (offset >= 0 && equal)
|
|
{
|
|
with = boundinfo->indexes[offset];
|
|
if (partdesc->oids[with] != splitPartOid)
|
|
{
|
|
overlap = true;
|
|
break;
|
|
}
|
|
}
|
|
else
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
|
|
errmsg("new partition \"%s\" cannot have this value because split partition does not have",
|
|
relname),
|
|
parser_errposition(pstate, overlap_location)));
|
|
}
|
|
else if (partition_bound_accepts_nulls(boundinfo))
|
|
{
|
|
with = boundinfo->null_index;
|
|
if (partdesc->oids[with] != splitPartOid)
|
|
{
|
|
overlap = true;
|
|
break;
|
|
}
|
|
}
|
|
else
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
|
|
errmsg("new partition \"%s\" cannot have NULL value because split partition does not have",
|
|
relname),
|
|
parser_errposition(pstate, overlap_location)));
|
|
}
|
|
|
|
if (overlap)
|
|
{
|
|
Assert(with >= 0);
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
|
|
errmsg("new partition \"%s\" would overlap with another (not split) partition \"%s\"",
|
|
relname, get_rel_name(partdesc->oids[with])),
|
|
parser_errposition(pstate, overlap_location)));
|
|
}
|
|
}
|
|
|
|
/*
|
|
* find_value_in_new_partitions_list
|
|
*
|
|
* (function for BY LIST partitioning)
|
|
*
|
|
* Function returns true in case any of new partitions contains value "value".
|
|
*
|
|
* partsupfunc: information about comparison function associated with the partition key
|
|
* partcollation: partitioning collation
|
|
* parts: pointer to array with new partitions descriptions
|
|
* nparts: number of new partitions
|
|
* value: the value that we are looking for
|
|
* isnull: true if the value that we are looking for is NULL
|
|
*/
|
|
static bool
|
|
find_value_in_new_partitions_list(FmgrInfo *partsupfunc,
|
|
Oid *partcollation,
|
|
SinglePartitionSpec **parts,
|
|
int nparts,
|
|
Datum value,
|
|
bool isnull)
|
|
{
|
|
ListCell *valptr;
|
|
int i;
|
|
|
|
for (i = 0; i < nparts; i++)
|
|
{
|
|
SinglePartitionSpec *sps = parts[i];
|
|
|
|
foreach(valptr, sps->bound->listdatums)
|
|
{
|
|
Const *val = lfirst_node(Const, valptr);
|
|
|
|
if (isnull && val->constisnull)
|
|
return true;
|
|
|
|
if (!isnull && !val->constisnull)
|
|
{
|
|
if (DatumGetInt32(FunctionCall2Coll(&partsupfunc[0],
|
|
partcollation[0],
|
|
val->constvalue,
|
|
value)) == 0)
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* check_parent_values_in_new_partitions
|
|
*
|
|
* (function for BY LIST partitioning)
|
|
*
|
|
* Checks that all values of split partition (with Oid partOid) contains in new
|
|
* partitions.
|
|
*
|
|
* parent: partitioned table
|
|
* partOid: split partition Oid
|
|
* parts: pointer to array with new partitions descriptions
|
|
* nparts: number of new partitions
|
|
* pstate: pointer to ParseState struct for determine error position
|
|
*/
|
|
static void
|
|
check_parent_values_in_new_partitions(Relation parent,
|
|
Oid partOid,
|
|
SinglePartitionSpec **parts,
|
|
int nparts,
|
|
ParseState *pstate)
|
|
{
|
|
PartitionKey key = RelationGetPartitionKey(parent);
|
|
PartitionDesc partdesc = RelationGetPartitionDesc(parent, false);
|
|
PartitionBoundInfo boundinfo = partdesc->boundinfo;
|
|
int i;
|
|
bool found = true;
|
|
bool searchNull = false;
|
|
Datum datum = PointerGetDatum(NULL);
|
|
|
|
Assert(key->strategy == PARTITION_STRATEGY_LIST);
|
|
|
|
/*
|
|
* Special processing for NULL value. Search NULL value if the split
|
|
* partition (partOid) contains it.
|
|
*/
|
|
if (partition_bound_accepts_nulls(boundinfo) &&
|
|
partdesc->oids[boundinfo->null_index] == partOid)
|
|
{
|
|
if (!find_value_in_new_partitions_list(&key->partsupfunc[0],
|
|
key->partcollation, parts, nparts, datum, true))
|
|
{
|
|
found = false;
|
|
searchNull = true;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Search all values of split partition with partOid in PartitionDesc of
|
|
* partitioned table.
|
|
*/
|
|
for (i = 0; i < boundinfo->ndatums; i++)
|
|
{
|
|
if (partdesc->oids[boundinfo->indexes[i]] == partOid)
|
|
{
|
|
/* We found value that split partition contains. */
|
|
datum = boundinfo->datums[i][0];
|
|
if (!find_value_in_new_partitions_list(&key->partsupfunc[0],
|
|
key->partcollation, parts, nparts, datum, false))
|
|
{
|
|
found = false;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!found)
|
|
{
|
|
Const *notFoundVal;
|
|
|
|
if (!searchNull)
|
|
|
|
/*
|
|
* Make Const for getting string representation of not found
|
|
* value.
|
|
*/
|
|
notFoundVal = makeConst(key->parttypid[0],
|
|
key->parttypmod[0],
|
|
key->parttypcoll[0],
|
|
key->parttyplen[0],
|
|
datum,
|
|
false, /* isnull */
|
|
key->parttypbyval[0]);
|
|
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
|
|
errmsg("new partitions do not have value %s but split partition does",
|
|
searchNull ? "NULL" : get_list_partvalue_string(notFoundVal))));
|
|
}
|
|
}
|
|
|
|
/*
|
|
* check_partitions_for_split
|
|
*
|
|
* Checks new partitions for SPLIT PARTITIONS command:
|
|
* 1. DEFAULT partition should be one.
|
|
* 2. New partitions should have different names
|
|
* (with existing partitions too).
|
|
* 3. Bounds of new partitions should not overlap with new and existing
|
|
* partitions.
|
|
* 4. In case split partition is DEFAULT partition, one of new partitions
|
|
* should be DEFAULT.
|
|
* 5. In case new partitions or existing partitions contains DEFAULT
|
|
* partition, new partitions can have any bounds inside split
|
|
* partition bound (can be spaces between partitions bounds).
|
|
* 6. In case partitioned table does not have DEFAULT partition, DEFAULT
|
|
* partition can be defined as one of new partition.
|
|
* 7. In case new partitions not contains DEFAULT partition and
|
|
* partitioned table does not have DEFAULT partition the following
|
|
* should be true: sum bounds of new partitions should be equal
|
|
* to bound of split partition.
|
|
*
|
|
* parent: partitioned table
|
|
* splitPartOid: split partition Oid
|
|
* splitPartName: split partition name
|
|
* list: list of new partitions
|
|
* pstate: pointer to ParseState struct for determine error position
|
|
*/
|
|
void
|
|
check_partitions_for_split(Relation parent,
|
|
Oid splitPartOid,
|
|
RangeVar *splitPartName,
|
|
List *partlist,
|
|
ParseState *pstate)
|
|
{
|
|
PartitionKey key;
|
|
char strategy;
|
|
Oid defaultPartOid;
|
|
bool isSplitPartDefault;
|
|
bool existsDefaultPart;
|
|
ListCell *listptr;
|
|
int default_index = -1;
|
|
int i,
|
|
j;
|
|
SinglePartitionSpec **new_parts;
|
|
SinglePartitionSpec *spsPrev = NULL;
|
|
int nparts = 0;
|
|
|
|
key = RelationGetPartitionKey(parent);
|
|
strategy = get_partition_strategy(key);
|
|
|
|
switch (strategy)
|
|
{
|
|
case PARTITION_STRATEGY_LIST:
|
|
case PARTITION_STRATEGY_RANGE:
|
|
{
|
|
/*
|
|
* Make array new_parts with new partitions except DEFAULT
|
|
* partition.
|
|
*/
|
|
new_parts = (SinglePartitionSpec **)
|
|
palloc0(list_length(partlist) * sizeof(SinglePartitionSpec *));
|
|
i = 0;
|
|
foreach(listptr, partlist)
|
|
{
|
|
SinglePartitionSpec *sps =
|
|
(SinglePartitionSpec *) lfirst(listptr);
|
|
|
|
if (sps->bound->is_default)
|
|
{
|
|
if (default_index >= 0)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
|
|
errmsg("DEFAULT partition should be one")),
|
|
parser_errposition(pstate, sps->name->location));
|
|
default_index = i;
|
|
}
|
|
else
|
|
{
|
|
new_parts[nparts++] = sps;
|
|
}
|
|
i++;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case PARTITION_STRATEGY_HASH:
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
|
errmsg("partition of hash-partitioned table cannot be split")));
|
|
break;
|
|
|
|
default:
|
|
elog(ERROR, "unexpected partition strategy: %d",
|
|
(int) key->strategy);
|
|
break;
|
|
}
|
|
|
|
if (strategy == PARTITION_STRATEGY_RANGE)
|
|
{
|
|
PartitionRangeBound **lower_bounds;
|
|
SinglePartitionSpec **tmp_new_parts;
|
|
|
|
/*
|
|
* For simplify check for ranges of new partitions need to sort all
|
|
* partitions in ascending order of them bounds (we compare upper
|
|
* bound only).
|
|
*/
|
|
lower_bounds = (PartitionRangeBound **)
|
|
palloc0(nparts * sizeof(PartitionRangeBound *));
|
|
|
|
/* Create array of lower bounds. */
|
|
for (i = 0; i < nparts; i++)
|
|
{
|
|
lower_bounds[i] = make_one_partition_rbound(key, i,
|
|
new_parts[i]->bound->lowerdatums, true);
|
|
}
|
|
|
|
/* Sort array of lower bounds. */
|
|
qsort_arg(lower_bounds, nparts, sizeof(PartitionRangeBound *),
|
|
qsort_partition_rbound_cmp, (void *) key);
|
|
|
|
/* Reorder array of partitions. */
|
|
tmp_new_parts = new_parts;
|
|
new_parts = (SinglePartitionSpec **)
|
|
palloc0(nparts * sizeof(SinglePartitionSpec *));
|
|
for (i = 0; i < nparts; i++)
|
|
new_parts[i] = tmp_new_parts[lower_bounds[i]->index];
|
|
|
|
pfree(tmp_new_parts);
|
|
pfree(lower_bounds);
|
|
}
|
|
|
|
defaultPartOid =
|
|
get_default_oid_from_partdesc(RelationGetPartitionDesc(parent, true));
|
|
|
|
/* isSplitPartDefault flag: is split partition a DEFAULT partition? */
|
|
isSplitPartDefault = (defaultPartOid == splitPartOid);
|
|
|
|
if (isSplitPartDefault && default_index < 0)
|
|
{
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
|
|
errmsg("one partition in the list should be DEFAULT because split partition is DEFAULT")),
|
|
parser_errposition(pstate, ((SinglePartitionSpec *) linitial(partlist))->name->location));
|
|
}
|
|
else if (!isSplitPartDefault && (default_index >= 0) && OidIsValid(defaultPartOid))
|
|
{
|
|
SinglePartitionSpec *spsDef =
|
|
(SinglePartitionSpec *) list_nth(partlist, default_index);
|
|
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
|
|
errmsg("new partition cannot be DEFAULT because DEFAULT partition already exists")),
|
|
parser_errposition(pstate, spsDef->name->location));
|
|
}
|
|
|
|
/* Indicator that partitioned table has (or will have) DEFAULT partition */
|
|
existsDefaultPart = OidIsValid(defaultPartOid) || (default_index >= 0);
|
|
|
|
for (i = 0; i < nparts; i++)
|
|
{
|
|
SinglePartitionSpec *sps = new_parts[i];
|
|
|
|
if (isSplitPartDefault)
|
|
{
|
|
/*
|
|
* In case split partition is DEFAULT partition we can use any
|
|
* free ranges - as when creating a new partition.
|
|
*/
|
|
check_new_partition_bound(sps->name->relname, parent, sps->bound,
|
|
pstate);
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
* Checks that bound of current partition is inside bound of split
|
|
* partition. For range partitioning: checks that upper bound of
|
|
* previous partition is equal to lower bound of current
|
|
* partition. For list partitioning: checks that split partition
|
|
* contains all values of current partition.
|
|
*/
|
|
if (strategy == PARTITION_STRATEGY_RANGE)
|
|
{
|
|
bool first = (i == 0);
|
|
bool last = (i == (nparts - 1));
|
|
|
|
check_partition_bounds_for_split_range(parent, sps->name->relname, sps->bound,
|
|
splitPartOid, splitPartName,
|
|
first, last,
|
|
existsDefaultPart, pstate);
|
|
}
|
|
else
|
|
check_partition_bounds_for_split_list(parent, sps->name->relname,
|
|
sps->bound, splitPartOid, pstate);
|
|
}
|
|
|
|
/* Ranges of new partitions should not overlap. */
|
|
if (strategy == PARTITION_STRATEGY_RANGE && spsPrev)
|
|
check_two_partitions_bounds_range(parent, spsPrev->name, spsPrev->bound,
|
|
sps->name, sps->bound, existsDefaultPart, pstate);
|
|
|
|
spsPrev = sps;
|
|
|
|
/* Check: new partitions should have different names. */
|
|
for (j = i + 1; j < nparts; j++)
|
|
{
|
|
SinglePartitionSpec *sps2 = new_parts[j];
|
|
|
|
if (equal(sps->name, sps2->name))
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_DUPLICATE_TABLE),
|
|
errmsg("name \"%s\" is already used", sps2->name->relname)),
|
|
parser_errposition(pstate, sps2->name->location));
|
|
}
|
|
}
|
|
|
|
if (strategy == PARTITION_STRATEGY_LIST)
|
|
{
|
|
/* Values of new partitions should not overlap. */
|
|
check_partitions_not_overlap_list(parent, new_parts, nparts,
|
|
pstate);
|
|
|
|
/*
|
|
* Need to check that all values of split partition contains in new
|
|
* partitions. Skip this check if DEFAULT partition exists.
|
|
*/
|
|
if (!existsDefaultPart)
|
|
check_parent_values_in_new_partitions(parent, splitPartOid,
|
|
new_parts, nparts, pstate);
|
|
}
|
|
|
|
pfree(new_parts);
|
|
}
|
|
|
|
/*
|
|
* calculate_partition_bound_for_merge
|
|
*
|
|
* Calculates the bound of merged partition "spec" by using the bounds of
|
|
* partitions to be merged.
|
|
*
|
|
* parent: partitioned table
|
|
* partNames: names of partitions to be merged
|
|
* partOids: Oids of partitions to be merged
|
|
* spec (out): bounds specification of the merged partition
|
|
* pstate: pointer to ParseState struct for determine error position
|
|
*/
|
|
void
|
|
calculate_partition_bound_for_merge(Relation parent,
|
|
List *partNames,
|
|
List *partOids,
|
|
PartitionBoundSpec *spec,
|
|
ParseState *pstate)
|
|
{
|
|
PartitionKey key = RelationGetPartitionKey(parent);
|
|
PartitionBoundSpec *bound;
|
|
|
|
Assert(!spec->is_default);
|
|
|
|
switch (key->strategy)
|
|
{
|
|
case PARTITION_STRATEGY_RANGE:
|
|
{
|
|
int i;
|
|
PartitionRangeBound **lower_bounds;
|
|
int nparts = list_length(partOids);
|
|
List *bounds = NIL;
|
|
|
|
lower_bounds = (PartitionRangeBound **)
|
|
palloc0(nparts * sizeof(PartitionRangeBound *));
|
|
|
|
/*
|
|
* Create array of lower bounds and list of
|
|
* PartitionBoundSpec.
|
|
*/
|
|
for (i = 0; i < nparts; i++)
|
|
{
|
|
bound = get_partition_bound_spec(list_nth_oid(partOids, i),
|
|
(RangeVar *) list_nth(partNames, i));
|
|
|
|
lower_bounds[i] = make_one_partition_rbound(key, i, bound->lowerdatums, true);
|
|
bounds = lappend(bounds, bound);
|
|
}
|
|
|
|
/* Sort array of lower bounds. */
|
|
qsort_arg(lower_bounds, nparts, sizeof(PartitionRangeBound *),
|
|
qsort_partition_rbound_cmp, (void *) key);
|
|
|
|
/* Ranges of partitions should not overlap. */
|
|
for (i = 1; i < nparts; i++)
|
|
{
|
|
int index = lower_bounds[i]->index;
|
|
int prev_index = lower_bounds[i - 1]->index;
|
|
|
|
check_two_partitions_bounds_range(parent,
|
|
(RangeVar *) list_nth(partNames, prev_index),
|
|
(PartitionBoundSpec *) list_nth(bounds, prev_index),
|
|
(RangeVar *) list_nth(partNames, index),
|
|
(PartitionBoundSpec *) list_nth(bounds, index),
|
|
false, pstate);
|
|
}
|
|
|
|
/*
|
|
* Lower bound of first partition is the lower bound of merged
|
|
* partition.
|
|
*/
|
|
spec->lowerdatums =
|
|
((PartitionBoundSpec *) list_nth(bounds, lower_bounds[0]->index))->lowerdatums;
|
|
|
|
/*
|
|
* Upper bound of last partition is the upper bound of merged
|
|
* partition.
|
|
*/
|
|
spec->upperdatums =
|
|
((PartitionBoundSpec *) list_nth(bounds, lower_bounds[nparts - 1]->index))->upperdatums;
|
|
|
|
pfree(lower_bounds);
|
|
list_free(bounds);
|
|
break;
|
|
}
|
|
|
|
case PARTITION_STRATEGY_LIST:
|
|
{
|
|
ListCell *listptr,
|
|
*listptr2;
|
|
|
|
/* Consolidate bounds for all partitions in the list. */
|
|
forboth(listptr, partOids, listptr2, partNames)
|
|
{
|
|
RangeVar *name = (RangeVar *) lfirst(listptr2);
|
|
Oid curOid = lfirst_oid(listptr);
|
|
|
|
bound = get_partition_bound_spec(curOid, name);
|
|
spec->listdatums = list_concat(spec->listdatums, bound->listdatums);
|
|
}
|
|
break;
|
|
}
|
|
|
|
default:
|
|
elog(ERROR, "unexpected partition strategy: %d",
|
|
(int) key->strategy);
|
|
}
|
|
}
|