mirror of
https://github.com/postgres/postgres.git
synced 2025-05-18 00:02:16 -04:00
Edit SGML documentation related to extended statistics.
Use the "statistics object" terminology uniformly here too. Assorted copy-editing. Put new catalogs.sgml sections into alphabetical order.
This commit is contained in:
parent
e84c019598
commit
93ece9cc88
@ -221,13 +221,13 @@
|
|||||||
</row>
|
</row>
|
||||||
|
|
||||||
<row>
|
<row>
|
||||||
<entry><link linkend="catalog-pg-pltemplate"><structname>pg_pltemplate</structname></link></entry>
|
<entry><link linkend="catalog-pg-partitioned-table"><structname>pg_partitioned_table</structname></link></entry>
|
||||||
<entry>template data for procedural languages</entry>
|
<entry>information about partition key of tables</entry>
|
||||||
</row>
|
</row>
|
||||||
|
|
||||||
<row>
|
<row>
|
||||||
<entry><link linkend="catalog-pg-partitioned-table"><structname>pg_partitioned_table</structname></link></entry>
|
<entry><link linkend="catalog-pg-pltemplate"><structname>pg_pltemplate</structname></link></entry>
|
||||||
<entry>information about partition key of tables</entry>
|
<entry>template data for procedural languages</entry>
|
||||||
</row>
|
</row>
|
||||||
|
|
||||||
<row>
|
<row>
|
||||||
@ -4271,108 +4271,6 @@ SCRAM-SHA-256$<replaceable><iteration count></>:<replaceable><salt><
|
|||||||
</table>
|
</table>
|
||||||
</sect1>
|
</sect1>
|
||||||
|
|
||||||
<sect1 id="catalog-pg-statistic-ext">
|
|
||||||
<title><structname>pg_statistic_ext</structname></title>
|
|
||||||
|
|
||||||
<indexterm zone="catalog-pg-statistic-ext">
|
|
||||||
<primary>pg_statistic_ext</primary>
|
|
||||||
</indexterm>
|
|
||||||
|
|
||||||
<para>
|
|
||||||
The catalog <structname>pg_statistic_ext</structname>
|
|
||||||
holds extended planner statistics.
|
|
||||||
</para>
|
|
||||||
|
|
||||||
<table>
|
|
||||||
<title><structname>pg_statistic_ext</> Columns</title>
|
|
||||||
|
|
||||||
<tgroup cols="4">
|
|
||||||
<thead>
|
|
||||||
<row>
|
|
||||||
<entry>Name</entry>
|
|
||||||
<entry>Type</entry>
|
|
||||||
<entry>References</entry>
|
|
||||||
<entry>Description</entry>
|
|
||||||
</row>
|
|
||||||
</thead>
|
|
||||||
|
|
||||||
<tbody>
|
|
||||||
|
|
||||||
<row>
|
|
||||||
<entry><structfield>stxrelid</structfield></entry>
|
|
||||||
<entry><type>oid</type></entry>
|
|
||||||
<entry><literal><link linkend="catalog-pg-class"><structname>pg_class</structname></link>.oid</literal></entry>
|
|
||||||
<entry>The table that the described columns belongs to</entry>
|
|
||||||
</row>
|
|
||||||
|
|
||||||
<row>
|
|
||||||
<entry><structfield>stxname</structfield></entry>
|
|
||||||
<entry><type>name</type></entry>
|
|
||||||
<entry></entry>
|
|
||||||
<entry>Name of the statistic.</entry>
|
|
||||||
</row>
|
|
||||||
|
|
||||||
<row>
|
|
||||||
<entry><structfield>stxnamespace</structfield></entry>
|
|
||||||
<entry><type>oid</type></entry>
|
|
||||||
<entry><literal><link linkend="catalog-pg-namespace"><structname>pg_namespace</structname></link>.oid</literal></entry>
|
|
||||||
<entry>
|
|
||||||
The OID of the namespace that contains this statistic
|
|
||||||
</entry>
|
|
||||||
</row>
|
|
||||||
|
|
||||||
<row>
|
|
||||||
<entry><structfield>stxowner</structfield></entry>
|
|
||||||
<entry><type>oid</type></entry>
|
|
||||||
<entry><literal><link linkend="catalog-pg-authid"><structname>pg_authid</structname></link>.oid</literal></entry>
|
|
||||||
<entry>Owner of the statistic</entry>
|
|
||||||
</row>
|
|
||||||
|
|
||||||
<row>
|
|
||||||
<entry><structfield>stxkeys</structfield></entry>
|
|
||||||
<entry><type>int2vector</type></entry>
|
|
||||||
<entry><literal><link linkend="catalog-pg-attribute"><structname>pg_attribute</structname></link>.attnum</literal></entry>
|
|
||||||
<entry>
|
|
||||||
This is an array of values that indicate which table columns this
|
|
||||||
statistic covers. For example a value of <literal>1 3</literal> would
|
|
||||||
mean that the first and the third table columns make up the statistic key.
|
|
||||||
</entry>
|
|
||||||
</row>
|
|
||||||
|
|
||||||
<row>
|
|
||||||
<entry><structfield>stxkind</structfield></entry>
|
|
||||||
<entry><type>char[]</type></entry>
|
|
||||||
<entry></entry>
|
|
||||||
<entry>
|
|
||||||
An array with the modes of the enabled statistic types. Valid values
|
|
||||||
are:
|
|
||||||
<literal>d</literal> for ndistinct coefficients,
|
|
||||||
<literal>f</literal> for functional dependencies.
|
|
||||||
</entry>
|
|
||||||
</row>
|
|
||||||
|
|
||||||
<row>
|
|
||||||
<entry><structfield>stxndistinct</structfield></entry>
|
|
||||||
<entry><type>pg_ndistinct</type></entry>
|
|
||||||
<entry></entry>
|
|
||||||
<entry>
|
|
||||||
N-distinct coefficients, serialized as <structname>pg_ndistinct</> type.
|
|
||||||
</entry>
|
|
||||||
</row>
|
|
||||||
|
|
||||||
<row>
|
|
||||||
<entry><structfield>stxdependencies</structfield></entry>
|
|
||||||
<entry><type>pg_dependencies</type></entry>
|
|
||||||
<entry></entry>
|
|
||||||
<entry>
|
|
||||||
Functional dependencies, serialized as <structname>pg_dependencies</> type.
|
|
||||||
</entry>
|
|
||||||
</row>
|
|
||||||
|
|
||||||
</tbody>
|
|
||||||
</tgroup>
|
|
||||||
</table>
|
|
||||||
</sect1>
|
|
||||||
|
|
||||||
<sect1 id="catalog-pg-namespace">
|
<sect1 id="catalog-pg-namespace">
|
||||||
<title><structname>pg_namespace</structname></title>
|
<title><structname>pg_namespace</structname></title>
|
||||||
@ -4790,6 +4688,111 @@ SCRAM-SHA-256$<replaceable><iteration count></>:<replaceable><salt><
|
|||||||
</sect1>
|
</sect1>
|
||||||
|
|
||||||
|
|
||||||
|
<sect1 id="catalog-pg-partitioned-table">
|
||||||
|
<title><structname>pg_partitioned_table</structname></title>
|
||||||
|
|
||||||
|
<indexterm zone="catalog-pg-partitioned-table">
|
||||||
|
<primary>pg_partitioned_table</primary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
The catalog <structname>pg_partitioned_table</structname> stores
|
||||||
|
information about how tables are partitioned.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<table>
|
||||||
|
<title><structname>pg_partitioned_table</> Columns</title>
|
||||||
|
|
||||||
|
<tgroup cols="4">
|
||||||
|
<thead>
|
||||||
|
<row>
|
||||||
|
<entry>Name</entry>
|
||||||
|
<entry>Type</entry>
|
||||||
|
<entry>References</entry>
|
||||||
|
<entry>Description</entry>
|
||||||
|
</row>
|
||||||
|
</thead>
|
||||||
|
|
||||||
|
<tbody>
|
||||||
|
|
||||||
|
<row>
|
||||||
|
<entry><structfield>partrelid</structfield></entry>
|
||||||
|
<entry><type>oid</type></entry>
|
||||||
|
<entry><literal><link linkend="catalog-pg-class"><structname>pg_class</structname></link>.oid</literal></entry>
|
||||||
|
<entry>The OID of the <structname>pg_class</> entry for this partitioned table</entry>
|
||||||
|
</row>
|
||||||
|
|
||||||
|
<row>
|
||||||
|
<entry><structfield>partstrat</structfield></entry>
|
||||||
|
<entry><type>char</type></entry>
|
||||||
|
<entry></entry>
|
||||||
|
<entry>
|
||||||
|
Partitioning strategy; <literal>l</> = list partitioned table,
|
||||||
|
<literal>r</> = range partitioned table
|
||||||
|
</entry>
|
||||||
|
</row>
|
||||||
|
|
||||||
|
<row>
|
||||||
|
<entry><structfield>partnatts</structfield></entry>
|
||||||
|
<entry><type>int2</type></entry>
|
||||||
|
<entry></entry>
|
||||||
|
<entry>The number of columns in partition key</entry>
|
||||||
|
</row>
|
||||||
|
|
||||||
|
<row>
|
||||||
|
<entry><structfield>partattrs</structfield></entry>
|
||||||
|
<entry><type>int2vector</type></entry>
|
||||||
|
<entry><literal><link linkend="catalog-pg-attribute"><structname>pg_attribute</structname></link>.attnum</literal></entry>
|
||||||
|
<entry>
|
||||||
|
This is an array of <structfield>partnatts</structfield> values that
|
||||||
|
indicate which table columns are part of the partition key. For
|
||||||
|
example, a value of <literal>1 3</literal> would mean that the first
|
||||||
|
and the third table columns make up the partition key. A zero in this
|
||||||
|
array indicates that the corresponding partition key column is an
|
||||||
|
expression, rather than a simple column reference.
|
||||||
|
</entry>
|
||||||
|
</row>
|
||||||
|
|
||||||
|
<row>
|
||||||
|
<entry><structfield>partclass</structfield></entry>
|
||||||
|
<entry><type>oidvector</type></entry>
|
||||||
|
<entry><literal><link linkend="catalog-pg-opclass"><structname>pg_opclass</structname></link>.oid</literal></entry>
|
||||||
|
<entry>
|
||||||
|
For each column in the partition key, this contains the OID of the
|
||||||
|
operator class to use. See
|
||||||
|
<link linkend="catalog-pg-opclass"><structname>pg_opclass</structname></link> for details.
|
||||||
|
</entry>
|
||||||
|
</row>
|
||||||
|
|
||||||
|
<row>
|
||||||
|
<entry><structfield>partcollation</structfield></entry>
|
||||||
|
<entry><type>oidvector</type></entry>
|
||||||
|
<entry><literal><link linkend="catalog-pg-opclass"><structname>pg_opclass</structname></link>.oid</literal></entry>
|
||||||
|
<entry>
|
||||||
|
For each column in the partition key, this contains the OID of the
|
||||||
|
the collation to use for partitioning.
|
||||||
|
</entry>
|
||||||
|
</row>
|
||||||
|
|
||||||
|
<row>
|
||||||
|
<entry><structfield>partexprs</structfield></entry>
|
||||||
|
<entry><type>pg_node_tree</type></entry>
|
||||||
|
<entry></entry>
|
||||||
|
<entry>
|
||||||
|
Expression trees (in <function>nodeToString()</function>
|
||||||
|
representation) for partition key columns that are not simple column
|
||||||
|
references. This is a list with one element for each zero
|
||||||
|
entry in <structfield>partattrs</>. Null if all partition key columns
|
||||||
|
are simple references.
|
||||||
|
</entry>
|
||||||
|
</row>
|
||||||
|
|
||||||
|
</tbody>
|
||||||
|
</tgroup>
|
||||||
|
</table>
|
||||||
|
</sect1>
|
||||||
|
|
||||||
|
|
||||||
<sect1 id="catalog-pg-pltemplate">
|
<sect1 id="catalog-pg-pltemplate">
|
||||||
<title><structname>pg_pltemplate</structname></title>
|
<title><structname>pg_pltemplate</structname></title>
|
||||||
|
|
||||||
@ -4896,109 +4899,6 @@ SCRAM-SHA-256$<replaceable><iteration count></>:<replaceable><salt><
|
|||||||
|
|
||||||
</sect1>
|
</sect1>
|
||||||
|
|
||||||
<sect1 id="catalog-pg-partitioned-table">
|
|
||||||
<title><structname>pg_partitioned_table</structname></title>
|
|
||||||
|
|
||||||
<indexterm zone="catalog-pg-partitioned-table">
|
|
||||||
<primary>pg_partitioned_table</primary>
|
|
||||||
</indexterm>
|
|
||||||
|
|
||||||
<para>
|
|
||||||
The catalog <structname>pg_partitioned_table</structname> stores
|
|
||||||
information about how tables are partitioned.
|
|
||||||
</para>
|
|
||||||
|
|
||||||
<table>
|
|
||||||
<title><structname>pg_partitioned_table</> Columns</title>
|
|
||||||
|
|
||||||
<tgroup cols="4">
|
|
||||||
<thead>
|
|
||||||
<row>
|
|
||||||
<entry>Name</entry>
|
|
||||||
<entry>Type</entry>
|
|
||||||
<entry>References</entry>
|
|
||||||
<entry>Description</entry>
|
|
||||||
</row>
|
|
||||||
</thead>
|
|
||||||
|
|
||||||
<tbody>
|
|
||||||
|
|
||||||
<row>
|
|
||||||
<entry><structfield>partrelid</structfield></entry>
|
|
||||||
<entry><type>oid</type></entry>
|
|
||||||
<entry><literal><link linkend="catalog-pg-class"><structname>pg_class</structname></link>.oid</literal></entry>
|
|
||||||
<entry>The OID of the <structname>pg_class</> entry for this partitioned table</entry>
|
|
||||||
</row>
|
|
||||||
|
|
||||||
<row>
|
|
||||||
<entry><structfield>partstrat</structfield></entry>
|
|
||||||
<entry><type>char</type></entry>
|
|
||||||
<entry></entry>
|
|
||||||
<entry>
|
|
||||||
Partitioning strategy; <literal>l</> = list partitioned table,
|
|
||||||
<literal>r</> = range partitioned table
|
|
||||||
</entry>
|
|
||||||
</row>
|
|
||||||
|
|
||||||
<row>
|
|
||||||
<entry><structfield>partnatts</structfield></entry>
|
|
||||||
<entry><type>int2</type></entry>
|
|
||||||
<entry></entry>
|
|
||||||
<entry>The number of columns in partition key</entry>
|
|
||||||
</row>
|
|
||||||
|
|
||||||
<row>
|
|
||||||
<entry><structfield>partattrs</structfield></entry>
|
|
||||||
<entry><type>int2vector</type></entry>
|
|
||||||
<entry><literal><link linkend="catalog-pg-attribute"><structname>pg_attribute</structname></link>.attnum</literal></entry>
|
|
||||||
<entry>
|
|
||||||
This is an array of <structfield>partnatts</structfield> values that
|
|
||||||
indicate which table columns are part of the partition key. For
|
|
||||||
example, a value of <literal>1 3</literal> would mean that the first
|
|
||||||
and the third table columns make up the partition key. A zero in this
|
|
||||||
array indicates that the corresponding partition key column is an
|
|
||||||
expression, rather than a simple column reference.
|
|
||||||
</entry>
|
|
||||||
</row>
|
|
||||||
|
|
||||||
<row>
|
|
||||||
<entry><structfield>partclass</structfield></entry>
|
|
||||||
<entry><type>oidvector</type></entry>
|
|
||||||
<entry><literal><link linkend="catalog-pg-opclass"><structname>pg_opclass</structname></link>.oid</literal></entry>
|
|
||||||
<entry>
|
|
||||||
For each column in the partition key, this contains the OID of the
|
|
||||||
operator class to use. See
|
|
||||||
<link linkend="catalog-pg-opclass"><structname>pg_opclass</structname></link> for details.
|
|
||||||
</entry>
|
|
||||||
</row>
|
|
||||||
|
|
||||||
<row>
|
|
||||||
<entry><structfield>partcollation</structfield></entry>
|
|
||||||
<entry><type>oidvector</type></entry>
|
|
||||||
<entry><literal><link linkend="catalog-pg-opclass"><structname>pg_opclass</structname></link>.oid</literal></entry>
|
|
||||||
<entry>
|
|
||||||
For each column in the partition key, this contains the OID of the
|
|
||||||
the collation to use for partitioning.
|
|
||||||
</entry>
|
|
||||||
</row>
|
|
||||||
|
|
||||||
<row>
|
|
||||||
<entry><structfield>partexprs</structfield></entry>
|
|
||||||
<entry><type>pg_node_tree</type></entry>
|
|
||||||
<entry></entry>
|
|
||||||
<entry>
|
|
||||||
Expression trees (in <function>nodeToString()</function>
|
|
||||||
representation) for partition key columns that are not simple column
|
|
||||||
references. This is a list with one element for each zero
|
|
||||||
entry in <structfield>partattrs</>. Null if all partition key columns
|
|
||||||
are simple references.
|
|
||||||
</entry>
|
|
||||||
</row>
|
|
||||||
|
|
||||||
</tbody>
|
|
||||||
</tgroup>
|
|
||||||
</table>
|
|
||||||
</sect1>
|
|
||||||
|
|
||||||
<sect1 id="catalog-pg-policy">
|
<sect1 id="catalog-pg-policy">
|
||||||
<title><structname>pg_policy</structname></title>
|
<title><structname>pg_policy</structname></title>
|
||||||
@ -6466,6 +6366,120 @@ SCRAM-SHA-256$<replaceable><iteration count></>:<replaceable><salt><
|
|||||||
|
|
||||||
</sect1>
|
</sect1>
|
||||||
|
|
||||||
|
<sect1 id="catalog-pg-statistic-ext">
|
||||||
|
<title><structname>pg_statistic_ext</structname></title>
|
||||||
|
|
||||||
|
<indexterm zone="catalog-pg-statistic-ext">
|
||||||
|
<primary>pg_statistic_ext</primary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
The catalog <structname>pg_statistic_ext</structname>
|
||||||
|
holds extended planner statistics.
|
||||||
|
Each row in this catalog corresponds to a <firstterm>statistics object</>
|
||||||
|
created with <xref linkend="sql-createstatistics">.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<table>
|
||||||
|
<title><structname>pg_statistic_ext</> Columns</title>
|
||||||
|
|
||||||
|
<tgroup cols="4">
|
||||||
|
<thead>
|
||||||
|
<row>
|
||||||
|
<entry>Name</entry>
|
||||||
|
<entry>Type</entry>
|
||||||
|
<entry>References</entry>
|
||||||
|
<entry>Description</entry>
|
||||||
|
</row>
|
||||||
|
</thead>
|
||||||
|
|
||||||
|
<tbody>
|
||||||
|
|
||||||
|
<row>
|
||||||
|
<entry><structfield>stxrelid</structfield></entry>
|
||||||
|
<entry><type>oid</type></entry>
|
||||||
|
<entry><literal><link linkend="catalog-pg-class"><structname>pg_class</structname></link>.oid</literal></entry>
|
||||||
|
<entry>Table containing the columns described by this object</entry>
|
||||||
|
</row>
|
||||||
|
|
||||||
|
<row>
|
||||||
|
<entry><structfield>stxname</structfield></entry>
|
||||||
|
<entry><type>name</type></entry>
|
||||||
|
<entry></entry>
|
||||||
|
<entry>Name of the statistics object</entry>
|
||||||
|
</row>
|
||||||
|
|
||||||
|
<row>
|
||||||
|
<entry><structfield>stxnamespace</structfield></entry>
|
||||||
|
<entry><type>oid</type></entry>
|
||||||
|
<entry><literal><link linkend="catalog-pg-namespace"><structname>pg_namespace</structname></link>.oid</literal></entry>
|
||||||
|
<entry>
|
||||||
|
The OID of the namespace that contains this statistics object
|
||||||
|
</entry>
|
||||||
|
</row>
|
||||||
|
|
||||||
|
<row>
|
||||||
|
<entry><structfield>stxowner</structfield></entry>
|
||||||
|
<entry><type>oid</type></entry>
|
||||||
|
<entry><literal><link linkend="catalog-pg-authid"><structname>pg_authid</structname></link>.oid</literal></entry>
|
||||||
|
<entry>Owner of the statistics object</entry>
|
||||||
|
</row>
|
||||||
|
|
||||||
|
<row>
|
||||||
|
<entry><structfield>stxkeys</structfield></entry>
|
||||||
|
<entry><type>int2vector</type></entry>
|
||||||
|
<entry><literal><link linkend="catalog-pg-attribute"><structname>pg_attribute</structname></link>.attnum</literal></entry>
|
||||||
|
<entry>
|
||||||
|
An array of attribute numbers, indicating which table columns are
|
||||||
|
covered by this statistics object;
|
||||||
|
for example a value of <literal>1 3</literal> would
|
||||||
|
mean that the first and the third table columns are covered
|
||||||
|
</entry>
|
||||||
|
</row>
|
||||||
|
|
||||||
|
<row>
|
||||||
|
<entry><structfield>stxkind</structfield></entry>
|
||||||
|
<entry><type>char[]</type></entry>
|
||||||
|
<entry></entry>
|
||||||
|
<entry>
|
||||||
|
An array containing codes for the enabled statistic types;
|
||||||
|
valid values are:
|
||||||
|
<literal>d</literal> for n-distinct statistics,
|
||||||
|
<literal>f</literal> for functional dependency statistics
|
||||||
|
</entry>
|
||||||
|
</row>
|
||||||
|
|
||||||
|
<row>
|
||||||
|
<entry><structfield>stxndistinct</structfield></entry>
|
||||||
|
<entry><type>pg_ndistinct</type></entry>
|
||||||
|
<entry></entry>
|
||||||
|
<entry>
|
||||||
|
N-distinct counts, serialized as <structname>pg_ndistinct</> type
|
||||||
|
</entry>
|
||||||
|
</row>
|
||||||
|
|
||||||
|
<row>
|
||||||
|
<entry><structfield>stxdependencies</structfield></entry>
|
||||||
|
<entry><type>pg_dependencies</type></entry>
|
||||||
|
<entry></entry>
|
||||||
|
<entry>
|
||||||
|
Functional dependency statistics, serialized
|
||||||
|
as <structname>pg_dependencies</> type
|
||||||
|
</entry>
|
||||||
|
</row>
|
||||||
|
|
||||||
|
</tbody>
|
||||||
|
</tgroup>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
The <structfield>stxkind</structfield> field is filled at creation of the
|
||||||
|
statistics object, indicating which statistic type(s) are desired.
|
||||||
|
The fields after it are initially NULL and are filled only when the
|
||||||
|
corresponding statistic has been computed by <command>ANALYZE</>.
|
||||||
|
</para>
|
||||||
|
</sect1>
|
||||||
|
|
||||||
<sect1 id="catalog-pg-subscription">
|
<sect1 id="catalog-pg-subscription">
|
||||||
<title><structname>pg_subscription</structname></title>
|
<title><structname>pg_subscription</structname></title>
|
||||||
|
|
||||||
|
@ -1071,25 +1071,41 @@ WHERE tablename = 'road';
|
|||||||
are independent of each other,
|
are independent of each other,
|
||||||
an assumption that does not hold when column values are correlated.
|
an assumption that does not hold when column values are correlated.
|
||||||
Regular statistics, because of their per-individual-column nature,
|
Regular statistics, because of their per-individual-column nature,
|
||||||
do not capture the knowledge of cross-column correlation;
|
cannot capture any knowledge about cross-column correlation.
|
||||||
<firstterm>multivariate statistics</firstterm> can be used to instruct
|
However, <productname>PostgreSQL</> has the ability to compute
|
||||||
the server to obtain statistics across such a set of columns,
|
<firstterm>multivariate statistics</firstterm>, which can capture
|
||||||
which are later used by the query optimizer
|
such information.
|
||||||
to determine cardinality and selectivity
|
|
||||||
of clauses involving those columns.
|
|
||||||
Multivariate statistics are currently the only use of
|
|
||||||
<firstterm>extended statistics</firstterm>.
|
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
Extended statistics are created using
|
Because the number of possible column combinations is very large,
|
||||||
|
it's impractical to compute multivariate statistics automatically.
|
||||||
|
Instead, <firstterm>extended statistics objects</firstterm>, more often
|
||||||
|
called just <firstterm>statistics objects</>, can be created to instruct
|
||||||
|
the server to obtain statistics across interesting sets of columns.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
Statistics objects are created using
|
||||||
<xref linkend="sql-createstatistics">, which see for more details.
|
<xref linkend="sql-createstatistics">, which see for more details.
|
||||||
Data collection is deferred until the next <command>ANALYZE</command>
|
Creation of such an object merely creates a catalog entry expressing
|
||||||
on the table, after which the stored values can be examined in the
|
interest in the statistics. Actual data collection is performed
|
||||||
|
by <command>ANALYZE</command> (either a manual command, or background
|
||||||
|
auto-analyze). The collected values can be examined in the
|
||||||
<link linkend="catalog-pg-statistic-ext"><structname>pg_statistic_ext</structname></link>
|
<link linkend="catalog-pg-statistic-ext"><structname>pg_statistic_ext</structname></link>
|
||||||
catalog.
|
catalog.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
<command>ANALYZE</command> computes extended statistics based on the same
|
||||||
|
sample of table rows that it takes for computing regular single-column
|
||||||
|
statistics. Since the sample size is increased by increasing the
|
||||||
|
statistics target for the table or any of its columns (as described in
|
||||||
|
the previous section), a larger statistics target will normally result in
|
||||||
|
more accurate extended statistics, as well as more time spent calculating
|
||||||
|
them.
|
||||||
|
</para>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
The following subsections describe the types of extended statistics
|
The following subsections describe the types of extended statistics
|
||||||
that are currently supported.
|
that are currently supported.
|
||||||
@ -1099,142 +1115,162 @@ WHERE tablename = 'road';
|
|||||||
<title>Functional Dependencies</title>
|
<title>Functional Dependencies</title>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
The simplest type of extended statistics are functional dependencies,
|
The simplest type of extended statistics tracks <firstterm>functional
|
||||||
a concept used in definitions of database normal forms.
|
dependencies</>, a concept used in definitions of database normal forms.
|
||||||
Put simply, it is said that column <literal>b</> is functionally
|
We say that column <structfield>b</> is functionally dependent on
|
||||||
dependent on column <literal>a</> if knowledge of the value of
|
column <structfield>a</> if knowledge of the value of
|
||||||
<literal>a</> is sufficient to determine the value of <literal>b</>.
|
<structfield>a</> is sufficient to determine the value
|
||||||
In normalized databases, functional dependencies are allowed only on
|
of <structfield>b</>, that is there are no two rows having the same value
|
||||||
primary keys and superkeys. However, many data sets are in practice not
|
of <structfield>a</> but different values of <structfield>b</>.
|
||||||
fully normalized for various reasons; intentional denormalization for
|
In a fully normalized database, functional dependencies should exist
|
||||||
performance reasons is a common example.
|
only on primary keys and superkeys. However, in practice many data sets
|
||||||
|
are not fully normalized for various reasons; intentional
|
||||||
|
denormalization for performance reasons is a common example.
|
||||||
|
Even in a fully normalized database, there may be partial correlation
|
||||||
|
between some columns, which can be expressed as partial functional
|
||||||
|
dependency.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
The existance of functional dependencies directly affects the accuracy
|
The existence of functional dependencies directly affects the accuracy
|
||||||
of estimates in certain queries.
|
of estimates in certain queries. If a query contains conditions on
|
||||||
The reason is that conditions on the dependent columns do not
|
both the independent and the dependent column(s), the
|
||||||
restrict the result set, but the query planner (lacking functional
|
conditions on the dependent columns do not further reduce the result
|
||||||
dependency knowledge) considers them independent, resulting in
|
size; but without knowledge of the functional dependency, the query
|
||||||
underestimates.
|
planner will assume that the conditions are independent, resulting
|
||||||
To inform the planner about the functional dependencies, we collect
|
in underestimating the result size.
|
||||||
measurements of dependency during <command>ANALYZE</>. Assessing
|
|
||||||
the degree of dependency between all sets of columns would be
|
|
||||||
prohibitively expensive, so the search is limited to potential
|
|
||||||
dependencies defined using the <literal>dependencies</> option of
|
|
||||||
extended statistics. It is advisable to create
|
|
||||||
<literal>dependencies</> statistics if and only if functional
|
|
||||||
dependencies actually exist, to avoid unnecessary overhead on both
|
|
||||||
<command>ANALYZE</> and query planning.
|
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
To inspect functional dependencies on a statistics
|
To inform the planner about functional dependencies, <command>ANALYZE</>
|
||||||
<literal>stts</literal>, you may do this:
|
can collect measurements of cross-column dependency. Assessing the
|
||||||
|
degree of dependency between all sets of columns would be prohibitively
|
||||||
|
expensive, so data collection is limited to those groups of columns
|
||||||
|
appearing together in a statistics object defined with
|
||||||
|
the <literal>dependencies</> option. It is advisable to create
|
||||||
|
<literal>dependencies</> statistics only for column groups that are
|
||||||
|
strongly correlated, to avoid unnecessary overhead in both
|
||||||
|
<command>ANALYZE</> and later query planning.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
Here is an example of collecting functional-dependency statistics:
|
||||||
<programlisting>
|
<programlisting>
|
||||||
CREATE STATISTICS stts (dependencies)
|
CREATE STATISTICS stts (dependencies) ON zip, city FROM zipcodes;
|
||||||
ON zip, city FROM zipcodes;
|
|
||||||
ANALYZE zipcodes;
|
ANALYZE zipcodes;
|
||||||
|
|
||||||
SELECT stxname, stxkeys, stxdependencies
|
SELECT stxname, stxkeys, stxdependencies
|
||||||
FROM pg_statistic_ext
|
FROM pg_statistic_ext
|
||||||
WHERE stxname = 'stts';
|
WHERE stxname = 'stts';
|
||||||
stxname | stxkeys | stxdependencies
|
stxname | stxkeys | stxdependencies
|
||||||
---------+---------+------------------------------------------
|
---------+---------+------------------------------------------
|
||||||
stts | 1 5 | {"1 => 5": 1.000000, "5 => 1": 0.423130}
|
stts | 1 5 | {"1 => 5": 1.000000, "5 => 1": 0.423130}
|
||||||
(1 row)
|
(1 row)
|
||||||
</programlisting>
|
</programlisting>
|
||||||
where it can be seen that column 1 (a zip code) fully determines column
|
Here it can be seen that column 1 (zip code) fully determines column
|
||||||
5 (city) so the coefficient is 1.0, while city only determines zip code
|
5 (city) so the coefficient is 1.0, while city only determines zip code
|
||||||
about 42% of the time, meaning that there are many cities (58%) that are
|
about 42% of the time, meaning that there are many cities (58%) that are
|
||||||
represented by more than a single ZIP code.
|
represented by more than a single ZIP code.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
When computing the selectivity, the planner inspects all conditions and
|
When computing the selectivity for a query involving functionally
|
||||||
attempts to identify which conditions are already implied by other
|
dependent columns, the planner adjusts the per-condition selectivity
|
||||||
conditions. The selectivity estimates from any redundant conditions are
|
estimates using the dependency coefficients so as not to produce
|
||||||
ignored from a selectivity point of view. In the example query above,
|
an underestimate.
|
||||||
the selectivity estimates for either of the conditions may be eliminated,
|
|
||||||
thus improving the overall estimate.
|
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
<sect4>
|
<sect4>
|
||||||
<title>Limitations of Functional Dependencies</title>
|
<title>Limitations of Functional Dependencies</title>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
Functional dependencies are a very simple type of statistics, and
|
Functional dependencies are currently only applied when considering
|
||||||
as such have several limitations. The first limitation is that they
|
simple equality conditions that compare columns to constant values.
|
||||||
only work with simple equality conditions, comparing columns and constant
|
They are not used to improve estimates for equality conditions
|
||||||
values. It's not possible to use them to eliminate equality conditions
|
comparing two columns or comparing a column to an expression, nor for
|
||||||
comparing two columns or a column to an expression, range clauses,
|
range clauses, <literal>LIKE</> or any other type of condition.
|
||||||
<literal>LIKE</> or any other type of conditions.
|
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
When eliminating the implied conditions, the planner assumes that the
|
When estimating with functional dependencies, the planner assumes that
|
||||||
conditions are compatible. Consider the following example, where
|
conditions on the involved columns are compatible and hence redundant.
|
||||||
this assumption does not hold:
|
If they are incompatible, the correct estimate would be zero rows, but
|
||||||
|
that possibility is not considered. For example, given a query like
|
||||||
<programlisting>
|
<programlisting>
|
||||||
EXPLAIN (ANALYZE, TIMING OFF) SELECT * FROM t WHERE a = 1 AND b = 10;
|
SELECT * FROM zipcodes WHERE city = 'San Francisco' AND zip = '94105';
|
||||||
QUERY PLAN
|
|
||||||
-----------------------------------------------------------------------------
|
|
||||||
Seq Scan on t (cost=0.00..195.00 rows=100 width=8) (actual rows=0 loops=1)
|
|
||||||
Filter: ((a = 1) AND (b = 10))
|
|
||||||
Rows Removed by Filter: 10000
|
|
||||||
</programlisting>
|
</programlisting>
|
||||||
|
the planner will disregard the <structfield>city</> clause as not
|
||||||
While there are no rows with such combination of values, the planner
|
changing the selectivity, which is correct. However, it will make
|
||||||
is unable to verify whether the values match — it only knows that
|
the same assumption about
|
||||||
the columns are functionally dependent.
|
<programlisting>
|
||||||
|
SELECT * FROM zipcodes WHERE city = 'San Francisco' AND zip = '90210';
|
||||||
|
</programlisting>
|
||||||
|
even though there will really be zero rows satisfying this query.
|
||||||
|
Functional dependency statistics do not provide enough information
|
||||||
|
to conclude that, however.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
This assumption is related to queries executed on the database; in many
|
In many practical situations, this assumption is usually satisfied;
|
||||||
cases, it's actually satisfied (e.g. when the GUI only allows selecting
|
for example, there might be a GUI in the application that only allows
|
||||||
compatible values). But if that's not the case, functional dependencies
|
selecting compatible city and zipcode values to use in a query.
|
||||||
may not be a viable option.
|
But if that's not the case, functional dependencies may not be a viable
|
||||||
|
option.
|
||||||
</para>
|
</para>
|
||||||
</sect4>
|
</sect4>
|
||||||
</sect3>
|
</sect3>
|
||||||
|
|
||||||
<sect3>
|
<sect3>
|
||||||
<title>Multivariate N-Distinct Coefficients</title>
|
<title>Multivariate N-Distinct Counts</title>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
Single-column statistics store the number of distinct values in each
|
Single-column statistics store the number of distinct values in each
|
||||||
column. Estimates of the number of distinct values on more than one
|
column. Estimates of the number of distinct values when combining more
|
||||||
column (for example, for <literal>GROUP BY a, b</literal>) are
|
than one column (for example, for <literal>GROUP BY a, b</literal>) are
|
||||||
frequently wrong when the planner only has single-column statistical
|
frequently wrong when the planner only has single-column statistical
|
||||||
data, however, causing it to select bad plans.
|
data, causing it to select bad plans.
|
||||||
In order to improve n-distinct estimation when multiple columns are
|
|
||||||
grouped together, the <literal>ndistinct</> option of extended statistics
|
|
||||||
can be used, which instructs <command>ANALYZE</> to collect n-distinct
|
|
||||||
estimates for all possible combinations of two or more columns of the set
|
|
||||||
of columns in the statistics object (the per-column estimates are already
|
|
||||||
available in <structname>pg_statistic</>).
|
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
Continuing the above example, the n-distinct coefficients in a ZIP
|
To improve such estimates, <command>ANALYZE</> can collect n-distinct
|
||||||
code table may look like the following:
|
statistics for groups of columns. As before, it's impractical to do
|
||||||
|
this for every possible column grouping, so data is collected only for
|
||||||
|
those groups of columns appearing together in a statistics object
|
||||||
|
defined with the <literal>ndistinct</> option. Data will be collected
|
||||||
|
for each possible combination of two or more columns from the set of
|
||||||
|
listed columns.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
Continuing the previous example, the n-distinct counts in a
|
||||||
|
table of ZIP codes might look like the following:
|
||||||
<programlisting>
|
<programlisting>
|
||||||
CREATE STATISTICS stts2 (ndistinct)
|
CREATE STATISTICS stts2 (ndistinct) ON zip, state, city FROM zipcodes;
|
||||||
ON zip, state, city FROM zipcodes;
|
|
||||||
ANALYZE zipcodes;
|
ANALYZE zipcodes;
|
||||||
|
|
||||||
SELECT stxkeys AS k, stxndistinct AS nd
|
SELECT stxkeys AS k, stxndistinct AS nd
|
||||||
FROM pg_statistic_ext
|
FROM pg_statistic_ext
|
||||||
WHERE stxname = 'stts2';
|
WHERE stxname = 'stts2';
|
||||||
-[ RECORD 1 ]--------------------------------------------------------
|
-[ RECORD 1 ]--------------------------------------------------------
|
||||||
k | 1 2 5
|
k | 1 2 5
|
||||||
nd | {"1, 2": 33178, "1, 5": 33178, "2, 5": 27435, "1, 2, 5": 33178}
|
nd | {"1, 2": 33178, "1, 5": 33178, "2, 5": 27435, "1, 2, 5": 33178}
|
||||||
(1 row)
|
(1 row)
|
||||||
</programlisting>
|
</programlisting>
|
||||||
which indicates that there are three combinations of columns that
|
This indicates that there are three combinations of columns that
|
||||||
have 33178 distinct values: ZIP code and state; ZIP code and city;
|
have 33178 distinct values: ZIP code and state; ZIP code and city;
|
||||||
and ZIP code, city and state (the fact that they are all equal is
|
and ZIP code, city and state (the fact that they are all equal is
|
||||||
expected given the nature of ZIP-code data). On the other hand,
|
expected given that ZIP code alone is unique in this table). On the
|
||||||
the combination of city and state only has 27435 distinct values.
|
other hand, the combination of city and state has only 27435 distinct
|
||||||
|
values.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
It's advisable to create <literal>ndistinct</> statistics objects only
|
||||||
|
on combinations of columns that are actually used for grouping, and
|
||||||
|
for which misestimation of the number of groups is resulting in bad
|
||||||
|
plans. Otherwise, the <command>ANALYZE</> cycles are just wasted.
|
||||||
</para>
|
</para>
|
||||||
</sect3>
|
</sect3>
|
||||||
</sect2>
|
</sect2>
|
||||||
|
@ -456,10 +456,11 @@ rows = (outer_cardinality * inner_cardinality) * selectivity
|
|||||||
</indexterm>
|
</indexterm>
|
||||||
|
|
||||||
<sect2>
|
<sect2>
|
||||||
<title>Functional dependencies</title>
|
<title>Functional Dependencies</title>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
Multivariate correlation can be seen with a very simple data set — a
|
Multivariate correlation can be demonstrated with a very simple data set
|
||||||
table with two columns, both containing the same values:
|
— a table with two columns, both containing the same values:
|
||||||
|
|
||||||
<programlisting>
|
<programlisting>
|
||||||
CREATE TABLE t (a INT, b INT);
|
CREATE TABLE t (a INT, b INT);
|
||||||
@ -501,8 +502,8 @@ EXPLAIN (ANALYZE, TIMING OFF) SELECT * FROM t WHERE a = 1;
|
|||||||
number of rows, we see that the estimate is very accurate
|
number of rows, we see that the estimate is very accurate
|
||||||
(in fact exact, as the table is very small). Changing the
|
(in fact exact, as the table is very small). Changing the
|
||||||
<literal>WHERE</> to use the <structfield>b</> column, an identical
|
<literal>WHERE</> to use the <structfield>b</> column, an identical
|
||||||
plan is generated. Observe what happens if we apply the same
|
plan is generated. But observe what happens if we apply the same
|
||||||
condition on both columns combining them with <literal>AND</>:
|
condition on both columns, combining them with <literal>AND</>:
|
||||||
|
|
||||||
<programlisting>
|
<programlisting>
|
||||||
EXPLAIN (ANALYZE, TIMING OFF) SELECT * FROM t WHERE a = 1 AND b = 1;
|
EXPLAIN (ANALYZE, TIMING OFF) SELECT * FROM t WHERE a = 1 AND b = 1;
|
||||||
@ -514,15 +515,16 @@ EXPLAIN (ANALYZE, TIMING OFF) SELECT * FROM t WHERE a = 1 AND b = 1;
|
|||||||
</programlisting>
|
</programlisting>
|
||||||
|
|
||||||
The planner estimates the selectivity for each condition individually,
|
The planner estimates the selectivity for each condition individually,
|
||||||
arriving to the 1% estimates as above, and then multiplies them, getting
|
arriving at the same 1% estimates as above. Then it assumes that the
|
||||||
the final 0.01% estimate. The <quote>actual</quote> figures, however,
|
conditions are independent, and so it multiplies their selectivities,
|
||||||
show that this results in a significant underestimate, as the actual
|
producing a final selectivity estimate of just 0.01%.
|
||||||
number of rows matching the conditions (100) is two orders of magnitude
|
This is a significant underestimate, as the actual number of rows
|
||||||
higher than the estimated value.
|
matching the conditions (100) is two orders of magnitude higher.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
This problem can be fixed by applying functional-dependency
|
This problem can be fixed by creating a statistics object that
|
||||||
|
directs <command>ANALYZE</> to calculate functional-dependency
|
||||||
multivariate statistics on the two columns:
|
multivariate statistics on the two columns:
|
||||||
|
|
||||||
<programlisting>
|
<programlisting>
|
||||||
@ -539,13 +541,15 @@ EXPLAIN (ANALYZE, TIMING OFF) SELECT * FROM t WHERE a = 1 AND b = 1;
|
|||||||
</sect2>
|
</sect2>
|
||||||
|
|
||||||
<sect2>
|
<sect2>
|
||||||
<title>Multivariate N-Distinct coefficients</title>
|
<title>Multivariate N-Distinct Counts</title>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
A similar problem occurs with estimation of the cardinality of distinct
|
A similar problem occurs with estimation of the cardinality of sets of
|
||||||
elements, used to determine the number of groups that would be generated
|
multiple columns, such as the number of groups that would be generated by
|
||||||
by a <command>GROUP BY</command> clause. When <command>GROUP BY</command>
|
a <command>GROUP BY</command> clause. When <command>GROUP BY</command>
|
||||||
lists a single column, the n-distinct estimate (which can be seen as the
|
lists a single column, the n-distinct estimate (which is visible as the
|
||||||
number of rows returned by the aggregate execution node) is very accurate:
|
estimated number of rows returned by the HashAggregate node) is very
|
||||||
|
accurate:
|
||||||
<programlisting>
|
<programlisting>
|
||||||
EXPLAIN (ANALYZE, TIMING OFF) SELECT COUNT(*) FROM t GROUP BY a;
|
EXPLAIN (ANALYZE, TIMING OFF) SELECT COUNT(*) FROM t GROUP BY a;
|
||||||
QUERY PLAN
|
QUERY PLAN
|
||||||
@ -565,8 +569,8 @@ EXPLAIN (ANALYZE, TIMING OFF) SELECT COUNT(*) FROM t GROUP BY a, b;
|
|||||||
Group Key: a, b
|
Group Key: a, b
|
||||||
-> Seq Scan on t (cost=0.00..145.00 rows=10000 width=8) (actual rows=10000 loops=1)
|
-> Seq Scan on t (cost=0.00..145.00 rows=10000 width=8) (actual rows=10000 loops=1)
|
||||||
</programlisting>
|
</programlisting>
|
||||||
By dropping the existing statistics and re-creating it to include n-distinct
|
By redefining the statistics object to include n-distinct counts for the
|
||||||
calculation, the estimate is much improved:
|
two columns, the estimate is much improved:
|
||||||
<programlisting>
|
<programlisting>
|
||||||
DROP STATISTICS stts;
|
DROP STATISTICS stts;
|
||||||
CREATE STATISTICS stts (dependencies, ndistinct) ON a, b FROM t;
|
CREATE STATISTICS stts (dependencies, ndistinct) ON a, b FROM t;
|
||||||
|
@ -79,11 +79,13 @@ CREATE STATISTICS [ IF NOT EXISTS ] <replaceable class="PARAMETER">statistics_na
|
|||||||
<term><replaceable class="PARAMETER">statistic_type</replaceable></term>
|
<term><replaceable class="PARAMETER">statistic_type</replaceable></term>
|
||||||
<listitem>
|
<listitem>
|
||||||
<para>
|
<para>
|
||||||
A statistic type to be computed in this statistics object. Currently
|
A statistic type to be computed in this statistics object.
|
||||||
supported types are <literal>ndistinct</literal>, which enables
|
Currently supported types are
|
||||||
n-distinct coefficient tracking,
|
<literal>ndistinct</literal>, which enables n-distinct statistics, and
|
||||||
and <literal>dependencies</literal>, which enables functional
|
<literal>dependencies</literal>, which enables functional
|
||||||
dependencies.
|
dependency statistics.
|
||||||
|
For more information, see <xref linkend="planner-stats-extended">
|
||||||
|
and <xref linkend="multivariate-statistics-examples">.
|
||||||
</para>
|
</para>
|
||||||
</listitem>
|
</listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
@ -92,7 +94,8 @@ CREATE STATISTICS [ IF NOT EXISTS ] <replaceable class="PARAMETER">statistics_na
|
|||||||
<term><replaceable class="PARAMETER">column_name</replaceable></term>
|
<term><replaceable class="PARAMETER">column_name</replaceable></term>
|
||||||
<listitem>
|
<listitem>
|
||||||
<para>
|
<para>
|
||||||
The name of a table column to be included in the statistics object.
|
The name of a table column to be covered by the computed statistics.
|
||||||
|
At least two column names must be given.
|
||||||
</para>
|
</para>
|
||||||
</listitem>
|
</listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
@ -114,7 +117,9 @@ CREATE STATISTICS [ IF NOT EXISTS ] <replaceable class="PARAMETER">statistics_na
|
|||||||
<title>Notes</title>
|
<title>Notes</title>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
You must be the owner of a table to create or change statistics on it.
|
You must be the owner of a table to create a statistics object
|
||||||
|
reading it. Once created, however, the ownership of the statistics
|
||||||
|
object is independent of the underlying table(s).
|
||||||
</para>
|
</para>
|
||||||
</refsect1>
|
</refsect1>
|
||||||
|
|
||||||
@ -124,8 +129,8 @@ CREATE STATISTICS [ IF NOT EXISTS ] <replaceable class="PARAMETER">statistics_na
|
|||||||
<para>
|
<para>
|
||||||
Create table <structname>t1</> with two functionally dependent columns, i.e.
|
Create table <structname>t1</> with two functionally dependent columns, i.e.
|
||||||
knowledge of a value in the first column is sufficient for determining the
|
knowledge of a value in the first column is sufficient for determining the
|
||||||
value in the other column. Then functional dependencies are built on those
|
value in the other column. Then functional dependency statistics are built
|
||||||
columns:
|
on those columns:
|
||||||
|
|
||||||
<programlisting>
|
<programlisting>
|
||||||
CREATE TABLE t1 (
|
CREATE TABLE t1 (
|
||||||
@ -136,21 +141,25 @@ CREATE TABLE t1 (
|
|||||||
INSERT INTO t1 SELECT i/100, i/500
|
INSERT INTO t1 SELECT i/100, i/500
|
||||||
FROM generate_series(1,1000000) s(i);
|
FROM generate_series(1,1000000) s(i);
|
||||||
|
|
||||||
|
ANALYZE t1;
|
||||||
|
|
||||||
|
-- the number of matching rows will be drastically underestimated:
|
||||||
|
EXPLAIN ANALYZE SELECT * FROM t1 WHERE (a = 1) AND (b = 0);
|
||||||
|
|
||||||
CREATE STATISTICS s1 (dependencies) ON a, b FROM t1;
|
CREATE STATISTICS s1 (dependencies) ON a, b FROM t1;
|
||||||
|
|
||||||
ANALYZE t1;
|
ANALYZE t1;
|
||||||
|
|
||||||
-- valid combination of values
|
-- now the rowcount estimate is more accurate:
|
||||||
EXPLAIN ANALYZE SELECT * FROM t1 WHERE (a = 1) AND (b = 0);
|
EXPLAIN ANALYZE SELECT * FROM t1 WHERE (a = 1) AND (b = 0);
|
||||||
|
|
||||||
-- invalid combination of values
|
|
||||||
EXPLAIN ANALYZE SELECT * FROM t1 WHERE (a = 1) AND (b = 1);
|
|
||||||
</programlisting>
|
</programlisting>
|
||||||
|
|
||||||
Without functional-dependency statistics, the planner would make the
|
Without functional-dependency statistics, the planner would assume
|
||||||
same estimate of the number of matching rows for these two queries.
|
that the two <literal>WHERE</> conditions are independent, and would
|
||||||
With such statistics, it is able to tell that one case has matches
|
multiply their selectivities together to arrive at a much-too-small
|
||||||
and the other does not.
|
rowcount estimate.
|
||||||
|
With such statistics, the planner recognizes that the <literal>WHERE</>
|
||||||
|
conditions are redundant and does not underestimate the rowcount.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
</refsect1>
|
</refsect1>
|
||||||
|
Loading…
x
Reference in New Issue
Block a user