mirror of
https://github.com/postgres/postgres.git
synced 2025-05-28 00:03:23 -04:00
Update/improve documentation about creating aggregate functions.
This commit is contained in:
parent
0a27641c1a
commit
ee4dcf1478
@ -1,5 +1,5 @@
|
||||
<!--
|
||||
$Header: /cvsroot/pgsql/doc/src/sgml/ref/create_aggregate.sgml,v 1.7 1999/07/22 15:09:07 thomas Exp $
|
||||
$Header: /cvsroot/pgsql/doc/src/sgml/ref/create_aggregate.sgml,v 1.8 2000/03/26 19:45:21 tgl Exp $
|
||||
Postgres documentation
|
||||
-->
|
||||
|
||||
@ -24,9 +24,9 @@ Postgres documentation
|
||||
<date>1999-07-20</date>
|
||||
</refsynopsisdivinfo>
|
||||
<synopsis>
|
||||
CREATE AGGREGATE <replaceable class="PARAMETER">name</replaceable> [ AS ] ( BASETYPE = <replaceable class="PARAMETER">data_type</replaceable>
|
||||
[ , SFUNC1 = <replaceable class="PARAMETER">sfunc1</replaceable>, STYPE1 = <replaceable class="PARAMETER">sfunc1_return_type</replaceable> ]
|
||||
[ , SFUNC2 = <replaceable class="PARAMETER">sfunc2</replaceable>, STYPE2 = <replaceable class="PARAMETER">sfunc2_return_type</replaceable> ]
|
||||
CREATE AGGREGATE <replaceable class="PARAMETER">name</replaceable> [ AS ] ( BASETYPE = <replaceable class="PARAMETER">input_data_type</replaceable>
|
||||
[ , SFUNC1 = <replaceable class="PARAMETER">sfunc1</replaceable>, STYPE1 = <replaceable class="PARAMETER">state1_type</replaceable> ]
|
||||
[ , SFUNC2 = <replaceable class="PARAMETER">sfunc2</replaceable>, STYPE2 = <replaceable class="PARAMETER">state2_type</replaceable> ]
|
||||
[ , FINALFUNC = <replaceable class="PARAMETER">ffunc</replaceable> ]
|
||||
[ , INITCOND1 = <replaceable class="PARAMETER">initial_condition1</replaceable> ]
|
||||
[ , INITCOND2 = <replaceable class="PARAMETER">initial_condition2</replaceable> ] )
|
||||
@ -51,10 +51,10 @@ CREATE AGGREGATE <replaceable class="PARAMETER">name</replaceable> [ AS ] ( BASE
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><replaceable class="PARAMETER">data_type</replaceable></term>
|
||||
<term><replaceable class="PARAMETER">input_data_type</replaceable></term>
|
||||
<listitem>
|
||||
<para>
|
||||
The fundamental data type on which this aggregate function operates.
|
||||
The input data type on which this aggregate function operates.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
@ -63,21 +63,25 @@ CREATE AGGREGATE <replaceable class="PARAMETER">name</replaceable> [ AS ] ( BASE
|
||||
<term><replaceable class="PARAMETER">sfunc1</replaceable></term>
|
||||
<listitem>
|
||||
<para>
|
||||
The state transition function
|
||||
to be called for every non-NULL field from the source column.
|
||||
It takes a variable of
|
||||
type <replaceable class="PARAMETER">sfunc1_return_type</replaceable> as
|
||||
the first argument and that field as the
|
||||
second argument.
|
||||
A state transition function
|
||||
to be called for every non-NULL input data value.
|
||||
This must be a function of two arguments, the first being of
|
||||
type <replaceable class="PARAMETER">state1_type</replaceable>
|
||||
and the second of
|
||||
type <replaceable class="PARAMETER">input_data_type</replaceable>.
|
||||
The function must return a value of
|
||||
type <replaceable class="PARAMETER">state1_type</replaceable>.
|
||||
This function takes the current state value 1 and the current
|
||||
input data item, and returns the next state value 1.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><replaceable class="PARAMETER">sfunc1_return_type</replaceable></term>
|
||||
<term><replaceable class="PARAMETER">state1_type</replaceable></term>
|
||||
<listitem>
|
||||
<para>
|
||||
The return type of the first transition function.
|
||||
The data type for the first state value of the aggregate.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
@ -86,20 +90,22 @@ CREATE AGGREGATE <replaceable class="PARAMETER">name</replaceable> [ AS ] ( BASE
|
||||
<term><replaceable class="PARAMETER">sfunc2</replaceable></term>
|
||||
<listitem>
|
||||
<para>
|
||||
The state transition function
|
||||
to be called for every non-NULL field from the source column.
|
||||
It takes a variable
|
||||
of type <replaceable class="PARAMETER">sfunc2_return_type</replaceable>
|
||||
as the only argument and returns a variable of the same type.
|
||||
A state transition function
|
||||
to be called for every non-NULL input data value.
|
||||
This must be a function of one argument of
|
||||
type <replaceable class="PARAMETER">state2_type</replaceable>,
|
||||
returning a value of the same type.
|
||||
This function takes the current state value 2 and
|
||||
returns the next state value 2.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><replaceable class="PARAMETER">sfunc2_return_type</replaceable></term>
|
||||
<term><replaceable class="PARAMETER">state2_type</replaceable></term>
|
||||
<listitem>
|
||||
<para>
|
||||
The return type of the second transition function.
|
||||
The data type for the second state value of the aggregate.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
@ -108,12 +114,17 @@ CREATE AGGREGATE <replaceable class="PARAMETER">name</replaceable> [ AS ] ( BASE
|
||||
<term><replaceable class="PARAMETER">ffunc</replaceable></term>
|
||||
<listitem>
|
||||
<para>
|
||||
The final function
|
||||
called after traversing all input fields. This function must
|
||||
The final function called to compute the aggregate's result
|
||||
after all input data has been traversed.
|
||||
If both state values are used, the final function must
|
||||
take two arguments of types
|
||||
<replaceable class="PARAMETER">sfunc1_return_type</replaceable>
|
||||
<replaceable class="PARAMETER">state1_type</replaceable>
|
||||
and
|
||||
<replaceable class="PARAMETER">sfunc2_return_type</replaceable>.
|
||||
<replaceable class="PARAMETER">state2_type</replaceable>.
|
||||
If only one state value is used, the final function must
|
||||
take a single argument of that state value's type.
|
||||
The output datatype of the aggregate is defined as the return
|
||||
type of this function.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
@ -122,7 +133,7 @@ CREATE AGGREGATE <replaceable class="PARAMETER">name</replaceable> [ AS ] ( BASE
|
||||
<term><replaceable class="PARAMETER">initial_condition1</replaceable></term>
|
||||
<listitem>
|
||||
<para>
|
||||
The initial value for the first transition function argument.
|
||||
The initial value for state value 1.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
@ -131,7 +142,7 @@ CREATE AGGREGATE <replaceable class="PARAMETER">name</replaceable> [ AS ] ( BASE
|
||||
<term><replaceable class="PARAMETER">initial_condition2</replaceable></term>
|
||||
<listitem>
|
||||
<para>
|
||||
The initial value for the second transition function argument.
|
||||
The initial value for state value 2.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
@ -182,84 +193,66 @@ CREATE
|
||||
can be used to provide the desired features.
|
||||
</para>
|
||||
<para>
|
||||
An aggregate function can require up to three functions, two
|
||||
state transition functions,
|
||||
An aggregate function is identified by its name and input data type.
|
||||
Two aggregates can have the same name if they operate on different
|
||||
input types. To avoid confusion, do not make an ordinary function
|
||||
of the same name and input data type as an aggregate.
|
||||
</para>
|
||||
<para>
|
||||
An aggregate function is made from between one and three ordinary
|
||||
functions:
|
||||
two state transition functions,
|
||||
<replaceable class="PARAMETER">sfunc1</replaceable>
|
||||
and <replaceable class="PARAMETER">sfunc2</replaceable>:
|
||||
<programlisting>
|
||||
<replaceable class="PARAMETER">sfunc1</replaceable>( internal-state1, next-data_item ) ---> next-internal-state1 <replaceable class="PARAMETER">sfunc2</replaceable>( internal-state2 ) ---> next-internal-state2
|
||||
</programlisting>
|
||||
and <replaceable class="PARAMETER">sfunc2</replaceable>,
|
||||
and a final calculation function,
|
||||
<replaceable class="PARAMETER">ffunc</replaceable>:
|
||||
<replaceable class="PARAMETER">ffunc</replaceable>.
|
||||
These are used as follows:
|
||||
<programlisting>
|
||||
<replaceable class="PARAMETER">sfunc1</replaceable>( internal-state1, next-data-item ) ---> next-internal-state1
|
||||
<replaceable class="PARAMETER">sfunc2</replaceable>( internal-state2 ) ---> next-internal-state2
|
||||
<replaceable class="PARAMETER">ffunc</replaceable>(internal-state1, internal-state2) ---> aggregate-value
|
||||
</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
<productname>Postgres</productname> creates up to two temporary variables
|
||||
(referred to here as <replaceable class="PARAMETER">temp1</replaceable>
|
||||
and <replaceable class="PARAMETER">temp2</replaceable>)
|
||||
to hold intermediate results used as arguments to the transition functions.
|
||||
<productname>Postgres</productname> creates one or two temporary variables
|
||||
(of data types <replaceable class="PARAMETER">stype1</replaceable> and/or
|
||||
<replaceable class="PARAMETER">stype2</replaceable>) to hold the
|
||||
current internal states of the aggregate. At each input data item,
|
||||
the state transition function(s) are invoked to calculate new values
|
||||
for the internal state values. After all the data has been processed,
|
||||
the final function is invoked once to calculate the aggregate's output
|
||||
value.
|
||||
</para>
|
||||
<para>
|
||||
These transition functions are required to have the following properties:
|
||||
<itemizedlist>
|
||||
<listitem>
|
||||
<para>
|
||||
The arguments to
|
||||
<replaceable class="PARAMETER">sfunc1</replaceable>
|
||||
must be
|
||||
<replaceable class="PARAMETER">temp1</replaceable>
|
||||
of type
|
||||
<replaceable class="PARAMETER">sfunc1_return_type</replaceable>
|
||||
and
|
||||
<replaceable class="PARAMETER">column_value</replaceable>
|
||||
of type <replaceable class="PARAMETER">data_type</replaceable>.
|
||||
The return value must be of type
|
||||
<replaceable class="PARAMETER">sfunc1_return_type</replaceable>
|
||||
and will be used as the first argument in the next call to
|
||||
<replaceable class="PARAMETER">sfunc1</replaceable>.
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
The argument and return value of
|
||||
<replaceable class="PARAMETER">sfunc2</replaceable>
|
||||
must be
|
||||
<replaceable class="PARAMETER">temp2</replaceable>
|
||||
of type
|
||||
<replaceable class="PARAMETER">sfunc2_return_type</replaceable>.
|
||||
</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<para>
|
||||
The arguments to the final-calculation-function
|
||||
must be
|
||||
<replaceable class="PARAMETER">temp1</replaceable>
|
||||
and
|
||||
<replaceable class="PARAMETER">temp2</replaceable>
|
||||
and its return value must
|
||||
be a <productname>Postgres</productname>
|
||||
base type (not necessarily
|
||||
<replaceable class="PARAMETER">data_type</replaceable>
|
||||
which had been specified for BASETYPE).
|
||||
</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<para>
|
||||
FINALFUNC should be specified
|
||||
if and only if both state-transition functions are
|
||||
specified.
|
||||
</para></listitem>
|
||||
</itemizedlist>
|
||||
<replaceable class="PARAMETER">ffunc</replaceable> must be specified if
|
||||
both transition functions are specified. If only one transition function
|
||||
is used, then <replaceable class="PARAMETER">ffunc</replaceable> is
|
||||
optional. The default behavior when
|
||||
<replaceable class="PARAMETER">ffunc</replaceable> is not provided is
|
||||
to return the ending value of the internal state value being used
|
||||
(and, therefore, the aggregate's output type is the same as that
|
||||
state value's type).
|
||||
</para>
|
||||
|
||||
<para>
|
||||
An aggregate function may also require one or two initial conditions,
|
||||
one for
|
||||
each transition function. These are specified and stored
|
||||
in the database as fields of type <type>text</type>.
|
||||
An aggregate function may also provide one or two initial conditions,
|
||||
that is, initial values for the internal state values being used.
|
||||
These are specified and stored in the database as fields of type
|
||||
<type>text</type>, but they must be valid external representations
|
||||
of constants of the state value datatypes. If
|
||||
<replaceable class="PARAMETER">sfunc1</replaceable> is specified
|
||||
without an <replaceable class="PARAMETER">initcond1</replaceable> value,
|
||||
then the system does not call
|
||||
<replaceable class="PARAMETER">sfunc1</replaceable>
|
||||
at the first input item; instead, the internal state value 1 is
|
||||
initialized with the first input value, and
|
||||
<replaceable class="PARAMETER">sfunc1</replaceable> is called beginning
|
||||
at the second input item. This is useful for aggregates like MIN and
|
||||
MAX. Note that an aggregate using this feature will return NULL when
|
||||
called with no input values. There is no comparable provision for
|
||||
state value 2; if <replaceable class="PARAMETER">sfunc2</replaceable> is
|
||||
specified then an <replaceable class="PARAMETER">initcond2</replaceable> is
|
||||
required.
|
||||
</para>
|
||||
|
||||
<refsect2 id="R2-SQL-CREATEAGGREGATE-3">
|
||||
@ -274,18 +267,32 @@ CREATE
|
||||
to drop aggregate functions.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
The parameters of <command>CREATE AGGREGATE</command> can be written
|
||||
in any order, not just the order illustrated above.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
It is possible to specify aggregate functions
|
||||
that have varying combinations of state and final functions.
|
||||
For example, the <function>count</function> aggregate requires SFUNC2
|
||||
(an incrementing function) but not SFUNC1 or FINALFUNC,
|
||||
whereas the <function>sum</function> aggregate requires SFUNC1 (an addition
|
||||
function) but not SFUNC2 or FINALFUNC and the <function>avg</function>
|
||||
For example, the <function>count</function> aggregate requires
|
||||
<replaceable class="PARAMETER">sfunc2</replaceable>
|
||||
(an incrementing function) but not
|
||||
<replaceable class="PARAMETER">sfunc1</replaceable> or
|
||||
<replaceable class="PARAMETER">ffunc</replaceable>,
|
||||
whereas the <function>sum</function> aggregate requires
|
||||
<replaceable class="PARAMETER">sfunc1</replaceable> (an addition
|
||||
function) but not <replaceable class="PARAMETER">sfunc2</replaceable> or
|
||||
<replaceable class="PARAMETER">ffunc</replaceable>, and the
|
||||
<function>avg</function>
|
||||
aggregate requires
|
||||
both of the above state functions as
|
||||
well as a FINALFUNC (a division function) to produce its
|
||||
both state functions as
|
||||
well as a <replaceable class="PARAMETER">ffunc</replaceable> (a division
|
||||
function) to produce its
|
||||
answer. In any case, at least one state function must be
|
||||
defined, and any SFUNC2 must have a corresponding INITCOND2.
|
||||
defined, and any <replaceable class="PARAMETER">sfunc2</replaceable> must
|
||||
have a corresponding
|
||||
<replaceable class="PARAMETER">initcond2</replaceable>.
|
||||
</para>
|
||||
|
||||
</refsect2>
|
||||
|
@ -2,26 +2,57 @@
|
||||
<Title>Extending <Acronym>SQL</Acronym>: Aggregates</Title>
|
||||
|
||||
<Para>
|
||||
Aggregates in <ProductName>Postgres</ProductName>
|
||||
are expressed in terms of state
|
||||
transition functions. That is, an aggregate can be
|
||||
Aggregate functions in <ProductName>Postgres</ProductName>
|
||||
are expressed as <firstterm>state values</firstterm>
|
||||
and <firstterm>state transition functions</firstterm>.
|
||||
That is, an aggregate can be
|
||||
defined in terms of state that is modified whenever an
|
||||
instance is processed. Some state functions look at a
|
||||
particular value in the instance when computing the new
|
||||
state (<Acronym>sfunc1</Acronym> in the
|
||||
create aggregate syntax) while
|
||||
others only keep track of their own internal state
|
||||
(<Acronym>sfunc2</Acronym>).
|
||||
If we define an aggregate that uses only
|
||||
<Acronym>sfunc1</Acronym>, we
|
||||
define an aggregate that computes a running function of
|
||||
input item is processed. To define a new aggregate
|
||||
function, one selects a datatype for the state value,
|
||||
an initial value for the state, and a state transition
|
||||
function. The state transition function is just an
|
||||
ordinary function that could also be used outside the
|
||||
context of the aggregate.
|
||||
</Para>
|
||||
|
||||
<Para>
|
||||
Actually, in order to make it easier to construct useful
|
||||
aggregates from existing functions, an aggregate can have
|
||||
one or two separate state values, one or two transition
|
||||
functions to update those state values, and a
|
||||
<firstterm>final function</firstterm> that computes the
|
||||
actual aggregate result from the ending state values.
|
||||
</Para>
|
||||
|
||||
<Para>
|
||||
Thus there can be as many as four datatypes involved:
|
||||
the type of the input data items, the type of the aggregate's
|
||||
result, and the types of the two state values. Only the
|
||||
input and result datatypes are seen by a user of the aggregate.
|
||||
</Para>
|
||||
|
||||
<Para>
|
||||
Some state transition functions need to look at each successive
|
||||
input to compute the next state value, while others ignore the
|
||||
specific input value and simply update their internal state.
|
||||
(The most useful example of the second kind is a running count
|
||||
of the number of input items.) The <ProductName>Postgres</ProductName>
|
||||
aggregate machinery defines <Acronym>sfunc1</Acronym> for
|
||||
an aggregate as a function that is passed both the old state
|
||||
value and the current input value, while <Acronym>sfunc2</Acronym>
|
||||
is a function that is passed only the old state value.
|
||||
</Para>
|
||||
|
||||
<Para>
|
||||
If we define an aggregate that uses only <Acronym>sfunc1</Acronym>,
|
||||
we have an aggregate that computes a running function of
|
||||
the attribute values from each instance. "Sum" is an
|
||||
example of this kind of aggregate. "Sum" starts at
|
||||
zero and always adds the current instance's value to
|
||||
its running total. We will use the
|
||||
<Acronym>int4pl</Acronym> that is
|
||||
built into <ProductName>Postgres</ProductName>
|
||||
to perform this addition.
|
||||
its running total. For example, if we want to make a Sum
|
||||
aggregate to work on a datatype for complex numbers,
|
||||
we only need the addition function for that datatype.
|
||||
The aggregate definition is:
|
||||
|
||||
<ProgramListing>
|
||||
CREATE AGGREGATE complex_sum (
|
||||
@ -39,11 +70,15 @@ SELECT complex_sum(a) FROM test_complex;
|
||||
|(34,53.9) |
|
||||
+------------+
|
||||
</ProgramListing>
|
||||
|
||||
(In practice, we'd just name the aggregate "sum", and rely on
|
||||
<ProductName>Postgres</ProductName> to figure out which kind
|
||||
of sum to apply to a complex column.)
|
||||
</Para>
|
||||
|
||||
<Para>
|
||||
If we define only <Acronym>sfunc2</Acronym>, we are
|
||||
specifying an aggregate
|
||||
specifying an aggregate
|
||||
that computes a running function that is independent of
|
||||
the attribute values from each instance.
|
||||
"Count" is the most common example of this kind of
|
||||
@ -104,4 +139,10 @@ SELECT my_average(salary) as emp_average FROM EMP;
|
||||
+------------+
|
||||
</ProgramListing>
|
||||
</Para>
|
||||
|
||||
<Para>
|
||||
For further details see
|
||||
<xref endterm="sql-createaggregate-title"
|
||||
linkend="sql-createaggregate-title">.
|
||||
</Para>
|
||||
</Chapter>
|
||||
|
Loading…
x
Reference in New Issue
Block a user