mirror of
https://github.com/postgres/postgres.git
synced 2025-05-13 01:13:08 -04:00
This also involves renaming src/include/utils/hashutils.h, which becomes src/include/common/hashfn.h. Perhaps an argument can be made for keeping the hashutils.h name, but it seemed more consistent to make it match the name of the file, and also more descriptive of what is actually going on here. Patch by me, reviewed by Suraj Kharage and Mark Dilger. Off-list advice on how not to break the Windows build from Davinder Singh and Amit Kapila. Discussion: http://postgr.es/m/CA+TgmoaRiG4TXND8QuM6JXFRkM_1wL2ZNhzaUKsuec9-4yrkgw@mail.gmail.com
1225 lines
28 KiB
C
1225 lines
28 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* varchar.c
|
|
* Functions for the built-in types char(n) and varchar(n).
|
|
*
|
|
* Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
*
|
|
*
|
|
* IDENTIFICATION
|
|
* src/backend/utils/adt/varchar.c
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
#include "postgres.h"
|
|
|
|
#include "access/detoast.h"
|
|
#include "catalog/pg_collation.h"
|
|
#include "catalog/pg_type.h"
|
|
#include "common/hashfn.h"
|
|
#include "libpq/pqformat.h"
|
|
#include "mb/pg_wchar.h"
|
|
#include "nodes/nodeFuncs.h"
|
|
#include "nodes/supportnodes.h"
|
|
#include "utils/array.h"
|
|
#include "utils/builtins.h"
|
|
#include "utils/lsyscache.h"
|
|
#include "utils/pg_locale.h"
|
|
#include "utils/varlena.h"
|
|
|
|
/* common code for bpchartypmodin and varchartypmodin */
|
|
static int32
|
|
anychar_typmodin(ArrayType *ta, const char *typename)
|
|
{
|
|
int32 typmod;
|
|
int32 *tl;
|
|
int n;
|
|
|
|
tl = ArrayGetIntegerTypmods(ta, &n);
|
|
|
|
/*
|
|
* we're not too tense about good error message here because grammar
|
|
* shouldn't allow wrong number of modifiers for CHAR
|
|
*/
|
|
if (n != 1)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
errmsg("invalid type modifier")));
|
|
|
|
if (*tl < 1)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
errmsg("length for type %s must be at least 1", typename)));
|
|
if (*tl > MaxAttrSize)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
errmsg("length for type %s cannot exceed %d",
|
|
typename, MaxAttrSize)));
|
|
|
|
/*
|
|
* For largely historical reasons, the typmod is VARHDRSZ plus the number
|
|
* of characters; there is enough client-side code that knows about that
|
|
* that we'd better not change it.
|
|
*/
|
|
typmod = VARHDRSZ + *tl;
|
|
|
|
return typmod;
|
|
}
|
|
|
|
/* common code for bpchartypmodout and varchartypmodout */
|
|
static char *
|
|
anychar_typmodout(int32 typmod)
|
|
{
|
|
char *res = (char *) palloc(64);
|
|
|
|
if (typmod > VARHDRSZ)
|
|
snprintf(res, 64, "(%d)", (int) (typmod - VARHDRSZ));
|
|
else
|
|
*res = '\0';
|
|
|
|
return res;
|
|
}
|
|
|
|
|
|
/*
|
|
* CHAR() and VARCHAR() types are part of the SQL standard. CHAR()
|
|
* is for blank-padded string whose length is specified in CREATE TABLE.
|
|
* VARCHAR is for storing string whose length is at most the length specified
|
|
* at CREATE TABLE time.
|
|
*
|
|
* It's hard to implement these types because we cannot figure out
|
|
* the length of the type from the type itself. I changed (hopefully all) the
|
|
* fmgr calls that invoke input functions of a data type to supply the
|
|
* length also. (eg. in INSERTs, we have the tupleDescriptor which contains
|
|
* the length of the attributes and hence the exact length of the char() or
|
|
* varchar(). We pass this to bpcharin() or varcharin().) In the case where
|
|
* we cannot determine the length, we pass in -1 instead and the input
|
|
* converter does not enforce any length check.
|
|
*
|
|
* We actually implement this as a varlena so that we don't have to pass in
|
|
* the length for the comparison functions. (The difference between these
|
|
* types and "text" is that we truncate and possibly blank-pad the string
|
|
* at insertion time.)
|
|
*
|
|
* - ay 6/95
|
|
*/
|
|
|
|
|
|
/*****************************************************************************
|
|
* bpchar - char() *
|
|
*****************************************************************************/
|
|
|
|
/*
|
|
* bpchar_input -- common guts of bpcharin and bpcharrecv
|
|
*
|
|
* s is the input text of length len (may not be null-terminated)
|
|
* atttypmod is the typmod value to apply
|
|
*
|
|
* Note that atttypmod is measured in characters, which
|
|
* is not necessarily the same as the number of bytes.
|
|
*
|
|
* If the input string is too long, raise an error, unless the extra
|
|
* characters are spaces, in which case they're truncated. (per SQL)
|
|
*/
|
|
static BpChar *
|
|
bpchar_input(const char *s, size_t len, int32 atttypmod)
|
|
{
|
|
BpChar *result;
|
|
char *r;
|
|
size_t maxlen;
|
|
|
|
/* If typmod is -1 (or invalid), use the actual string length */
|
|
if (atttypmod < (int32) VARHDRSZ)
|
|
maxlen = len;
|
|
else
|
|
{
|
|
size_t charlen; /* number of CHARACTERS in the input */
|
|
|
|
maxlen = atttypmod - VARHDRSZ;
|
|
charlen = pg_mbstrlen_with_len(s, len);
|
|
if (charlen > maxlen)
|
|
{
|
|
/* Verify that extra characters are spaces, and clip them off */
|
|
size_t mbmaxlen = pg_mbcharcliplen(s, len, maxlen);
|
|
size_t j;
|
|
|
|
/*
|
|
* at this point, len is the actual BYTE length of the input
|
|
* string, maxlen is the max number of CHARACTERS allowed for this
|
|
* bpchar type, mbmaxlen is the length in BYTES of those chars.
|
|
*/
|
|
for (j = mbmaxlen; j < len; j++)
|
|
{
|
|
if (s[j] != ' ')
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
|
|
errmsg("value too long for type character(%d)",
|
|
(int) maxlen)));
|
|
}
|
|
|
|
/*
|
|
* Now we set maxlen to the necessary byte length, not the number
|
|
* of CHARACTERS!
|
|
*/
|
|
maxlen = len = mbmaxlen;
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
* Now we set maxlen to the necessary byte length, not the number
|
|
* of CHARACTERS!
|
|
*/
|
|
maxlen = len + (maxlen - charlen);
|
|
}
|
|
}
|
|
|
|
result = (BpChar *) palloc(maxlen + VARHDRSZ);
|
|
SET_VARSIZE(result, maxlen + VARHDRSZ);
|
|
r = VARDATA(result);
|
|
memcpy(r, s, len);
|
|
|
|
/* blank pad the string if necessary */
|
|
if (maxlen > len)
|
|
memset(r + len, ' ', maxlen - len);
|
|
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* Convert a C string to CHARACTER internal representation. atttypmod
|
|
* is the declared length of the type plus VARHDRSZ.
|
|
*/
|
|
Datum
|
|
bpcharin(PG_FUNCTION_ARGS)
|
|
{
|
|
char *s = PG_GETARG_CSTRING(0);
|
|
|
|
#ifdef NOT_USED
|
|
Oid typelem = PG_GETARG_OID(1);
|
|
#endif
|
|
int32 atttypmod = PG_GETARG_INT32(2);
|
|
BpChar *result;
|
|
|
|
result = bpchar_input(s, strlen(s), atttypmod);
|
|
PG_RETURN_BPCHAR_P(result);
|
|
}
|
|
|
|
|
|
/*
|
|
* Convert a CHARACTER value to a C string.
|
|
*
|
|
* Uses the text conversion functions, which is only appropriate if BpChar
|
|
* and text are equivalent types.
|
|
*/
|
|
Datum
|
|
bpcharout(PG_FUNCTION_ARGS)
|
|
{
|
|
Datum txt = PG_GETARG_DATUM(0);
|
|
|
|
PG_RETURN_CSTRING(TextDatumGetCString(txt));
|
|
}
|
|
|
|
/*
|
|
* bpcharrecv - converts external binary format to bpchar
|
|
*/
|
|
Datum
|
|
bpcharrecv(PG_FUNCTION_ARGS)
|
|
{
|
|
StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
|
|
|
|
#ifdef NOT_USED
|
|
Oid typelem = PG_GETARG_OID(1);
|
|
#endif
|
|
int32 atttypmod = PG_GETARG_INT32(2);
|
|
BpChar *result;
|
|
char *str;
|
|
int nbytes;
|
|
|
|
str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
|
|
result = bpchar_input(str, nbytes, atttypmod);
|
|
pfree(str);
|
|
PG_RETURN_BPCHAR_P(result);
|
|
}
|
|
|
|
/*
|
|
* bpcharsend - converts bpchar to binary format
|
|
*/
|
|
Datum
|
|
bpcharsend(PG_FUNCTION_ARGS)
|
|
{
|
|
/* Exactly the same as textsend, so share code */
|
|
return textsend(fcinfo);
|
|
}
|
|
|
|
|
|
/*
|
|
* Converts a CHARACTER type to the specified size.
|
|
*
|
|
* maxlen is the typmod, ie, declared length plus VARHDRSZ bytes.
|
|
* isExplicit is true if this is for an explicit cast to char(N).
|
|
*
|
|
* Truncation rules: for an explicit cast, silently truncate to the given
|
|
* length; for an implicit cast, raise error unless extra characters are
|
|
* all spaces. (This is sort-of per SQL: the spec would actually have us
|
|
* raise a "completion condition" for the explicit cast case, but Postgres
|
|
* hasn't got such a concept.)
|
|
*/
|
|
Datum
|
|
bpchar(PG_FUNCTION_ARGS)
|
|
{
|
|
BpChar *source = PG_GETARG_BPCHAR_PP(0);
|
|
int32 maxlen = PG_GETARG_INT32(1);
|
|
bool isExplicit = PG_GETARG_BOOL(2);
|
|
BpChar *result;
|
|
int32 len;
|
|
char *r;
|
|
char *s;
|
|
int i;
|
|
int charlen; /* number of characters in the input string +
|
|
* VARHDRSZ */
|
|
|
|
/* No work if typmod is invalid */
|
|
if (maxlen < (int32) VARHDRSZ)
|
|
PG_RETURN_BPCHAR_P(source);
|
|
|
|
maxlen -= VARHDRSZ;
|
|
|
|
len = VARSIZE_ANY_EXHDR(source);
|
|
s = VARDATA_ANY(source);
|
|
|
|
charlen = pg_mbstrlen_with_len(s, len);
|
|
|
|
/* No work if supplied data matches typmod already */
|
|
if (charlen == maxlen)
|
|
PG_RETURN_BPCHAR_P(source);
|
|
|
|
if (charlen > maxlen)
|
|
{
|
|
/* Verify that extra characters are spaces, and clip them off */
|
|
size_t maxmblen;
|
|
|
|
maxmblen = pg_mbcharcliplen(s, len, maxlen);
|
|
|
|
if (!isExplicit)
|
|
{
|
|
for (i = maxmblen; i < len; i++)
|
|
if (s[i] != ' ')
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
|
|
errmsg("value too long for type character(%d)",
|
|
maxlen)));
|
|
}
|
|
|
|
len = maxmblen;
|
|
|
|
/*
|
|
* At this point, maxlen is the necessary byte length, not the number
|
|
* of CHARACTERS!
|
|
*/
|
|
maxlen = len;
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
* At this point, maxlen is the necessary byte length, not the number
|
|
* of CHARACTERS!
|
|
*/
|
|
maxlen = len + (maxlen - charlen);
|
|
}
|
|
|
|
Assert(maxlen >= len);
|
|
|
|
result = palloc(maxlen + VARHDRSZ);
|
|
SET_VARSIZE(result, maxlen + VARHDRSZ);
|
|
r = VARDATA(result);
|
|
|
|
memcpy(r, s, len);
|
|
|
|
/* blank pad the string if necessary */
|
|
if (maxlen > len)
|
|
memset(r + len, ' ', maxlen - len);
|
|
|
|
PG_RETURN_BPCHAR_P(result);
|
|
}
|
|
|
|
|
|
/* char_bpchar()
|
|
* Convert char to bpchar(1).
|
|
*/
|
|
Datum
|
|
char_bpchar(PG_FUNCTION_ARGS)
|
|
{
|
|
char c = PG_GETARG_CHAR(0);
|
|
BpChar *result;
|
|
|
|
result = (BpChar *) palloc(VARHDRSZ + 1);
|
|
|
|
SET_VARSIZE(result, VARHDRSZ + 1);
|
|
*(VARDATA(result)) = c;
|
|
|
|
PG_RETURN_BPCHAR_P(result);
|
|
}
|
|
|
|
|
|
/* bpchar_name()
|
|
* Converts a bpchar() type to a NameData type.
|
|
*/
|
|
Datum
|
|
bpchar_name(PG_FUNCTION_ARGS)
|
|
{
|
|
BpChar *s = PG_GETARG_BPCHAR_PP(0);
|
|
char *s_data;
|
|
Name result;
|
|
int len;
|
|
|
|
len = VARSIZE_ANY_EXHDR(s);
|
|
s_data = VARDATA_ANY(s);
|
|
|
|
/* Truncate oversize input */
|
|
if (len >= NAMEDATALEN)
|
|
len = pg_mbcliplen(s_data, len, NAMEDATALEN - 1);
|
|
|
|
/* Remove trailing blanks */
|
|
while (len > 0)
|
|
{
|
|
if (s_data[len - 1] != ' ')
|
|
break;
|
|
len--;
|
|
}
|
|
|
|
/* We use palloc0 here to ensure result is zero-padded */
|
|
result = (Name) palloc0(NAMEDATALEN);
|
|
memcpy(NameStr(*result), s_data, len);
|
|
|
|
PG_RETURN_NAME(result);
|
|
}
|
|
|
|
/* name_bpchar()
|
|
* Converts a NameData type to a bpchar type.
|
|
*
|
|
* Uses the text conversion functions, which is only appropriate if BpChar
|
|
* and text are equivalent types.
|
|
*/
|
|
Datum
|
|
name_bpchar(PG_FUNCTION_ARGS)
|
|
{
|
|
Name s = PG_GETARG_NAME(0);
|
|
BpChar *result;
|
|
|
|
result = (BpChar *) cstring_to_text(NameStr(*s));
|
|
PG_RETURN_BPCHAR_P(result);
|
|
}
|
|
|
|
Datum
|
|
bpchartypmodin(PG_FUNCTION_ARGS)
|
|
{
|
|
ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0);
|
|
|
|
PG_RETURN_INT32(anychar_typmodin(ta, "char"));
|
|
}
|
|
|
|
Datum
|
|
bpchartypmodout(PG_FUNCTION_ARGS)
|
|
{
|
|
int32 typmod = PG_GETARG_INT32(0);
|
|
|
|
PG_RETURN_CSTRING(anychar_typmodout(typmod));
|
|
}
|
|
|
|
|
|
/*****************************************************************************
|
|
* varchar - varchar(n)
|
|
*
|
|
* Note: varchar piggybacks on type text for most operations, and so has no
|
|
* C-coded functions except for I/O and typmod checking.
|
|
*****************************************************************************/
|
|
|
|
/*
|
|
* varchar_input -- common guts of varcharin and varcharrecv
|
|
*
|
|
* s is the input text of length len (may not be null-terminated)
|
|
* atttypmod is the typmod value to apply
|
|
*
|
|
* Note that atttypmod is measured in characters, which
|
|
* is not necessarily the same as the number of bytes.
|
|
*
|
|
* If the input string is too long, raise an error, unless the extra
|
|
* characters are spaces, in which case they're truncated. (per SQL)
|
|
*
|
|
* Uses the C string to text conversion function, which is only appropriate
|
|
* if VarChar and text are equivalent types.
|
|
*/
|
|
static VarChar *
|
|
varchar_input(const char *s, size_t len, int32 atttypmod)
|
|
{
|
|
VarChar *result;
|
|
size_t maxlen;
|
|
|
|
maxlen = atttypmod - VARHDRSZ;
|
|
|
|
if (atttypmod >= (int32) VARHDRSZ && len > maxlen)
|
|
{
|
|
/* Verify that extra characters are spaces, and clip them off */
|
|
size_t mbmaxlen = pg_mbcharcliplen(s, len, maxlen);
|
|
size_t j;
|
|
|
|
for (j = mbmaxlen; j < len; j++)
|
|
{
|
|
if (s[j] != ' ')
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
|
|
errmsg("value too long for type character varying(%d)",
|
|
(int) maxlen)));
|
|
}
|
|
|
|
len = mbmaxlen;
|
|
}
|
|
|
|
result = (VarChar *) cstring_to_text_with_len(s, len);
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* Convert a C string to VARCHAR internal representation. atttypmod
|
|
* is the declared length of the type plus VARHDRSZ.
|
|
*/
|
|
Datum
|
|
varcharin(PG_FUNCTION_ARGS)
|
|
{
|
|
char *s = PG_GETARG_CSTRING(0);
|
|
|
|
#ifdef NOT_USED
|
|
Oid typelem = PG_GETARG_OID(1);
|
|
#endif
|
|
int32 atttypmod = PG_GETARG_INT32(2);
|
|
VarChar *result;
|
|
|
|
result = varchar_input(s, strlen(s), atttypmod);
|
|
PG_RETURN_VARCHAR_P(result);
|
|
}
|
|
|
|
|
|
/*
|
|
* Convert a VARCHAR value to a C string.
|
|
*
|
|
* Uses the text to C string conversion function, which is only appropriate
|
|
* if VarChar and text are equivalent types.
|
|
*/
|
|
Datum
|
|
varcharout(PG_FUNCTION_ARGS)
|
|
{
|
|
Datum txt = PG_GETARG_DATUM(0);
|
|
|
|
PG_RETURN_CSTRING(TextDatumGetCString(txt));
|
|
}
|
|
|
|
/*
|
|
* varcharrecv - converts external binary format to varchar
|
|
*/
|
|
Datum
|
|
varcharrecv(PG_FUNCTION_ARGS)
|
|
{
|
|
StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
|
|
|
|
#ifdef NOT_USED
|
|
Oid typelem = PG_GETARG_OID(1);
|
|
#endif
|
|
int32 atttypmod = PG_GETARG_INT32(2);
|
|
VarChar *result;
|
|
char *str;
|
|
int nbytes;
|
|
|
|
str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
|
|
result = varchar_input(str, nbytes, atttypmod);
|
|
pfree(str);
|
|
PG_RETURN_VARCHAR_P(result);
|
|
}
|
|
|
|
/*
|
|
* varcharsend - converts varchar to binary format
|
|
*/
|
|
Datum
|
|
varcharsend(PG_FUNCTION_ARGS)
|
|
{
|
|
/* Exactly the same as textsend, so share code */
|
|
return textsend(fcinfo);
|
|
}
|
|
|
|
|
|
/*
|
|
* varchar_support()
|
|
*
|
|
* Planner support function for the varchar() length coercion function.
|
|
*
|
|
* Currently, the only interesting thing we can do is flatten calls that set
|
|
* the new maximum length >= the previous maximum length. We can ignore the
|
|
* isExplicit argument, since that only affects truncation cases.
|
|
*/
|
|
Datum
|
|
varchar_support(PG_FUNCTION_ARGS)
|
|
{
|
|
Node *rawreq = (Node *) PG_GETARG_POINTER(0);
|
|
Node *ret = NULL;
|
|
|
|
if (IsA(rawreq, SupportRequestSimplify))
|
|
{
|
|
SupportRequestSimplify *req = (SupportRequestSimplify *) rawreq;
|
|
FuncExpr *expr = req->fcall;
|
|
Node *typmod;
|
|
|
|
Assert(list_length(expr->args) >= 2);
|
|
|
|
typmod = (Node *) lsecond(expr->args);
|
|
|
|
if (IsA(typmod, Const) &&!((Const *) typmod)->constisnull)
|
|
{
|
|
Node *source = (Node *) linitial(expr->args);
|
|
int32 old_typmod = exprTypmod(source);
|
|
int32 new_typmod = DatumGetInt32(((Const *) typmod)->constvalue);
|
|
int32 old_max = old_typmod - VARHDRSZ;
|
|
int32 new_max = new_typmod - VARHDRSZ;
|
|
|
|
if (new_typmod < 0 || (old_typmod >= 0 && old_max <= new_max))
|
|
ret = relabel_to_typmod(source, new_typmod);
|
|
}
|
|
}
|
|
|
|
PG_RETURN_POINTER(ret);
|
|
}
|
|
|
|
/*
|
|
* Converts a VARCHAR type to the specified size.
|
|
*
|
|
* maxlen is the typmod, ie, declared length plus VARHDRSZ bytes.
|
|
* isExplicit is true if this is for an explicit cast to varchar(N).
|
|
*
|
|
* Truncation rules: for an explicit cast, silently truncate to the given
|
|
* length; for an implicit cast, raise error unless extra characters are
|
|
* all spaces. (This is sort-of per SQL: the spec would actually have us
|
|
* raise a "completion condition" for the explicit cast case, but Postgres
|
|
* hasn't got such a concept.)
|
|
*/
|
|
Datum
|
|
varchar(PG_FUNCTION_ARGS)
|
|
{
|
|
VarChar *source = PG_GETARG_VARCHAR_PP(0);
|
|
int32 typmod = PG_GETARG_INT32(1);
|
|
bool isExplicit = PG_GETARG_BOOL(2);
|
|
int32 len,
|
|
maxlen;
|
|
size_t maxmblen;
|
|
int i;
|
|
char *s_data;
|
|
|
|
len = VARSIZE_ANY_EXHDR(source);
|
|
s_data = VARDATA_ANY(source);
|
|
maxlen = typmod - VARHDRSZ;
|
|
|
|
/* No work if typmod is invalid or supplied data fits it already */
|
|
if (maxlen < 0 || len <= maxlen)
|
|
PG_RETURN_VARCHAR_P(source);
|
|
|
|
/* only reach here if string is too long... */
|
|
|
|
/* truncate multibyte string preserving multibyte boundary */
|
|
maxmblen = pg_mbcharcliplen(s_data, len, maxlen);
|
|
|
|
if (!isExplicit)
|
|
{
|
|
for (i = maxmblen; i < len; i++)
|
|
if (s_data[i] != ' ')
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
|
|
errmsg("value too long for type character varying(%d)",
|
|
maxlen)));
|
|
}
|
|
|
|
PG_RETURN_VARCHAR_P((VarChar *) cstring_to_text_with_len(s_data,
|
|
maxmblen));
|
|
}
|
|
|
|
Datum
|
|
varchartypmodin(PG_FUNCTION_ARGS)
|
|
{
|
|
ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0);
|
|
|
|
PG_RETURN_INT32(anychar_typmodin(ta, "varchar"));
|
|
}
|
|
|
|
Datum
|
|
varchartypmodout(PG_FUNCTION_ARGS)
|
|
{
|
|
int32 typmod = PG_GETARG_INT32(0);
|
|
|
|
PG_RETURN_CSTRING(anychar_typmodout(typmod));
|
|
}
|
|
|
|
|
|
/*****************************************************************************
|
|
* Exported functions
|
|
*****************************************************************************/
|
|
|
|
/* "True" length (not counting trailing blanks) of a BpChar */
|
|
static inline int
|
|
bcTruelen(BpChar *arg)
|
|
{
|
|
return bpchartruelen(VARDATA_ANY(arg), VARSIZE_ANY_EXHDR(arg));
|
|
}
|
|
|
|
int
|
|
bpchartruelen(char *s, int len)
|
|
{
|
|
int i;
|
|
|
|
/*
|
|
* Note that we rely on the assumption that ' ' is a singleton unit on
|
|
* every supported multibyte server encoding.
|
|
*/
|
|
for (i = len - 1; i >= 0; i--)
|
|
{
|
|
if (s[i] != ' ')
|
|
break;
|
|
}
|
|
return i + 1;
|
|
}
|
|
|
|
Datum
|
|
bpcharlen(PG_FUNCTION_ARGS)
|
|
{
|
|
BpChar *arg = PG_GETARG_BPCHAR_PP(0);
|
|
int len;
|
|
|
|
/* get number of bytes, ignoring trailing spaces */
|
|
len = bcTruelen(arg);
|
|
|
|
/* in multibyte encoding, convert to number of characters */
|
|
if (pg_database_encoding_max_length() != 1)
|
|
len = pg_mbstrlen_with_len(VARDATA_ANY(arg), len);
|
|
|
|
PG_RETURN_INT32(len);
|
|
}
|
|
|
|
Datum
|
|
bpcharoctetlen(PG_FUNCTION_ARGS)
|
|
{
|
|
Datum arg = PG_GETARG_DATUM(0);
|
|
|
|
/* We need not detoast the input at all */
|
|
PG_RETURN_INT32(toast_raw_datum_size(arg) - VARHDRSZ);
|
|
}
|
|
|
|
|
|
/*****************************************************************************
|
|
* Comparison Functions used for bpchar
|
|
*
|
|
* Note: btree indexes need these routines not to leak memory; therefore,
|
|
* be careful to free working copies of toasted datums. Most places don't
|
|
* need to be so careful.
|
|
*****************************************************************************/
|
|
|
|
static void
|
|
check_collation_set(Oid collid)
|
|
{
|
|
if (!OidIsValid(collid))
|
|
{
|
|
/*
|
|
* This typically means that the parser could not resolve a conflict
|
|
* of implicit collations, so report it that way.
|
|
*/
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INDETERMINATE_COLLATION),
|
|
errmsg("could not determine which collation to use for string comparison"),
|
|
errhint("Use the COLLATE clause to set the collation explicitly.")));
|
|
}
|
|
}
|
|
|
|
Datum
|
|
bpchareq(PG_FUNCTION_ARGS)
|
|
{
|
|
BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
|
|
BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
|
|
int len1,
|
|
len2;
|
|
bool result;
|
|
Oid collid = PG_GET_COLLATION();
|
|
|
|
check_collation_set(collid);
|
|
|
|
len1 = bcTruelen(arg1);
|
|
len2 = bcTruelen(arg2);
|
|
|
|
if (lc_collate_is_c(collid) ||
|
|
collid == DEFAULT_COLLATION_OID ||
|
|
pg_newlocale_from_collation(collid)->deterministic)
|
|
{
|
|
/*
|
|
* Since we only care about equality or not-equality, we can avoid all
|
|
* the expense of strcoll() here, and just do bitwise comparison.
|
|
*/
|
|
if (len1 != len2)
|
|
result = false;
|
|
else
|
|
result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) == 0);
|
|
}
|
|
else
|
|
{
|
|
result = (varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
|
|
collid) == 0);
|
|
}
|
|
|
|
PG_FREE_IF_COPY(arg1, 0);
|
|
PG_FREE_IF_COPY(arg2, 1);
|
|
|
|
PG_RETURN_BOOL(result);
|
|
}
|
|
|
|
Datum
|
|
bpcharne(PG_FUNCTION_ARGS)
|
|
{
|
|
BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
|
|
BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
|
|
int len1,
|
|
len2;
|
|
bool result;
|
|
Oid collid = PG_GET_COLLATION();
|
|
|
|
check_collation_set(collid);
|
|
|
|
len1 = bcTruelen(arg1);
|
|
len2 = bcTruelen(arg2);
|
|
|
|
if (lc_collate_is_c(collid) ||
|
|
collid == DEFAULT_COLLATION_OID ||
|
|
pg_newlocale_from_collation(collid)->deterministic)
|
|
{
|
|
/*
|
|
* Since we only care about equality or not-equality, we can avoid all
|
|
* the expense of strcoll() here, and just do bitwise comparison.
|
|
*/
|
|
if (len1 != len2)
|
|
result = true;
|
|
else
|
|
result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) != 0);
|
|
}
|
|
else
|
|
{
|
|
result = (varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
|
|
collid) != 0);
|
|
}
|
|
|
|
PG_FREE_IF_COPY(arg1, 0);
|
|
PG_FREE_IF_COPY(arg2, 1);
|
|
|
|
PG_RETURN_BOOL(result);
|
|
}
|
|
|
|
Datum
|
|
bpcharlt(PG_FUNCTION_ARGS)
|
|
{
|
|
BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
|
|
BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
|
|
int len1,
|
|
len2;
|
|
int cmp;
|
|
|
|
len1 = bcTruelen(arg1);
|
|
len2 = bcTruelen(arg2);
|
|
|
|
cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
|
|
PG_GET_COLLATION());
|
|
|
|
PG_FREE_IF_COPY(arg1, 0);
|
|
PG_FREE_IF_COPY(arg2, 1);
|
|
|
|
PG_RETURN_BOOL(cmp < 0);
|
|
}
|
|
|
|
Datum
|
|
bpcharle(PG_FUNCTION_ARGS)
|
|
{
|
|
BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
|
|
BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
|
|
int len1,
|
|
len2;
|
|
int cmp;
|
|
|
|
len1 = bcTruelen(arg1);
|
|
len2 = bcTruelen(arg2);
|
|
|
|
cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
|
|
PG_GET_COLLATION());
|
|
|
|
PG_FREE_IF_COPY(arg1, 0);
|
|
PG_FREE_IF_COPY(arg2, 1);
|
|
|
|
PG_RETURN_BOOL(cmp <= 0);
|
|
}
|
|
|
|
Datum
|
|
bpchargt(PG_FUNCTION_ARGS)
|
|
{
|
|
BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
|
|
BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
|
|
int len1,
|
|
len2;
|
|
int cmp;
|
|
|
|
len1 = bcTruelen(arg1);
|
|
len2 = bcTruelen(arg2);
|
|
|
|
cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
|
|
PG_GET_COLLATION());
|
|
|
|
PG_FREE_IF_COPY(arg1, 0);
|
|
PG_FREE_IF_COPY(arg2, 1);
|
|
|
|
PG_RETURN_BOOL(cmp > 0);
|
|
}
|
|
|
|
Datum
|
|
bpcharge(PG_FUNCTION_ARGS)
|
|
{
|
|
BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
|
|
BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
|
|
int len1,
|
|
len2;
|
|
int cmp;
|
|
|
|
len1 = bcTruelen(arg1);
|
|
len2 = bcTruelen(arg2);
|
|
|
|
cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
|
|
PG_GET_COLLATION());
|
|
|
|
PG_FREE_IF_COPY(arg1, 0);
|
|
PG_FREE_IF_COPY(arg2, 1);
|
|
|
|
PG_RETURN_BOOL(cmp >= 0);
|
|
}
|
|
|
|
Datum
|
|
bpcharcmp(PG_FUNCTION_ARGS)
|
|
{
|
|
BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
|
|
BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
|
|
int len1,
|
|
len2;
|
|
int cmp;
|
|
|
|
len1 = bcTruelen(arg1);
|
|
len2 = bcTruelen(arg2);
|
|
|
|
cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
|
|
PG_GET_COLLATION());
|
|
|
|
PG_FREE_IF_COPY(arg1, 0);
|
|
PG_FREE_IF_COPY(arg2, 1);
|
|
|
|
PG_RETURN_INT32(cmp);
|
|
}
|
|
|
|
Datum
|
|
bpchar_sortsupport(PG_FUNCTION_ARGS)
|
|
{
|
|
SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
|
|
Oid collid = ssup->ssup_collation;
|
|
MemoryContext oldcontext;
|
|
|
|
oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
|
|
|
|
/* Use generic string SortSupport */
|
|
varstr_sortsupport(ssup, BPCHAROID, collid);
|
|
|
|
MemoryContextSwitchTo(oldcontext);
|
|
|
|
PG_RETURN_VOID();
|
|
}
|
|
|
|
Datum
|
|
bpchar_larger(PG_FUNCTION_ARGS)
|
|
{
|
|
BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
|
|
BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
|
|
int len1,
|
|
len2;
|
|
int cmp;
|
|
|
|
len1 = bcTruelen(arg1);
|
|
len2 = bcTruelen(arg2);
|
|
|
|
cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
|
|
PG_GET_COLLATION());
|
|
|
|
PG_RETURN_BPCHAR_P((cmp >= 0) ? arg1 : arg2);
|
|
}
|
|
|
|
Datum
|
|
bpchar_smaller(PG_FUNCTION_ARGS)
|
|
{
|
|
BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
|
|
BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
|
|
int len1,
|
|
len2;
|
|
int cmp;
|
|
|
|
len1 = bcTruelen(arg1);
|
|
len2 = bcTruelen(arg2);
|
|
|
|
cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
|
|
PG_GET_COLLATION());
|
|
|
|
PG_RETURN_BPCHAR_P((cmp <= 0) ? arg1 : arg2);
|
|
}
|
|
|
|
|
|
/*
|
|
* bpchar needs a specialized hash function because we want to ignore
|
|
* trailing blanks in comparisons.
|
|
*/
|
|
Datum
|
|
hashbpchar(PG_FUNCTION_ARGS)
|
|
{
|
|
BpChar *key = PG_GETARG_BPCHAR_PP(0);
|
|
Oid collid = PG_GET_COLLATION();
|
|
char *keydata;
|
|
int keylen;
|
|
pg_locale_t mylocale = 0;
|
|
Datum result;
|
|
|
|
if (!collid)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INDETERMINATE_COLLATION),
|
|
errmsg("could not determine which collation to use for string hashing"),
|
|
errhint("Use the COLLATE clause to set the collation explicitly.")));
|
|
|
|
keydata = VARDATA_ANY(key);
|
|
keylen = bcTruelen(key);
|
|
|
|
if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
|
|
mylocale = pg_newlocale_from_collation(collid);
|
|
|
|
if (!mylocale || mylocale->deterministic)
|
|
{
|
|
result = hash_any((unsigned char *) keydata, keylen);
|
|
}
|
|
else
|
|
{
|
|
#ifdef USE_ICU
|
|
if (mylocale->provider == COLLPROVIDER_ICU)
|
|
{
|
|
int32_t ulen = -1;
|
|
UChar *uchar = NULL;
|
|
Size bsize;
|
|
uint8_t *buf;
|
|
|
|
ulen = icu_to_uchar(&uchar, keydata, keylen);
|
|
|
|
bsize = ucol_getSortKey(mylocale->info.icu.ucol,
|
|
uchar, ulen, NULL, 0);
|
|
buf = palloc(bsize);
|
|
ucol_getSortKey(mylocale->info.icu.ucol,
|
|
uchar, ulen, buf, bsize);
|
|
|
|
result = hash_any(buf, bsize);
|
|
|
|
pfree(buf);
|
|
}
|
|
else
|
|
#endif
|
|
/* shouldn't happen */
|
|
elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
|
|
}
|
|
|
|
/* Avoid leaking memory for toasted inputs */
|
|
PG_FREE_IF_COPY(key, 0);
|
|
|
|
return result;
|
|
}
|
|
|
|
Datum
|
|
hashbpcharextended(PG_FUNCTION_ARGS)
|
|
{
|
|
BpChar *key = PG_GETARG_BPCHAR_PP(0);
|
|
Oid collid = PG_GET_COLLATION();
|
|
char *keydata;
|
|
int keylen;
|
|
pg_locale_t mylocale = 0;
|
|
Datum result;
|
|
|
|
if (!collid)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INDETERMINATE_COLLATION),
|
|
errmsg("could not determine which collation to use for string hashing"),
|
|
errhint("Use the COLLATE clause to set the collation explicitly.")));
|
|
|
|
keydata = VARDATA_ANY(key);
|
|
keylen = bcTruelen(key);
|
|
|
|
if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
|
|
mylocale = pg_newlocale_from_collation(collid);
|
|
|
|
if (!mylocale || mylocale->deterministic)
|
|
{
|
|
result = hash_any_extended((unsigned char *) keydata, keylen,
|
|
PG_GETARG_INT64(1));
|
|
}
|
|
else
|
|
{
|
|
#ifdef USE_ICU
|
|
if (mylocale->provider == COLLPROVIDER_ICU)
|
|
{
|
|
int32_t ulen = -1;
|
|
UChar *uchar = NULL;
|
|
Size bsize;
|
|
uint8_t *buf;
|
|
|
|
ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key));
|
|
|
|
bsize = ucol_getSortKey(mylocale->info.icu.ucol,
|
|
uchar, ulen, NULL, 0);
|
|
buf = palloc(bsize);
|
|
ucol_getSortKey(mylocale->info.icu.ucol,
|
|
uchar, ulen, buf, bsize);
|
|
|
|
result = hash_any_extended(buf, bsize, PG_GETARG_INT64(1));
|
|
|
|
pfree(buf);
|
|
}
|
|
else
|
|
#endif
|
|
/* shouldn't happen */
|
|
elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
|
|
}
|
|
|
|
PG_FREE_IF_COPY(key, 0);
|
|
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* The following operators support character-by-character comparison
|
|
* of bpchar datums, to allow building indexes suitable for LIKE clauses.
|
|
* Note that the regular bpchareq/bpcharne comparison operators, and
|
|
* regular support functions 1 and 2 with "C" collation are assumed to be
|
|
* compatible with these!
|
|
*/
|
|
|
|
static int
|
|
internal_bpchar_pattern_compare(BpChar *arg1, BpChar *arg2)
|
|
{
|
|
int result;
|
|
int len1,
|
|
len2;
|
|
|
|
len1 = bcTruelen(arg1);
|
|
len2 = bcTruelen(arg2);
|
|
|
|
result = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
|
|
if (result != 0)
|
|
return result;
|
|
else if (len1 < len2)
|
|
return -1;
|
|
else if (len1 > len2)
|
|
return 1;
|
|
else
|
|
return 0;
|
|
}
|
|
|
|
|
|
Datum
|
|
bpchar_pattern_lt(PG_FUNCTION_ARGS)
|
|
{
|
|
BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
|
|
BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
|
|
int result;
|
|
|
|
result = internal_bpchar_pattern_compare(arg1, arg2);
|
|
|
|
PG_FREE_IF_COPY(arg1, 0);
|
|
PG_FREE_IF_COPY(arg2, 1);
|
|
|
|
PG_RETURN_BOOL(result < 0);
|
|
}
|
|
|
|
|
|
Datum
|
|
bpchar_pattern_le(PG_FUNCTION_ARGS)
|
|
{
|
|
BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
|
|
BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
|
|
int result;
|
|
|
|
result = internal_bpchar_pattern_compare(arg1, arg2);
|
|
|
|
PG_FREE_IF_COPY(arg1, 0);
|
|
PG_FREE_IF_COPY(arg2, 1);
|
|
|
|
PG_RETURN_BOOL(result <= 0);
|
|
}
|
|
|
|
|
|
Datum
|
|
bpchar_pattern_ge(PG_FUNCTION_ARGS)
|
|
{
|
|
BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
|
|
BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
|
|
int result;
|
|
|
|
result = internal_bpchar_pattern_compare(arg1, arg2);
|
|
|
|
PG_FREE_IF_COPY(arg1, 0);
|
|
PG_FREE_IF_COPY(arg2, 1);
|
|
|
|
PG_RETURN_BOOL(result >= 0);
|
|
}
|
|
|
|
|
|
Datum
|
|
bpchar_pattern_gt(PG_FUNCTION_ARGS)
|
|
{
|
|
BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
|
|
BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
|
|
int result;
|
|
|
|
result = internal_bpchar_pattern_compare(arg1, arg2);
|
|
|
|
PG_FREE_IF_COPY(arg1, 0);
|
|
PG_FREE_IF_COPY(arg2, 1);
|
|
|
|
PG_RETURN_BOOL(result > 0);
|
|
}
|
|
|
|
|
|
Datum
|
|
btbpchar_pattern_cmp(PG_FUNCTION_ARGS)
|
|
{
|
|
BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
|
|
BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
|
|
int result;
|
|
|
|
result = internal_bpchar_pattern_compare(arg1, arg2);
|
|
|
|
PG_FREE_IF_COPY(arg1, 0);
|
|
PG_FREE_IF_COPY(arg2, 1);
|
|
|
|
PG_RETURN_INT32(result);
|
|
}
|
|
|
|
|
|
Datum
|
|
btbpchar_pattern_sortsupport(PG_FUNCTION_ARGS)
|
|
{
|
|
SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
|
|
MemoryContext oldcontext;
|
|
|
|
oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
|
|
|
|
/* Use generic string SortSupport, forcing "C" collation */
|
|
varstr_sortsupport(ssup, BPCHAROID, C_COLLATION_OID);
|
|
|
|
MemoryContextSwitchTo(oldcontext);
|
|
|
|
PG_RETURN_VOID();
|
|
}
|