mirror of
https://github.com/postgres/postgres.git
synced 2025-10-04 00:02:26 -04:00
Add support for base64url encoding and decoding
This adds support for base64url encoding and decoding, a base64 variant which is safe to use in filenames and URLs. base64url replaces '+' in the base64 alphabet with '-' and '/' with '_', thus making it safe for URL addresses and file systems. Support for base64url was originally suggested by Przemysław Sztoch. Author: Florents Tselai <florents.tselai@gmail.com> Reviewed-by: Aleksander Alekseev <aleksander@timescale.com> Reviewed-by: David E. Wheeler <david@justatheory.com> Reviewed-by: Masahiko Sawada <sawada.mshk@gmail.com> Reviewed-by: Daniel Gustafsson <daniel@yesql.se> Reviewed-by: Chao Li (Evan) <li.evan.chao@gmail.com> Discussion: https://postgr.es/m/70f2b6a8-486a-4fdb-a951-84cef35e22ab@sztoch.pl
This commit is contained in:
parent
261f89a976
commit
e1d917182c
@ -728,6 +728,7 @@
|
||||
Encodes binary data into a textual representation; supported
|
||||
<parameter>format</parameter> values are:
|
||||
<link linkend="encode-format-base64"><literal>base64</literal></link>,
|
||||
<link linkend="encode-format-base64url"><literal>base64url</literal></link>,
|
||||
<link linkend="encode-format-escape"><literal>escape</literal></link>,
|
||||
<link linkend="encode-format-hex"><literal>hex</literal></link>.
|
||||
</para>
|
||||
@ -785,6 +786,24 @@
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry id="encode-format-base64url">
|
||||
<term>base64url
|
||||
<indexterm>
|
||||
<primary>base64url format</primary>
|
||||
</indexterm></term>
|
||||
<listitem>
|
||||
<para>
|
||||
The <literal>base64url</literal> format is that of
|
||||
<ulink url="https://datatracker.ietf.org/doc/html/rfc4648#section-5">
|
||||
RFC 4648 Section 5</ulink>, a <literal>base64</literal> variant safe to
|
||||
use in filenames and URLs. The <literal>base64url</literal> alphabet
|
||||
use <literal>'-'</literal> instead of <literal>'+'</literal> and
|
||||
<literal>'_'</literal> instead of <literal>'/'</literal> and also omits
|
||||
the <literal>'='</literal> padding character.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry id="encode-format-escape">
|
||||
<term>escape
|
||||
<indexterm>
|
||||
|
@ -267,12 +267,15 @@ hex_dec_len(const char *src, size_t srclen)
|
||||
}
|
||||
|
||||
/*
|
||||
* BASE64
|
||||
* BASE64 and BASE64URL
|
||||
*/
|
||||
|
||||
static const char _base64[] =
|
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
|
||||
|
||||
static const char _base64url[] =
|
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
|
||||
|
||||
static const int8 b64lookup[128] = {
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
@ -284,8 +287,15 @@ static const int8 b64lookup[128] = {
|
||||
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
|
||||
};
|
||||
|
||||
/*
|
||||
* pg_base64_encode_internal
|
||||
*
|
||||
* Helper for decoding base64 or base64url. When url is passed as true the
|
||||
* input will be encoded using base64url. len bytes in src is encoded into
|
||||
* dst.
|
||||
*/
|
||||
static uint64
|
||||
pg_base64_encode(const char *src, size_t len, char *dst)
|
||||
pg_base64_encode_internal(const char *src, size_t len, char *dst, bool url)
|
||||
{
|
||||
char *p,
|
||||
*lend = dst + 76;
|
||||
@ -293,6 +303,7 @@ pg_base64_encode(const char *src, size_t len, char *dst)
|
||||
*end = src + len;
|
||||
int pos = 2;
|
||||
uint32 buf = 0;
|
||||
const char *alphabet = url ? _base64url : _base64;
|
||||
|
||||
s = src;
|
||||
p = dst;
|
||||
@ -306,33 +317,64 @@ pg_base64_encode(const char *src, size_t len, char *dst)
|
||||
/* write it out */
|
||||
if (pos < 0)
|
||||
{
|
||||
*p++ = _base64[(buf >> 18) & 0x3f];
|
||||
*p++ = _base64[(buf >> 12) & 0x3f];
|
||||
*p++ = _base64[(buf >> 6) & 0x3f];
|
||||
*p++ = _base64[buf & 0x3f];
|
||||
*p++ = alphabet[(buf >> 18) & 0x3f];
|
||||
*p++ = alphabet[(buf >> 12) & 0x3f];
|
||||
*p++ = alphabet[(buf >> 6) & 0x3f];
|
||||
*p++ = alphabet[buf & 0x3f];
|
||||
|
||||
pos = 2;
|
||||
buf = 0;
|
||||
}
|
||||
if (p >= lend)
|
||||
{
|
||||
*p++ = '\n';
|
||||
lend = p + 76;
|
||||
|
||||
if (!url && p >= lend)
|
||||
{
|
||||
*p++ = '\n';
|
||||
lend = p + 76;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Handle remaining bytes in buf */
|
||||
if (pos != 2)
|
||||
{
|
||||
*p++ = _base64[(buf >> 18) & 0x3f];
|
||||
*p++ = _base64[(buf >> 12) & 0x3f];
|
||||
*p++ = (pos == 0) ? _base64[(buf >> 6) & 0x3f] : '=';
|
||||
*p++ = '=';
|
||||
*p++ = alphabet[(buf >> 18) & 0x3f];
|
||||
*p++ = alphabet[(buf >> 12) & 0x3f];
|
||||
|
||||
if (pos == 0)
|
||||
{
|
||||
*p++ = alphabet[(buf >> 6) & 0x3f];
|
||||
if (!url)
|
||||
*p++ = '=';
|
||||
}
|
||||
else if (!url)
|
||||
{
|
||||
*p++ = '=';
|
||||
*p++ = '=';
|
||||
}
|
||||
}
|
||||
|
||||
return p - dst;
|
||||
}
|
||||
|
||||
static uint64
|
||||
pg_base64_decode(const char *src, size_t len, char *dst)
|
||||
pg_base64_encode(const char *src, size_t len, char *dst)
|
||||
{
|
||||
return pg_base64_encode_internal(src, len, dst, false);
|
||||
}
|
||||
|
||||
static uint64
|
||||
pg_base64url_encode(const char *src, size_t len, char *dst)
|
||||
{
|
||||
return pg_base64_encode_internal(src, len, dst, true);
|
||||
}
|
||||
|
||||
/*
|
||||
* pg_base64_decode_internal
|
||||
*
|
||||
* Helper for decoding base64 or base64url. When url is passed as true the
|
||||
* input will be assumed to be encoded using base64url.
|
||||
*/
|
||||
static uint64
|
||||
pg_base64_decode_internal(const char *src, size_t len, char *dst, bool url)
|
||||
{
|
||||
const char *srcend = src + len,
|
||||
*s = src;
|
||||
@ -350,6 +392,15 @@ pg_base64_decode(const char *src, size_t len, char *dst)
|
||||
if (c == ' ' || c == '\t' || c == '\n' || c == '\r')
|
||||
continue;
|
||||
|
||||
/* convert base64url to base64 */
|
||||
if (url)
|
||||
{
|
||||
if (c == '-')
|
||||
c = '+';
|
||||
else if (c == '_')
|
||||
c = '/';
|
||||
}
|
||||
|
||||
if (c == '=')
|
||||
{
|
||||
/* end sequence */
|
||||
@ -360,9 +411,12 @@ pg_base64_decode(const char *src, size_t len, char *dst)
|
||||
else if (pos == 3)
|
||||
end = 2;
|
||||
else
|
||||
{
|
||||
/* translator: %s is the name of an encoding scheme */
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("unexpected \"=\" while decoding base64 sequence")));
|
||||
errmsg("unexpected \"=\" while decoding %s sequence", url ? "base64url" : "base64")));
|
||||
}
|
||||
}
|
||||
b = 0;
|
||||
}
|
||||
@ -372,10 +426,14 @@ pg_base64_decode(const char *src, size_t len, char *dst)
|
||||
if (c > 0 && c < 127)
|
||||
b = b64lookup[(unsigned char) c];
|
||||
if (b < 0)
|
||||
{
|
||||
/* translator: %s is the name of an encoding scheme */
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("invalid symbol \"%.*s\" found while decoding base64 sequence",
|
||||
pg_mblen(s - 1), s - 1)));
|
||||
errmsg("invalid symbol \"%.*s\" found while decoding %s sequence",
|
||||
pg_mblen(s - 1), s - 1,
|
||||
url ? "base64url" : "base64")));
|
||||
}
|
||||
}
|
||||
/* add it to buffer */
|
||||
buf = (buf << 6) + b;
|
||||
@ -392,15 +450,40 @@ pg_base64_decode(const char *src, size_t len, char *dst)
|
||||
}
|
||||
}
|
||||
|
||||
if (pos != 0)
|
||||
if (pos == 2)
|
||||
{
|
||||
buf <<= 12;
|
||||
*p++ = (buf >> 16) & 0xFF;
|
||||
}
|
||||
else if (pos == 3)
|
||||
{
|
||||
buf <<= 6;
|
||||
*p++ = (buf >> 16) & 0xFF;
|
||||
*p++ = (buf >> 8) & 0xFF;
|
||||
}
|
||||
else if (pos != 0)
|
||||
{
|
||||
/* translator: %s is the name of an encoding scheme */
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("invalid base64 end sequence"),
|
||||
errmsg("invalid %s end sequence", url ? "base64url" : "base64"),
|
||||
errhint("Input data is missing padding, is truncated, or is otherwise corrupted.")));
|
||||
}
|
||||
|
||||
return p - dst;
|
||||
}
|
||||
|
||||
static uint64
|
||||
pg_base64_decode(const char *src, size_t len, char *dst)
|
||||
{
|
||||
return pg_base64_decode_internal(src, len, dst, false);
|
||||
}
|
||||
|
||||
static uint64
|
||||
pg_base64url_decode(const char *src, size_t len, char *dst)
|
||||
{
|
||||
return pg_base64_decode_internal(src, len, dst, true);
|
||||
}
|
||||
|
||||
static uint64
|
||||
pg_base64_enc_len(const char *src, size_t srclen)
|
||||
@ -415,6 +498,32 @@ pg_base64_dec_len(const char *src, size_t srclen)
|
||||
return ((uint64) srclen * 3) >> 2;
|
||||
}
|
||||
|
||||
static uint64
|
||||
pg_base64url_enc_len(const char *src, size_t srclen)
|
||||
{
|
||||
/*
|
||||
* Unlike standard base64, base64url doesn't use padding characters when
|
||||
* the input length is not divisible by 3
|
||||
*/
|
||||
return (srclen + 2) / 3 * 4;
|
||||
}
|
||||
|
||||
static uint64
|
||||
pg_base64url_dec_len(const char *src, size_t srclen)
|
||||
{
|
||||
/*
|
||||
* For base64, each 4 characters of input produce at most 3 bytes of
|
||||
* output. For base64url without padding, we need to round up to the
|
||||
* nearest 4
|
||||
*/
|
||||
size_t adjusted_len = srclen;
|
||||
|
||||
if (srclen % 4 != 0)
|
||||
adjusted_len += 4 - (srclen % 4);
|
||||
|
||||
return (adjusted_len * 3) / 4;
|
||||
}
|
||||
|
||||
/*
|
||||
* Escape
|
||||
* Minimally escape bytea to text.
|
||||
@ -606,6 +715,12 @@ static const struct
|
||||
pg_base64_enc_len, pg_base64_dec_len, pg_base64_encode, pg_base64_decode
|
||||
}
|
||||
},
|
||||
{
|
||||
"base64url",
|
||||
{
|
||||
pg_base64url_enc_len, pg_base64url_dec_len, pg_base64url_encode, pg_base64url_decode
|
||||
}
|
||||
},
|
||||
{
|
||||
"escape",
|
||||
{
|
||||
|
@ -2517,6 +2517,156 @@ SELECT decode(encode('\x1234567890abcdef00', 'escape'), 'escape');
|
||||
\x1234567890abcdef00
|
||||
(1 row)
|
||||
|
||||
--
|
||||
-- base64url encoding/decoding
|
||||
--
|
||||
SET bytea_output TO hex;
|
||||
-- Simple encoding/decoding
|
||||
SELECT encode('\x69b73eff', 'base64url'); -- abc-_w
|
||||
encode
|
||||
--------
|
||||
abc-_w
|
||||
(1 row)
|
||||
|
||||
SELECT decode('abc-_w', 'base64url'); -- \x69b73eff
|
||||
decode
|
||||
------------
|
||||
\x69b73eff
|
||||
(1 row)
|
||||
|
||||
-- Round-trip: decode(encode(x)) = x
|
||||
SELECT decode(encode('\x1234567890abcdef00', 'base64url'), 'base64url'); -- \x1234567890abcdef00
|
||||
decode
|
||||
----------------------
|
||||
\x1234567890abcdef00
|
||||
(1 row)
|
||||
|
||||
-- Empty input
|
||||
SELECT encode('', 'base64url'); -- ''
|
||||
encode
|
||||
--------
|
||||
|
||||
(1 row)
|
||||
|
||||
SELECT decode('', 'base64url'); -- ''
|
||||
decode
|
||||
--------
|
||||
\x
|
||||
(1 row)
|
||||
|
||||
-- 1 byte input
|
||||
SELECT encode('\x01', 'base64url'); -- AQ
|
||||
encode
|
||||
--------
|
||||
AQ
|
||||
(1 row)
|
||||
|
||||
SELECT decode('AQ', 'base64url'); -- \x01
|
||||
decode
|
||||
--------
|
||||
\x01
|
||||
(1 row)
|
||||
|
||||
-- 2 byte input
|
||||
SELECT encode('\x0102'::bytea, 'base64url'); -- AQI
|
||||
encode
|
||||
--------
|
||||
AQI
|
||||
(1 row)
|
||||
|
||||
SELECT decode('AQI', 'base64url'); -- \x0102
|
||||
decode
|
||||
--------
|
||||
\x0102
|
||||
(1 row)
|
||||
|
||||
-- 3 byte input (no padding needed)
|
||||
SELECT encode('\x010203'::bytea, 'base64url'); -- AQID
|
||||
encode
|
||||
--------
|
||||
AQID
|
||||
(1 row)
|
||||
|
||||
SELECT decode('AQID', 'base64url'); -- \x010203
|
||||
decode
|
||||
----------
|
||||
\x010203
|
||||
(1 row)
|
||||
|
||||
-- 4 byte input (results in 6 base64 chars)
|
||||
SELECT encode('\xdeadbeef'::bytea, 'base64url'); -- 3q2-7w
|
||||
encode
|
||||
--------
|
||||
3q2-7w
|
||||
(1 row)
|
||||
|
||||
SELECT decode('3q2-7w', 'base64url'); -- \xdeadbeef
|
||||
decode
|
||||
------------
|
||||
\xdeadbeef
|
||||
(1 row)
|
||||
|
||||
-- Round-trip test for all lengths from 0–4
|
||||
SELECT encode(decode(encode(E'\\x', 'base64url'), 'base64url'), 'base64url');
|
||||
encode
|
||||
--------
|
||||
|
||||
(1 row)
|
||||
|
||||
SELECT encode(decode(encode(E'\\x00', 'base64url'), 'base64url'), 'base64url');
|
||||
encode
|
||||
--------
|
||||
AA
|
||||
(1 row)
|
||||
|
||||
SELECT encode(decode(encode(E'\\x0001', 'base64url'), 'base64url'), 'base64url');
|
||||
encode
|
||||
--------
|
||||
AAE
|
||||
(1 row)
|
||||
|
||||
SELECT encode(decode(encode(E'\\x000102', 'base64url'), 'base64url'), 'base64url');
|
||||
encode
|
||||
--------
|
||||
AAEC
|
||||
(1 row)
|
||||
|
||||
SELECT encode(decode(encode(E'\\x00010203', 'base64url'), 'base64url'), 'base64url');
|
||||
encode
|
||||
--------
|
||||
AAECAw
|
||||
(1 row)
|
||||
|
||||
-- Invalid inputs (should ERROR)
|
||||
-- invalid character '@'
|
||||
SELECT decode('QQ@=', 'base64url');
|
||||
ERROR: invalid symbol "@" found while decoding base64url sequence
|
||||
-- missing characters (incomplete group)
|
||||
SELECT decode('QQ', 'base64url'); -- ok (1 byte)
|
||||
decode
|
||||
--------
|
||||
\x41
|
||||
(1 row)
|
||||
|
||||
SELECT decode('QQI', 'base64url'); -- ok (2 bytes)
|
||||
decode
|
||||
--------
|
||||
\x4102
|
||||
(1 row)
|
||||
|
||||
SELECT decode('QQIDQ', 'base64url'); -- ERROR: invalid base64url end sequence
|
||||
ERROR: invalid base64url end sequence
|
||||
HINT: Input data is missing padding, is truncated, or is otherwise corrupted.
|
||||
-- unexpected '=' at start
|
||||
SELECT decode('=QQQ', 'base64url');
|
||||
ERROR: unexpected "=" while decoding base64url sequence
|
||||
-- valid base64 padding in base64url (optional, but accepted)
|
||||
SELECT decode('abc-_w==', 'base64url'); -- should decode to \x69b73eff
|
||||
decode
|
||||
------------
|
||||
\x69b73eff
|
||||
(1 row)
|
||||
|
||||
--
|
||||
-- get_bit/set_bit etc
|
||||
--
|
||||
|
@ -799,6 +799,60 @@ SELECT decode(encode(('\x' || repeat('1234567890abcdef0001', 7))::bytea,
|
||||
SELECT encode('\x1234567890abcdef00', 'escape');
|
||||
SELECT decode(encode('\x1234567890abcdef00', 'escape'), 'escape');
|
||||
|
||||
--
|
||||
-- base64url encoding/decoding
|
||||
--
|
||||
SET bytea_output TO hex;
|
||||
|
||||
-- Simple encoding/decoding
|
||||
SELECT encode('\x69b73eff', 'base64url'); -- abc-_w
|
||||
SELECT decode('abc-_w', 'base64url'); -- \x69b73eff
|
||||
|
||||
-- Round-trip: decode(encode(x)) = x
|
||||
SELECT decode(encode('\x1234567890abcdef00', 'base64url'), 'base64url'); -- \x1234567890abcdef00
|
||||
|
||||
-- Empty input
|
||||
SELECT encode('', 'base64url'); -- ''
|
||||
SELECT decode('', 'base64url'); -- ''
|
||||
|
||||
-- 1 byte input
|
||||
SELECT encode('\x01', 'base64url'); -- AQ
|
||||
SELECT decode('AQ', 'base64url'); -- \x01
|
||||
|
||||
-- 2 byte input
|
||||
SELECT encode('\x0102'::bytea, 'base64url'); -- AQI
|
||||
SELECT decode('AQI', 'base64url'); -- \x0102
|
||||
|
||||
-- 3 byte input (no padding needed)
|
||||
SELECT encode('\x010203'::bytea, 'base64url'); -- AQID
|
||||
SELECT decode('AQID', 'base64url'); -- \x010203
|
||||
|
||||
-- 4 byte input (results in 6 base64 chars)
|
||||
SELECT encode('\xdeadbeef'::bytea, 'base64url'); -- 3q2-7w
|
||||
SELECT decode('3q2-7w', 'base64url'); -- \xdeadbeef
|
||||
|
||||
-- Round-trip test for all lengths from 0–4
|
||||
SELECT encode(decode(encode(E'\\x', 'base64url'), 'base64url'), 'base64url');
|
||||
SELECT encode(decode(encode(E'\\x00', 'base64url'), 'base64url'), 'base64url');
|
||||
SELECT encode(decode(encode(E'\\x0001', 'base64url'), 'base64url'), 'base64url');
|
||||
SELECT encode(decode(encode(E'\\x000102', 'base64url'), 'base64url'), 'base64url');
|
||||
SELECT encode(decode(encode(E'\\x00010203', 'base64url'), 'base64url'), 'base64url');
|
||||
|
||||
-- Invalid inputs (should ERROR)
|
||||
-- invalid character '@'
|
||||
SELECT decode('QQ@=', 'base64url');
|
||||
|
||||
-- missing characters (incomplete group)
|
||||
SELECT decode('QQ', 'base64url'); -- ok (1 byte)
|
||||
SELECT decode('QQI', 'base64url'); -- ok (2 bytes)
|
||||
SELECT decode('QQIDQ', 'base64url'); -- ERROR: invalid base64url end sequence
|
||||
|
||||
-- unexpected '=' at start
|
||||
SELECT decode('=QQQ', 'base64url');
|
||||
|
||||
-- valid base64 padding in base64url (optional, but accepted)
|
||||
SELECT decode('abc-_w==', 'base64url'); -- should decode to \x69b73eff
|
||||
|
||||
--
|
||||
-- get_bit/set_bit etc
|
||||
--
|
||||
|
Loading…
x
Reference in New Issue
Block a user