mirror of
				https://github.com/postgres/postgres.git
				synced 2025-10-31 00:03:57 -04:00 
			
		
		
		
	Speed up lexing of long JSON strings
Use optimized linear search when looking ahead for end quotes, backslashes, and non-printable characters. This results in nearly 40% faster JSON parsing on x86-64 when most values are long strings, and all platforms should see some improvement. Reviewed by Andres Freund and Nathan Bossart Discussion: https://www.postgresql.org/message-id/CAFBsxsGhaR2KQ5eisaK%3D6Vm60t%3DaxhD8Ckj1qFoCH1pktZi%2B2w%40mail.gmail.com Discussion: https://www.postgresql.org/message-id/CAFBsxsESLUyJ5spfOSyPrOvKUEYYNqsBosue9SV1j8ecgNXSKA%40mail.gmail.com
This commit is contained in:
		
							parent
							
								
									05519126a0
								
							
						
					
					
						commit
						0a8de93a48
					
				| @ -19,6 +19,7 @@ | ||||
| 
 | ||||
| #include "common/jsonapi.h" | ||||
| #include "mb/pg_wchar.h" | ||||
| #include "port/pg_lfind.h" | ||||
| 
 | ||||
| #ifndef FRONTEND | ||||
| #include "miscadmin.h" | ||||
| @ -844,7 +845,7 @@ json_lex_string(JsonLexContext *lex) | ||||
| 		} | ||||
| 		else | ||||
| 		{ | ||||
| 			char	   *p; | ||||
| 			char	   *p = s; | ||||
| 
 | ||||
| 			if (hi_surrogate != -1) | ||||
| 				return JSON_UNICODE_LOW_SURROGATE; | ||||
| @ -853,11 +854,17 @@ json_lex_string(JsonLexContext *lex) | ||||
| 			 * Skip to the first byte that requires special handling, so we | ||||
| 			 * can batch calls to appendBinaryStringInfo. | ||||
| 			 */ | ||||
| 			for (p = s; p < end; p++) | ||||
| 			while (p < end - sizeof(Vector8) && | ||||
| 				   !pg_lfind8('\\', (uint8 *) p, sizeof(Vector8)) && | ||||
| 				   !pg_lfind8('"', (uint8 *) p, sizeof(Vector8)) && | ||||
| 				   !pg_lfind8_le(31, (uint8 *) p, sizeof(Vector8))) | ||||
| 				p += sizeof(Vector8); | ||||
| 
 | ||||
| 			for (; p < end; p++) | ||||
| 			{ | ||||
| 				if (*p == '\\' || *p == '"') | ||||
| 					break; | ||||
| 				else if ((unsigned char) *p < 32) | ||||
| 				else if ((unsigned char) *p <= 31) | ||||
| 				{ | ||||
| 					/* Per RFC4627, these characters MUST be escaped. */ | ||||
| 					/*
 | ||||
|  | ||||
| @ -42,6 +42,19 @@ LINE 1: SELECT '"\v"'::json; | ||||
|                ^ | ||||
| DETAIL:  Escape sequence "\v" is invalid. | ||||
| CONTEXT:  JSON data, line 1: "\v... | ||||
| -- Check fast path for longer strings (at least 16 bytes long) | ||||
| SELECT ('"'||repeat('.', 12)||'abc"')::json; -- OK | ||||
|        json         | ||||
| ------------------- | ||||
|  "............abc" | ||||
| (1 row) | ||||
| 
 | ||||
| SELECT ('"'||repeat('.', 12)||'abc\n"')::json; -- OK, legal escapes | ||||
|         json          | ||||
| --------------------- | ||||
|  "............abc\n" | ||||
| (1 row) | ||||
| 
 | ||||
| -- see json_encoding test for input with unicode escapes | ||||
| -- Numbers. | ||||
| SELECT '1'::json;				-- OK | ||||
|  | ||||
| @ -7,6 +7,11 @@ SELECT '"abc | ||||
| def"'::json;					-- ERROR, unescaped newline in string constant | ||||
| SELECT '"\n\"\\"'::json;		-- OK, legal escapes | ||||
| SELECT '"\v"'::json;			-- ERROR, not a valid JSON escape | ||||
| 
 | ||||
| -- Check fast path for longer strings (at least 16 bytes long) | ||||
| SELECT ('"'||repeat('.', 12)||'abc"')::json; -- OK | ||||
| SELECT ('"'||repeat('.', 12)||'abc\n"')::json; -- OK, legal escapes | ||||
| 
 | ||||
| -- see json_encoding test for input with unicode escapes | ||||
| 
 | ||||
| -- Numbers. | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user