mirror of
				https://github.com/postgres/postgres.git
				synced 2025-11-04 00:02:52 -05:00 
			
		
		
		
	fuzzystrmatch's difference() function assumes that _soundex() always initializes its output buffer fully. This was not so for the case of a string containing no alphabetic characters, resulting in unstable output and Valgrind complaints. Fix by using memset() to fill the whole buffer in the early-exit case. Also make some cosmetic improvements (I didn't care for the random switches between "instr[0]" and "*instr" notation). Report and diagnosis by Alexander Lakhin (bug #17935). Back-patch to all supported branches. Discussion: https://postgr.es/m/17935-b99316aa79c18513@postgresql.org
		
			
				
	
	
		
			245 lines
		
	
	
		
			4.8 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			245 lines
		
	
	
		
			4.8 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
CREATE EXTENSION fuzzystrmatch;
 | 
						|
SELECT soundex('hello world!');
 | 
						|
 soundex 
 | 
						|
---------
 | 
						|
 H464
 | 
						|
(1 row)
 | 
						|
 | 
						|
SELECT soundex('Anne'), soundex('Ann'), difference('Anne', 'Ann');
 | 
						|
 soundex | soundex | difference 
 | 
						|
---------+---------+------------
 | 
						|
 A500    | A500    |          4
 | 
						|
(1 row)
 | 
						|
 | 
						|
SELECT soundex('Anne'), soundex('Andrew'), difference('Anne', 'Andrew');
 | 
						|
 soundex | soundex | difference 
 | 
						|
---------+---------+------------
 | 
						|
 A500    | A536    |          2
 | 
						|
(1 row)
 | 
						|
 | 
						|
SELECT soundex('Anne'), soundex('Margaret'), difference('Anne', 'Margaret');
 | 
						|
 soundex | soundex | difference 
 | 
						|
---------+---------+------------
 | 
						|
 A500    | M626    |          0
 | 
						|
(1 row)
 | 
						|
 | 
						|
SELECT soundex(''), difference('', '');
 | 
						|
 soundex | difference 
 | 
						|
---------+------------
 | 
						|
         |          4
 | 
						|
(1 row)
 | 
						|
 | 
						|
SELECT levenshtein('GUMBO', 'GAMBOL');
 | 
						|
 levenshtein 
 | 
						|
-------------
 | 
						|
           2
 | 
						|
(1 row)
 | 
						|
 | 
						|
SELECT levenshtein('GUMBO', 'GAMBOL', 2, 1, 1);
 | 
						|
 levenshtein 
 | 
						|
-------------
 | 
						|
           3
 | 
						|
(1 row)
 | 
						|
 | 
						|
SELECT levenshtein_less_equal('extensive', 'exhaustive', 2);
 | 
						|
 levenshtein_less_equal 
 | 
						|
------------------------
 | 
						|
                      3
 | 
						|
(1 row)
 | 
						|
 | 
						|
SELECT levenshtein_less_equal('extensive', 'exhaustive', 4);
 | 
						|
 levenshtein_less_equal 
 | 
						|
------------------------
 | 
						|
                      4
 | 
						|
(1 row)
 | 
						|
 | 
						|
SELECT metaphone('GUMBO', 4);
 | 
						|
 metaphone 
 | 
						|
-----------
 | 
						|
 KM
 | 
						|
(1 row)
 | 
						|
 | 
						|
SELECT dmetaphone('gumbo');
 | 
						|
 dmetaphone 
 | 
						|
------------
 | 
						|
 KMP
 | 
						|
(1 row)
 | 
						|
 | 
						|
SELECT dmetaphone_alt('gumbo');
 | 
						|
 dmetaphone_alt 
 | 
						|
----------------
 | 
						|
 KMP
 | 
						|
(1 row)
 | 
						|
 | 
						|
-- Wovels
 | 
						|
SELECT daitch_mokotoff('Augsburg');
 | 
						|
 daitch_mokotoff 
 | 
						|
-----------------
 | 
						|
 {054795}
 | 
						|
(1 row)
 | 
						|
 | 
						|
SELECT daitch_mokotoff('Breuer');
 | 
						|
 daitch_mokotoff 
 | 
						|
-----------------
 | 
						|
 {791900}
 | 
						|
(1 row)
 | 
						|
 | 
						|
SELECT daitch_mokotoff('Freud');
 | 
						|
 daitch_mokotoff 
 | 
						|
-----------------
 | 
						|
 {793000}
 | 
						|
(1 row)
 | 
						|
 | 
						|
-- The letter "H"
 | 
						|
SELECT daitch_mokotoff('Halberstadt');
 | 
						|
 daitch_mokotoff 
 | 
						|
-----------------
 | 
						|
 {587943,587433}
 | 
						|
(1 row)
 | 
						|
 | 
						|
SELECT daitch_mokotoff('Mannheim');
 | 
						|
 daitch_mokotoff 
 | 
						|
-----------------
 | 
						|
 {665600}
 | 
						|
(1 row)
 | 
						|
 | 
						|
-- Adjacent sounds
 | 
						|
SELECT daitch_mokotoff('Chernowitz');
 | 
						|
 daitch_mokotoff 
 | 
						|
-----------------
 | 
						|
 {596740,496740}
 | 
						|
(1 row)
 | 
						|
 | 
						|
-- Adjacent letters with identical adjacent code digits
 | 
						|
SELECT daitch_mokotoff('Cherkassy');
 | 
						|
 daitch_mokotoff 
 | 
						|
-----------------
 | 
						|
 {595400,495400}
 | 
						|
(1 row)
 | 
						|
 | 
						|
SELECT daitch_mokotoff('Kleinman');
 | 
						|
 daitch_mokotoff 
 | 
						|
-----------------
 | 
						|
 {586660}
 | 
						|
(1 row)
 | 
						|
 | 
						|
-- More than one word
 | 
						|
SELECT daitch_mokotoff('Nowy Targ');
 | 
						|
 daitch_mokotoff 
 | 
						|
-----------------
 | 
						|
 {673950}
 | 
						|
(1 row)
 | 
						|
 | 
						|
-- Padded with "0"
 | 
						|
SELECT daitch_mokotoff('Berlin');
 | 
						|
 daitch_mokotoff 
 | 
						|
-----------------
 | 
						|
 {798600}
 | 
						|
(1 row)
 | 
						|
 | 
						|
-- Other examples from https://www.avotaynu.com/soundex.htm
 | 
						|
SELECT daitch_mokotoff('Ceniow');
 | 
						|
 daitch_mokotoff 
 | 
						|
-----------------
 | 
						|
 {567000,467000}
 | 
						|
(1 row)
 | 
						|
 | 
						|
SELECT daitch_mokotoff('Tsenyuv');
 | 
						|
 daitch_mokotoff 
 | 
						|
-----------------
 | 
						|
 {467000}
 | 
						|
(1 row)
 | 
						|
 | 
						|
SELECT daitch_mokotoff('Holubica');
 | 
						|
 daitch_mokotoff 
 | 
						|
-----------------
 | 
						|
 {587500,587400}
 | 
						|
(1 row)
 | 
						|
 | 
						|
SELECT daitch_mokotoff('Golubitsa');
 | 
						|
 daitch_mokotoff 
 | 
						|
-----------------
 | 
						|
 {587400}
 | 
						|
(1 row)
 | 
						|
 | 
						|
SELECT daitch_mokotoff('Przemysl');
 | 
						|
 daitch_mokotoff 
 | 
						|
-----------------
 | 
						|
 {794648,746480}
 | 
						|
(1 row)
 | 
						|
 | 
						|
SELECT daitch_mokotoff('Pshemeshil');
 | 
						|
 daitch_mokotoff 
 | 
						|
-----------------
 | 
						|
 {746480}
 | 
						|
(1 row)
 | 
						|
 | 
						|
SELECT daitch_mokotoff('Rosochowaciec');
 | 
						|
                      daitch_mokotoff                      
 | 
						|
-----------------------------------------------------------
 | 
						|
 {945755,945754,945745,945744,944755,944754,944745,944744}
 | 
						|
(1 row)
 | 
						|
 | 
						|
SELECT daitch_mokotoff('Rosokhovatsets');
 | 
						|
 daitch_mokotoff 
 | 
						|
-----------------
 | 
						|
 {945744}
 | 
						|
(1 row)
 | 
						|
 | 
						|
-- Ignored characters
 | 
						|
SELECT daitch_mokotoff('''OBrien');
 | 
						|
 daitch_mokotoff 
 | 
						|
-----------------
 | 
						|
 {079600}
 | 
						|
(1 row)
 | 
						|
 | 
						|
SELECT daitch_mokotoff('O''Brien');
 | 
						|
 daitch_mokotoff 
 | 
						|
-----------------
 | 
						|
 {079600}
 | 
						|
(1 row)
 | 
						|
 | 
						|
-- "Difficult" cases, likely to cause trouble for other implementations.
 | 
						|
SELECT daitch_mokotoff('CJC');
 | 
						|
               daitch_mokotoff               
 | 
						|
---------------------------------------------
 | 
						|
 {550000,540000,545000,450000,400000,440000}
 | 
						|
(1 row)
 | 
						|
 | 
						|
SELECT daitch_mokotoff('BESST');
 | 
						|
 daitch_mokotoff 
 | 
						|
-----------------
 | 
						|
 {743000}
 | 
						|
(1 row)
 | 
						|
 | 
						|
SELECT daitch_mokotoff('BOUEY');
 | 
						|
 daitch_mokotoff 
 | 
						|
-----------------
 | 
						|
 {710000}
 | 
						|
(1 row)
 | 
						|
 | 
						|
SELECT daitch_mokotoff('HANNMANN');
 | 
						|
 daitch_mokotoff 
 | 
						|
-----------------
 | 
						|
 {566600}
 | 
						|
(1 row)
 | 
						|
 | 
						|
SELECT daitch_mokotoff('MCCOYJR');
 | 
						|
                      daitch_mokotoff                      
 | 
						|
-----------------------------------------------------------
 | 
						|
 {651900,654900,654190,654490,645190,645490,641900,644900}
 | 
						|
(1 row)
 | 
						|
 | 
						|
SELECT daitch_mokotoff('ACCURSO');
 | 
						|
                      daitch_mokotoff                      
 | 
						|
-----------------------------------------------------------
 | 
						|
 {059400,054000,054940,054400,045940,045400,049400,044000}
 | 
						|
(1 row)
 | 
						|
 | 
						|
SELECT daitch_mokotoff('BIERSCHBACH');
 | 
						|
                      daitch_mokotoff                      
 | 
						|
-----------------------------------------------------------
 | 
						|
 {794575,794574,794750,794740,745750,745740,747500,747400}
 | 
						|
(1 row)
 | 
						|
 |