mirror of
				https://github.com/postgres/postgres.git
				synced 2025-10-31 00:03:57 -04:00 
			
		
		
		
	Make the pg_rewind regression tests more robust on slow systems.
There were a couple of hard-coded sleeps in the tests: to wait for standby to catch up with master, and to wait for promotion with "pg_ctl promote" to complete. Instead of a fixed, hard-coded sleep, poll the server with a query once a second. This isn't ideal either, and I wish we had a better solution for real-world applications too, but this should fix the immediate problem. Patch by Michael Paquier, with some editing by me.
This commit is contained in:
		
							parent
							
								
									cef939c347
								
							
						
					
					
						commit
						54a16df010
					
				| @ -125,6 +125,37 @@ sub check_query | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| # Run a query once a second, until it returns 't' (i.e. SQL boolean true). | ||||
| sub poll_query_until | ||||
| { | ||||
| 	my ($query, $connstr) = @_; | ||||
| 
 | ||||
| 	my $max_attempts = 30; | ||||
| 	my $attempts = 0; | ||||
| 	my ($stdout, $stderr); | ||||
| 
 | ||||
| 	while ($attempts < $max_attempts) | ||||
| 	{ | ||||
| 		my $cmd = ['psql', '-At', '-c', "$query", '-d', "$connstr" ]; | ||||
| 		my $result = run $cmd, '>', \$stdout, '2>', \$stderr; | ||||
| 
 | ||||
| 		chomp($stdout); | ||||
| 		if ($stdout eq "t") | ||||
| 		{ | ||||
| 			return 1; | ||||
| 		} | ||||
| 
 | ||||
| 		# Wait a second before retrying. | ||||
| 		sleep 1; | ||||
| 		$attempts++; | ||||
| 	} | ||||
| 
 | ||||
| 	# The query result didn't change in 30 seconds. Give up. Print the stderr | ||||
| 	# from the last attempt, hopefully that's useful for debugging. | ||||
| 	diag $stderr; | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| sub append_to_file | ||||
| { | ||||
| 	my($filename, $str) = @_; | ||||
| @ -185,7 +216,7 @@ sub create_standby | ||||
| 	# Base backup is taken with xlog files included | ||||
| 	system_or_bail("pg_basebackup -D $test_standby_datadir -p $port_master -x >>$log_path 2>&1"); | ||||
| 	append_to_file("$test_standby_datadir/recovery.conf", qq( | ||||
| primary_conninfo='$connstr_master' | ||||
| primary_conninfo='$connstr_master application_name=rewind_standby' | ||||
| standby_mode=on | ||||
| recovery_target_timeline='latest' | ||||
| )); | ||||
| @ -193,8 +224,11 @@ recovery_target_timeline='latest' | ||||
| 	# Start standby | ||||
| 	system_or_bail("pg_ctl -w -D $test_standby_datadir -o \"-k $tempdir_short --listen-addresses='' -p $port_standby\" start >>$log_path 2>&1"); | ||||
| 
 | ||||
| 	# sleep a bit to make sure the standby has caught up. | ||||
| 	sleep 1; | ||||
| 	# Wait until the standby has caught up with the primary, by polling | ||||
| 	# pg_stat_replication. | ||||
| 	my $caughtup_query = "SELECT pg_current_xlog_location() = replay_location FROM pg_stat_replication WHERE application_name = 'rewind_standby';"; | ||||
| 	poll_query_until($caughtup_query, $connstr_master) | ||||
| 		or die "Timed out while waiting for standby to catch up"; | ||||
| } | ||||
| 
 | ||||
| sub promote_standby | ||||
| @ -203,9 +237,11 @@ sub promote_standby | ||||
| 	# up standby | ||||
| 
 | ||||
| 	# Now promote slave and insert some new data on master, this will put | ||||
| 	# the master out-of-sync with the standby. | ||||
| 	# the master out-of-sync with the standby. Wait until the standby is | ||||
| 	# out of recovery mode, and is ready to accept read-write connections. | ||||
| 	system_or_bail("pg_ctl -w -D $test_standby_datadir promote >>$log_path 2>&1"); | ||||
| 	sleep 2; | ||||
| 	poll_query_until("SELECT NOT pg_is_in_recovery()", $connstr_standby) | ||||
| 		or die "Timed out while waiting for promotion of standby"; | ||||
| } | ||||
| 
 | ||||
| sub run_pg_rewind | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user