Revert sampling patch for EXPLAIN ANALYZE; it turns out to be too unreliable

because node timing is much less predictable than the patch expects. I kept the API change for InstrStopNode, however.
2025-10-30 00:04:49 -04:00 · 2006-06-09 19:30:56 +00:00 · 2006-06-09 19:30:56 +00:00 · 5de0cbdf0c
commit 5de0cbdf0c
parent 9143144935
3 changed files with 30 additions and 179 deletions
--- a/src/backend/executor/execProcnode.c
+++ b/src/backend/executor/execProcnode.c
@ -12,7 +12,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/executor/execProcnode.c,v 1.55 2006/05/30 14:01:57 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/executor/execProcnode.c,v 1.56 2006/06/09 19:30:56 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -423,7 +423,7 @@ ExecProcNode(PlanState *node)
 	}
 	if (node->instrument)
-		InstrStopNode(node->instrument, TupIsNull(result) ? 0 : 1 );
+		InstrStopNode(node->instrument, TupIsNull(result) ? 0.0 : 1.0);
 	return result;
 }
--- a/src/backend/executor/instrument.c
+++ b/src/backend/executor/instrument.c
@ -7,112 +7,22 @@
 * Copyright (c) 2001-2006, PostgreSQL Global Development Group
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/executor/instrument.c,v 1.17 2006/06/07 18:49:03 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/executor/instrument.c,v 1.18 2006/06/09 19:30:56 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
 #include "postgres.h"
 #include <math.h>
 #include <unistd.h>
 #include "executor/instrument.h"
 /*
 * As of PostgreSQL 8.2, we try to reduce the overhead of EXPLAIN ANALYZE
 * by not calling INSTR_TIME_SET_CURRENT() for every single node execution.
 * (Because that requires a kernel call on most systems, it's expensive.)
 *
 * This macro determines the sampling interval: that is, after how many more
 * iterations will we take the next time sample, given that niters iterations
 * have occurred already.  In general, if the function is f(x), then for N
 * iterations we will take on the order of integral(1/f(x), x=0..N)
 * samples.  Some examples follow, with the number of samples that would be
 * collected over 1,000,000 iterations:
 *
 *		f(x) = x         =>   log2(N)                        20
 *		f(x) = x^(1/2)   =>   2 * N^(1/2)                  2000
 *		f(x) = x^(1/3)   =>   1.5 * N^(2/3)               15000
 *
 * I've chosen the last one as it seems to provide a good compromise between
 * low overhead but still getting a meaningful number of samples. However,
 * not all machines have the cbrt() function so on those we substitute
 * sqrt(). The difference is not very significant in the tests I made.
 *
 * The actual sampling interval is randomized with the SampleFunc() value
 * as the mean; this hopefully will reduce any measurement bias due to
 * variation in the node execution time.
 */
 #ifdef HAVE_CBRT
 #define SampleFunc(niters) cbrt(niters)
 #else
 #define SampleFunc(niters) sqrt(niters)
 #endif
 #define SampleInterval(niters) \
 	(SampleFunc(niters) * (double) random() / (double) (MAX_RANDOM_VALUE/2))
 /*
 * We sample at every node iteration until we've reached this threshold,
 * so that nodes not called a large number of times are completely accurate.
 * (Perhaps this should be a GUC variable?)
 */
 #define SAMPLE_THRESHOLD 50
 /*
 * Determine the sampling overhead.  This only needs to be done once per
 * backend (actually, probably once per postmaster would do ...)
 */
 static double SampleOverhead;
 static bool SampleOverheadCalculated = false;
 static void
 CalculateSampleOverhead(void)
 {
 	int i;
 	/*
 	 * We could get a wrong result due to being interrupted by task switch.
 	 * To minimize the risk we do it a few times and take the lowest.
 	 */
 	SampleOverhead = 1.0e6;
 	for (i = 0; i < 5; i++)
 	{
 		Instrumentation timer;
 		instr_time	tmptime;
 		int j;
 		double overhead;
 		memset(&timer, 0, sizeof(timer));
 		InstrStartNode(&timer);
 #define TEST_COUNT 100
 		for (j = 0; j < TEST_COUNT; j++)
 		{
 			INSTR_TIME_SET_CURRENT(tmptime);
 		}
 		InstrStopNode(&timer, 1);
 		overhead = INSTR_TIME_GET_DOUBLE(timer.counter) / TEST_COUNT;
 		if (overhead < SampleOverhead)
 			SampleOverhead = overhead;
 	}
 	SampleOverheadCalculated = true;
 }
 /* Allocate new instrumentation structure(s) */
 Instrumentation *
 InstrAlloc(int n)
 {
-	Instrumentation *instr;
+	Instrumentation *instr = palloc0(n * sizeof(Instrumentation));
 	/* Calculate sampling overhead, if not done yet in this backend */
 	if (!SampleOverheadCalculated)
 		CalculateSampleOverhead();
 	instr = palloc0(n * sizeof(Instrumentation));
 	/* we don't need to do any initialization except zero 'em */
@ -124,22 +34,7 @@ void
 InstrStartNode(Instrumentation *instr)
 {
 	if (INSTR_TIME_IS_ZERO(instr->starttime))
-	{
+		INSTR_TIME_SET_CURRENT(instr->starttime);
 		/*
 		 * Always sample if not yet up to threshold, else check whether
 		 * next threshold has been reached
 		 */
 		if (instr->itercount < SAMPLE_THRESHOLD)
 			instr->sampling = true;
 		else if (instr->itercount >= instr->nextsample)
 		{
 			instr->sampling = true;
 			instr->nextsample =
 				instr->itercount + SampleInterval(instr->itercount);
 		}
 		if (instr->sampling)
 			INSTR_TIME_SET_CURRENT(instr->starttime);
 	}
 	else
 		elog(DEBUG2, "InstrStartNode called twice in a row");
 }
@ -148,53 +43,39 @@ InstrStartNode(Instrumentation *instr)
 void
 InstrStopNode(Instrumentation *instr, double nTuples)
 {
-	/* count the returned tuples and iterations */
+	instr_time	endtime;
 	/* count the returned tuples */
 	instr->tuplecount += nTuples;
 	instr->itercount += 1;
-	/* measure runtime if appropriate */
+	if (INSTR_TIME_IS_ZERO(instr->starttime))
 	if (instr->sampling)
 	{
-		instr_time	endtime;
+		elog(DEBUG2, "InstrStopNode called without start");
 		return;
 	}
-		/*
+	INSTR_TIME_SET_CURRENT(endtime);
 		 * To be sure that SampleOverhead accurately reflects the extra
 		 * overhead, we must do INSTR_TIME_SET_CURRENT() as the *first*
 		 * action that is different between the sampling and non-sampling
 		 * code paths.
 		 */
 		INSTR_TIME_SET_CURRENT(endtime);
 		if (INSTR_TIME_IS_ZERO(instr->starttime))
 		{
 			elog(DEBUG2, "InstrStopNode called without start");
 			return;
 		}
 #ifndef WIN32
-		instr->counter.tv_sec += endtime.tv_sec - instr->starttime.tv_sec;
+	instr->counter.tv_sec += endtime.tv_sec - instr->starttime.tv_sec;
-		instr->counter.tv_usec += endtime.tv_usec - instr->starttime.tv_usec;
+	instr->counter.tv_usec += endtime.tv_usec - instr->starttime.tv_usec;
-		/* Normalize after each add to avoid overflow/underflow of tv_usec */
+	/* Normalize after each add to avoid overflow/underflow of tv_usec */
-		while (instr->counter.tv_usec < 0)
+	while (instr->counter.tv_usec < 0)
-		{
+	{
-			instr->counter.tv_usec += 1000000;
+		instr->counter.tv_usec += 1000000;
-			instr->counter.tv_sec--;
+		instr->counter.tv_sec--;
-		}
+	}
-		while (instr->counter.tv_usec >= 1000000)
+	while (instr->counter.tv_usec >= 1000000)
-		{
+	{
-			instr->counter.tv_usec -= 1000000;
+		instr->counter.tv_usec -= 1000000;
-			instr->counter.tv_sec++;
+		instr->counter.tv_sec++;
-		}
+	}
 #else							/* WIN32 */
-		instr->counter.QuadPart += (endtime.QuadPart - instr->starttime.QuadPart);
+	instr->counter.QuadPart += (endtime.QuadPart - instr->starttime.QuadPart);
 #endif
-		INSTR_TIME_SET_ZERO(instr->starttime);
+	INSTR_TIME_SET_ZERO(instr->starttime);
 		instr->samplecount += 1;
 		instr->sampling = false;
 	}
 	/* Is this the first tuple of this cycle? */
 	if (!instr->running)
@ -217,31 +98,9 @@ InstrEndLoop(Instrumentation *instr)
 	if (!INSTR_TIME_IS_ZERO(instr->starttime))
 		elog(DEBUG2, "InstrEndLoop called on running node");
-	/* Compute time spent in node */
+	/* Accumulate per-cycle statistics into totals */
 	totaltime = INSTR_TIME_GET_DOUBLE(instr->counter);
 	/*
 	 * If we didn't measure runtime on every iteration, then we must increase
 	 * the measured total to account for the other iterations.  Naively
 	 * multiplying totaltime by itercount/samplecount would be wrong because
 	 * it effectively assumes the sampling overhead applies to all iterations,
 	 * even the ones we didn't measure.  (Note that what we are trying to
 	 * estimate here is the actual time spent in the node, including the
 	 * actual measurement overhead; not the time exclusive of measurement
 	 * overhead.)  We exclude the first iteration from the correction basis,
 	 * because it often takes longer than others.
 	 */
 	if (instr->itercount > instr->samplecount)
 	{
 		double per_iter;
 		per_iter = (totaltime - instr->firsttuple) / (instr->samplecount - 1)
 			- SampleOverhead;
 		if (per_iter > 0)				/* sanity check */
 			totaltime += per_iter * (instr->itercount - instr->samplecount);
 	}
 	/* Accumulate per-cycle statistics into totals */
 	instr->startup += instr->firsttuple;
 	instr->total += totaltime;
 	instr->ntuples += instr->tuplecount;
@ -249,12 +108,8 @@ InstrEndLoop(Instrumentation *instr)
 	/* Reset for next cycle (if any) */
 	instr->running = false;
 	instr->sampling = false;
 	INSTR_TIME_SET_ZERO(instr->starttime);
 	INSTR_TIME_SET_ZERO(instr->counter);
 	instr->firsttuple = 0;
 	instr->tuplecount = 0;
 	instr->itercount = 0;
 	instr->samplecount = 0;
 	instr->nextsample = 0;
 }
--- a/src/include/executor/instrument.h
+++ b/src/include/executor/instrument.h
@ -6,7 +6,7 @@
 *
 * Copyright (c) 2001-2006, PostgreSQL Global Development Group
 *
- * $PostgreSQL: pgsql/src/include/executor/instrument.h,v 1.15 2006/05/30 19:24:25 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/executor/instrument.h,v 1.16 2006/06/09 19:30:56 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -57,14 +57,10 @@ typedef struct Instrumentation
 {
 	/* Info about current plan cycle: */
 	bool		running;		/* TRUE if we've completed first tuple */
 	bool		sampling;		/* Are we sampling in current iteration? */
 	instr_time	starttime;		/* Start time of current iteration of node */
 	instr_time	counter;		/* Accumulated runtime for this node */
 	double		firsttuple;		/* Time for first tuple of this cycle */
 	double		tuplecount;		/* Tuples emitted so far this cycle */
 	double		itercount;		/* Plan node iterations this cycle */
 	double		samplecount;	/* Iterations in which we sampled runtime */
 	double		nextsample;		/* Next itercount to sample at */
 	/* Accumulated statistics across all completed cycles: */
 	double		startup;		/* Total startup time (in seconds) */
 	double		total;			/* Total total time (in seconds) */