better statistics (#100)

* better statistics

* Prettyprint.
This commit is contained in:
simonpgit 2017-02-13 15:31:28 +00:00 committed by Douglas Stebila
parent c6da8e5af0
commit baab63ab88
4 changed files with 77 additions and 5 deletions

View File

@ -1,4 +1,5 @@
#include <assert.h>
#include <stdio.h>
#include <math.h>
#if defined(WINDOWS)
#include <windows.h>
@ -77,6 +78,71 @@ double OQS_RAND_test_statistical_distance_from_uniform(const unsigned long occur
return distance;
}
// Even for a perfectly uniform generator, if the number of samples is
// low then the std dev of the counts will be high. So, instead, whilst
// still assuming the number of samples isn't super-low, we calculate an
// approximate Chi-squared statistic and back-convert to the Normal
// distribution. The number of sigmas is reported: -3 to +3 is pretty
// ordinary, big negative is suspiciously-flat counts, big positive is
// wildly-fluctuating counts.
double OQS_RAND_zscore_deviation_from_uniform(const unsigned long occurrences[256]) {
double quantiles[102] = {
156.7872, 158.4155, 160.0555, 161.7072, 163.3707, 165.0460, 166.7331, 168.4321,
170.1430, 171.8658, 173.6006, 175.3475, 177.1064, 178.8773, 180.6604, 182.4557,
184.2631, 186.0828, 187.9147, 189.7589, 191.6155, 193.4844, 195.3657, 197.2594,
199.1656, 201.0843, 203.0155, 204.9593, 206.9157, 208.8847, 210.8663, 212.8607,
214.8678, 216.8877, 218.9203, 220.9658, 223.0241, 225.0953, 227.1794, 229.2765,
231.3866, 233.5096, 235.6457, 237.7949, 239.9572, 242.1326, 244.3212, 246.5230,
248.7380, 250.9663, 253.2079, 255.4627, 257.7310, 260.0126, 262.3076, 264.6160,
266.9379, 269.2733, 271.6222, 273.9846, 276.3607, 278.7503, 281.1536, 283.5705,
286.0011, 288.4454, 290.9035, 293.3754, 295.8610, 298.3605, 300.8739, 303.4011,
305.9422, 308.4973, 311.0663, 313.6493, 316.2463, 318.8574, 321.4825, 324.1217,
326.7751, 329.4426, 332.1242, 334.8201, 337.5301, 340.2544, 342.9930, 345.7459,
348.5131, 351.2947, 354.0906, 356.9009, 359.7256, 362.5648, 365.4184, 368.2866,
371.1692, 374.0664, 376.9782, 379.9045, 382.8454, 385.8010}; // -5.05 to +5.05 sigma: qchisq(pnorm(seq(-5.05,5.05,length.out=102)),255)
unsigned long total;
double chsq;
int i;
for (total = i = 0; i < 256; i++) {
total += occurrences[i];
}
if (total / 256. < 5) {
return ZSCORE_SPARSE;
}
for (chsq = i = 0; i < 256; i++) {
chsq += pow(occurrences[i] - total / 256., 2) * 256. / total;
}
if (chsq <= quantiles[0]) {
return ZSCORE_BIGNEG;
}
for (i = 1; i < 102; i++) {
if (chsq <= quantiles[i]) {
return (i - 51) / 10.0;
}
}
return ZSCORE_BIGPOS;
}
//
// convenience function for statistics reporting
void OQS_RAND_report_statistics(const unsigned long occurrences[256], const char *indent) {
double zscore = OQS_RAND_zscore_deviation_from_uniform(occurrences);
printf("%sStatistical distance from uniform: %12.10f\n", indent, OQS_RAND_test_statistical_distance_from_uniform(occurrences));
printf("%s Z-score deviation from uniform: ", indent);
if (zscore == ZSCORE_BIGNEG) {
printf("less than -5.0 sigma ***\n");
} else if (zscore == ZSCORE_BIGPOS) {
printf("more than +5.0 sigma ***\n");
} else if (zscore == ZSCORE_SPARSE) {
printf("(too few data)\n");
} else {
printf("about %.1f sigma\n", zscore);
}
return;
}
int OQS_RAND_get_system_entropy(uint8_t *buf, size_t n) {
int result = 0;

View File

@ -89,5 +89,11 @@ void OQS_RAND_free(OQS_RAND *r);
void OQS_RAND_test_record_occurrence(const unsigned char b, unsigned long occurrences[256]);
double OQS_RAND_test_statistical_distance_from_uniform(const unsigned long occurrences[256]);
#define ZSCORE_SPARSE (999.999)
#define ZSCORE_BIGNEG (-100.0)
#define ZSCORE_BIGPOS (+100.0)
double OQS_RAND_zscore_deviation_from_uniform(const unsigned long occurrences[256]);
void OQS_RAND_report_statistics(const unsigned long occurrences[256], const char *indent);
int OQS_RAND_get_system_entropy(uint8_t *buf, size_t n);
#endif

View File

@ -120,7 +120,7 @@ static int rand_test_distribution_wrapper(enum OQS_RAND_alg_name alg_name, int i
printf("1-byte mode for %d iterations\n", 8 * iterations);
rand_test_distribution_8(rand, occurrences, 8 * iterations);
printf(" Statistical distance from uniform: %12.10f\n", OQS_RAND_test_statistical_distance_from_uniform(occurrences));
OQS_RAND_report_statistics(occurrences, " ");
for (int i = 0; i < 256; i++) {
occurrences[i] = 0;
@ -128,7 +128,7 @@ static int rand_test_distribution_wrapper(enum OQS_RAND_alg_name alg_name, int i
printf("4-byte mode for %d iterations\n", 2 * iterations);
rand_test_distribution_32(rand, occurrences, 2 * iterations);
printf(" Statistical distance from uniform: %12.10f\n", OQS_RAND_test_statistical_distance_from_uniform(occurrences));
OQS_RAND_report_statistics(occurrences, " ");
for (int i = 0; i < 256; i++) {
occurrences[i] = 0;
@ -136,7 +136,7 @@ static int rand_test_distribution_wrapper(enum OQS_RAND_alg_name alg_name, int i
printf("8-byte mode for %d iterations\n", iterations);
rand_test_distribution_64(rand, occurrences, iterations);
printf(" Statistical distance from uniform: %12.10f\n", OQS_RAND_test_statistical_distance_from_uniform(occurrences));
OQS_RAND_report_statistics(occurrences, " ");
for (int i = 0; i < 256; i++) {
occurrences[i] = 0;
@ -144,7 +144,7 @@ static int rand_test_distribution_wrapper(enum OQS_RAND_alg_name alg_name, int i
printf("n-byte mode for %d bytes\n", 8 * iterations);
rand_test_distribution_n(rand, occurrences, 8 * iterations);
printf(" Statistical distance from uniform: %12.10f\n", OQS_RAND_test_statistical_distance_from_uniform(occurrences));
OQS_RAND_report_statistics(occurrences, " ");
OQS_RAND_free(rand);

View File

@ -181,7 +181,7 @@ static int kex_test_correctness_wrapper(OQS_RAND *rand, enum OQS_KEX_alg_name al
}
}
printf("All session keys matched.\n");
printf("Statistical distance from uniform: %12.10f\n", OQS_RAND_test_statistical_distance_from_uniform(occurrences));
OQS_RAND_report_statistics(occurrences, "");
printf("\n\n");
ret = 1;