Add QgsStatisticalSummary class for calculating stats from a list

of values.
This commit is contained in:
Nyall Dawson 2015-05-02 23:19:39 +10:00
parent bb0e583f4b
commit 154468bd84
7 changed files with 504 additions and 0 deletions

View File

@ -103,6 +103,7 @@
%Include qgssnapper.sip
%Include qgssnappingutils.sip
%Include qgsspatialindex.sip
%Include qgsstatisticalsummary.sip
%Include qgstolerance.sip
%Include qgsvectordataprovider.sip
%Include qgsvectorfilewriter.sip

View File

@ -0,0 +1,135 @@
/** \ingroup core
* \class QgsStatisticalSummary
* \brief Calculator for summary statistics for a list of doubles.
*
* Statistics are calculated by calling @link calculate @endlink and passing a list of doubles. The
* individual statistics can then be retrieved using the associated methods. Note that not all statistics
* are calculated by default. Statistics which require slower computations are only calculated by
* specifying the statistic in the constructor or via @link setStatistics @endlink.
*
* \note Added in version 2.9
*/
class QgsStatisticalSummary
{
%TypeHeaderCode
#include <qgsstatisticalsummary.h>
%End
public:
//! Enumeration of flags that specify statistics to be calculated
enum Statistic
{
Count , //!< Count
Sum, //!< Sum of values
Mean, //!< Mean of values
Median, //!< Median of values
StDev, //!< Standard deviation of values
StDevSample, //!< Sample standard deviation of values
Min, //!< Min of values
Max, //!< Max of values
Range, //!< Range of values (max - min)
Minority, //!< Minority of values
Majority, //!< Majority of values
Variety, //!< Variety (count of distinct) values
All
};
typedef QFlags<QgsStatisticalSummary::Statistic> Statistics;
/** Constructor for QgsStatisticalSummary
* @param stats flags for statistics to calculate
*/
QgsStatisticalSummary( QgsStatisticalSummary::Statistics stats = QgsStatisticalSummary::Statistics( 0 ) );
virtual ~QgsStatisticalSummary();
/** Returns flags which specify which statistics will be calculated. Some statistics
* are always calculated (eg sum, min and max).
* @see setStatistics
*/
QgsStatisticalSummary::Statistics statistics() const;
/** Sets flags which specify which statistics will be calculated. Some statistics
* are always calculated (eg sum, min and max).
* @param stats flags for statistics to calculate
* @see statistics
*/
void setStatistics( QgsStatisticalSummary::Statistics stats );
/** Resets the calculated values
*/
void reset();
/** Calculates summary statistics for a list of values
* @param values list of doubles
*/
void calculate( const QList<double>& values );
/** Returns calculated count of values
*/
int count() const;
/** Returns calculated sum of values
*/
double sum() const;
/** Returns calculated mean of values
*/
double mean() const;
/** Returns calculated median of values. This is only calculated if Statistic::Median has
* been specified in the constructor or via setStatistics.
*/
double median() const;
/** Returns calculated minimum from values.
*/
double min() const;
/** Returns calculated maximum from values.
*/
double max() const;
/** Returns calculated range (difference between maximum and minimum values).
*/
double range() const;
/** Returns population standard deviation. This is only calculated if Statistic::StDev has
* been specified in the constructor or via setStatistics.
* @see sampleStDev
*/
double stDev() const;
/** Returns sample standard deviation. This is only calculated if Statistic::StDev has
* been specified in the constructor or via setStatistics.
* @see stDev
*/
double sampleStDev() const;
/** Returns variety of values. The variety is the count of unique values from the list.
* This is only calculated if Statistic::Variety has been specified in the constructor
* or via setStatistics.
*/
int variety() const;
/** Returns minority of values. The minority is the value with least occurances in the list
* This is only calculated if Statistic::Minority has been specified in the constructor
* or via setStatistics.
* @see majority
*/
double minority() const;
/** Returns majority of values. The majority is the value with most occurances in the list
* This is only calculated if Statistic::Majority has been specified in the constructor
* or via setStatistics.
* @see minority
*/
double majority() const;
};
QFlags<QgsStatisticalSummary::Statistic> operator|(QgsStatisticalSummary::Statistic f1, QFlags<QgsStatisticalSummary::Statistic> f2);

View File

@ -168,6 +168,7 @@ SET(QGIS_CORE_SRCS
qgssnapper.cpp
qgssnappingutils.cpp
qgsspatialindex.cpp
qgsstatisticalsummary.cpp
qgstransaction.cpp
qgstolerance.cpp
qgsvectordataprovider.cpp
@ -554,6 +555,7 @@ SET(QGIS_CORE_HDRS
qgssnapper.h
qgssnappingutils.h
qgsspatialindex.h
qgsstatisticalsummary.h
qgstolerance.h
qgstransaction.h
qgsvectordataprovider.h

View File

@ -0,0 +1,109 @@
/***************************************************************************
qgsstatisticalsummary.cpp
--------------------------------------
Date : May 2015
Copyright : (C) 2015 by Nyall Dawson
Email : nyall dot dawson at gmail dot com
***************************************************************************
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
***************************************************************************/
#include "qgsstatisticalsummary.h"
#include <limits>
#include <qmath.h>
QgsStatisticalSummary::QgsStatisticalSummary( Statistics stats )
: mStatistics( stats )
{
reset();
}
QgsStatisticalSummary::~QgsStatisticalSummary()
{
}
void QgsStatisticalSummary::reset()
{
mCount = 0;
mSum = 0;
mMean = 0;
mMedian = 0;
mMin = std::numeric_limits<double>::max();
mMax = std::numeric_limits<double>::min();
mStdev = 0;
mSampleStdev = 0;
mMinority = 0;
mMajority = 0;
mValueCount.clear();
}
void QgsStatisticalSummary::calculate( const QList<double> &values )
{
reset();
foreach ( double value, values )
{
mCount++;
mSum += value;
mMin = qMin( mMin, value );
mMax = qMax( mMax, value );
if ( mStatistics & QgsStatisticalSummary::Majority || mStatistics & QgsStatisticalSummary::Minority || mStatistics & QgsStatisticalSummary::Variety )
mValueCount.insert( value, mValueCount.value( value, 0 ) + 1 );
}
if ( mCount == 0 )
return;
mMean = mSum / mCount;
if ( mStatistics & QgsStatisticalSummary::StDev )
{
double sumSquared = 0;
foreach ( double value, values )
{
double diff = value - mMean;
sumSquared += diff * diff;
}
mStdev = qPow( sumSquared / values.count(), 0.5 );
mSampleStdev = qPow( sumSquared / ( values.count() - 1 ), 0.5 );
}
if ( mStatistics & QgsStatisticalSummary::Median )
{
QList<double> sorted = values;
qSort( sorted.begin(), sorted.end() );
bool even = ( mCount % 2 ) < 1;
if ( even )
{
mMedian = ( sorted[mCount / 2 - 1] + sorted[mCount / 2] ) / 2.0;
}
else //odd
{
mMedian = sorted[( mCount + 1 ) / 2 - 1];
}
}
if ( mStatistics & QgsStatisticalSummary::Minority || mStatistics & QgsStatisticalSummary::Majority )
{
QList<int> valueCounts = mValueCount.values();
qSort( valueCounts.begin(), valueCounts.end() );
if ( mStatistics & QgsStatisticalSummary::Minority )
{
mMinority = mValueCount.key( valueCounts.first() );
}
if ( mStatistics & QgsStatisticalSummary::Majority )
{
mMajority = mValueCount.key( valueCounts.last() );
}
}
}

View File

@ -0,0 +1,165 @@
/***************************************************************************
qgsstatisticalsummary.h
--------------------------------------
Date : May 2015
Copyright : (C) 2015 by Nyall Dawson
Email : nyall dot dawson at gmail dot com
***************************************************************************
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
***************************************************************************/
#ifndef QGSSTATISTICALSUMMARY_H
#define QGSSTATISTICALSUMMARY_H
#include <QMap>
/** \ingroup core
* \class QgsStatisticalSummary
* \brief Calculator for summary statistics for a list of doubles.
*
* Statistics are calculated by calling @link calculate @endlink and passing a list of doubles. The
* individual statistics can then be retrieved using the associated methods. Note that not all statistics
* are calculated by default. Statistics which require slower computations are only calculated by
* specifying the statistic in the constructor or via @link setStatistics @endlink.
*
* \note Added in version 2.9
*/
class CORE_EXPORT QgsStatisticalSummary
{
public:
//! Enumeration of flags that specify statistics to be calculated
enum Statistic
{
Count = 1, //!< Count
Sum = 2, //!< Sum of values
Mean = 4, //!< Mean of values
Median = 8, //!< Median of values
StDev = 16, //!< Standard deviation of values
StDevSample = 32, //!< Sample standard deviation of values
Min = 64, //!< Min of values
Max = 128, //!< Max of values
Range = 256, //!< Range of values (max - min)
Minority = 512, //!< Minority of values
Majority = 1024, //!< Majority of values
Variety = 2048, //!< Variety (count of distinct) values
All = Count | Sum | Mean | Median | StDev | Max | Min | Range | Minority | Majority | Variety
};
Q_DECLARE_FLAGS( Statistics, Statistic )
/** Constructor for QgsStatisticalSummary
* @param stats flags for statistics to calculate
*/
QgsStatisticalSummary( Statistics stats = Statistics( 0 ) );
virtual ~QgsStatisticalSummary();
/** Returns flags which specify which statistics will be calculated. Some statistics
* are always calculated (eg sum, min and max).
* @see setStatistics
*/
Statistics statistics() const { return mStatistics; }
/** Sets flags which specify which statistics will be calculated. Some statistics
* are always calculated (eg sum, min and max).
* @param stats flags for statistics to calculate
* @see statistics
*/
void setStatistics( Statistics stats ) { mStatistics = stats; }
/** Resets the calculated values
*/
void reset();
/** Calculates summary statistics for a list of values
* @param values list of doubles
*/
void calculate( const QList<double>& values );
/** Returns calculated count of values
*/
int count() const { return mCount; }
/** Returns calculated sum of values
*/
double sum() const { return mSum; }
/** Returns calculated mean of values
*/
double mean() const { return mMean; }
/** Returns calculated median of values. This is only calculated if Statistic::Median has
* been specified in the constructor or via setStatistics.
*/
double median() const { return mMedian; }
/** Returns calculated minimum from values.
*/
double min() const { return mMin; }
/** Returns calculated maximum from values.
*/
double max() const { return mMax; }
/** Returns calculated range (difference between maximum and minimum values).
*/
double range() const { return mMax - mMin; }
/** Returns population standard deviation. This is only calculated if Statistic::StDev has
* been specified in the constructor or via setStatistics.
* @see sampleStDev
*/
double stDev() const { return mStdev; }
/** Returns sample standard deviation. This is only calculated if Statistic::StDev has
* been specified in the constructor or via setStatistics.
* @see stDev
*/
double sampleStDev() const { return mSampleStdev; }
/** Returns variety of values. The variety is the count of unique values from the list.
* This is only calculated if Statistic::Variety has been specified in the constructor
* or via setStatistics.
*/
int variety() const { return mValueCount.count(); }
/** Returns minority of values. The minority is the value with least occurances in the list
* This is only calculated if Statistic::Minority has been specified in the constructor
* or via setStatistics.
* @see majority
*/
double minority() const { return mMinority; }
/** Returns majority of values. The majority is the value with most occurances in the list
* This is only calculated if Statistic::Majority has been specified in the constructor
* or via setStatistics.
* @see minority
*/
double majority() const { return mMajority; }
private:
Statistics mStatistics;
int mCount;
double mSum;
double mMean;
double mMedian;
double mMin;
double mMax;
double mStdev;
double mSampleStdev;
double mMinority;
double mMajority;
QMap< double, int > mValueCount;
};
Q_DECLARE_OPERATORS_FOR_FLAGS( QgsStatisticalSummary::Statistics )
#endif // QGSSTATISTICALSUMMARY_H

View File

@ -154,3 +154,5 @@ ADD_QGIS_TEST(snappingutilstest testqgssnappingutils.cpp )
ADD_QGIS_TEST(imageoperationtest testqgsimageoperation.cpp)
ADD_QGIS_TEST(painteffecttest testqgspainteffect.cpp)
ADD_QGIS_TEST(painteffectregistrytest testqgspainteffectregistry.cpp)
ADD_QGIS_TEST(statisticalsummarytest testqgsstatisticalsummary.cpp)

View File

@ -0,0 +1,90 @@
/***************************************************************************
testqgsstatisticalsummary.cpp
-----------------------------
Date : May 2015
Copyright : (C) 2015 Nyall Dawson
Email : nyall dot dawson at gmail dot com
***************************************************************************
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
***************************************************************************/
#include <QtTest/QtTest>
#include <QObject>
#include <QString>
#include <QStringList>
#include <QSettings>
#include <QSharedPointer>
#include "qgsstatisticalsummary.h"
#include "qgis.h"
class TestQgsStatisticSummary: public QObject
{
Q_OBJECT
private slots:
void initTestCase();// will be called before the first testfunction is executed.
void cleanupTestCase();// will be called after the last testfunction was executed.
void init();// will be called before each testfunction is executed.
void cleanup();// will be called after every testfunction.
void stats();
private:
};
void TestQgsStatisticSummary::initTestCase()
{
}
void TestQgsStatisticSummary::cleanupTestCase()
{
}
void TestQgsStatisticSummary::init()
{
}
void TestQgsStatisticSummary::cleanup()
{
}
void TestQgsStatisticSummary::stats()
{
QgsStatisticalSummary s( QgsStatisticalSummary::All );
QList<double> values;
values << 4 << 2 << 3 << 2 << 5 << 8;
s.calculate( values );
QCOMPARE( s.count(), 6 );
QCOMPARE( s.sum(), 24.0 );
QCOMPARE( s.mean(), 4.0 );
QVERIFY( qgsDoubleNear( s.stDev(), 2.0816, 0.0001 ) );
QVERIFY( qgsDoubleNear( s.sampleStDev(), 2.2803, 0.0001 ) );
QCOMPARE( s.min(), 2.0 );
QCOMPARE( s.max(), 8.0 );
QCOMPARE( s.range(), 6.0 );
QCOMPARE( s.median(), 3.5 );
values << 9;
s.calculate( values );
QCOMPARE( s.median(), 4.0 );
values << 4 << 5 << 8 << 12 << 12 << 12;
s.calculate( values );
QCOMPARE( s.variety(), 7 );
QCOMPARE( s.minority(), 3.0 );
QCOMPARE( s.majority(), 12.0 );
}
QTEST_MAIN( TestQgsStatisticSummary )
#include "testqgsstatisticalsummary.moc"