Add QgsHistogram class for calculating numeric histograms from a

list of values or a vector layer's attribute.
This commit is contained in:
Nyall Dawson 2015-05-02 19:55:57 +10:00
parent 7091d3bef8
commit 17962ef47b
7 changed files with 449 additions and 0 deletions

View File

@ -43,6 +43,7 @@
%Include qgsgeometry.sip
%Include qgsgeometryvalidator.sip
%Include qgsgeometrysimplifier.sip
%Include qgshistogram.sip
%Include qgsmaptopixelgeometrysimplifier.sip
%Include qgsgml.sip
%Include qgsgmlschema.sip

View File

@ -0,0 +1,66 @@
/** \ingroup core
* \class QgsHistogram
* \brief Calculator for a numeric histogram from a list of values.
*
* \note Added in version 2.9
*/
class QgsHistogram
{
%TypeHeaderCode
#include "qgshistogram.h"
%End
public:
QgsHistogram();
virtual ~QgsHistogram();
/** Assigns numeric source values for the histogram.
* @param values list of doubles
*/
void setValues( const QList<double>& values );
/** Assigns numeric source values for the histogram from a vector layer's field or as the
* result of an expression.
* @param layer vector layer
* @param fieldOrExpression field name or expression to be evaluated
* @returns true if values were successfully set
*/
bool setValues( QgsVectorLayer* layer, const QString& fieldOrExpression );
/** Calculates the optimal bin width using the Freedman-Diaconis rule. Bins widths are
* determined by the inter-quartile range of values and the number of values.
* @returns optimal width for bins
* @see optimalNumberBins
* @note values must first be specified using @link setValues @endlink
*/
double optimalBinWidth() const;
/** Returns the optimal number of bins for the source values, calculated using the
* Freedman-Diaconis rule. The number of bins are determined by the inter-quartile range
* of values and the number of values.
* @returns optimal number of bins
* @see optimalBinWidth
* @note values must first be specified using @link setValues @endlink
*/
int optimalNumberBins() const;
/** Returns a list of edges for the histogram for a specified number of bins. This list
* will be length bins + 1, as both the first and last value are also included.
* @param bins number of bins
* @return list of bin edges
* @note values must first be specified using @link setValues @endlink
*/
QList<double> binEdges( int bins ) const;
/** Returns the calculated list of the counts for the histogram bins.
* @param bins number of histogram bins
* @return list of histogram counts
* @note values must first be specified using @link setValues @endlink
*/
QList<int> counts( int bins ) const;
};

View File

@ -107,6 +107,7 @@ SET(QGIS_CORE_SRCS
qgsgeometryvalidator.cpp
qgsgml.cpp
qgsgmlschema.cpp
qgshistogram.cpp
qgslayerdefinition.cpp
qgslabel.cpp
qgslabelattributes.cpp
@ -510,6 +511,7 @@ SET(QGIS_CORE_HDRS
qgsfontutils.h
qgsgeometry.h
qgsgeometrycache.h
qgshistogram.h
qgslayerdefinition.h
qgslabel.h
qgslabelattributes.h

125
src/core/qgshistogram.cpp Normal file
View File

@ -0,0 +1,125 @@
/***************************************************************************
qgshistogram.cpp
----------------
begin : May 2015
copyright : (C) 2015 by Nyall Dawson
email : nyall dot dawson at gmail dot com
***************************************************************************/
/***************************************************************************
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
***************************************************************************/
#include "qgshistogram.h"
#include "qgsstatisticalsummary.h"
#include "qgsvectorlayer.h"
#include <qmath.h>
QgsHistogram::QgsHistogram()
: mMax( 0 )
, mMin( 0 )
, mIQR( 0 )
{
}
QgsHistogram::~QgsHistogram()
{
}
void QgsHistogram::prepareValues()
{
qSort( mValues.begin(), mValues.end() );
QgsStatisticalSummary s;
s.setStatistics( QgsStatisticalSummary::Max | QgsStatisticalSummary::Min | QgsStatisticalSummary::InterQuartileRange );
s.calculate( mValues );
mMin = s.min();
mMax = s.max();
mIQR = s.interQuartileRange();
}
void QgsHistogram::setValues( const QList<double> &values )
{
mValues = values;
prepareValues();
}
bool QgsHistogram::setValues( QgsVectorLayer *layer, const QString &fieldOrExpression )
{
mValues.clear();
if ( !layer )
return false;
bool ok;
mValues = layer->getDoubleValues( fieldOrExpression, ok );
if ( !ok )
return false;
prepareValues();
return true;
}
double QgsHistogram::optimalBinWidth() const
{
//Freedman-Diaconis rule
return 2.0 * mIQR * qPow( mValues.count(), -1 / 3.0 );
}
int QgsHistogram::optimalNumberBins() const
{
return ceil(( mMax - mMin ) / optimalBinWidth() );
}
QList<double> QgsHistogram::binEdges( int bins ) const
{
double binWidth = ( mMax - mMin ) / bins;
QList<double> edges;
edges << mMin;
double current = mMin;
for ( int i = 0; i < bins; ++i )
{
current += binWidth;
edges << current;
}
return edges;
}
QList<int> QgsHistogram::counts( int bins ) const
{
QList<double> edges = binEdges( bins );
QList<int> binCounts;
binCounts.reserve( bins );
int currentValueIndex = 0;
for ( int i = 0; i < bins; ++i )
{
int count = 0;
while ( mValues.at( currentValueIndex ) < edges.at( i + 1 ) )
{
count++;
currentValueIndex++;
if ( currentValueIndex >= mValues.count() )
break;
}
binCounts << count;
}
if ( currentValueIndex < mValues.count() )
{
//last value needs to be added
binCounts[ bins - 1 ] = binCounts.last() + 1;
}
return binCounts;
}

97
src/core/qgshistogram.h Normal file
View File

@ -0,0 +1,97 @@
/***************************************************************************
qgshistogram.h
--------------
begin : May 2015
copyright : (C) 2015 by Nyall Dawson
email : nyall dot dawson at gmail dot com
***************************************************************************/
/***************************************************************************
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
***************************************************************************/
#ifndef QGSHISTOGRAM_H
#define QGSHISTOGRAM_H
#include <QList>
class QgsVectorLayer;
/** \ingroup core
* \class QgsHistogram
* \brief Calculator for a numeric histogram from a list of values.
*
* \note Added in version 2.9
*/
class CORE_EXPORT QgsHistogram
{
public:
QgsHistogram();
virtual ~QgsHistogram();
/** Assigns numeric source values for the histogram.
* @param values list of doubles
*/
void setValues( const QList<double>& values );
/** Assigns numeric source values for the histogram from a vector layer's field or as the
* result of an expression.
* @param layer vector layer
* @param fieldOrExpression field name or expression to be evaluated
* @returns true if values were successfully set
*/
bool setValues( QgsVectorLayer* layer, const QString& fieldOrExpression );
/** Calculates the optimal bin width using the Freedman-Diaconis rule. Bins widths are
* determined by the inter-quartile range of values and the number of values.
* @returns optimal width for bins
* @see optimalNumberBins
* @note values must first be specified using @link setValues @endlink
*/
double optimalBinWidth() const;
/** Returns the optimal number of bins for the source values, calculated using the
* Freedman-Diaconis rule. The number of bins are determined by the inter-quartile range
* of values and the number of values.
* @returns optimal number of bins
* @see optimalBinWidth
* @note values must first be specified using @link setValues @endlink
*/
int optimalNumberBins() const;
/** Returns a list of edges for the histogram for a specified number of bins. This list
* will be length bins + 1, as both the first and last value are also included.
* @param bins number of bins
* @return list of bin edges
* @note values must first be specified using @link setValues @endlink
*/
QList<double> binEdges( int bins ) const;
/** Returns the calculated list of the counts for the histogram bins.
* @param bins number of histogram bins
* @return list of histogram counts
* @note values must first be specified using @link setValues @endlink
*/
QList<int> counts( int bins ) const;
private:
QList<double> mValues;
double mMax;
double mMin;
double mIQR;
void prepareValues();
};
#endif // QGSHISTOGRAM_H

View File

@ -156,4 +156,5 @@ ADD_QGIS_TEST(imageoperationtest testqgsimageoperation.cpp)
ADD_QGIS_TEST(painteffecttest testqgspainteffect.cpp)
ADD_QGIS_TEST(painteffectregistrytest testqgspainteffectregistry.cpp)
ADD_QGIS_TEST(statisticalsummarytest testqgsstatisticalsummary.cpp)
ADD_QGIS_TEST(histogramtest testqgshistogram.cpp)

View File

@ -0,0 +1,157 @@
/***************************************************************************
testqgshistogram.cpp
--------------------
Date : May 2015
Copyright : (C) 2015 by Nyall Dawson
Email : nyall dot dawson at gmail dot com
***************************************************************************
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
***************************************************************************/
#include <QDir>
#include <QtTest/QtTest>
#include "qgsapplication.h"
#include "qgsvectorlayer.h"
#include "qgsvectordataprovider.h"
#include "qgshistogram.h"
/** \ingroup UnitTests
* This is a unit test for QgsHistogram
*/
class TestQgsHistogram : public QObject
{
Q_OBJECT
public:
TestQgsHistogram();
private slots:
void initTestCase();
void cleanupTestCase();
void init() {}
void cleanup() {}
void optimalBinWidth();
void optimalBinCount();
void binEdges();
void counts();
void fromLayer();
private:
};
TestQgsHistogram::TestQgsHistogram()
{
}
void TestQgsHistogram::initTestCase()
{
QgsApplication::init();
QgsApplication::initQgis();
}
void TestQgsHistogram::cleanupTestCase()
{
QgsApplication::exitQgis();
}
void TestQgsHistogram::optimalBinWidth()
{
QList<double> vals;
vals << 1 << 2 << 3 << 4 << 5 << 6 << 7 << 8 << 9 << 10;
QgsHistogram h;
h.setValues( vals );
QVERIFY( qgsDoubleNear( h.optimalBinWidth(), 4.641, 0.001 ) );
}
void TestQgsHistogram::optimalBinCount()
{
QList<double> vals;
vals << 1 << 2 << 3 << 4 << 5 << 6 << 7 << 8 << 9 << 10;
QgsHistogram h;
h.setValues( vals );
QCOMPARE( h.optimalNumberBins(), 2 );
}
void TestQgsHistogram::binEdges()
{
QList<double> vals;
vals << 1 << 2 << 3 << 4 << 5 << 6 << 7 << 8 << 9 << 10;
QgsHistogram h;
h.setValues( vals );
QList<double> edges = h.binEdges( 3 );
QCOMPARE( edges.count(), 4 );
QCOMPARE( edges.at( 0 ), 1.0 );
QCOMPARE( edges.at( 1 ), 4.0 );
QCOMPARE( edges.at( 2 ), 7.0 );
QCOMPARE( edges.at( 3 ), 10.0 );
}
void TestQgsHistogram::counts()
{
QList<double> vals;
vals << 1 << 2 << 3 << 4 << 5 << 6 << 7 << 8 << 9 << 10;
QgsHistogram h;
h.setValues( vals );
QList<int> counts = h.counts( 1 );
QList<int> expected;
expected << 10;
QCOMPARE( counts, expected );
counts = h.counts( 2 );
expected.clear();
expected << 5 << 5;
QCOMPARE( counts, expected );
counts = h.counts( 5 );
expected.clear();
expected << 2 << 2 << 2 << 2 << 2;
QCOMPARE( counts, expected );
counts = h.counts( 20 );
expected.clear();
expected << 1 << 0 << 1 << 0 << 1 << 0 << 1 << 0 << 1 << 0 << 0 << 1 << 0 << 1 << 0 << 1 << 0 << 1 << 0 << 1;
QCOMPARE( counts, expected );
}
void TestQgsHistogram::fromLayer()
{
QgsHistogram h;
QVERIFY( !h.setValues( 0, QString() ));
QgsVectorLayer* layer = new QgsVectorLayer( "Point?field=col1:real", "layer", "memory" );
QVERIFY( layer->isValid() );
QgsFeatureList features;
for ( int i = 1; i <= 10; ++i )
{
QgsFeature f( layer->dataProvider()->fields(), i );
f.setAttribute( "col1", i );
features << f;
}
layer->dataProvider()->addFeatures( features );
QVERIFY( !h.setValues( layer, QString() ));
QVERIFY( h.setValues( layer, QString( "col1" ) ) );
QList<int>counts = h.counts( 5 );
QList<int> expected;
expected << 2 << 2 << 2 << 2 << 2;
QCOMPARE( counts, expected );
delete layer;
}
QTEST_MAIN( TestQgsHistogram )
#include "testqgshistogram.moc"