mirror of
https://github.com/qgis/QGIS.git
synced 2025-04-13 00:03:09 -04:00
New class QgsStringStatisticalSummary, for calculating statistics
on lists of strings
This commit is contained in:
parent
ceba5264f7
commit
0493cbfc21
@ -127,6 +127,7 @@
|
||||
%Include qgssnappingutils.sip
|
||||
%Include qgsspatialindex.sip
|
||||
%Include qgsstatisticalsummary.sip
|
||||
%Include qgsstringstatisticalsummary.sip
|
||||
%Include qgsstringutils.sip
|
||||
%Include qgstolerance.sip
|
||||
%Include qgstracer.sip
|
||||
|
119
python/core/qgsstringstatisticalsummary.sip
Normal file
119
python/core/qgsstringstatisticalsummary.sip
Normal file
@ -0,0 +1,119 @@
|
||||
/** \ingroup core
|
||||
* \class QgsStringStatisticalSummary
|
||||
* \brief Calculator for summary statistics and aggregates for a list of strings.
|
||||
*
|
||||
* Statistics are calculated by calling @link calculate @endlink and passing a list of strings. The
|
||||
* individual statistics can then be retrieved using the associated methods. Note that not all statistics
|
||||
* are calculated by default. Statistics which require slower computations are only calculated by
|
||||
* specifying the statistic in the constructor or via @link setStatistics @endlink.
|
||||
*
|
||||
* \note Added in version 2.16
|
||||
*/
|
||||
|
||||
|
||||
class QgsStringStatisticalSummary
|
||||
{
|
||||
%TypeHeaderCode
|
||||
#include <qgsstringstatisticalsummary.h>
|
||||
%End
|
||||
|
||||
public:
|
||||
|
||||
public:
|
||||
|
||||
//! Enumeration of flags that specify statistics to be calculated
|
||||
enum Statistic
|
||||
{
|
||||
Count, //!< Count
|
||||
CountDistinct, //!< Number of distinct string values
|
||||
CountMissing, //!< Number of missing (null) values
|
||||
Min, //!< Minimum string value
|
||||
Max, //!< Maximum string value
|
||||
MinimumLength, //!< Minimum length of string
|
||||
MaximumLength, //!< Maximum length of string
|
||||
All, //! All statistics
|
||||
};
|
||||
typedef QFlags<QgsStringStatisticalSummary::Statistic> Statistics;
|
||||
|
||||
/** Constructor for QgsStringStatistics
|
||||
* @param stats flags for statistics to calculate
|
||||
*/
|
||||
QgsStringStatisticalSummary( const QgsStringStatisticalSummary::Statistics& stats = All );
|
||||
|
||||
/** Returns flags which specify which statistics will be calculated. Some statistics
|
||||
* are always calculated (eg count).
|
||||
* @see setStatistics
|
||||
*/
|
||||
Statistics statistics() const;
|
||||
|
||||
/** Sets flags which specify which statistics will be calculated. Some statistics
|
||||
* are always calculated (eg count).
|
||||
* @param stats flags for statistics to calculate
|
||||
* @see statistics
|
||||
*/
|
||||
void setStatistics( const Statistics& stats );
|
||||
|
||||
/** Resets the calculated values
|
||||
*/
|
||||
void reset();
|
||||
|
||||
/** Calculates summary statistics for a list of strings.
|
||||
* @param values list of strings
|
||||
*/
|
||||
void calculate( const QStringList& values );
|
||||
|
||||
/** Calculates summary statistics for a list of variants. Any non-string variants will be
|
||||
* ignored.
|
||||
* @param values list of variants
|
||||
*/
|
||||
void calculate( const QVariantList& values );
|
||||
|
||||
/** Returns the value of a specified statistic
|
||||
* @param stat statistic to return
|
||||
* @returns calculated value of statistic
|
||||
*/
|
||||
QVariant statistic( Statistic stat ) const;
|
||||
|
||||
/** Returns the calculated count of values.
|
||||
*/
|
||||
int count() const;
|
||||
|
||||
/** Returns the number of distinct string values.
|
||||
* @see distinctValues()
|
||||
*/
|
||||
int countDistinct() const;
|
||||
|
||||
/** Returns the set of distinct string values.
|
||||
* @see countDistinct()
|
||||
*/
|
||||
QSet< QString > distinctValues() const;
|
||||
|
||||
/** Returns the number of missing (null) string values.
|
||||
*/
|
||||
int countMissing() const;
|
||||
|
||||
/** Returns the minimum (non-null) string value.
|
||||
*/
|
||||
QString min() const;
|
||||
|
||||
/** Returns the maximum (non-null) string value.
|
||||
*/
|
||||
QString max() const;
|
||||
|
||||
/** Returns the minimum length of strings.
|
||||
*/
|
||||
int minLength() const;
|
||||
|
||||
/** Returns the maximum length of strings.
|
||||
*/
|
||||
int maxLength() const;
|
||||
|
||||
/** Returns the friendly display name for a statistic
|
||||
* @param statistic statistic to return name for
|
||||
*/
|
||||
static QString displayName( Statistic statistic );
|
||||
|
||||
};
|
||||
|
||||
QFlags<QgsStringStatisticalSummary::Statistic> operator|(QgsStringStatisticalSummary::Statistic f1, QFlags<QgsStringStatisticalSummary::Statistic> f2);
|
||||
|
@ -194,6 +194,7 @@ SET(QGIS_CORE_SRCS
|
||||
qgssqlexpressioncompiler.cpp
|
||||
qgssqliteexpressioncompiler.cpp
|
||||
qgsstatisticalsummary.cpp
|
||||
qgsstringstatisticalsummary.cpp
|
||||
qgsstringutils.cpp
|
||||
qgstextlabelfeature.cpp
|
||||
qgstolerance.cpp
|
||||
@ -689,6 +690,7 @@ SET(QGIS_CORE_HDRS
|
||||
qgsspatialindex.h
|
||||
qgssqlexpressioncompiler.h
|
||||
qgsstatisticalsummary.h
|
||||
qgsstringstatisticalsummary.h
|
||||
qgsstringutils.h
|
||||
qgstextlabelfeature.h
|
||||
qgstolerance.h
|
||||
|
154
src/core/qgsstringstatisticalsummary.cpp
Normal file
154
src/core/qgsstringstatisticalsummary.cpp
Normal file
@ -0,0 +1,154 @@
|
||||
/***************************************************************************
|
||||
qgsstringstatisticalsummary.cpp
|
||||
-------------------------------
|
||||
Date : May 2016
|
||||
Copyright : (C) 2016 by Nyall Dawson
|
||||
Email : nyall dot dawson at gmail dot com
|
||||
***************************************************************************
|
||||
* *
|
||||
* This program is free software; you can redistribute it and/or modify *
|
||||
* it under the terms of the GNU General Public License as published by *
|
||||
* the Free Software Foundation; either version 2 of the License, or *
|
||||
* (at your option) any later version. *
|
||||
* *
|
||||
***************************************************************************/
|
||||
|
||||
#include "qgsstringstatisticalsummary.h"
|
||||
#include <QString>
|
||||
#include <QStringList>
|
||||
#include <QObject>
|
||||
#include <QVariant>
|
||||
#include <QVariantList>
|
||||
#include "limits.h"
|
||||
|
||||
/***************************************************************************
|
||||
* This class is considered CRITICAL and any change MUST be accompanied with
|
||||
* full unit tests in test_qgsstringstatisticalsummary.py.
|
||||
* See details in QEP #17
|
||||
****************************************************************************/
|
||||
|
||||
QgsStringStatisticalSummary::QgsStringStatisticalSummary( const QgsStringStatisticalSummary::Statistics& stats )
|
||||
: mStatistics( stats )
|
||||
{
|
||||
reset();
|
||||
}
|
||||
|
||||
void QgsStringStatisticalSummary::reset()
|
||||
{
|
||||
mCount = 0;
|
||||
mValues.clear();
|
||||
mCountMissing = 0;
|
||||
mMin.clear();
|
||||
mMax.clear();
|
||||
mMinLength = INT_MAX;
|
||||
mMaxLength = 0;
|
||||
}
|
||||
|
||||
void QgsStringStatisticalSummary::calculate( const QStringList& values )
|
||||
{
|
||||
reset();
|
||||
|
||||
Q_FOREACH ( const QString& string, values )
|
||||
{
|
||||
testString( string );
|
||||
}
|
||||
}
|
||||
|
||||
void QgsStringStatisticalSummary::calculate( const QVariantList& values )
|
||||
{
|
||||
reset();
|
||||
|
||||
Q_FOREACH ( const QVariant& variant, values )
|
||||
{
|
||||
if ( variant.type() == QVariant::String )
|
||||
{
|
||||
testString( variant.toString() );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void QgsStringStatisticalSummary::testString( const QString& string )
|
||||
{
|
||||
mCount++;
|
||||
|
||||
if ( string.isEmpty() )
|
||||
mCountMissing++;
|
||||
|
||||
if ( mStatistics & CountDistinct )
|
||||
{
|
||||
mValues << string;
|
||||
}
|
||||
if ( mStatistics & Min )
|
||||
{
|
||||
if ( !mMin.isEmpty() && !string.isEmpty() )
|
||||
{
|
||||
mMin = qMin( mMin, string );
|
||||
}
|
||||
else if ( mMin.isEmpty() && !string.isEmpty() )
|
||||
{
|
||||
mMin = string;
|
||||
}
|
||||
}
|
||||
if ( mStatistics & Max )
|
||||
{
|
||||
if ( !mMax.isEmpty() && !string.isEmpty() )
|
||||
{
|
||||
mMax = qMax( mMax, string );
|
||||
}
|
||||
else if ( mMax.isEmpty() && !string.isEmpty() )
|
||||
{
|
||||
mMax = string;
|
||||
}
|
||||
}
|
||||
mMinLength = qMin( mMinLength, string.length() );
|
||||
mMaxLength = qMax( mMaxLength, string.length() );
|
||||
}
|
||||
|
||||
QVariant QgsStringStatisticalSummary::statistic( QgsStringStatisticalSummary::Statistic stat ) const
|
||||
{
|
||||
switch ( stat )
|
||||
{
|
||||
case Count:
|
||||
return mCount;
|
||||
case CountDistinct:
|
||||
return mValues.count();
|
||||
case CountMissing:
|
||||
return mCountMissing;
|
||||
case Min:
|
||||
return mMin;
|
||||
case Max:
|
||||
return mMax;
|
||||
case MinimumLength:
|
||||
return mMinLength;
|
||||
case MaximumLength:
|
||||
return mMaxLength;
|
||||
case All:
|
||||
return 0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
QString QgsStringStatisticalSummary::displayName( QgsStringStatisticalSummary::Statistic statistic )
|
||||
{
|
||||
switch ( statistic )
|
||||
{
|
||||
case Count:
|
||||
return QObject::tr( "Count" );
|
||||
case CountDistinct:
|
||||
return QObject::tr( "Count (distinct)" );
|
||||
case CountMissing:
|
||||
return QObject::tr( "Count (missing)" );
|
||||
case Min:
|
||||
return QObject::tr( "Minimum" );
|
||||
case Max:
|
||||
return QObject::tr( "Maximum" );
|
||||
case MinimumLength:
|
||||
return QObject::tr( "Minimum length" );
|
||||
case MaximumLength:
|
||||
return QObject::tr( "Maximum length" );
|
||||
case All:
|
||||
return QString();
|
||||
}
|
||||
return QString();
|
||||
}
|
||||
|
153
src/core/qgsstringstatisticalsummary.h
Normal file
153
src/core/qgsstringstatisticalsummary.h
Normal file
@ -0,0 +1,153 @@
|
||||
/***************************************************************************
|
||||
qgsstringstatisticalsummary.h
|
||||
-----------------------------
|
||||
Date : May 2016
|
||||
Copyright : (C) 2016 by Nyall Dawson
|
||||
Email : nyall dot dawson at gmail dot com
|
||||
***************************************************************************
|
||||
* *
|
||||
* This program is free software; you can redistribute it and/or modify *
|
||||
* it under the terms of the GNU General Public License as published by *
|
||||
* the Free Software Foundation; either version 2 of the License, or *
|
||||
* (at your option) any later version. *
|
||||
* *
|
||||
***************************************************************************/
|
||||
|
||||
#ifndef QGSSTRINGSTATISTICALSUMMARY_H
|
||||
#define QGSSTRINGSTATISTICALSUMMARY_H
|
||||
|
||||
#include <QSet>
|
||||
#include <QVariantList>
|
||||
|
||||
/***************************************************************************
|
||||
* This class is considered CRITICAL and any change MUST be accompanied with
|
||||
* full unit tests in test_qgsstringstatisticalsummary.py.
|
||||
* See details in QEP #17
|
||||
****************************************************************************/
|
||||
|
||||
/** \ingroup core
|
||||
* \class QgsStringStatisticalSummary
|
||||
* \brief Calculator for summary statistics and aggregates for a list of strings.
|
||||
*
|
||||
* Statistics are calculated by calling @link calculate @endlink and passing a list of strings. The
|
||||
* individual statistics can then be retrieved using the associated methods. Note that not all statistics
|
||||
* are calculated by default. Statistics which require slower computations are only calculated by
|
||||
* specifying the statistic in the constructor or via @link setStatistics @endlink.
|
||||
*
|
||||
* \note Added in version 2.16
|
||||
*/
|
||||
|
||||
class CORE_EXPORT QgsStringStatisticalSummary
|
||||
{
|
||||
public:
|
||||
|
||||
//! Enumeration of flags that specify statistics to be calculated
|
||||
enum Statistic
|
||||
{
|
||||
Count = 1, //!< Count
|
||||
CountDistinct = 2, //!< Number of distinct string values
|
||||
CountMissing = 4, //!< Number of missing (null) values
|
||||
Min = 8, //!< Minimum string value
|
||||
Max = 16, //!< Maximum string value
|
||||
MinimumLength = 32, //!< Minimum length of string
|
||||
MaximumLength = 64, //!< Maximum length of string
|
||||
All = Count | CountDistinct | CountMissing | Min | Max, //! All statistics
|
||||
};
|
||||
Q_DECLARE_FLAGS( Statistics, Statistic )
|
||||
|
||||
/** Constructor for QgsStringStatistics
|
||||
* @param stats flags for statistics to calculate
|
||||
*/
|
||||
QgsStringStatisticalSummary( const QgsStringStatisticalSummary::Statistics& stats = All );
|
||||
|
||||
/** Returns flags which specify which statistics will be calculated. Some statistics
|
||||
* are always calculated (eg count).
|
||||
* @see setStatistics
|
||||
*/
|
||||
Statistics statistics() const { return mStatistics; }
|
||||
|
||||
/** Sets flags which specify which statistics will be calculated. Some statistics
|
||||
* are always calculated (eg count).
|
||||
* @param stats flags for statistics to calculate
|
||||
* @see statistics
|
||||
*/
|
||||
void setStatistics( const Statistics& stats ) { mStatistics = stats; }
|
||||
|
||||
/** Resets the calculated values
|
||||
*/
|
||||
void reset();
|
||||
|
||||
/** Calculates summary statistics for a list of strings.
|
||||
* @param values list of strings
|
||||
*/
|
||||
void calculate( const QStringList& values );
|
||||
|
||||
/** Calculates summary statistics for a list of variants. Any non-string variants will be
|
||||
* ignored.
|
||||
* @param values list of variants
|
||||
*/
|
||||
void calculate( const QVariantList& values );
|
||||
|
||||
/** Returns the value of a specified statistic
|
||||
* @param stat statistic to return
|
||||
* @returns calculated value of statistic
|
||||
*/
|
||||
QVariant statistic( Statistic stat ) const;
|
||||
|
||||
/** Returns the calculated count of values.
|
||||
*/
|
||||
int count() const { return mCount; }
|
||||
|
||||
/** Returns the number of distinct string values.
|
||||
* @see distinctValues()
|
||||
*/
|
||||
int countDistinct() const { return mValues.count(); }
|
||||
|
||||
/** Returns the set of distinct string values.
|
||||
* @see countDistinct()
|
||||
*/
|
||||
QSet< QString > distinctValues() const { return mValues; }
|
||||
|
||||
/** Returns the number of missing (null) string values.
|
||||
*/
|
||||
int countMissing() const { return mCountMissing; }
|
||||
|
||||
/** Returns the minimum (non-null) string value.
|
||||
*/
|
||||
QString min() const { return mMin; }
|
||||
|
||||
/** Returns the maximum (non-null) string value.
|
||||
*/
|
||||
QString max() const { return mMax; }
|
||||
|
||||
/** Returns the minimum length of strings.
|
||||
*/
|
||||
int minLength() const { return mMinLength; }
|
||||
|
||||
/** Returns the maximum length of strings.
|
||||
*/
|
||||
int maxLength() const { return mMaxLength; }
|
||||
|
||||
/** Returns the friendly display name for a statistic
|
||||
* @param statistic statistic to return name for
|
||||
*/
|
||||
static QString displayName( Statistic statistic );
|
||||
|
||||
private:
|
||||
|
||||
Statistics mStatistics;
|
||||
|
||||
int mCount;
|
||||
QSet< QString > mValues;
|
||||
int mCountMissing;
|
||||
QString mMin;
|
||||
QString mMax;
|
||||
int mMinLength;
|
||||
int mMaxLength;
|
||||
|
||||
void testString( const QString& string );
|
||||
};
|
||||
|
||||
Q_DECLARE_OPERATORS_FOR_FLAGS( QgsStringStatisticalSummary::Statistics )
|
||||
|
||||
#endif // QGSSTRINGSTATISTICALSUMMARY_H
|
@ -66,6 +66,7 @@ ADD_PYTHON_TEST(PyQgsShapefileProvider test_provider_shapefile.py)
|
||||
ADD_PYTHON_TEST(PyQgsTabfileProvider test_provider_tabfile.py)
|
||||
ADD_PYTHON_TEST(PyQgsSpatialIndex test_qgsspatialindex.py)
|
||||
ADD_PYTHON_TEST(PyQgsSpatialiteProvider test_provider_spatialite.py)
|
||||
ADD_PYTHON_TEST(PyQgsStringStatisticalSummary test_qgsstringstatisticalsummary.py)
|
||||
ADD_PYTHON_TEST(PyQgsSymbolLayerV2 test_qgssymbollayerv2.py)
|
||||
ADD_PYTHON_TEST(PyQgsArrowSymbolLayer test_qgsarrowsymbollayer.py)
|
||||
ADD_PYTHON_TEST(PyQgsSymbolExpressionVariables test_qgssymbolexpressionvariables.py)
|
||||
|
78
tests/src/python/test_qgsstringstatisticalsummary.py
Normal file
78
tests/src/python/test_qgsstringstatisticalsummary.py
Normal file
@ -0,0 +1,78 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""QGIS Unit tests for QgsStringStatisticalSummary.
|
||||
|
||||
.. note:: This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
"""
|
||||
__author__ = 'Nyall Dawson'
|
||||
__date__ = '07/05/2016'
|
||||
__copyright__ = 'Copyright 2016, The QGIS Project'
|
||||
# This will get replaced with a git SHA1 when you do a git archive
|
||||
__revision__ = '$Format:%H$'
|
||||
|
||||
import qgis # NOQA
|
||||
|
||||
from qgis.core import (QgsStringStatisticalSummary
|
||||
)
|
||||
from qgis.testing import unittest
|
||||
|
||||
|
||||
class PyQgsStringStatisticalSummary(unittest.TestCase):
|
||||
|
||||
def testStats(self):
|
||||
s = QgsStringStatisticalSummary()
|
||||
self.assertEqual(s.statistics(), QgsStringStatisticalSummary.All)
|
||||
s.calculate(['cc', 'aaaa', 'bbbbbbbb', 'aaaa', 'eeee', '', 'eeee', '', 'dddd'])
|
||||
self.assertEqual(s.count(), 9)
|
||||
self.assertEqual(s.countDistinct(), 6)
|
||||
self.assertEqual(set(s.distinctValues()), set(['cc', 'aaaa', 'bbbbbbbb', 'eeee', 'dddd', '']))
|
||||
self.assertEqual(s.countMissing(), 2)
|
||||
self.assertEqual(s.min(), 'aaaa')
|
||||
self.assertEqual(s.max(), 'eeee')
|
||||
self.assertEqual(s.minLength(), 0)
|
||||
self.assertEqual(s.maxLength(), 8)
|
||||
|
||||
#extra check for minLength without empty strings
|
||||
s.calculate(['1111111', '111', '11111'])
|
||||
self.assertEqual(s.minLength(), 3)
|
||||
|
||||
def testIndividualStats(self):
|
||||
# tests calculation of statistics one at a time, to make sure statistic calculations are not
|
||||
# dependent on each other
|
||||
tests = [{'stat': QgsStringStatisticalSummary.Count, 'expected': 9},
|
||||
{'stat': QgsStringStatisticalSummary.CountDistinct, 'expected': 6},
|
||||
{'stat': QgsStringStatisticalSummary.CountMissing, 'expected': 2},
|
||||
{'stat': QgsStringStatisticalSummary.Min, 'expected': 'aaaa'},
|
||||
{'stat': QgsStringStatisticalSummary.Max, 'expected': 'eeee'},
|
||||
{'stat': QgsStringStatisticalSummary.MinimumLength, 'expected': 0},
|
||||
{'stat': QgsStringStatisticalSummary.MaximumLength, 'expected': 8},
|
||||
]
|
||||
|
||||
s = QgsStringStatisticalSummary()
|
||||
for t in tests:
|
||||
# test constructor
|
||||
s2 = QgsStringStatisticalSummary(t['stat'])
|
||||
self.assertEqual(s2.statistics(), t['stat'])
|
||||
|
||||
s.setStatistics(t['stat'])
|
||||
self.assertEqual(s.statistics(), t['stat'])
|
||||
s.calculate(['cc', 'aaaa', 'bbbbbbbb', 'aaaa', 'eeee', '', 'eeee', '', 'dddd'])
|
||||
self.assertEqual(s.statistic(t['stat']), t['expected'])
|
||||
|
||||
# display name
|
||||
self.assertTrue(len(QgsStringStatisticalSummary.displayName(t['stat'])) > 0)
|
||||
|
||||
def testVariantStats(self):
|
||||
s = QgsStringStatisticalSummary()
|
||||
self.assertEqual(s.statistics(), QgsStringStatisticalSummary.All)
|
||||
s.calculate(['cc', 5, 'bbbb', 'aaaa', 'eeee', 6, 9, '9', ''])
|
||||
self.assertEqual(s.count(), 6)
|
||||
self.assertEqual(set(s.distinctValues()), set(['cc', 'aaaa', 'bbbb', 'eeee', '', '9']))
|
||||
self.assertEqual(s.countMissing(), 1)
|
||||
self.assertEqual(s.min(), '9')
|
||||
self.assertEqual(s.max(), 'eeee')
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
Loading…
x
Reference in New Issue
Block a user