New class QgsDateTimeStatisticalSummary

Calculates summary statistics on lists of datetime values, such
as count, count distinct, min, max and range
This commit is contained in:
Nyall Dawson 2016-05-10 11:11:33 +10:00
parent 0d165e5621
commit b053ad8769
7 changed files with 528 additions and 0 deletions

View File

@ -39,6 +39,7 @@
%Include qgsdataitemproviderregistry.sip
%Include qgsdataprovider.sip
%Include qgsdatasourceuri.sip
%Include qgsdatetimestatisticalsummary.sip
%Include qgsdatumtransformstore.sip
%Include qgsdbfilterproxymodel.sip
%Include qgsdistancearea.sip

View File

@ -0,0 +1,104 @@
/** \ingroup core
* \class QgsDateTimeStatisticalSummary
* \brief Calculator for summary statistics and aggregates for a list of datetimes.
*
* Statistics are calculated by calling @link calculate @endlink and passing a list of datetimes. The
* individual statistics can then be retrieved using the associated methods. Note that not all statistics
* are calculated by default. Statistics which require slower computations are only calculated by
* specifying the statistic in the constructor or via @link setStatistics @endlink.
*
* \note Added in version 2.16
*/
class QgsDateTimeStatisticalSummary
{
%TypeHeaderCode
#include <qgsdatetimestatisticalsummary.h>
%End
public:
//! Enumeration of flags that specify statistics to be calculated
enum Statistic
{
Count, //!< Count
CountDistinct, //!< Number of distinct datetime values
CountMissing, //!< Number of missing (null) values
Min, //!< Minimum (earliest) datetime value
Max, //!< Maximum (latest) datetime value
Range, //!< Interval between earliest and latest datetime value
All, //! All statistics
};
typedef QFlags<QgsDateTimeStatisticalSummary::Statistic> Statistics;
/** Constructor for QgsDateTimeStatisticalSummary
* @param stats flags for statistics to calculate
*/
QgsDateTimeStatisticalSummary( const QgsDateTimeStatisticalSummary::Statistics& stats = All );
/** Returns flags which specify which statistics will be calculated. Some statistics
* are always calculated (eg count).
* @see setStatistics
*/
Statistics statistics() const;
/** Sets flags which specify which statistics will be calculated. Some statistics
* are always calculated (eg count).
* @param stats flags for statistics to calculate
* @see statistics
*/
void setStatistics( const Statistics& stats );
/** Resets the calculated values
*/
void reset();
/** Calculates summary statistics for a list of variants. Any non-string variants will be
* ignored.
* @param values list of variants
*/
void calculate( const QVariantList& values );
/** Returns the value of a specified statistic
* @param stat statistic to return
* @returns calculated value of statistic
*/
QVariant statistic( Statistic stat ) const;
/** Returns the calculated count of values.
*/
int count() const;
/** Returns the number of distinct datetime values.
*/
int countDistinct() const;
/** Returns the set of distinct datetime values.
*/
QSet< QDateTime > distinctValues() const;
/** Returns the number of missing (null) datetime values.
*/
int countMissing() const;
/** Returns the minimum (earliest) non-null datetime value.
*/
QDateTime min() const;
/** Returns the maximum (latest) non-null datetime value.
*/
QDateTime max() const;
/** Returns the range (interval between earliest and latest non-null datetime values).
*/
QgsInterval range() const;
/** Returns the friendly display name for a statistic
* @param statistic statistic to return name for
*/
static QString displayName( Statistic statistic );
};
QFlags<QgsDateTimeStatisticalSummary::Statistic> operator|(QgsDateTimeStatisticalSummary::Statistic f1, QFlags<QgsDateTimeStatisticalSummary::Statistic> f2);

View File

@ -100,6 +100,7 @@ SET(QGIS_CORE_SRCS
qgsdataitemprovider.cpp
qgsdataitemproviderregistry.cpp
qgsdatasourceuri.cpp
qgsdatetimestatisticalsummary.cpp
qgsdatumtransformstore.cpp
qgsdbfilterproxymodel.cpp
qgsdiagramrendererv2.cpp
@ -617,6 +618,7 @@ SET(QGIS_CORE_HDRS
qgsdataitemprovider.h
qgsdataitemproviderregistry.h
qgsdatasourceuri.h
qgsdatetimestatisticalsummary.h
qgsdatumtransformstore.h
qgsdbfilterproxymodel.h
qgsdiagramrendererv2.h

View File

@ -0,0 +1,144 @@
/***************************************************************************
qgsdatetimestatisticalsummary.cpp
---------------------------------
Date : May 2016
Copyright : (C) 2016 by Nyall Dawson
Email : nyall dot dawson at gmail dot com
***************************************************************************
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
***************************************************************************/
#include "qgsdatetimestatisticalsummary.h"
#include <QString>
#include <QDateTime>
#include <QStringList>
#include <QObject>
#include <QVariant>
#include <QVariantList>
#include "limits.h"
/***************************************************************************
* This class is considered CRITICAL and any change MUST be accompanied with
* full unit tests in test_qgsdatetimestatisticalsummary.py.
* See details in QEP #17
****************************************************************************/
QgsDateTimeStatisticalSummary::QgsDateTimeStatisticalSummary( const QgsDateTimeStatisticalSummary::Statistics& stats )
: mStatistics( stats )
{
reset();
}
void QgsDateTimeStatisticalSummary::reset()
{
mCount = 0;
mValues.clear();
mCountMissing = 0;
mMin = QDateTime();
mMax = QDateTime();
}
void QgsDateTimeStatisticalSummary::calculate( const QVariantList& values )
{
reset();
Q_FOREACH ( const QVariant& variant, values )
{
if ( variant.type() == QVariant::DateTime )
{
testDateTime( variant.toDateTime() );
}
else if ( variant.type() == QVariant::Date )
{
QDate date = variant.toDate();
testDateTime( date.isValid() ? QDateTime( date, QTime( 0, 0, 0 ) )
: QDateTime() );
}
// QTime?
}
}
void QgsDateTimeStatisticalSummary::testDateTime( const QDateTime& dateTime )
{
mCount++;
if ( !dateTime.isValid() )
mCountMissing++;
if ( mStatistics & CountDistinct )
{
mValues << dateTime;
}
if ( mStatistics & Min || mStatistics & Range )
{
if ( mMin.isValid() && dateTime.isValid() )
{
mMin = qMin( mMin, dateTime );
}
else if ( !mMin.isValid() && dateTime.isValid() )
{
mMin = dateTime;
}
}
if ( mStatistics & Max || mStatistics & Range )
{
if ( mMax.isValid() && dateTime.isValid() )
{
mMax = qMax( mMax, dateTime );
}
else if ( !mMax.isValid() && dateTime.isValid() )
{
mMax = dateTime;
}
}
}
QVariant QgsDateTimeStatisticalSummary::statistic( QgsDateTimeStatisticalSummary::Statistic stat ) const
{
switch ( stat )
{
case Count:
return mCount;
case CountDistinct:
return mValues.count();
case CountMissing:
return mCountMissing;
case Min:
return mMin;
case Max:
return mMax;
case Range:
return QVariant::fromValue( mMax - mMin );
case All:
return 0;
}
return 0;
}
QString QgsDateTimeStatisticalSummary::displayName( QgsDateTimeStatisticalSummary::Statistic statistic )
{
switch ( statistic )
{
case Count:
return QObject::tr( "Count" );
case CountDistinct:
return QObject::tr( "Count (distinct)" );
case CountMissing:
return QObject::tr( "Count (missing)" );
case Min:
return QObject::tr( "Minimum (earliest)" );
case Max:
return QObject::tr( "Maximum (latest)" );
case Range:
return QObject::tr( "Range (interval)" );
case All:
return QString();
}
return QString();
}

View File

@ -0,0 +1,142 @@
/***************************************************************************
qgsdatetimestatisticalsummary.h
-------------------------------
Date : May 2016
Copyright : (C) 2016 by Nyall Dawson
Email : nyall dot dawson at gmail dot com
***************************************************************************
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
***************************************************************************/
#ifndef QGSDATETIMESTATISTICALSUMMARY_H
#define QGSDATETIMESTATISTICALSUMMARY_H
#include "qgis.h"
#include "qgsinterval.h"
#include <QSet>
#include <QDateTime>
#include <QVariantList>
/***************************************************************************
* This class is considered CRITICAL and any change MUST be accompanied with
* full unit tests in test_qgsdatetimestatisticalsummary.py.
* See details in QEP #17
****************************************************************************/
/** \ingroup core
* \class QgsDateTimeStatisticalSummary
* \brief Calculator for summary statistics and aggregates for a list of datetimes.
*
* Statistics are calculated by calling @link calculate @endlink and passing a list of datetimes. The
* individual statistics can then be retrieved using the associated methods. Note that not all statistics
* are calculated by default. Statistics which require slower computations are only calculated by
* specifying the statistic in the constructor or via @link setStatistics @endlink.
*
* \note Added in version 2.16
*/
class CORE_EXPORT QgsDateTimeStatisticalSummary
{
public:
//! Enumeration of flags that specify statistics to be calculated
enum Statistic
{
Count = 1, //!< Count
CountDistinct = 2, //!< Number of distinct datetime values
CountMissing = 4, //!< Number of missing (null) values
Min = 8, //!< Minimum (earliest) datetime value
Max = 16, //!< Maximum (latest) datetime value
Range = 32, //!< Interval between earliest and latest datetime value
All = Count | CountDistinct | CountMissing | Min | Max | Range, //! All statistics
};
Q_DECLARE_FLAGS( Statistics, Statistic )
/** Constructor for QgsDateTimeStatisticalSummary
* @param stats flags for statistics to calculate
*/
QgsDateTimeStatisticalSummary( const QgsDateTimeStatisticalSummary::Statistics& stats = All );
/** Returns flags which specify which statistics will be calculated. Some statistics
* are always calculated (eg count).
* @see setStatistics
*/
Statistics statistics() const { return mStatistics; }
/** Sets flags which specify which statistics will be calculated. Some statistics
* are always calculated (eg count).
* @param stats flags for statistics to calculate
* @see statistics
*/
void setStatistics( const Statistics& stats ) { mStatistics = stats; }
/** Resets the calculated values
*/
void reset();
/** Calculates summary statistics for a list of variants. Any non-string variants will be
* ignored.
* @param values list of variants
*/
void calculate( const QVariantList& values );
/** Returns the value of a specified statistic
* @param stat statistic to return
* @returns calculated value of statistic
*/
QVariant statistic( Statistic stat ) const;
/** Returns the calculated count of values.
*/
int count() const { return mCount; }
/** Returns the number of distinct datetime values.
*/
int countDistinct() const { return mValues.count(); }
/** Returns the set of distinct datetime values.
*/
QSet< QDateTime > distinctValues() const { return mValues; }
/** Returns the number of missing (null) datetime values.
*/
int countMissing() const { return mCountMissing; }
/** Returns the minimum (earliest) non-null datetime value.
*/
QDateTime min() const { return mMin; }
/** Returns the maximum (latest) non-null datetime value.
*/
QDateTime max() const { return mMax; }
/** Returns the range (interval between earliest and latest non-null datetime values).
*/
QgsInterval range() const { return mMax - mMin; }
/** Returns the friendly display name for a statistic
* @param statistic statistic to return name for
*/
static QString displayName( Statistic statistic );
private:
Statistics mStatistics;
int mCount;
QSet< QDateTime > mValues;
int mCountMissing;
QDateTime mMin;
QDateTime mMax;
void testDateTime( const QDateTime& dateTime );
};
Q_DECLARE_OPERATORS_FOR_FLAGS( QgsDateTimeStatisticalSummary::Statistics )
#endif // QGSDATETIMESTATISTICALSUMMARY_H

View File

@ -28,6 +28,7 @@ ADD_PYTHON_TEST(PyQgsComposerPolyline test_qgscomposerpolyline.py)
ADD_PYTHON_TEST(PyQgsComposition test_qgscomposition.py)
ADD_PYTHON_TEST(PyQgsConditionalStyle test_qgsconditionalstyle.py)
ADD_PYTHON_TEST(PyQgsCoordinateTransform test_qgscoordinatetransform.py)
ADD_PYTHON_TEST(PyQgsDateTimeStatisticalSummary test_qgsdatetimestatisticalsummary.py)
ADD_PYTHON_TEST(PyQgsDelimitedTextProvider test_qgsdelimitedtextprovider.py)
ADD_PYTHON_TEST(PyQgsDistanceArea test_qgsdistancearea.py)
ADD_PYTHON_TEST(PyQgsEditWidgets test_qgseditwidgets.py)

View File

@ -0,0 +1,134 @@
# -*- coding: utf-8 -*-
"""QGIS Unit tests for QgsDateTimeStatisticalSummary.
.. note:: This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
"""
__author__ = 'Nyall Dawson'
__date__ = '07/05/2016'
__copyright__ = 'Copyright 2016, The QGIS Project'
# This will get replaced with a git SHA1 when you do a git archive
__revision__ = '$Format:%H$'
import qgis # NOQA
from qgis.core import (QgsDateTimeStatisticalSummary,
QgsInterval
)
from qgis.PyQt.QtCore import QDateTime, QDate, QTime
from qgis.testing import unittest
class PyQgsDateTimeStatisticalSummary(unittest.TestCase):
def testStats(self):
s = QgsDateTimeStatisticalSummary()
self.assertEqual(s.statistics(), QgsDateTimeStatisticalSummary.All)
s.calculate([QDateTime(QDate(2015, 3, 4), QTime(11, 10, 54)),
QDateTime(QDate(2011, 1, 5), QTime(15, 3, 1)),
QDateTime(QDate(2015, 3, 4), QTime(11, 10, 54)),
QDateTime(QDate(2015, 3, 4), QTime(11, 10, 54)),
QDateTime(QDate(2019, 12, 28), QTime(23, 10, 1)),
QDateTime(),
QDateTime(QDate(1998, 1, 2), QTime(1, 10, 54)),
QDateTime(),
QDateTime(QDate(2011, 1, 5), QTime(11, 10, 54))])
self.assertEqual(s.count(), 9)
self.assertEqual(s.countDistinct(), 6)
self.assertEqual(set(s.distinctValues()),
set([QDateTime(QDate(2015, 3, 4), QTime(11, 10, 54)),
QDateTime(QDate(2011, 1, 5), QTime(15, 3, 1)),
QDateTime(QDate(2019, 12, 28), QTime(23, 10, 1)),
QDateTime(),
QDateTime(QDate(1998, 1, 2), QTime(1, 10, 54)),
QDateTime(QDate(2011, 1, 5), QTime(11, 10, 54))]))
self.assertEqual(s.countMissing(), 2)
self.assertEqual(s.min(), QDateTime(QDate(1998, 1, 2), QTime(1, 10, 54)))
self.assertEqual(s.max(), QDateTime(QDate(2019, 12, 28), QTime(23, 10, 1)))
self.assertEqual(s.range(), QgsInterval(693871147))
def testIndividualStats(self):
# tests calculation of statistics one at a time, to make sure statistic calculations are not
# dependent on each other
tests = [{'stat': QgsDateTimeStatisticalSummary.Count, 'expected': 9},
{'stat': QgsDateTimeStatisticalSummary.CountDistinct, 'expected': 6},
{'stat': QgsDateTimeStatisticalSummary.CountMissing, 'expected': 2},
{'stat': QgsDateTimeStatisticalSummary.Min, 'expected': QDateTime(QDate(1998, 1, 2), QTime(1, 10, 54))},
{'stat': QgsDateTimeStatisticalSummary.Max, 'expected': QDateTime(QDate(2019, 12, 28), QTime(23, 10, 1))},
{'stat': QgsDateTimeStatisticalSummary.Range, 'expected': QgsInterval(693871147)},
]
s = QgsDateTimeStatisticalSummary()
for t in tests:
# test constructor
s2 = QgsDateTimeStatisticalSummary(t['stat'])
self.assertEqual(s2.statistics(), t['stat'])
s.setStatistics(t['stat'])
self.assertEqual(s.statistics(), t['stat'])
s.calculate([QDateTime(QDate(2015, 3, 4), QTime(11, 10, 54)),
QDateTime(QDate(2011, 1, 5), QTime(15, 3, 1)),
QDateTime(QDate(2015, 3, 4), QTime(11, 10, 54)),
QDateTime(QDate(2015, 3, 4), QTime(11, 10, 54)),
QDateTime(QDate(2019, 12, 28), QTime(23, 10, 1)),
QDateTime(),
QDateTime(QDate(1998, 1, 2), QTime(1, 10, 54)),
QDateTime(),
QDateTime(QDate(2011, 1, 5), QTime(11, 10, 54))])
self.assertEqual(s.statistic(t['stat']), t['expected'])
# display name
self.assertTrue(len(QgsDateTimeStatisticalSummary.displayName(t['stat'])) > 0)
def testVariantStats(self):
""" test with non-datetime values """
s = QgsDateTimeStatisticalSummary()
self.assertEqual(s.statistics(), QgsDateTimeStatisticalSummary.All)
s.calculate([QDateTime(QDate(2015, 3, 4), QTime(11, 10, 54)),
'asdasd',
QDateTime(QDate(2015, 3, 4), QTime(11, 10, 54)),
34,
QDateTime(QDate(2019, 12, 28), QTime(23, 10, 1)),
QDateTime(),
QDateTime(QDate(1998, 1, 2), QTime(1, 10, 54)),
QDateTime(),
QDateTime(QDate(2011, 1, 5), QTime(11, 10, 54))])
self.assertEqual(s.count(), 7)
self.assertEqual(set(s.distinctValues()), set([QDateTime(QDate(2015, 3, 4), QTime(11, 10, 54)),
QDateTime(QDate(2019, 12, 28), QTime(23, 10, 1)),
QDateTime(QDate(1998, 1, 2), QTime(1, 10, 54)),
QDateTime(QDate(2011, 1, 5), QTime(11, 10, 54)),
QDateTime()]))
self.assertEqual(s.countMissing(), 2)
self.assertEqual(s.min(), QDateTime(QDate(1998, 1, 2), QTime(1, 10, 54)))
self.assertEqual(s.max(), QDateTime(QDate(2019, 12, 28), QTime(23, 10, 1)))
self.assertEqual(s.range(), QgsInterval(693871147))
def testDates(self):
""" test with date values """
s = QgsDateTimeStatisticalSummary()
self.assertEqual(s.statistics(), QgsDateTimeStatisticalSummary.All)
s.calculate([QDate(2015, 3, 4),
QDate(2015, 3, 4),
QDate(2019, 12, 28),
QDate(),
QDate(1998, 1, 2),
QDate(),
QDate(2011, 1, 5)])
self.assertEqual(s.count(), 7)
self.assertEqual(set(s.distinctValues()), set([
QDateTime(QDate(2015, 3, 4), QTime()),
QDateTime(QDate(2019, 12, 28), QTime()),
QDateTime(QDate(1998, 1, 2), QTime()),
QDateTime(),
QDateTime(QDate(2011, 1, 5), QTime())]))
self.assertEqual(s.countMissing(), 2)
self.assertEqual(s.min(), QDateTime(QDate(1998, 1, 2), QTime()))
self.assertEqual(s.max(), QDateTime(QDate(2019, 12, 28), QTime()))
self.assertEqual(s.range(), QgsInterval(693792000))
if __name__ == '__main__':
unittest.main()