Port Stats by Category to new API

Improvements:
- keep original field type and name for category field
- add unit test
This commit is contained in:
Nyall Dawson 2017-08-05 07:01:06 +10:00
parent adda744576
commit b93be39c24
5 changed files with 153 additions and 32 deletions

View File

@ -132,6 +132,7 @@ from .SnapGeometries import SnapGeometriesToLayer
from .SpatialiteExecuteSQL import SpatialiteExecuteSQL
from .SpatialIndex import SpatialIndex
from .SplitWithLines import SplitWithLines
from .StatisticsByCategories import StatisticsByCategories
from .SumLines import SumLines
from .SymmetricalDifference import SymmetricalDifference
from .TextToFloat import TextToFloat
@ -149,7 +150,6 @@ from .ZonalStatistics import ZonalStatistics
# from .SelectByLocation import SelectByLocation
# from .SpatialJoin import SpatialJoin
# from .GeometryConvert import GeometryConvert
# from .StatisticsByCategories import StatisticsByCategories
# from .FieldsCalculator import FieldsCalculator
# from .FieldPyculator import FieldsPyculator
# from .PointsDisplacement import PointsDisplacement
@ -190,7 +190,7 @@ class QGISAlgorithmProvider(QgsProcessingProvider):
# SpatialJoin(),
# GeometryConvert(), FieldsCalculator(),
# FieldsPyculator(),
# StatisticsByCategories(),
#
# RasterLayerStatistics(), PointsDisplacement(),
# PointsFromPolygons(),
# PointsFromLines(),
@ -298,6 +298,7 @@ class QGISAlgorithmProvider(QgsProcessingProvider):
SpatialiteExecuteSQL(),
SpatialIndex(),
SplitWithLines(),
StatisticsByCategories(),
SumLines(),
SymmetricalDifference(),
TextToFloat(),

View File

@ -26,19 +26,24 @@ __copyright__ = '(C) 2012, Victor Olaya'
__revision__ = '$Format:%H$'
from qgis.core import (QgsApplication,
QgsFeatureSink,
from qgis.core import (QgsProcessingParameterFeatureSource,
QgsStatisticalSummary,
QgsProcessingUtils)
from processing.core.outputs import OutputTable
QgsFeatureRequest,
QgsProcessingParameterField,
QgsProcessingParameterFeatureSink,
QgsFields,
QgsField,
QgsWkbTypes,
QgsCoordinateReferenceSystem,
QgsFeature,
QgsFeatureSink)
from qgis.PyQt.QtCore import QVariant
from processing.algs.qgis.QgisAlgorithm import QgisAlgorithm
from processing.core.parameters import ParameterVector
from processing.core.parameters import ParameterTableField
class StatisticsByCategories(QgisAlgorithm):
INPUT_LAYER = 'INPUT_LAYER'
INPUT = 'INPUT'
VALUES_FIELD_NAME = 'VALUES_FIELD_NAME'
CATEGORIES_FIELD_NAME = 'CATEGORIES_FIELD_NAME'
OUTPUT = 'OUTPUT'
@ -50,16 +55,16 @@ class StatisticsByCategories(QgisAlgorithm):
super().__init__()
def initAlgorithm(self, config=None):
self.addParameter(ParameterVector(self.INPUT_LAYER,
self.tr('Input vector layer')))
self.addParameter(ParameterTableField(self.VALUES_FIELD_NAME,
self.tr('Field to calculate statistics on'),
self.INPUT_LAYER, ParameterTableField.DATA_TYPE_NUMBER))
self.addParameter(ParameterTableField(self.CATEGORIES_FIELD_NAME,
self.tr('Field with categories'),
self.INPUT_LAYER, ParameterTableField.DATA_TYPE_ANY))
self.addParameter(QgsProcessingParameterFeatureSource(self.INPUT,
self.tr('Input vector layer')))
self.addParameter(QgsProcessingParameterField(self.VALUES_FIELD_NAME,
self.tr('Field to calculate statistics on'),
parentLayerParameterName=self.INPUT, type=QgsProcessingParameterField.Numeric))
self.addParameter(QgsProcessingParameterField(self.CATEGORIES_FIELD_NAME,
self.tr('Field with categories'),
parentLayerParameterName=self.INPUT, type=QgsProcessingParameterField.Any))
self.addOutput(OutputTable(self.OUTPUT, self.tr('Statistics by category')))
self.addParameter(QgsProcessingParameterFeatureSink(self.OUTPUT, self.tr('Statistics by category')))
def name(self):
return 'statisticsbycategories'
@ -68,36 +73,51 @@ class StatisticsByCategories(QgisAlgorithm):
return self.tr('Statistics by categories')
def processAlgorithm(self, parameters, context, feedback):
layer = QgsProcessingUtils.mapLayerFromString(self.getParameterValue(self.INPUT_LAYER), context)
valuesFieldName = self.getParameterValue(self.VALUES_FIELD_NAME)
categoriesFieldName = self.getParameterValue(self.CATEGORIES_FIELD_NAME)
source = self.parameterAsSource(parameters, self.INPUT, context)
value_field_name = self.parameterAsString(parameters, self.VALUES_FIELD_NAME, context)
category_field_name = self.parameterAsString(parameters, self.CATEGORIES_FIELD_NAME, context)
output = self.getOutputFromName(self.OUTPUT)
valuesField = layer.fields().lookupField(valuesFieldName)
categoriesField = layer.fields().lookupField(categoriesFieldName)
value_field_index = source.fields().lookupField(value_field_name)
category_field_index = source.fields().lookupField(category_field_name)
features = QgsProcessingUtils.getFeatures(layer, context)
total = 100.0 / layer.featureCount() if layer.featureCount() else 0
features = source.getFeatures(QgsFeatureRequest().setFlags(QgsFeatureRequest.NoGeometry))
total = 100.0 / source.featureCount() if source.featureCount() else 0
values = {}
for current, feat in enumerate(features):
if feedback.isCanceled():
break
feedback.setProgress(int(current * total))
attrs = feat.attributes()
try:
value = float(attrs[valuesField])
cat = str(attrs[categoriesField])
value = float(attrs[value_field_index])
cat = attrs[category_field_index]
if cat not in values:
values[cat] = []
values[cat].append(value)
except:
pass
fields = ['category', 'min', 'max', 'mean', 'stddev', 'sum', 'count']
writer = output.getTableWriter(fields)
fields = QgsFields()
fields.append(source.fields().at(category_field_index))
fields.append(QgsField('min', QVariant.Double))
fields.append(QgsField('max', QVariant.Double))
fields.append(QgsField('mean', QVariant.Double))
fields.append(QgsField('stddev', QVariant.Double))
fields.append(QgsField('sum', QVariant.Double))
fields.append(QgsField('count', QVariant.Int))
(sink, dest_id) = self.parameterAsSink(parameters, self.OUTPUT, context,
fields, QgsWkbTypes.NoGeometry, QgsCoordinateReferenceSystem())
stat = QgsStatisticalSummary(QgsStatisticalSummary.Min | QgsStatisticalSummary.Max |
QgsStatisticalSummary.Mean | QgsStatisticalSummary.StDevSample |
QgsStatisticalSummary.Sum | QgsStatisticalSummary.Count)
for (cat, v) in list(values.items()):
stat.calculate(v)
record = [cat, stat.min(), stat.max(), stat.mean(), stat.sampleStDev(), stat.sum(), stat.count()]
writer.addRecord(record)
f = QgsFeature()
f.setAttributes([cat, stat.min(), stat.max(), stat.mean(), stat.sampleStDev(), stat.sum(), stat.count()])
sink.addFeature(f, QgsFeatureSink.FastInsert)
return {self.OUTPUT: dest_id}

View File

@ -0,0 +1,45 @@
<GMLFeatureClassList>
<GMLFeatureClass>
<Name>stats_by_category</Name>
<ElementPath>stats_by_category</ElementPath>
<GeometryType>100</GeometryType>
<DatasetSpecificInfo>
<FeatureCount>3</FeatureCount>
</DatasetSpecificInfo>
<PropertyDefn>
<Name>id2</Name>
<ElementPath>id2</ElementPath>
<Type>Integer</Type>
</PropertyDefn>
<PropertyDefn>
<Name>min</Name>
<ElementPath>min</ElementPath>
<Type>Integer</Type>
</PropertyDefn>
<PropertyDefn>
<Name>max</Name>
<ElementPath>max</ElementPath>
<Type>Integer</Type>
</PropertyDefn>
<PropertyDefn>
<Name>mean</Name>
<ElementPath>mean</ElementPath>
<Type>Real</Type>
</PropertyDefn>
<PropertyDefn>
<Name>stddev</Name>
<ElementPath>stddev</ElementPath>
<Type>Real</Type>
</PropertyDefn>
<PropertyDefn>
<Name>sum</Name>
<ElementPath>sum</ElementPath>
<Type>Integer</Type>
</PropertyDefn>
<PropertyDefn>
<Name>count</Name>
<ElementPath>count</ElementPath>
<Type>Integer</Type>
</PropertyDefn>
</GMLFeatureClass>
</GMLFeatureClassList>

View File

@ -0,0 +1,42 @@
<?xml version="1.0" encoding="utf-8" ?>
<ogr:FeatureCollection
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation=""
xmlns:ogr="http://ogr.maptools.org/"
xmlns:gml="http://www.opengis.net/gml">
<gml:boundedBy><gml:null>missing</gml:null></gml:boundedBy>
<gml:featureMember>
<ogr:stats_by_category fid="stats_by_category.0">
<ogr:id2>2</ogr:id2>
<ogr:min>1</ogr:min>
<ogr:max>4</ogr:max>
<ogr:mean>2.5</ogr:mean>
<ogr:stddev>2.12132034355964</ogr:stddev>
<ogr:sum>5</ogr:sum>
<ogr:count>2</ogr:count>
</ogr:stats_by_category>
</gml:featureMember>
<gml:featureMember>
<ogr:stats_by_category fid="stats_by_category.1">
<ogr:id2>1</ogr:id2>
<ogr:min>2</ogr:min>
<ogr:max>5</ogr:max>
<ogr:mean>3.5</ogr:mean>
<ogr:stddev>2.12132034355964</ogr:stddev>
<ogr:sum>7</ogr:sum>
<ogr:count>2</ogr:count>
</ogr:stats_by_category>
</gml:featureMember>
<gml:featureMember>
<ogr:stats_by_category fid="stats_by_category.2">
<ogr:id2>0</ogr:id2>
<ogr:min>3</ogr:min>
<ogr:max>9</ogr:max>
<ogr:mean>6.6</ogr:mean>
<ogr:stddev>2.30217288664427</ogr:stddev>
<ogr:sum>33</ogr:sum>
<ogr:count>5</ogr:count>
</ogr:stats_by_category>
</gml:featureMember>
</ogr:FeatureCollection>

View File

@ -2489,6 +2489,19 @@ tests:
name: expected/single_to_multi.gml
type: vector
- algorithm: qgis:statisticsbycategories
name: stats by category
params:
VALUES_FIELD_NAME: id
CATEGORIES_FIELD_NAME: id2
INPUT:
name: points.gml
type: vector
results:
OUTPUT:
name: expected/stats_by_category.gml
type: vector
# - algorithm: qgis:zonalstatistics
# name: simple zonal statistics
# params: