From 4e78e034a1fdea4bc8bede8a872c72be32cf9684 Mon Sep 17 00:00:00 2001 From: Nyall Dawson Date: Wed, 6 Sep 2017 16:32:01 +1000 Subject: [PATCH 1/9] [FEATURE] Improve Stats by Categories algorithm - allow non spatial inputs - allow calculation of stats on any field type, with specific string and datetime stats calculated when field type matches - output a full set of stats for numeric fields (including median , quartiles, etc) - also calculate stats for 'null' category --- .../algs/qgis/StatisticsByCategories.py | 198 +++++++++++++++--- 1 file changed, 169 insertions(+), 29 deletions(-) mode change 100644 => 100755 python/plugins/processing/algs/qgis/StatisticsByCategories.py diff --git a/python/plugins/processing/algs/qgis/StatisticsByCategories.py b/python/plugins/processing/algs/qgis/StatisticsByCategories.py old mode 100644 new mode 100755 index dfe0097f06f..9008ea406e7 --- a/python/plugins/processing/algs/qgis/StatisticsByCategories.py +++ b/python/plugins/processing/algs/qgis/StatisticsByCategories.py @@ -28,6 +28,8 @@ __revision__ = '$Format:%H$' from qgis.core import (QgsProcessingParameterFeatureSource, QgsStatisticalSummary, + QgsDateTimeStatisticalSummary, + QgsStringStatisticalSummary, QgsFeatureRequest, QgsProcessingParameterField, QgsProcessingParameterFeatureSink, @@ -36,13 +38,16 @@ from qgis.core import (QgsProcessingParameterFeatureSource, QgsWkbTypes, QgsCoordinateReferenceSystem, QgsFeature, - QgsFeatureSink) + QgsFeatureSink, + QgsProcessing, + NULL) from qgis.PyQt.QtCore import QVariant from processing.algs.qgis.QgisAlgorithm import QgisAlgorithm +from collections import defaultdict + class StatisticsByCategories(QgisAlgorithm): - INPUT = 'INPUT' VALUES_FIELD_NAME = 'VALUES_FIELD_NAME' CATEGORIES_FIELD_NAME = 'CATEGORIES_FIELD_NAME' @@ -56,13 +61,15 @@ class StatisticsByCategories(QgisAlgorithm): def initAlgorithm(self, config=None): self.addParameter(QgsProcessingParameterFeatureSource(self.INPUT, - self.tr('Input vector layer'))) + self.tr('Input vector layer'), + types=[QgsProcessing.TypeVector])) self.addParameter(QgsProcessingParameterField(self.VALUES_FIELD_NAME, self.tr('Field to calculate statistics on'), - parentLayerParameterName=self.INPUT, type=QgsProcessingParameterField.Numeric)) + parentLayerParameterName=self.INPUT)) self.addParameter(QgsProcessingParameterField(self.CATEGORIES_FIELD_NAME, self.tr('Field with categories'), - parentLayerParameterName=self.INPUT, type=QgsProcessingParameterField.Any)) + parentLayerParameterName=self.INPUT, + type=QgsProcessingParameterField.Any)) self.addParameter(QgsProcessingParameterFeatureSink(self.OUTPUT, self.tr('Statistics by category'))) @@ -78,11 +85,63 @@ class StatisticsByCategories(QgisAlgorithm): category_field_name = self.parameterAsString(parameters, self.CATEGORIES_FIELD_NAME, context) value_field_index = source.fields().lookupField(value_field_name) + value_field = source.fields().at(value_field_index) category_field_index = source.fields().lookupField(category_field_name) - features = source.getFeatures(QgsFeatureRequest().setFlags(QgsFeatureRequest.NoGeometry)) - total = 100.0 / source.featureCount() if source.featureCount() else 0 - values = {} + # generate output fields + fields = QgsFields() + fields.append(source.fields().at(category_field_index)) + + def addField(name): + """ + Adds a field to the output, keeping the same data type as the value_field + """ + field = value_field + field.setName(name) + fields.append(field) + + if value_field.isNumeric(): + field_type = 'numeric' + fields.append(QgsField('count', QVariant.Int)) + fields.append(QgsField('unique', QVariant.Int)) + fields.append(QgsField('min', QVariant.Double)) + fields.append(QgsField('max', QVariant.Double)) + fields.append(QgsField('range', QVariant.Double)) + fields.append(QgsField('sum', QVariant.Double)) + fields.append(QgsField('mean', QVariant.Double)) + fields.append(QgsField('median', QVariant.Double)) + fields.append(QgsField('stddev', QVariant.Double)) + fields.append(QgsField('minority', QVariant.Double)) + fields.append(QgsField('majority', QVariant.Double)) + fields.append(QgsField('q1', QVariant.Double)) + fields.append(QgsField('q3', QVariant.Double)) + fields.append(QgsField('iqr', QVariant.Double)) + elif value_field.type() in (QVariant.Date, QVariant.Time, QVariant.DateTime): + field_type = 'datetime' + fields.append(QgsField('count', QVariant.Int)) + fields.append(QgsField('unique', QVariant.Int)) + fields.append(QgsField('empty', QVariant.Int)) + fields.append(QgsField('filled', QVariant.Int)) + # keep same data type for these fields + addField('min') + addField('max') + else: + field_type = 'string' + fields.append(QgsField('count', QVariant.Int)) + fields.append(QgsField('unique', QVariant.Int)) + fields.append(QgsField('empty', QVariant.Int)) + fields.append(QgsField('filled', QVariant.Int)) + # keep same data type for these fields + addField('min') + addField('max') + fields.append(QgsField('min_length', QVariant.Int)) + fields.append(QgsField('max_length', QVariant.Int)) + fields.append(QgsField('mean_length', QVariant.Double)) + + features = source.getFeatures(QgsFeatureRequest().setFlags(QgsFeatureRequest.NoGeometry).setSubsetOfAttributes( + [value_field_index, category_field_index])) + total = 50.0 / source.featureCount() if source.featureCount() else 0 + values = defaultdict(list) for current, feat in enumerate(features): if feedback.isCanceled(): break @@ -90,34 +149,115 @@ class StatisticsByCategories(QgisAlgorithm): feedback.setProgress(int(current * total)) attrs = feat.attributes() try: - value = float(attrs[value_field_index]) + if field_type == 'numeric': + if attrs[value_field_index] == NULL: + continue + else: + value = float(attrs[value_field_index]) + elif attrs[value_field_index] == NULL: + value = NULL + elif field_type == 'string': + value = str(attrs[value_field_index]) + else: + value = attrs[value_field_index] cat = attrs[category_field_index] - if cat not in values: - values[cat] = [] values[cat].append(value) except: pass - fields = QgsFields() - fields.append(source.fields().at(category_field_index)) - fields.append(QgsField('min', QVariant.Double)) - fields.append(QgsField('max', QVariant.Double)) - fields.append(QgsField('mean', QVariant.Double)) - fields.append(QgsField('stddev', QVariant.Double)) - fields.append(QgsField('sum', QVariant.Double)) - fields.append(QgsField('count', QVariant.Int)) - (sink, dest_id) = self.parameterAsSink(parameters, self.OUTPUT, context, fields, QgsWkbTypes.NoGeometry, QgsCoordinateReferenceSystem()) - stat = QgsStatisticalSummary(QgsStatisticalSummary.Min | QgsStatisticalSummary.Max | - QgsStatisticalSummary.Mean | QgsStatisticalSummary.StDevSample | - QgsStatisticalSummary.Sum | QgsStatisticalSummary.Count) - - for (cat, v) in list(values.items()): - stat.calculate(v) - f = QgsFeature() - f.setAttributes([cat, stat.min(), stat.max(), stat.mean(), stat.sampleStDev(), stat.sum(), stat.count()]) - sink.addFeature(f, QgsFeatureSink.FastInsert) + if field_type == 'numeric': + self.calcNumericStats(values, sink, feedback) + elif field_type == 'datetime': + self.calcDateTimeStats(values, sink, feedback) + else: + self.calcStringStats(values, sink, feedback) return {self.OUTPUT: dest_id} + + def calcNumericStats(self, values, sink, feedback): + stat = QgsStatisticalSummary() + + total = 50.0 / len(values) if values else 0 + current = 0 + for cat, v in values.items(): + if feedback.isCanceled(): + break + + feedback.setProgress(int(current * total) + 50) + + stat.calculate(v) + f = QgsFeature() + f.setAttributes([cat, + stat.count(), + stat.variety(), + stat.min(), + stat.max(), + stat.range(), + stat.sum(), + stat.mean(), + stat.median(), + stat.stDev(), + stat.minority(), + stat.majority(), + stat.firstQuartile(), + stat.thirdQuartile(), + stat.interQuartileRange()]) + + sink.addFeature(f, QgsFeatureSink.FastInsert) + current += 1 + + def calcDateTimeStats(self, values, sink, feedback): + stat = QgsDateTimeStatisticalSummary() + + total = 50.0 / len(values) if values else 0 + current = 0 + for cat, v in values.items(): + if feedback.isCanceled(): + break + + feedback.setProgress(int(current * total) + 50) + + stat.calculate(v) + f = QgsFeature() + f.setAttributes([cat, + stat.count(), + stat.countDistinct(), + stat.countMissing(), + stat.count() - stat.countMissing(), + stat.statistic(QgsDateTimeStatisticalSummary.Min), + stat.statistic(QgsDateTimeStatisticalSummary.Max) + ]) + + sink.addFeature(f, QgsFeatureSink.FastInsert) + current += 1 + + def calcStringStats(self, values, sink, feedback): + stat = QgsStringStatisticalSummary() + + total = 50.0 / len(values) if values else 0 + current = 0 + for cat, v in values.items(): + if feedback.isCanceled(): + break + + feedback.setProgress(int(current * total) + 50) + + stat.calculate(v) + f = QgsFeature() + f.setAttributes([cat, + stat.count(), + stat.countDistinct(), + stat.countMissing(), + stat.count() - stat.countMissing(), + stat.min(), + stat.max(), + stat.minLength(), + stat.maxLength(), + stat.meanLength() + ]) + + sink.addFeature(f, QgsFeatureSink.FastInsert) + current += 1 From 30866190e984290583a9690f443db7a26e5f2bd5 Mon Sep 17 00:00:00 2001 From: Nyall Dawson Date: Wed, 6 Sep 2017 16:45:48 +1000 Subject: [PATCH 2/9] [FEATURE] Allow multiple category fields in 'stats by category' --- .../algs/qgis/StatisticsByCategories.py | 89 ++++++++++--------- 1 file changed, 45 insertions(+), 44 deletions(-) diff --git a/python/plugins/processing/algs/qgis/StatisticsByCategories.py b/python/plugins/processing/algs/qgis/StatisticsByCategories.py index 9008ea406e7..249f3543d68 100755 --- a/python/plugins/processing/algs/qgis/StatisticsByCategories.py +++ b/python/plugins/processing/algs/qgis/StatisticsByCategories.py @@ -67,9 +67,9 @@ class StatisticsByCategories(QgisAlgorithm): self.tr('Field to calculate statistics on'), parentLayerParameterName=self.INPUT)) self.addParameter(QgsProcessingParameterField(self.CATEGORIES_FIELD_NAME, - self.tr('Field with categories'), + self.tr('Field(s) with categories'), parentLayerParameterName=self.INPUT, - type=QgsProcessingParameterField.Any)) + type=QgsProcessingParameterField.Any, allowMultiple=True)) self.addParameter(QgsProcessingParameterFeatureSink(self.OUTPUT, self.tr('Statistics by category'))) @@ -82,15 +82,16 @@ class StatisticsByCategories(QgisAlgorithm): def processAlgorithm(self, parameters, context, feedback): source = self.parameterAsSource(parameters, self.INPUT, context) value_field_name = self.parameterAsString(parameters, self.VALUES_FIELD_NAME, context) - category_field_name = self.parameterAsString(parameters, self.CATEGORIES_FIELD_NAME, context) + category_field_names = self.parameterAsFields(parameters, self.CATEGORIES_FIELD_NAME, context) value_field_index = source.fields().lookupField(value_field_name) value_field = source.fields().at(value_field_index) - category_field_index = source.fields().lookupField(category_field_name) + category_field_indexes = [source.fields().lookupField(n) for n in category_field_names] # generate output fields fields = QgsFields() - fields.append(source.fields().at(category_field_index)) + for c in category_field_indexes: + fields.append(source.fields().at(c)) def addField(name): """ @@ -138,8 +139,11 @@ class StatisticsByCategories(QgisAlgorithm): fields.append(QgsField('max_length', QVariant.Int)) fields.append(QgsField('mean_length', QVariant.Double)) - features = source.getFeatures(QgsFeatureRequest().setFlags(QgsFeatureRequest.NoGeometry).setSubsetOfAttributes( - [value_field_index, category_field_index])) + request = QgsFeatureRequest().setFlags(QgsFeatureRequest.NoGeometry) + attrs = [value_field_index] + attrs.extend(category_field_indexes) + request.setSubsetOfAttributes(attrs) + features = source.getFeatures(request) total = 50.0 / source.featureCount() if source.featureCount() else 0 values = defaultdict(list) for current, feat in enumerate(features): @@ -148,7 +152,7 @@ class StatisticsByCategories(QgisAlgorithm): feedback.setProgress(int(current * total)) attrs = feat.attributes() - try: + if True: if field_type == 'numeric': if attrs[value_field_index] == NULL: continue @@ -160,9 +164,9 @@ class StatisticsByCategories(QgisAlgorithm): value = str(attrs[value_field_index]) else: value = attrs[value_field_index] - cat = attrs[category_field_index] + cat = tuple([attrs[c] for c in category_field_indexes]) values[cat].append(value) - except: + else: pass (sink, dest_id) = self.parameterAsSink(parameters, self.OUTPUT, context, @@ -190,21 +194,20 @@ class StatisticsByCategories(QgisAlgorithm): stat.calculate(v) f = QgsFeature() - f.setAttributes([cat, - stat.count(), - stat.variety(), - stat.min(), - stat.max(), - stat.range(), - stat.sum(), - stat.mean(), - stat.median(), - stat.stDev(), - stat.minority(), - stat.majority(), - stat.firstQuartile(), - stat.thirdQuartile(), - stat.interQuartileRange()]) + f.setAttributes(list(cat) + [stat.count(), + stat.variety(), + stat.min(), + stat.max(), + stat.range(), + stat.sum(), + stat.mean(), + stat.median(), + stat.stDev(), + stat.minority(), + stat.majority(), + stat.firstQuartile(), + stat.thirdQuartile(), + stat.interQuartileRange()]) sink.addFeature(f, QgsFeatureSink.FastInsert) current += 1 @@ -222,14 +225,13 @@ class StatisticsByCategories(QgisAlgorithm): stat.calculate(v) f = QgsFeature() - f.setAttributes([cat, - stat.count(), - stat.countDistinct(), - stat.countMissing(), - stat.count() - stat.countMissing(), - stat.statistic(QgsDateTimeStatisticalSummary.Min), - stat.statistic(QgsDateTimeStatisticalSummary.Max) - ]) + f.setAttributes(list(cat) + [stat.count(), + stat.countDistinct(), + stat.countMissing(), + stat.count() - stat.countMissing(), + stat.statistic(QgsDateTimeStatisticalSummary.Min), + stat.statistic(QgsDateTimeStatisticalSummary.Max) + ]) sink.addFeature(f, QgsFeatureSink.FastInsert) current += 1 @@ -247,17 +249,16 @@ class StatisticsByCategories(QgisAlgorithm): stat.calculate(v) f = QgsFeature() - f.setAttributes([cat, - stat.count(), - stat.countDistinct(), - stat.countMissing(), - stat.count() - stat.countMissing(), - stat.min(), - stat.max(), - stat.minLength(), - stat.maxLength(), - stat.meanLength() - ]) + f.setAttributes(list(cat) + [stat.count(), + stat.countDistinct(), + stat.countMissing(), + stat.count() - stat.countMissing(), + stat.min(), + stat.max(), + stat.minLength(), + stat.maxLength(), + stat.meanLength() + ]) sink.addFeature(f, QgsFeatureSink.FastInsert) current += 1 From 30c663eaef5b0a043df658e52a5d9fd3f68a15b2 Mon Sep 17 00:00:00 2001 From: Nyall Dawson Date: Wed, 6 Sep 2017 16:52:51 +1000 Subject: [PATCH 3/9] Fix calculation with null strings --- .../plugins/processing/algs/qgis/StatisticsByCategories.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/python/plugins/processing/algs/qgis/StatisticsByCategories.py b/python/plugins/processing/algs/qgis/StatisticsByCategories.py index 249f3543d68..8636cb2117a 100755 --- a/python/plugins/processing/algs/qgis/StatisticsByCategories.py +++ b/python/plugins/processing/algs/qgis/StatisticsByCategories.py @@ -158,10 +158,13 @@ class StatisticsByCategories(QgisAlgorithm): continue else: value = float(attrs[value_field_index]) + elif field_type == 'string': + if attrs[value_field_index] == NULL: + value='' + else: + value = str(attrs[value_field_index]) elif attrs[value_field_index] == NULL: value = NULL - elif field_type == 'string': - value = str(attrs[value_field_index]) else: value = attrs[value_field_index] cat = tuple([attrs[c] for c in category_field_indexes]) From ea2e537cd791be2f2a114f59e739da5dfd415dc4 Mon Sep 17 00:00:00 2001 From: Nyall Dawson Date: Wed, 6 Sep 2017 17:03:15 +1000 Subject: [PATCH 4/9] Make value field in 'Stats by category' optional If not set, only the feature counts for each category will be calculated --- .../algs/qgis/StatisticsByCategories.py | 76 +++++++++++++------ 1 file changed, 52 insertions(+), 24 deletions(-) diff --git a/python/plugins/processing/algs/qgis/StatisticsByCategories.py b/python/plugins/processing/algs/qgis/StatisticsByCategories.py index 8636cb2117a..9e04aa20c9c 100755 --- a/python/plugins/processing/algs/qgis/StatisticsByCategories.py +++ b/python/plugins/processing/algs/qgis/StatisticsByCategories.py @@ -64,8 +64,9 @@ class StatisticsByCategories(QgisAlgorithm): self.tr('Input vector layer'), types=[QgsProcessing.TypeVector])) self.addParameter(QgsProcessingParameterField(self.VALUES_FIELD_NAME, - self.tr('Field to calculate statistics on'), - parentLayerParameterName=self.INPUT)) + self.tr( + 'Field to calculate statistics on (if empty, only count is calculated)'), + parentLayerParameterName=self.INPUT, optional=True)) self.addParameter(QgsProcessingParameterField(self.CATEGORIES_FIELD_NAME, self.tr('Field(s) with categories'), parentLayerParameterName=self.INPUT, @@ -85,7 +86,10 @@ class StatisticsByCategories(QgisAlgorithm): category_field_names = self.parameterAsFields(parameters, self.CATEGORIES_FIELD_NAME, context) value_field_index = source.fields().lookupField(value_field_name) - value_field = source.fields().at(value_field_index) + if value_field_index >= 0: + value_field = source.fields().at(value_field_index) + else: + value_field = None category_field_indexes = [source.fields().lookupField(n) for n in category_field_names] # generate output fields @@ -101,7 +105,10 @@ class StatisticsByCategories(QgisAlgorithm): field.setName(name) fields.append(field) - if value_field.isNumeric(): + if value_field is None: + field_type = 'none' + fields.append(QgsField('count', QVariant.Int)) + elif value_field.isNumeric(): field_type = 'numeric' fields.append(QgsField('count', QVariant.Int)) fields.append(QgsField('unique', QVariant.Int)) @@ -140,42 +147,50 @@ class StatisticsByCategories(QgisAlgorithm): fields.append(QgsField('mean_length', QVariant.Double)) request = QgsFeatureRequest().setFlags(QgsFeatureRequest.NoGeometry) - attrs = [value_field_index] + if value_field is not None: + attrs = [value_field_index] + else: + attrs = [] attrs.extend(category_field_indexes) request.setSubsetOfAttributes(attrs) features = source.getFeatures(request) total = 50.0 / source.featureCount() if source.featureCount() else 0 - values = defaultdict(list) + if field_type == 'none': + values = defaultdict(lambda: 0) + else: + values = defaultdict(list) for current, feat in enumerate(features): if feedback.isCanceled(): break feedback.setProgress(int(current * total)) attrs = feat.attributes() - if True: - if field_type == 'numeric': - if attrs[value_field_index] == NULL: - continue - else: - value = float(attrs[value_field_index]) - elif field_type == 'string': - if attrs[value_field_index] == NULL: - value='' - else: - value = str(attrs[value_field_index]) - elif attrs[value_field_index] == NULL: - value = NULL + cat = tuple([attrs[c] for c in category_field_indexes]) + if field_type == 'none': + values[cat] += 1 + continue + if field_type == 'numeric': + if attrs[value_field_index] == NULL: + continue else: - value = attrs[value_field_index] - cat = tuple([attrs[c] for c in category_field_indexes]) - values[cat].append(value) + value = float(attrs[value_field_index]) + elif field_type == 'string': + if attrs[value_field_index] == NULL: + value = '' + else: + value = str(attrs[value_field_index]) + elif attrs[value_field_index] == NULL: + value = NULL else: - pass + value = attrs[value_field_index] + values[cat].append(value) (sink, dest_id) = self.parameterAsSink(parameters, self.OUTPUT, context, fields, QgsWkbTypes.NoGeometry, QgsCoordinateReferenceSystem()) - if field_type == 'numeric': + if field_type == 'none': + self.saveCounts(values, sink, feedback) + elif field_type == 'numeric': self.calcNumericStats(values, sink, feedback) elif field_type == 'datetime': self.calcDateTimeStats(values, sink, feedback) @@ -184,6 +199,19 @@ class StatisticsByCategories(QgisAlgorithm): return {self.OUTPUT: dest_id} + def saveCounts(self, values, sink, feedback): + total = 50.0 / len(values) if values else 0 + current = 0 + for cat, v in values.items(): + if feedback.isCanceled(): + break + + feedback.setProgress(int(current * total) + 50) + f = QgsFeature() + f.setAttributes(list(cat) + [v]) + sink.addFeature(f, QgsFeatureSink.FastInsert) + current += 1 + def calcNumericStats(self, values, sink, feedback): stat = QgsStatisticalSummary() From 9a091651bc50f06647804c0c4b78ef3cff8a7b25 Mon Sep 17 00:00:00 2001 From: Nyall Dawson Date: Wed, 6 Sep 2017 17:06:45 +1000 Subject: [PATCH 5/9] [FEATURE] Drop 'Frequency analysis' and 'Number of unique values in classes' algs The functionality of both these algorithms is available in 'stats by categories' --- .../algs/qgis/scripts/Frequency_analysis.py | 47 ----------------- .../Number_of_unique_values_in_classes.py | 52 ------------------- 2 files changed, 99 deletions(-) delete mode 100644 python/plugins/processing/algs/qgis/scripts/Frequency_analysis.py delete mode 100644 python/plugins/processing/algs/qgis/scripts/Number_of_unique_values_in_classes.py diff --git a/python/plugins/processing/algs/qgis/scripts/Frequency_analysis.py b/python/plugins/processing/algs/qgis/scripts/Frequency_analysis.py deleted file mode 100644 index e6c1885ee33..00000000000 --- a/python/plugins/processing/algs/qgis/scripts/Frequency_analysis.py +++ /dev/null @@ -1,47 +0,0 @@ -##Vector analysis=group - -#inputs - -##Input=source -##Fields=field multiple Input -##Frequency=sink table - - -from processing.tools.vector import TableWriter -from collections import defaultdict -from qgis.core import QgsProcessingUtils, QgsFields, QgsField, QgsWkbTypes, QgsFeature -from qgis.PyQt.QtCore import QVariant -from processing.core.GeoAlgorithmExecutionException import GeoAlgorithmExecutionException - -inputFields = Input.fields() -fieldIdxs = [] -out_fields = QgsFields() -for f in Fields: - idx = inputFields.indexFromName(f) - if idx == -1: - raise GeoAlgorithmExecutionException('Field not found:' + f) - fieldIdxs.append(idx) - out_fields.append(inputFields.at(idx)) - -out_fields.append(QgsField('FREQ', QVariant.Int)) - -(sink, Frequency) = self.parameterAsSink(parameters, 'Frequency', context, - out_fields) - -counts = {} -feats = Input.getFeatures() -nFeats = Input.featureCount() -counts = defaultdict(int) -for i, feat in enumerate(feats): - feedback.setProgress(int(100 * i / nFeats)) - if feedback.isCanceled(): - break - - attrs = feat.attributes() - clazz = tuple([attrs[i] for i in fieldIdxs]) - counts[clazz] += 1 - -for c in counts: - f = QgsFeature() - f.setAttributes(list(c) + [counts[c]]) - sink.addFeature(f) diff --git a/python/plugins/processing/algs/qgis/scripts/Number_of_unique_values_in_classes.py b/python/plugins/processing/algs/qgis/scripts/Number_of_unique_values_in_classes.py deleted file mode 100644 index 5335c66e3f9..00000000000 --- a/python/plugins/processing/algs/qgis/scripts/Number_of_unique_values_in_classes.py +++ /dev/null @@ -1,52 +0,0 @@ -##Vector analysis=group - -# inputs - - -##input=source -##class_field=field input -##value_field=field input -##N_unique_values=sink - - -from qgis.PyQt.QtCore import QVariant -from qgis.core import QgsFeature, QgsField, QgsProcessingUtils - -fields = input.fields() -fields.append(QgsField('UNIQ_COUNT', QVariant.Int)) - -(sink, N_unique_values) = self.parameterAsSink(parameters, 'N_unique_values', context, - fields, input.wkbType(), input.sourceCrs()) - - -class_field_index = input.fields().lookupField(class_field) -value_field_index = input.fields().lookupField(value_field) - -outFeat = QgsFeature() -classes = {} -feats = input.getFeatures() -nFeat = input.featureCount() -for n, inFeat in enumerate(feats): - if feedback.isCanceled(): - break - feedback.setProgress(int(100 * n / nFeat)) - attrs = inFeat.attributes() - clazz = attrs[class_field_index] - value = attrs[value_field_index] - if clazz not in classes: - classes[clazz] = [] - if value not in classes[clazz]: - classes[clazz].append(value) - -feats = input.getFeatures() -for n, inFeat in enumerate(feats): - if feedback.isCanceled(): - break - feedback.setProgress(int(100 * n / nFeat)) - inGeom = inFeat.geometry() - outFeat.setGeometry(inGeom) - attrs = inFeat.attributes() - clazz = attrs[class_field_index] - attrs.append(len(classes[clazz])) - outFeat.setAttributes(attrs) - sink.addFeature(outFeat) From 2612c3957843b455e0cf022f3c2b7b76e740e77b Mon Sep 17 00:00:00 2001 From: Nyall Dawson Date: Fri, 8 Sep 2017 09:10:17 +1000 Subject: [PATCH 6/9] [processing] A non optional, multiple field input must have at least one selected field to be considered valid --- src/core/processing/qgsprocessingparameters.cpp | 3 +++ tests/src/core/testqgsprocessing.cpp | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/src/core/processing/qgsprocessingparameters.cpp b/src/core/processing/qgsprocessingparameters.cpp index 59561855195..ac1265060a6 100644 --- a/src/core/processing/qgsprocessingparameters.cpp +++ b/src/core/processing/qgsprocessingparameters.cpp @@ -2301,6 +2301,9 @@ bool QgsProcessingParameterField::checkValueIsAcceptable( const QVariant &input, { if ( !mAllowMultiple ) return false; + + if ( input.toList().isEmpty() && !( mFlags & FlagOptional ) ) + return false; } else if ( input.type() == QVariant::String ) { diff --git a/tests/src/core/testqgsprocessing.cpp b/tests/src/core/testqgsprocessing.cpp index de11fe2c9cc..0bc2c122043 100644 --- a/tests/src/core/testqgsprocessing.cpp +++ b/tests/src/core/testqgsprocessing.cpp @@ -2258,6 +2258,8 @@ void TestQgsProcessing::parameterLayerList() QVERIFY( !def->checkValueIsAcceptable( true ) ); QVERIFY( !def->checkValueIsAcceptable( 5 ) ); QVERIFY( !def->checkValueIsAcceptable( "layer12312312" ) ); + QVERIFY( !def->checkValueIsAcceptable( QVariantList() ) ); + QVERIFY( !def->checkValueIsAcceptable( QStringList() ) ); QVERIFY( def->checkValueIsAcceptable( QStringList() << "layer12312312" << "layerB" ) ); QVERIFY( def->checkValueIsAcceptable( QVariantList() << "layer12312312" << "layerB" ) ); QVERIFY( !def->checkValueIsAcceptable( "" ) ); @@ -3183,6 +3185,8 @@ void TestQgsProcessing::parameterField() QVERIFY( def->checkValueIsAcceptable( QVariantList() << "a" << "b" ) ); QVERIFY( !def->checkValueIsAcceptable( "" ) ); QVERIFY( !def->checkValueIsAcceptable( QVariant() ) ); + QVERIFY( !def->checkValueIsAcceptable( QStringList() ) ); + QVERIFY( !def->checkValueIsAcceptable( QVariantList() ) ); params.insert( "non_optional", QString( "a;b" ) ); fields = QgsProcessingParameters::parameterAsFields( def.get(), params, context ); From c750cb2154842243e84a43a1161d332e604fe017 Mon Sep 17 00:00:00 2001 From: Nyall Dawson Date: Fri, 8 Sep 2017 09:19:01 +1000 Subject: [PATCH 7/9] Fix existing unit test, add new tests --- .../testdata/expected/stats_by_cat_date.gfs | 48 +++++++ .../testdata/expected/stats_by_cat_date.gml | 50 +++++++ .../testdata/expected/stats_by_cat_float.gfs | 86 ++++++++++++ .../testdata/expected/stats_by_cat_float.gml | 103 ++++++++++++++ .../expected/stats_by_cat_no_value.gfs | 26 ++++ .../expected/stats_by_cat_no_value.gml | 48 +++++++ .../testdata/expected/stats_by_cat_string.gfs | 62 +++++++++ .../testdata/expected/stats_by_cat_string.gml | 64 +++++++++ .../expected/stats_by_cat_two_fields.gfs | 91 +++++++++++++ .../expected/stats_by_cat_two_fields.gml | 126 ++++++++++++++++++ .../testdata/expected/stats_by_category.gfs | 48 ++++++- .../testdata/expected/stats_by_category.gml | 42 ++++-- .../tests/testdata/qgis_algorithm_tests.yaml | 71 ++++++++++ 13 files changed, 852 insertions(+), 13 deletions(-) create mode 100644 python/plugins/processing/tests/testdata/expected/stats_by_cat_date.gfs create mode 100644 python/plugins/processing/tests/testdata/expected/stats_by_cat_date.gml create mode 100644 python/plugins/processing/tests/testdata/expected/stats_by_cat_float.gfs create mode 100644 python/plugins/processing/tests/testdata/expected/stats_by_cat_float.gml create mode 100644 python/plugins/processing/tests/testdata/expected/stats_by_cat_no_value.gfs create mode 100644 python/plugins/processing/tests/testdata/expected/stats_by_cat_no_value.gml create mode 100644 python/plugins/processing/tests/testdata/expected/stats_by_cat_string.gfs create mode 100644 python/plugins/processing/tests/testdata/expected/stats_by_cat_string.gml create mode 100644 python/plugins/processing/tests/testdata/expected/stats_by_cat_two_fields.gfs create mode 100644 python/plugins/processing/tests/testdata/expected/stats_by_cat_two_fields.gml diff --git a/python/plugins/processing/tests/testdata/expected/stats_by_cat_date.gfs b/python/plugins/processing/tests/testdata/expected/stats_by_cat_date.gfs new file mode 100644 index 00000000000..1e740b97e92 --- /dev/null +++ b/python/plugins/processing/tests/testdata/expected/stats_by_cat_date.gfs @@ -0,0 +1,48 @@ + + + stats_by_cat_date + stats_by_cat_date + 100 + + 4 + + + date + date + String + 10 + + + count + count + Integer + + + unique + unique + Integer + + + empty + empty + Integer + + + filled + filled + Integer + + + min + min + String + 10 + + + max + max + String + 10 + + + diff --git a/python/plugins/processing/tests/testdata/expected/stats_by_cat_date.gml b/python/plugins/processing/tests/testdata/expected/stats_by_cat_date.gml new file mode 100644 index 00000000000..27c0f84cb3b --- /dev/null +++ b/python/plugins/processing/tests/testdata/expected/stats_by_cat_date.gml @@ -0,0 +1,50 @@ + + + missing + + + + 2016/11/30 + 1 + 1 + 0 + 1 + 2016/11/30 + 2016/11/30 + + + + + 2016/11/10 + 1 + 1 + 0 + 1 + 2016/11/10 + 2016/11/10 + + + + + 1 + 0 + 1 + 0 + + + + + 2014/11/30 + 1 + 1 + 0 + 1 + 2014/11/30 + 2014/11/30 + + + diff --git a/python/plugins/processing/tests/testdata/expected/stats_by_cat_float.gfs b/python/plugins/processing/tests/testdata/expected/stats_by_cat_float.gfs new file mode 100644 index 00000000000..1069d98aeb8 --- /dev/null +++ b/python/plugins/processing/tests/testdata/expected/stats_by_cat_float.gfs @@ -0,0 +1,86 @@ + + + stats_by_cat_float + stats_by_cat_float + 100 + + 5 + + + name + name + String + 2 + + + count + count + Integer + + + unique + unique + Integer + + + min + min + Real + + + max + max + Real + + + range + range + Real + + + sum + sum + Real + + + mean + mean + Real + + + median + median + Real + + + stddev + stddev + Real + + + minority + minority + Real + + + majority + majority + Real + + + q1 + q1 + Real + + + q3 + q3 + Real + + + iqr + iqr + Real + + + diff --git a/python/plugins/processing/tests/testdata/expected/stats_by_cat_float.gml b/python/plugins/processing/tests/testdata/expected/stats_by_cat_float.gml new file mode 100644 index 00000000000..1fa21ebeadc --- /dev/null +++ b/python/plugins/processing/tests/testdata/expected/stats_by_cat_float.gml @@ -0,0 +1,103 @@ + + + missing + + + + aa + 2 + 2 + 3.33 + 44.123456 + 40.793456 + 47.453456 + 23.726728 + 23.726728 + 20.396728 + 3.33 + 3.33 + 3.33 + 44.123456 + 40.793456 + + + + + dd + 1 + 1 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + + + + + bb + 4 + 1 + 0.123 + 0.123 + 0 + 0.492 + 0.123 + 0.123 + 0 + 0.123 + 0.123 + 0.123 + 0.123 + 0 + + + + + 1 + 1 + -100291.43213 + -100291.43213 + 0 + -100291.43213 + -100291.43213 + -100291.43213 + 0 + -100291.43213 + -100291.43213 + -100291.43213 + -100291.43213 + 0 + + + + + cc + 1 + 1 + 0.123 + 0.123 + 0 + 0.123 + 0.123 + 0.123 + 0 + 0.123 + 0.123 + 0.123 + 0.123 + 0 + + + diff --git a/python/plugins/processing/tests/testdata/expected/stats_by_cat_no_value.gfs b/python/plugins/processing/tests/testdata/expected/stats_by_cat_no_value.gfs new file mode 100644 index 00000000000..b8719d76b93 --- /dev/null +++ b/python/plugins/processing/tests/testdata/expected/stats_by_cat_no_value.gfs @@ -0,0 +1,26 @@ + + + stats_by_cat_no_value + stats_by_cat_no_value + 100 + + 6 + + + intval + intval + Integer + + + name + name + String + 2 + + + count + count + Integer + + + diff --git a/python/plugins/processing/tests/testdata/expected/stats_by_cat_no_value.gml b/python/plugins/processing/tests/testdata/expected/stats_by_cat_no_value.gml new file mode 100644 index 00000000000..5590c2625a4 --- /dev/null +++ b/python/plugins/processing/tests/testdata/expected/stats_by_cat_no_value.gml @@ -0,0 +1,48 @@ + + + missing + + + + 1 + aa + 2 + + + + + dd + 2 + + + + + 1 + bb + 3 + + + + + 120 + 1 + + + + + cc + 1 + + + + + 2 + bb + 1 + + + diff --git a/python/plugins/processing/tests/testdata/expected/stats_by_cat_string.gfs b/python/plugins/processing/tests/testdata/expected/stats_by_cat_string.gfs new file mode 100644 index 00000000000..0fb2358c926 --- /dev/null +++ b/python/plugins/processing/tests/testdata/expected/stats_by_cat_string.gfs @@ -0,0 +1,62 @@ + + + stats_by_cat_string + stats_by_cat_string + 100 + + 4 + + + intval + intval + Integer + + + count + count + Integer + + + unique + unique + Integer + + + empty + empty + Integer + + + filled + filled + Integer + + + min + min + String + 2 + + + max + max + String + 2 + + + min_length + min_length + Integer + + + max_length + max_length + Integer + + + mean_length + mean_length + Integer + + + diff --git a/python/plugins/processing/tests/testdata/expected/stats_by_cat_string.gml b/python/plugins/processing/tests/testdata/expected/stats_by_cat_string.gml new file mode 100644 index 00000000000..600e578de7d --- /dev/null +++ b/python/plugins/processing/tests/testdata/expected/stats_by_cat_string.gml @@ -0,0 +1,64 @@ + + + missing + + + + 1 + 5 + 2 + 0 + 5 + aa + bb + 2 + 2 + 2 + + + + + 3 + 2 + 0 + 3 + cc + dd + 2 + 2 + 2 + + + + + 120 + 1 + 1 + 1 + 0 + + + 0 + 0 + 0 + + + + + 2 + 1 + 1 + 0 + 1 + bb + bb + 2 + 2 + 2 + + + diff --git a/python/plugins/processing/tests/testdata/expected/stats_by_cat_two_fields.gfs b/python/plugins/processing/tests/testdata/expected/stats_by_cat_two_fields.gfs new file mode 100644 index 00000000000..348c809ee01 --- /dev/null +++ b/python/plugins/processing/tests/testdata/expected/stats_by_cat_two_fields.gfs @@ -0,0 +1,91 @@ + + + stats_by_cat_two_fields + stats_by_cat_two_fields + 100 + + 6 + + + intval + intval + Integer + + + name + name + String + 2 + + + count + count + Integer + + + unique + unique + Integer + + + min + min + Real + + + max + max + Real + + + range + range + Real + + + sum + sum + Real + + + mean + mean + Real + + + median + median + Real + + + stddev + stddev + Real + + + minority + minority + Real + + + majority + majority + Real + + + q1 + q1 + Real + + + q3 + q3 + Real + + + iqr + iqr + Real + + + diff --git a/python/plugins/processing/tests/testdata/expected/stats_by_cat_two_fields.gml b/python/plugins/processing/tests/testdata/expected/stats_by_cat_two_fields.gml new file mode 100644 index 00000000000..e8668bfdc2b --- /dev/null +++ b/python/plugins/processing/tests/testdata/expected/stats_by_cat_two_fields.gml @@ -0,0 +1,126 @@ + + + missing + + + + 1 + aa + 2 + 2 + 3.33 + 44.123456 + 40.793456 + 47.453456 + 23.726728 + 23.726728 + 20.396728 + 3.33 + 3.33 + 3.33 + 44.123456 + 40.793456 + + + + + dd + 1 + 1 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + + + + + 1 + bb + 3 + 1 + 0.123 + 0.123 + 0 + 0.369 + 0.123 + 0.123 + 0 + 0.123 + 0.123 + 0.123 + 0.123 + 0 + + + + + 120 + 1 + 1 + -100291.43213 + -100291.43213 + 0 + -100291.43213 + -100291.43213 + -100291.43213 + 0 + -100291.43213 + -100291.43213 + -100291.43213 + -100291.43213 + 0 + + + + + cc + 1 + 1 + 0.123 + 0.123 + 0 + 0.123 + 0.123 + 0.123 + 0 + 0.123 + 0.123 + 0.123 + 0.123 + 0 + + + + + 2 + bb + 1 + 1 + 0.123 + 0.123 + 0 + 0.123 + 0.123 + 0.123 + 0 + 0.123 + 0.123 + 0.123 + 0.123 + 0 + + + diff --git a/python/plugins/processing/tests/testdata/expected/stats_by_category.gfs b/python/plugins/processing/tests/testdata/expected/stats_by_category.gfs index ba663caa0c0..99c98501a5b 100644 --- a/python/plugins/processing/tests/testdata/expected/stats_by_category.gfs +++ b/python/plugins/processing/tests/testdata/expected/stats_by_category.gfs @@ -11,6 +11,16 @@ id2 Integer + + count + count + Integer + + + unique + unique + Integer + min min @@ -21,24 +31,54 @@ max Integer + + range + range + Integer + + + sum + sum + Integer + mean mean Real + + median + median + Real + stddev stddev Real - sum - sum + minority + minority Integer - count - count + majority + majority + Integer + + + q1 + q1 + Integer + + + q3 + q3 + Integer + + + iqr + iqr Integer diff --git a/python/plugins/processing/tests/testdata/expected/stats_by_category.gml b/python/plugins/processing/tests/testdata/expected/stats_by_category.gml index 4647a986f75..7fcd964d673 100644 --- a/python/plugins/processing/tests/testdata/expected/stats_by_category.gml +++ b/python/plugins/processing/tests/testdata/expected/stats_by_category.gml @@ -9,34 +9,58 @@ 2 + 2 + 2 1 4 - 2.5 - 2.12132034355964 + 3 5 - 2 + 2.5 + 2.5 + 1.5 + 1 + 1 + 1 + 4 + 3 1 + 2 + 2 2 5 - 3.5 - 2.12132034355964 + 3 7 - 2 + 3.5 + 3.5 + 1.5 + 2 + 2 + 2 + 5 + 3 0 + 5 + 5 3 9 - 6.6 - 2.30217288664427 + 6 33 - 5 + 6.6 + 7 + 2.0591260281974 + 3 + 3 + 6 + 8 + 2 diff --git a/python/plugins/processing/tests/testdata/qgis_algorithm_tests.yaml b/python/plugins/processing/tests/testdata/qgis_algorithm_tests.yaml index 0bcff7fcfa1..401aeb67d68 100644 --- a/python/plugins/processing/tests/testdata/qgis_algorithm_tests.yaml +++ b/python/plugins/processing/tests/testdata/qgis_algorithm_tests.yaml @@ -3476,3 +3476,74 @@ tests: name: expected/collect_two_fields.gml type: vector + + - algorithm: qgis:statisticsbycategories + name: Stats by cat (float field) + params: + CATEGORIES_FIELD_NAME: + - name + INPUT: + name: dissolve_polys.gml + type: vector + VALUES_FIELD_NAME: floatval + results: + OUTPUT: + name: expected/stats_by_cat_float.gml + type: vector + + - algorithm: qgis:statisticsbycategories + name: Stats by cat (string field) + params: + CATEGORIES_FIELD_NAME: + - intval + INPUT: + name: dissolve_polys.gml + type: vector + VALUES_FIELD_NAME: name + results: + OUTPUT: + name: expected/stats_by_cat_string.gml + type: vector + + - algorithm: qgis:statisticsbycategories + name: Stats by cat (two category fields) + params: + CATEGORIES_FIELD_NAME: + - intval + - name + INPUT: + name: dissolve_polys.gml + type: vector + VALUES_FIELD_NAME: floatval + results: + OUTPUT: + name: expected/stats_by_cat_two_fields.gml + type: vector + + - algorithm: qgis:statisticsbycategories + name: Stats by cat (no value field) + params: + CATEGORIES_FIELD_NAME: + - intval + - name + INPUT: + name: dissolve_polys.gml + type: vector + results: + OUTPUT: + name: expected/stats_by_cat_no_value.gml + type: vector + + - algorithm: qgis:statisticsbycategories + name: Stats by cat (date field) + params: + CATEGORIES_FIELD_NAME: + - date + INPUT: + name: custom/datetimes.tab + type: vector + VALUES_FIELD_NAME: date + results: + OUTPUT: + name: expected/stats_by_cat_date.gml + type: vector From e9fd4095521c8a5e641f7494911dd4af5f6785ac Mon Sep 17 00:00:00 2001 From: Nyall Dawson Date: Fri, 8 Sep 2017 16:33:55 +1000 Subject: [PATCH 8/9] Allow multiple primary key fields to be specified for processing tests ` --- python/testing/__init__.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/python/testing/__init__.py b/python/testing/__init__.py index 7b8359de5ee..4cea7c414cf 100644 --- a/python/testing/__init__.py +++ b/python/testing/__init__.py @@ -105,7 +105,11 @@ class TestCase(_TestCase): def sort_by_pk_or_fid(f): if 'pk' in kwargs and kwargs['pk'] is not None: - return f[kwargs['pk']] + key = kwargs['pk'] + if isinstance(key, list) or isinstance(key, tuple): + return [f[k] for k in key] + else: + return f[kwargs['pk']] else: return f.id() From 4ca972a2ffb743dd95c5f71bdd750b2c3648a3b3 Mon Sep 17 00:00:00 2001 From: Nyall Dawson Date: Fri, 8 Sep 2017 16:35:18 +1000 Subject: [PATCH 9/9] Fix stats by categories tests on Travis --- .../tests/testdata/qgis_algorithm_tests.yaml | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/python/plugins/processing/tests/testdata/qgis_algorithm_tests.yaml b/python/plugins/processing/tests/testdata/qgis_algorithm_tests.yaml index 401aeb67d68..887813449e4 100644 --- a/python/plugins/processing/tests/testdata/qgis_algorithm_tests.yaml +++ b/python/plugins/processing/tests/testdata/qgis_algorithm_tests.yaml @@ -3490,6 +3490,10 @@ tests: OUTPUT: name: expected/stats_by_cat_float.gml type: vector + pk: name + compare: + fields: + fid: skip - algorithm: qgis:statisticsbycategories name: Stats by cat (string field) @@ -3504,6 +3508,10 @@ tests: OUTPUT: name: expected/stats_by_cat_string.gml type: vector + pk: intval + compare: + fields: + fid: skip - algorithm: qgis:statisticsbycategories name: Stats by cat (two category fields) @@ -3519,6 +3527,12 @@ tests: OUTPUT: name: expected/stats_by_cat_two_fields.gml type: vector + pk: + - intval + - name + compare: + fields: + fid: skip - algorithm: qgis:statisticsbycategories name: Stats by cat (no value field) @@ -3533,6 +3547,12 @@ tests: OUTPUT: name: expected/stats_by_cat_no_value.gml type: vector + pk: + - intval + - name + compare: + fields: + fid: skip - algorithm: qgis:statisticsbycategories name: Stats by cat (date field) @@ -3547,3 +3567,7 @@ tests: OUTPUT: name: expected/stats_by_cat_date.gml type: vector + pk: date + compare: + fields: + fid: skip \ No newline at end of file