diff --git a/python/plugins/processing/algs/qgis/BasicStatisticsStrings.py b/python/plugins/processing/algs/qgis/BasicStatisticsStrings.py index 149db62f506..3afc0f9213a 100644 --- a/python/plugins/processing/algs/qgis/BasicStatisticsStrings.py +++ b/python/plugins/processing/algs/qgis/BasicStatisticsStrings.py @@ -31,6 +31,9 @@ import codecs from qgis.PyQt.QtGui import QIcon +from qgis.core import (QgsStringStatisticalSummary, + QgsFeatureRequest) + from processing.core.GeoAlgorithm import GeoAlgorithm from processing.core.parameters import ParameterVector from processing.core.parameters import ParameterTableField @@ -54,6 +57,8 @@ class BasicStatisticsStrings(GeoAlgorithm): EMPTY = 'EMPTY' FILLED = 'FILLED' UNIQUE = 'UNIQUE' + MIN_VALUE = 'MIN_VALUE' + MAX_VALUE = 'MAX_VALUE' def getIcon(self): return QIcon(os.path.join(pluginPath, 'images', 'ftools', 'basic_statistics.png')) @@ -78,6 +83,8 @@ class BasicStatisticsStrings(GeoAlgorithm): self.addOutput(OutputNumber(self.EMPTY, self.tr('Number of empty values'))) self.addOutput(OutputNumber(self.FILLED, self.tr('Number of non-empty values'))) self.addOutput(OutputNumber(self.UNIQUE, self.tr('Number of unique values'))) + self.addOutput(OutputNumber(self.MIN_VALUE, self.tr('Minimum string value'))) + self.addOutput(OutputNumber(self.MAX_VALUE, self.tr('Maximum string value'))) def processAlgorithm(self, progress): layer = dataobjects.getObjectFromUri( @@ -86,72 +93,42 @@ class BasicStatisticsStrings(GeoAlgorithm): outputFile = self.getOutputValue(self.OUTPUT_HTML_FILE) - index = layer.fields().lookupField(fieldName) - - sumValue = 0 - minValue = 0 - maxValue = 0 - meanValue = 0 - nullValues = 0 - filledValues = 0 - - isFirst = True - values = [] - - features = vector.features(layer) + request = QgsFeatureRequest().setFlags(QgsFeatureRequest.NoGeometry).setSubsetOfAttributes([fieldName], + layer.fields()) + stat = QgsStringStatisticalSummary() + features = vector.features(layer, request) count = len(features) - total = 100.0 / count + total = 100.0 / float(count) for current, ft in enumerate(features): - value = ft[fieldName] - if value: - length = float(len(value)) - filledValues += 1 - else: - nullValues += 1 - progress.setPercentage(int(current * total)) - continue - - if isFirst: - minValue = length - maxValue = length - isFirst = False - else: - if length < minValue: - minValue = length - if length > maxValue: - maxValue = length - - values.append(length) - sumValue += length - + stat.addValue(ft[fieldName]) progress.setPercentage(int(current * total)) - n = float(len(values)) - if n > 0: - meanValue = sumValue / n - - uniqueValues = vector.getUniqueValuesCount(layer, index) + stat.finalize() data = [] data.append(self.tr('Analyzed layer: {}').format(layer.name())) data.append(self.tr('Analyzed field: {}').format(fieldName)) - data.append(self.tr('Minimum length: {}').format(minValue)) - data.append(self.tr('Maximum length: {}').format(maxValue)) - data.append(self.tr('Mean length: {}').format(meanValue)) - data.append(self.tr('Filled values: {}').format(filledValues)) - data.append(self.tr('NULL (missing) values: {}').format(nullValues)) - data.append(self.tr('Count: {}').format(count)) - data.append(self.tr('Unique: {}').format(uniqueValues)) + data.append(self.tr('Minimum length: {}').format(stat.minLength())) + data.append(self.tr('Maximum length: {}').format(stat.maxLength())) + data.append(self.tr('Mean length: {}').format(stat.meanLength())) + data.append(self.tr('Filled values: {}').format(stat.count() - stat.countMissing())) + data.append(self.tr('NULL (missing) values: {}').format(stat.countMissing())) + data.append(self.tr('Count: {}').format(stat.count())) + data.append(self.tr('Unique: {}').format(stat.countDistinct())) + data.append(self.tr('Minimum string value: {}').format(stat.min())) + data.append(self.tr('Maximum string value: {}').format(stat.max())) self.createHTML(outputFile, data) - self.setOutputValue(self.MIN_LEN, minValue) - self.setOutputValue(self.MAX_LEN, maxValue) - self.setOutputValue(self.MEAN_LEN, meanValue) - self.setOutputValue(self.FILLED, filledValues) - self.setOutputValue(self.EMPTY, nullValues) - self.setOutputValue(self.COUNT, count) - self.setOutputValue(self.UNIQUE, uniqueValues) + self.setOutputValue(self.MIN_LEN, stat.minLength()) + self.setOutputValue(self.MAX_LEN, stat.maxLength()) + self.setOutputValue(self.MEAN_LEN, stat.meanLength()) + self.setOutputValue(self.FILLED, stat.count() - stat.countMissing()) + self.setOutputValue(self.EMPTY, stat.countMissing()) + self.setOutputValue(self.COUNT, stat.count()) + self.setOutputValue(self.UNIQUE, stat.countDistinct()) + self.setOutputValue(self.MIN_VALUE, stat.min()) + self.setOutputValue(self.MAX_VALUE, stat.max()) def createHTML(self, outputFile, algData): with codecs.open(outputFile, 'w', encoding='utf-8') as f: @@ -159,4 +136,4 @@ class BasicStatisticsStrings(GeoAlgorithm): f.write('\n') for s in algData: f.write('

' + str(s) + '

\n') - f.write('') + f.write('\n') diff --git a/python/plugins/processing/tests/testdata/expected/basic_statistics_string.html b/python/plugins/processing/tests/testdata/expected/basic_statistics_string.html index b213bb0b237..9dd6d2d3c60 100644 --- a/python/plugins/processing/tests/testdata/expected/basic_statistics_string.html +++ b/python/plugins/processing/tests/testdata/expected/basic_statistics_string.html @@ -2,11 +2,13 @@

Analyzed layer: multipolys.gml

Analyzed field: Bname

-

Minimum length: 4.0

-

Maximum length: 4.0

-

Mean length: 4.0

+

Minimum length: 0

+

Maximum length: 4

+

Mean length: 3.0

Filled values: 3

NULL (missing) values: 1

Count: 4

Unique: 2

- \ No newline at end of file +

Minimum string value: Test

+

Maximum string value: Test

+