[processing] Use QgsStringStatisticalSummary in basic stats for strings

And also further optimise the algorithm
This commit is contained in:
Nyall Dawson 2016-11-30 12:47:39 +10:00
parent e272bb3e9c
commit ab29f2de28
2 changed files with 39 additions and 60 deletions

View File

@ -31,6 +31,9 @@ import codecs
from qgis.PyQt.QtGui import QIcon
from qgis.core import (QgsStringStatisticalSummary,
QgsFeatureRequest)
from processing.core.GeoAlgorithm import GeoAlgorithm
from processing.core.parameters import ParameterVector
from processing.core.parameters import ParameterTableField
@ -54,6 +57,8 @@ class BasicStatisticsStrings(GeoAlgorithm):
EMPTY = 'EMPTY'
FILLED = 'FILLED'
UNIQUE = 'UNIQUE'
MIN_VALUE = 'MIN_VALUE'
MAX_VALUE = 'MAX_VALUE'
def getIcon(self):
return QIcon(os.path.join(pluginPath, 'images', 'ftools', 'basic_statistics.png'))
@ -78,6 +83,8 @@ class BasicStatisticsStrings(GeoAlgorithm):
self.addOutput(OutputNumber(self.EMPTY, self.tr('Number of empty values')))
self.addOutput(OutputNumber(self.FILLED, self.tr('Number of non-empty values')))
self.addOutput(OutputNumber(self.UNIQUE, self.tr('Number of unique values')))
self.addOutput(OutputNumber(self.MIN_VALUE, self.tr('Minimum string value')))
self.addOutput(OutputNumber(self.MAX_VALUE, self.tr('Maximum string value')))
def processAlgorithm(self, progress):
layer = dataobjects.getObjectFromUri(
@ -86,72 +93,42 @@ class BasicStatisticsStrings(GeoAlgorithm):
outputFile = self.getOutputValue(self.OUTPUT_HTML_FILE)
index = layer.fields().lookupField(fieldName)
sumValue = 0
minValue = 0
maxValue = 0
meanValue = 0
nullValues = 0
filledValues = 0
isFirst = True
values = []
features = vector.features(layer)
request = QgsFeatureRequest().setFlags(QgsFeatureRequest.NoGeometry).setSubsetOfAttributes([fieldName],
layer.fields())
stat = QgsStringStatisticalSummary()
features = vector.features(layer, request)
count = len(features)
total = 100.0 / count
total = 100.0 / float(count)
for current, ft in enumerate(features):
value = ft[fieldName]
if value:
length = float(len(value))
filledValues += 1
else:
nullValues += 1
progress.setPercentage(int(current * total))
continue
if isFirst:
minValue = length
maxValue = length
isFirst = False
else:
if length < minValue:
minValue = length
if length > maxValue:
maxValue = length
values.append(length)
sumValue += length
stat.addValue(ft[fieldName])
progress.setPercentage(int(current * total))
n = float(len(values))
if n > 0:
meanValue = sumValue / n
uniqueValues = vector.getUniqueValuesCount(layer, index)
stat.finalize()
data = []
data.append(self.tr('Analyzed layer: {}').format(layer.name()))
data.append(self.tr('Analyzed field: {}').format(fieldName))
data.append(self.tr('Minimum length: {}').format(minValue))
data.append(self.tr('Maximum length: {}').format(maxValue))
data.append(self.tr('Mean length: {}').format(meanValue))
data.append(self.tr('Filled values: {}').format(filledValues))
data.append(self.tr('NULL (missing) values: {}').format(nullValues))
data.append(self.tr('Count: {}').format(count))
data.append(self.tr('Unique: {}').format(uniqueValues))
data.append(self.tr('Minimum length: {}').format(stat.minLength()))
data.append(self.tr('Maximum length: {}').format(stat.maxLength()))
data.append(self.tr('Mean length: {}').format(stat.meanLength()))
data.append(self.tr('Filled values: {}').format(stat.count() - stat.countMissing()))
data.append(self.tr('NULL (missing) values: {}').format(stat.countMissing()))
data.append(self.tr('Count: {}').format(stat.count()))
data.append(self.tr('Unique: {}').format(stat.countDistinct()))
data.append(self.tr('Minimum string value: {}').format(stat.min()))
data.append(self.tr('Maximum string value: {}').format(stat.max()))
self.createHTML(outputFile, data)
self.setOutputValue(self.MIN_LEN, minValue)
self.setOutputValue(self.MAX_LEN, maxValue)
self.setOutputValue(self.MEAN_LEN, meanValue)
self.setOutputValue(self.FILLED, filledValues)
self.setOutputValue(self.EMPTY, nullValues)
self.setOutputValue(self.COUNT, count)
self.setOutputValue(self.UNIQUE, uniqueValues)
self.setOutputValue(self.MIN_LEN, stat.minLength())
self.setOutputValue(self.MAX_LEN, stat.maxLength())
self.setOutputValue(self.MEAN_LEN, stat.meanLength())
self.setOutputValue(self.FILLED, stat.count() - stat.countMissing())
self.setOutputValue(self.EMPTY, stat.countMissing())
self.setOutputValue(self.COUNT, stat.count())
self.setOutputValue(self.UNIQUE, stat.countDistinct())
self.setOutputValue(self.MIN_VALUE, stat.min())
self.setOutputValue(self.MAX_VALUE, stat.max())
def createHTML(self, outputFile, algData):
with codecs.open(outputFile, 'w', encoding='utf-8') as f:
@ -159,4 +136,4 @@ class BasicStatisticsStrings(GeoAlgorithm):
f.write('<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /></head><body>\n')
for s in algData:
f.write('<p>' + str(s) + '</p>\n')
f.write('</body></html>')
f.write('</body></html>\n')

View File

@ -2,11 +2,13 @@
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /></head><body>
<p>Analyzed layer: multipolys.gml</p>
<p>Analyzed field: Bname</p>
<p>Minimum length: 4.0</p>
<p>Maximum length: 4.0</p>
<p>Mean length: 4.0</p>
<p>Minimum length: 0</p>
<p>Maximum length: 4</p>
<p>Mean length: 3.0</p>
<p>Filled values: 3</p>
<p>NULL (missing) values: 1</p>
<p>Count: 4</p>
<p>Unique: 2</p>
</body></html>
<p>Minimum string value: Test</p>
<p>Maximum string value: Test</p>
</body></html>