mirror of
https://github.com/qgis/QGIS.git
synced 2025-04-15 00:04:00 -04:00
[processing] Use QgsStringStatisticalSummary in basic stats for strings
And also further optimise the algorithm
This commit is contained in:
parent
e272bb3e9c
commit
ab29f2de28
@ -31,6 +31,9 @@ import codecs
|
||||
|
||||
from qgis.PyQt.QtGui import QIcon
|
||||
|
||||
from qgis.core import (QgsStringStatisticalSummary,
|
||||
QgsFeatureRequest)
|
||||
|
||||
from processing.core.GeoAlgorithm import GeoAlgorithm
|
||||
from processing.core.parameters import ParameterVector
|
||||
from processing.core.parameters import ParameterTableField
|
||||
@ -54,6 +57,8 @@ class BasicStatisticsStrings(GeoAlgorithm):
|
||||
EMPTY = 'EMPTY'
|
||||
FILLED = 'FILLED'
|
||||
UNIQUE = 'UNIQUE'
|
||||
MIN_VALUE = 'MIN_VALUE'
|
||||
MAX_VALUE = 'MAX_VALUE'
|
||||
|
||||
def getIcon(self):
|
||||
return QIcon(os.path.join(pluginPath, 'images', 'ftools', 'basic_statistics.png'))
|
||||
@ -78,6 +83,8 @@ class BasicStatisticsStrings(GeoAlgorithm):
|
||||
self.addOutput(OutputNumber(self.EMPTY, self.tr('Number of empty values')))
|
||||
self.addOutput(OutputNumber(self.FILLED, self.tr('Number of non-empty values')))
|
||||
self.addOutput(OutputNumber(self.UNIQUE, self.tr('Number of unique values')))
|
||||
self.addOutput(OutputNumber(self.MIN_VALUE, self.tr('Minimum string value')))
|
||||
self.addOutput(OutputNumber(self.MAX_VALUE, self.tr('Maximum string value')))
|
||||
|
||||
def processAlgorithm(self, progress):
|
||||
layer = dataobjects.getObjectFromUri(
|
||||
@ -86,72 +93,42 @@ class BasicStatisticsStrings(GeoAlgorithm):
|
||||
|
||||
outputFile = self.getOutputValue(self.OUTPUT_HTML_FILE)
|
||||
|
||||
index = layer.fields().lookupField(fieldName)
|
||||
|
||||
sumValue = 0
|
||||
minValue = 0
|
||||
maxValue = 0
|
||||
meanValue = 0
|
||||
nullValues = 0
|
||||
filledValues = 0
|
||||
|
||||
isFirst = True
|
||||
values = []
|
||||
|
||||
features = vector.features(layer)
|
||||
request = QgsFeatureRequest().setFlags(QgsFeatureRequest.NoGeometry).setSubsetOfAttributes([fieldName],
|
||||
layer.fields())
|
||||
stat = QgsStringStatisticalSummary()
|
||||
features = vector.features(layer, request)
|
||||
count = len(features)
|
||||
total = 100.0 / count
|
||||
total = 100.0 / float(count)
|
||||
for current, ft in enumerate(features):
|
||||
value = ft[fieldName]
|
||||
if value:
|
||||
length = float(len(value))
|
||||
filledValues += 1
|
||||
else:
|
||||
nullValues += 1
|
||||
progress.setPercentage(int(current * total))
|
||||
continue
|
||||
|
||||
if isFirst:
|
||||
minValue = length
|
||||
maxValue = length
|
||||
isFirst = False
|
||||
else:
|
||||
if length < minValue:
|
||||
minValue = length
|
||||
if length > maxValue:
|
||||
maxValue = length
|
||||
|
||||
values.append(length)
|
||||
sumValue += length
|
||||
|
||||
stat.addValue(ft[fieldName])
|
||||
progress.setPercentage(int(current * total))
|
||||
|
||||
n = float(len(values))
|
||||
if n > 0:
|
||||
meanValue = sumValue / n
|
||||
|
||||
uniqueValues = vector.getUniqueValuesCount(layer, index)
|
||||
stat.finalize()
|
||||
|
||||
data = []
|
||||
data.append(self.tr('Analyzed layer: {}').format(layer.name()))
|
||||
data.append(self.tr('Analyzed field: {}').format(fieldName))
|
||||
data.append(self.tr('Minimum length: {}').format(minValue))
|
||||
data.append(self.tr('Maximum length: {}').format(maxValue))
|
||||
data.append(self.tr('Mean length: {}').format(meanValue))
|
||||
data.append(self.tr('Filled values: {}').format(filledValues))
|
||||
data.append(self.tr('NULL (missing) values: {}').format(nullValues))
|
||||
data.append(self.tr('Count: {}').format(count))
|
||||
data.append(self.tr('Unique: {}').format(uniqueValues))
|
||||
data.append(self.tr('Minimum length: {}').format(stat.minLength()))
|
||||
data.append(self.tr('Maximum length: {}').format(stat.maxLength()))
|
||||
data.append(self.tr('Mean length: {}').format(stat.meanLength()))
|
||||
data.append(self.tr('Filled values: {}').format(stat.count() - stat.countMissing()))
|
||||
data.append(self.tr('NULL (missing) values: {}').format(stat.countMissing()))
|
||||
data.append(self.tr('Count: {}').format(stat.count()))
|
||||
data.append(self.tr('Unique: {}').format(stat.countDistinct()))
|
||||
data.append(self.tr('Minimum string value: {}').format(stat.min()))
|
||||
data.append(self.tr('Maximum string value: {}').format(stat.max()))
|
||||
|
||||
self.createHTML(outputFile, data)
|
||||
|
||||
self.setOutputValue(self.MIN_LEN, minValue)
|
||||
self.setOutputValue(self.MAX_LEN, maxValue)
|
||||
self.setOutputValue(self.MEAN_LEN, meanValue)
|
||||
self.setOutputValue(self.FILLED, filledValues)
|
||||
self.setOutputValue(self.EMPTY, nullValues)
|
||||
self.setOutputValue(self.COUNT, count)
|
||||
self.setOutputValue(self.UNIQUE, uniqueValues)
|
||||
self.setOutputValue(self.MIN_LEN, stat.minLength())
|
||||
self.setOutputValue(self.MAX_LEN, stat.maxLength())
|
||||
self.setOutputValue(self.MEAN_LEN, stat.meanLength())
|
||||
self.setOutputValue(self.FILLED, stat.count() - stat.countMissing())
|
||||
self.setOutputValue(self.EMPTY, stat.countMissing())
|
||||
self.setOutputValue(self.COUNT, stat.count())
|
||||
self.setOutputValue(self.UNIQUE, stat.countDistinct())
|
||||
self.setOutputValue(self.MIN_VALUE, stat.min())
|
||||
self.setOutputValue(self.MAX_VALUE, stat.max())
|
||||
|
||||
def createHTML(self, outputFile, algData):
|
||||
with codecs.open(outputFile, 'w', encoding='utf-8') as f:
|
||||
@ -159,4 +136,4 @@ class BasicStatisticsStrings(GeoAlgorithm):
|
||||
f.write('<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /></head><body>\n')
|
||||
for s in algData:
|
||||
f.write('<p>' + str(s) + '</p>\n')
|
||||
f.write('</body></html>')
|
||||
f.write('</body></html>\n')
|
||||
|
@ -2,11 +2,13 @@
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /></head><body>
|
||||
<p>Analyzed layer: multipolys.gml</p>
|
||||
<p>Analyzed field: Bname</p>
|
||||
<p>Minimum length: 4.0</p>
|
||||
<p>Maximum length: 4.0</p>
|
||||
<p>Mean length: 4.0</p>
|
||||
<p>Minimum length: 0</p>
|
||||
<p>Maximum length: 4</p>
|
||||
<p>Mean length: 3.0</p>
|
||||
<p>Filled values: 3</p>
|
||||
<p>NULL (missing) values: 1</p>
|
||||
<p>Count: 4</p>
|
||||
<p>Unique: 2</p>
|
||||
</body></html>
|
||||
<p>Minimum string value: Test</p>
|
||||
<p>Maximum string value: Test</p>
|
||||
</body></html>
|
||||
|
Loading…
x
Reference in New Issue
Block a user