QGIS/python/plugins/processing/algs/qgis/BasicStatisticsNumbers.py

174 lines
6.3 KiB
Python
Raw Normal View History

2012-10-04 19:33:47 +02:00
# -*- coding: utf-8 -*-
"""
***************************************************************************
BasicStatisticsNumbers.py
---------------------
Date : September 2012
Copyright : (C) 2012 by Victor Olaya
Email : volayaf at gmail dot com
***************************************************************************
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
***************************************************************************
"""
__author__ = 'Victor Olaya'
__date__ = 'September 2012'
__copyright__ = '(C) 2012, Victor Olaya'
2012-10-04 19:33:47 +02:00
# This will get replaced with a git SHA1 when you do a git archive
2012-10-04 19:33:47 +02:00
__revision__ = '$Format:%H$'
2012-09-28 15:35:31 +03:00
import math
2013-08-12 20:44:27 +02:00
from processing.core.GeoAlgorithm import GeoAlgorithm
from processing.core.parameters import ParameterVector
from processing.core.parameters import ParameterTableField
from processing.core.outputs import OutputHTML
from processing.core.outputs import OutputNumber
from processing.tools import dataobjects, vector
2012-09-28 15:35:31 +03:00
class BasicStatisticsNumbers(GeoAlgorithm):
INPUT_LAYER = 'INPUT_LAYER'
FIELD_NAME = 'FIELD_NAME'
OUTPUT_HTML_FILE = 'OUTPUT_HTML_FILE'
CV = 'CV'
MIN = 'MIN'
MAX = 'MAX'
SUM = 'SUM'
MEAN = 'MEAN'
COUNT = 'COUNT'
STD_DEV = 'STD_DEV'
RANGE = 'RANGE'
MEDIAN = 'MEDIAN'
UNIQUE = 'UNIQUE'
2013-02-28 22:08:32 +01:00
2012-09-28 15:35:31 +03:00
def defineCharacteristics(self):
self.name, self.i18n_name = self.trAlgorithm('Basic statistics for numeric fields')
self.group, self.i18n_group = self.trAlgorithm('Vector table tools')
self.addParameter(ParameterVector(self.INPUT_LAYER,
2015-01-15 20:41:15 +02:00
self.tr('Input vector layer'), ParameterVector.VECTOR_TYPE_ANY, False))
self.addParameter(ParameterTableField(self.FIELD_NAME,
2015-01-15 20:41:15 +02:00
self.tr('Field to calculate statistics on'),
self.INPUT_LAYER, ParameterTableField.DATA_TYPE_NUMBER))
self.addOutput(OutputHTML(self.OUTPUT_HTML_FILE,
self.tr('Statistics')))
2015-01-15 20:41:15 +02:00
self.addOutput(OutputNumber(self.CV, self.tr('Coefficient of Variation')))
self.addOutput(OutputNumber(self.MIN, self.tr('Minimum value')))
self.addOutput(OutputNumber(self.MAX, self.tr('Maximum value')))
self.addOutput(OutputNumber(self.SUM, self.tr('Sum')))
self.addOutput(OutputNumber(self.MEAN, self.tr('Mean value')))
self.addOutput(OutputNumber(self.COUNT, self.tr('Count')))
self.addOutput(OutputNumber(self.RANGE, self.tr('Range')))
self.addOutput(OutputNumber(self.MEDIAN, self.tr('Median')))
self.addOutput(OutputNumber(self.UNIQUE, self.tr('Number of unique values')))
self.addOutput(OutputNumber(self.STD_DEV, self.tr('Standard deviation')))
2012-09-28 15:35:31 +03:00
def processAlgorithm(self, progress):
layer = dataobjects.getObjectFromUri(
self.getParameterValue(self.INPUT_LAYER))
2012-09-28 15:35:31 +03:00
fieldName = self.getParameterValue(self.FIELD_NAME)
outputFile = self.getOutputValue(self.OUTPUT_HTML_FILE)
index = layer.fieldNameIndex(fieldName)
2012-09-28 15:35:31 +03:00
cvValue = 0
minValue = 0
maxValue = 0
sumValue = 0
meanValue = 0
medianValue = 0
stdDevValue = 0
isFirst = True
values = []
features = vector.features(layer)
2013-01-01 23:52:00 +01:00
count = len(features)
total = 100.0 / float(count)
current = 0
for ft in features:
if ft.attributes()[index]:
value = float(ft.attributes()[index])
if isFirst:
2012-09-28 15:35:31 +03:00
minValue = value
maxValue = value
isFirst = False
else:
if value < minValue:
minValue = value
if value > maxValue:
maxValue = value
2012-09-28 15:35:31 +03:00
values.append(value)
sumValue += value
2012-09-28 15:35:31 +03:00
2013-01-01 23:52:00 +01:00
current += 1
progress.setPercentage(int(current * total))
2012-09-28 15:35:31 +03:00
# Calculate additional values
2012-09-28 15:35:31 +03:00
rValue = maxValue - minValue
uniqueValue = vector.getUniqueValuesCount(layer, index)
2012-09-28 15:35:31 +03:00
if count > 0:
2013-02-16 00:23:56 +01:00
meanValue = sumValue / count
if meanValue != 0.00:
for v in values:
stdDevValue += (v - meanValue) * (v - meanValue)
2013-02-16 00:23:56 +01:00
stdDevValue = math.sqrt(stdDevValue / count)
cvValue = stdDevValue / meanValue
2012-09-28 15:35:31 +03:00
if count > 1:
2013-02-16 00:23:56 +01:00
tmp = values
tmp.sort()
# Calculate median
if count % 2 == 0:
2013-02-16 00:23:56 +01:00
medianValue = 0.5 * (tmp[(count - 1) / 2] + tmp[count / 2])
else:
medianValue = tmp[(count + 1) / 2 - 1]
2012-09-28 15:35:31 +03:00
data = []
data.append('Count: ' + unicode(count))
data.append('Unique values: ' + unicode(uniqueValue))
data.append('Minimum value: ' + unicode(minValue))
data.append('Maximum value: ' + unicode(maxValue))
data.append('Range: ' + unicode(rValue))
data.append('Sum: ' + unicode(sumValue))
data.append('Mean value: ' + unicode(meanValue))
data.append('Median value: ' + unicode(medianValue))
data.append('Standard deviation: ' + unicode(stdDevValue))
data.append('Coefficient of Variation: ' + unicode(cvValue))
2012-09-28 15:35:31 +03:00
self.createHTML(outputFile, data)
self.setOutputValue(self.COUNT, count)
self.setOutputValue(self.UNIQUE, uniqueValue)
self.setOutputValue(self.MIN, minValue)
self.setOutputValue(self.MAX, maxValue)
self.setOutputValue(self.RANGE, rValue)
self.setOutputValue(self.SUM, sumValue)
self.setOutputValue(self.MEAN, meanValue)
self.setOutputValue(self.MEDIAN, medianValue)
self.setOutputValue(self.STD_DEV, stdDevValue)
self.setOutputValue(self.CV, cvValue)
def createHTML(self, outputFile, algData):
f = open(outputFile, 'w')
2012-09-28 15:35:31 +03:00
for s in algData:
f.write('<p>' + unicode(s) + '</p>')
2012-09-28 15:35:31 +03:00
f.close()