refactor basic statistics tool

This commit is contained in:
Alexander Bruy 2012-09-28 15:35:31 +03:00
parent ad0ff93ac3
commit 1a67b91c4c
5 changed files with 375 additions and 195 deletions

View File

@ -1,190 +0,0 @@
from sextante.core.GeoAlgorithm import GeoAlgorithm
import os.path
from PyQt4 import QtGui
from PyQt4.QtCore import *
from PyQt4.QtGui import *
from qgis.core import *
from sextante.parameters.ParameterVector import ParameterVector
from sextante.core.QGisLayers import QGisLayers
from sextante.core.GeoAlgorithmExecutionException import GeoAlgorithmExecutionException
from sextante.ftools import ftools_utils
import math
from sextante.outputs.OutputHTML import OutputHTML
from sextante.parameters.ParameterTableField import ParameterTableField
from sextante.parameters.ParameterBoolean import ParameterBoolean
class BasicStatistics(GeoAlgorithm):
INPUT = "INPUT"
OUTPUT = "OUTPUT"
FIELD = "FIELD"
USE_SELECTION = "USE_SELECTION"
def getIcon(self):
return QtGui.QIcon(os.path.dirname(__file__) + "/icons/basic_statistics.png")
def createHTML(self, outputFile, lstStats):
f = open(outputFile, "w")
for s in lstStats:
f.write("<p>" + str(s) + "</p>")
f.close()
def processAlgorithm(self, progress):
outputFile = self.getOutputValue(BasicStatistics.OUTPUT)
vlayer = QGisLayers.getObjectFromUri(self.getParameterValue(BasicStatistics.INPUT))
attfield = self.getParameterValue(BasicStatistics.FIELD)
useSelection = self.getParameterValue(BasicStatistics.USE_SELECTION)
vprovider = vlayer.dataProvider()
allAttrs = vprovider.attributeIndexes()
vprovider.select( allAttrs )
index = vprovider.fieldNameIndex(attfield)
feat = QgsFeature()
sumVal = 0.0
meanVal = 0.0
nVal = 0.0
values = []
first = True
nElement = 0
# determine selected field type
if ftools_utils.getFieldType( vlayer, attfield ) in ('String', 'varchar', 'char', 'text'):
fillVal = 0
emptyVal = 0
if useSelection: # only selected features
selection = vlayer.selectedFeatures()
nFeat = vlayer.selectedFeatureCount()
for f in selection:
atMap = f.attributeMap()
lenVal = float( len( atMap[ index ].toString() ) )
if first:
minVal = lenVal
maxVal = lenVal
first = False
else:
if lenVal < minVal: minVal = lenVal
if lenVal > maxVal: maxVal = lenVal
if lenVal != 0.00:
fillVal += 1
else:
emptyVal += 1
values.append( lenVal )
sumVal = sumVal + lenVal
nElement += 1
progress.setPercentage(int(nElement/nFeat * 100))
else: # there is no selection, process the whole layer
nFeat = vprovider.featureCount()
if nFeat > 0:
vprovider.select( allAttrs )
while vprovider.nextFeature( feat ):
atMap = feat.attributeMap()
lenVal = float( len( atMap[ index ].toString() ) )
if first:
minVal = lenVal
maxVal = lenVal
first = False
else:
if lenVal < minVal: minVal = lenVal
if lenVal > maxVal: maxVal = lenVal
if lenVal != 0.00:
fillVal += 1
else:
emptyVal += 1
values.append( lenVal )
sumVal = sumVal + lenVal
nElement += 1
progress.setPercentage(int(nElement/nFeat * 100))
nVal= float( len( values ) )
if nVal > 0:
meanVal = sumVal / nVal
lstStats = []
lstStats.append( "Max. len:" + unicode( maxVal ) )
lstStats.append( "Min. len:" + unicode( minVal ) )
lstStats.append( "Mean. len:" + unicode( meanVal ) )
lstStats.append( "Filled:" + unicode( fillVal ) )
lstStats.append( "Empty:" + unicode( emptyVal ) )
lstStats.append( "N:" + unicode( nVal ) )
self.createHTML(outputFile, lstStats)
else:
raise GeoAlgorithmExecutionException("Error:No features selected!")
else: # numeric field
stdVal = 0.00
cvVal = 0.00
rangeVal = 0.00
medianVal = 0.00
maxVal = 0.00
minVal = 0.00
if useSelection: # only selected features
selection = vlayer.selectedFeatures()
nFeat = vlayer.selectedFeatureCount()
uniqueVal = ftools_utils.getUniqueValuesCount( vlayer, index, True )
for f in selection:
atMap = f.attributeMap()
value = float( atMap[ index ].toDouble()[ 0 ] )
if first:
minVal = value
maxVal = value
first = False
else:
if value < minVal: minVal = value
if value > maxVal: maxVal = value
values.append( value )
sumVal = sumVal + value
nElement += 1
progress.setPercentage(int(nElement/nFeat * 100))
else: # there is no selection, process the whole layer
nFeat = vprovider.featureCount()
uniqueVal = ftools_utils.getUniqueValuesCount( vlayer, index, False )
if nFeat > 0:
vprovider.select( allAttrs )
while vprovider.nextFeature( feat ):
atMap = feat.attributeMap()
value = float( atMap[ index ].toDouble()[ 0 ] )
if first:
minVal = value
maxVal = value
first = False
else:
if value < minVal: minVal = value
if value > maxVal: maxVal = value
values.append( value )
sumVal = sumVal + value
nElement += 1
nVal= float( len( values ) )
if nVal > 0.00:
rangeVal = maxVal - minVal
meanVal = sumVal / nVal
if meanVal != 0.00:
for val in values:
stdVal += ( ( val - meanVal ) * ( val - meanVal ) )
stdVal = math.sqrt( stdVal / nVal )
cvVal = stdVal / meanVal
if nVal > 1:
lstVal = values
lstVal.sort()
if ( nVal % 2 ) == 0:
medianVal = 0.5 * ( lstVal[ int( ( nVal - 1 ) / 2 ) ] + lstVal[ int( ( nVal ) / 2 ) ] )
else:
medianVal = lstVal[ int( ( nVal + 1 ) / 2 - 1 ) ]
lstStats = []
lstStats.append( "Mean:" + unicode( meanVal ) )
lstStats.append( "StdDev:" + unicode( stdVal ) )
lstStats.append( "Sum:" + unicode( sumVal) )
lstStats.append( "Min:" + unicode( minVal ) )
lstStats.append( "Max:" + unicode( maxVal ) )
lstStats.append( "N:" + unicode( nVal ) )
lstStats.append( "CV:" + unicode( cvVal ) )
lstStats.append( "Number of unique values:" + unicode( uniqueVal ) )
lstStats.append( "Range:" + unicode( rangeVal ) )
lstStats.append( "Median:" + unicode( medianVal ) )
self.createHTML(outputFile, lstStats)
else:
raise GeoAlgorithmExecutionException("Error:No features selected!")
def defineCharacteristics(self):
self.name = "Basic statistics"
self.group = "Analysis tools"
self.addParameter(ParameterVector(BasicStatistics.INPUT, "Input layer", ParameterVector.VECTOR_TYPE_ANY))
self.addParameter(ParameterTableField(BasicStatistics.FIELD, "Field", BasicStatistics.INPUT))
self.addParameter(ParameterBoolean(BasicStatistics.USE_SELECTION, "Use selection", False))
self.addOutput(OutputHTML(BasicStatistics.OUTPUT, "Statistics"))

View File

@ -0,0 +1,186 @@
import os.path
import math
from PyQt4 import QtGui
from PyQt4.QtCore import *
from qgis.core import *
from sextante.core.GeoAlgorithm import GeoAlgorithm
from sextante.core.QGisLayers import QGisLayers
from sextante.parameters.ParameterVector import ParameterVector
from sextante.parameters.ParameterTableField import ParameterTableField
from sextante.parameters.ParameterBoolean import ParameterBoolean
from sextante.outputs.OutputHTML import OutputHTML
from sextante.outputs.OutputNumber import OutputNumber
from sextante.ftools import FToolsUtils as utils
class BasicStatisticsNumbers(GeoAlgorithm):
INPUT_LAYER = "INPUT_LAYER"
FIELD_NAME = "FIELD_NAME"
USE_SELECTION = "USE_SELECTION"
OUTPUT_HTML_FILE = "OUTPUT_HTML_FILE"
CV = "CV"
MIN = "MIN"
MAX = "MAX"
SUM = "SUM"
MEAN = "MEAN"
COUNT = "COUNT"
RANGE = "RANGE"
MEDIAN = "MEDIAN"
UNIQUE = "UNIQUE"
STD_DEV = "STD_DEV"
def getIcon(self):
return QtGui.QIcon(os.path.dirname(__file__) + "/icons/basic_statistics.png")
def defineCharacteristics(self):
self.name = "Basic statistics for numeric fields"
self.group = "Analysis tools"
self.addParameter(ParameterVector(self.INPUT_LAYER, "Input vector layer", ParameterVector.VECTOR_TYPE_ANY, False))
self.addParameter(ParameterTableField(self.FIELD_NAME, "Field to calculate statistics on", self.INPUT_LAYER, ParameterTableField.DATA_TYPE_NUMBER))
self.addParameter(ParameterBoolean(self.USE_SELECTION, "Use selection", False))
self.addOutput(OutputHTML(self.OUTPUT_HTML_FILE, "Statistics for numeric field"))
self.addOutput(OutputNumber(self.CV, "Coefficient of Variation"))
self.addOutput(OutputNumber(self.MIN, "Minimum value"))
self.addOutput(OutputNumber(self.MAX, "Maximum value"))
self.addOutput(OutputNumber(self.SUM, "Sum"))
self.addOutput(OutputNumber(self.MEAN, "Mean value"))
self.addOutput(OutputNumber(self.COUNT, "Count"))
self.addOutput(OutputNumber(self.RANGE, "Range"))
self.addOutput(OutputNumber(self.MEDIAN, "Median"))
self.addOutput(OutputNumber(self.UNIQUE, "Number of unique values"))
self.addOutput(OutputNumber(self.STD_DEV, "Standard deviation"))
def processAlgorithm(self, progress):
layer = QGisLayers.getObjectFromUri(self.getParameterValue(self.INPUT_LAYER))
fieldName = self.getParameterValue(self.FIELD_NAME)
useSelection = self.getParameterValue(self.USE_SELECTION)
outputFile = self.getOutputValue(self.OUTPUT_HTML_FILE)
index = layer.fieldNameIndex(fieldName)
layer.select([index], QgsRectangle(), False)
count = 0
rValue = 0
cvValue = 0
minValue = 0
maxValue = 0
sumValue = 0
meanValue = 0
medianValue = 0
stdDevValue = 0
uniqueValue = 0
isFirst = True
values = []
if useSelection:
selection = layer.selectedFeatures()
count = layer.selectedFeatureCount()
total = 100.0 / float(count)
current = 0
for f in selection:
value = float(f.attributeMap()[index].toDouble()[0])
if isFirst:
minValue = value
maxValue = value
isFirst = False
else:
if value < minValue:
minValue = value
if value > maxValue:
maxValue = value
values.append(value)
sumValue += value
current += 1
progress.setPercentage(int(current * total))
else:
count = layer.featureCount()
total = 100.0 / float(count)
current = 0
ft = QgsFeature()
while layer.nextFeature(ft):
value = float(ft.attributeMap()[index].toDouble()[0])
if isFirst:
minValue = value
maxValue = value
isFirst = False
else:
if value < minValue:
minValue = value
if value > maxValue:
maxValue = value
values.append( value )
sumValue += value
current += 1
progress.setPercentage(int(current * total))
# calculate additional values
rValue = maxValue - minValue
uniqueValue = utils.getUniqueValuesCount(layer, index, useSelection)
if count > 0:
meanValue = sumValue / count
if meanValue != 0.00:
for v in values:
stdDevValue += ((v - meanValue) * (v - meanValue))
stdDevValue = math.sqrt(stdDevValue / count)
cvValue = stdDevValue / meanValue
if count > 1:
tmp = values
tmp.sort()
# calculate median
if (count % 2) == 0:
medianValue = 0.5 * (tmp[(count - 1) / 2] + tmp[count / 2])
else:
medianValue = tmp[(count + 1) / 2 - 1]
data = []
data.append("Count: " + unicode(count))
data.append("Unique values: " + unicode(uniqueValue))
data.append("Minimum value: " + unicode(minValue))
data.append("Maximum value: " + unicode(maxValue))
data.append("Range: " + unicode(rValue))
data.append("Sum: " + unicode(sumValue))
data.append("Mean value: " + unicode(meanValue))
data.append("Median value: " + unicode(medianValue))
data.append("Standard deviation: " + unicode(stdDevValue))
data.append("Coefficient of Variation: " + unicode(cvValue))
self.createHTML(outputFile, data)
self.setOutputValue(self.COUNT, count)
self.setOutputValue(self.UNIQUE, uniqueValue)
self.setOutputValue(self.MIN, minValue)
self.setOutputValue(self.MAX, maxValue)
self.setOutputValue(self.RANGE, rValue)
self.setOutputValue(self.SUM, sumValue)
self.setOutputValue(self.MEAN, meanValue)
self.setOutputValue(self.MEDIAN, medianValue)
self.setOutputValue(self.STD_DEV, stdDevValue)
self.setOutputValue(self.CV, cvValue)
def createHTML(self, outputFile, algData):
f = open(outputFile, "w")
for s in algData:
f.write("<p>" + str(s) + "</p>")
f.close()

View File

@ -0,0 +1,165 @@
import os.path
from PyQt4 import QtGui
from PyQt4.QtCore import *
from qgis.core import *
from sextante.core.GeoAlgorithm import GeoAlgorithm
from sextante.core.QGisLayers import QGisLayers
from sextante.parameters.ParameterVector import ParameterVector
from sextante.parameters.ParameterTableField import ParameterTableField
from sextante.parameters.ParameterBoolean import ParameterBoolean
from sextante.outputs.OutputHTML import OutputHTML
from sextante.outputs.OutputNumber import OutputNumber
from sextante.ftools import FToolsUtils as utils
class BasicStatisticsStrings(GeoAlgorithm):
INPUT_LAYER = "INPUT_LAYER"
FIELD_NAME = "FIELD_NAME"
USE_SELECTION = "USE_SELECTION"
OUTPUT_HTML_FILE = "OUTPUT_HTML_FILE"
MIN_LEN = "MIN_LEN"
MAX_LEN = "MAX_LEN"
MEAN_LEN = "MEAN_LEN"
COUNT = "COUNT"
EMPTY = "EMPTY"
FILLED = "FILLED"
UNIQUE = "UNIQUE"
def getIcon(self):
return QtGui.QIcon(os.path.dirname(__file__) + "/icons/basic_statistics.png")
def defineCharacteristics(self):
self.name = "Basic statistics for text fields"
self.group = "Analysis tools"
self.addParameter(ParameterVector(self.INPUT_LAYER, "Input vector layer", ParameterVector.VECTOR_TYPE_ANY, False))
self.addParameter(ParameterTableField(self.FIELD_NAME, "Field to calculate statistics on", self.INPUT_LAYER, ParameterTableField.DATA_TYPE_STRING))
self.addParameter(ParameterBoolean(self.USE_SELECTION, "Use selection", False))
self.addOutput(OutputHTML(self.OUTPUT_HTML_FILE, "Statistics for text field"))
self.addOutput(OutputNumber(self.MIN_LEN, "Minimum length"))
self.addOutput(OutputNumber(self.MAX_LEN, "Maximum length"))
self.addOutput(OutputNumber(self.MEAN_LEN, "Mean length"))
self.addOutput(OutputNumber(self.COUNT, "Count"))
self.addOutput(OutputNumber(self.EMPTY, "Number of empty values"))
self.addOutput(OutputNumber(self.FILLED, "Number of non-empty values"))
self.addOutput(OutputNumber(self.UNIQUE, "Number of unique values"))
def processAlgorithm(self, progress):
layer = QGisLayers.getObjectFromUri(self.getParameterValue(self.INPUT_LAYER))
fieldName = self.getParameterValue(self.FIELD_NAME)
useSelection = self.getParameterValue(self.USE_SELECTION)
outputFile = self.getOutputValue(self.OUTPUT_HTML_FILE)
index = layer.fieldNameIndex(fieldName)
layer.select([index], QgsRectangle(), False)
count = 0
sumValue = 0
minValue = 0
maxValue = 0
meanValue = 0
countEmpty = 0
countFilled = 0
isFirst = True
values = []
if useSelection:
selection = layer.selectedFeatures()
count = layer.selectedFeatureCount()
total = 100.0 / float(count)
current = 0
for f in selection:
length = float(len(f.attributeMap()[index].toString()))
if isFirst:
minValue = length
maxValue = length
isFirst = False
else:
if length < minValue:
minValue = length
if length > maxValue:
maxValue = length
if length != 0.00:
countFilled += 1
else:
countEmpty += 1
values.append(length)
sumValue += length
current += 1
progress.setPercentage(int(current * total))
else:
count = layer.featureCount()
total = 100.0 / float(count)
current = 0
ft = QgsFeature()
while layer.nextFeature(ft):
length = float(len(ft.attributeMap()[index].toString()))
if isFirst:
minValue = length
maxValue = length
isFirst = False
else:
if length < minValue:
minValue = length
if length > maxValue:
maxValue = length
if length != 0.00:
countFilled += 1
else:
countEmpty += 1
values.append(length)
sumValue += length
current += 1
progress.setPercentage(int(current * total))
n = float(len(values))
if n > 0:
meanValue = sumValue / n
uniqueValues = utils.getUniqueValuesCount(layer, index, useSelection)
data = []
data.append("Minimum length: " + unicode(minValue))
data.append("Maximum length: " + unicode(maxValue))
data.append("Mean length: " + unicode(meanValue))
data.append("Filled: " + unicode(countFilled))
data.append("Empty: " + unicode(countEmpty))
data.append("Count: " + unicode(count))
data.append("Unique: " + unicode(uniqueValues))
self.createHTML(outputFile, data)
self.setOutputValue(self.MIN_LEN, minValue)
self.setOutputValue(self.MAX_LEN, maxValue)
self.setOutputValue(self.MEAN_LEN, meanValue)
self.setOutputValue(self.FILLED, countFilled)
self.setOutputValue(self.EMPTY, countEmpty)
self.setOutputValue(self.COUNT, count)
self.setOutputValue(self.UNIQUE, uniqueValues)
def createHTML(self, outputFile, algData):
f = open(outputFile, "w")
for s in algData:
f.write("<p>" + str(s) + "</p>")
f.close()

View File

@ -9,7 +9,8 @@ from sextante.ftools.SumLines import SumLines
from sextante.ftools.MeanCoords import MeanCoords
from sextante.ftools.UniqueValues import UniqueValues
from sextante.ftools.PointDistance import PointDistance
from sextante.ftools.BasicStatistics import BasicStatistics
from sextante.ftools.BasicStatisticsStrings import BasicStatisticsStrings
from sextante.ftools.BasicStatisticsNumbers import BasicStatisticsNumbers
from sextante.ftools.PointsInPolygon import PointsInPolygon
from sextante.ftools.LinesIntersection import LinesIntersection
from sextante.ftools.NearestNeighbourAnalysis import NearestNeighbourAnalysis
@ -48,9 +49,9 @@ class FToolsAlgorithmProvider(AlgorithmProvider):
def __init__(self):
AlgorithmProvider.__init__(self)
self.alglist = [SumLines(), PointsInPolygon(), BasicStatistics(),
NearestNeighbourAnalysis(), MeanCoords(), LinesIntersection(),
UniqueValues(), PointDistance(),
self.alglist = [SumLines(), PointsInPolygon(), BasicStatisticsStrings(),
BasicStatisticsNumbers(), NearestNeighbourAnalysis(),
MeanCoords(), LinesIntersection(), UniqueValues(), PointDistance(),
# data management
# geometry
ExportGeometryInfo(), Centroids(), Delaunay(), VoronoiPolygons(),
@ -80,4 +81,4 @@ class FToolsAlgorithmProvider(AlgorithmProvider):
return ["csv"]
def supportsNonFileBasedOutput(self):
return True
return True

View File

@ -74,3 +74,21 @@ def extractPoints( geom ):
points.extend(line)
return points
def getUniqueValuesCount(layer, fieldIndex, useSelection):
count = 0
values = []
layer.select([fieldIndex], QgsRectangle(), False)
if useSelection:
selection = layer.selectedFeatures()
for f in selection:
if f.attributeMap()[fieldIndex].toString() not in values:
values.append(f.attributeMap()[fieldIndex].toString())
count += 1
else:
feat = QgsFeature()
while layer.nextFeature(feat):
if feat.attributeMap()[fieldIndex].toString() not in values:
values.append(feat.attributeMap()[fieldIndex].toString())
count += 1
return count