QGIS/python/plugins/processing/algs/qgis/SpatialJoinSummary.py
2017-10-26 07:06:34 +10:00

349 lines
15 KiB
Python

# -*- coding: utf-8 -*-
"""
***************************************************************************
SpatialJoin.py
---------------------
Date : September 2017
Copyright : (C) 2017 by Nyall Dawson
Email : nyall dot dawson at gmail dot com
***************************************************************************
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
***************************************************************************
"""
from builtins import range
__author__ = 'Nyall Dawson'
__date__ = 'September 2017'
__copyright__ = '(C) 2017, Nyall Dawson'
# This will get replaced with a git SHA1 when you do a git archive
__revision__ = '$Format:%H$'
import os
from collections import defaultdict
from qgis.PyQt.QtGui import QIcon
from qgis.PyQt.QtCore import QVariant
from qgis.core import (NULL,
QgsField,
QgsFields,
QgsFeatureSink,
QgsFeatureRequest,
QgsGeometry,
QgsCoordinateTransform,
QgsStatisticalSummary,
QgsDateTimeStatisticalSummary,
QgsStringStatisticalSummary,
QgsProcessing,
QgsProcessingUtils,
QgsProcessingParameterBoolean,
QgsProcessingParameterFeatureSource,
QgsProcessingParameterEnum,
QgsProcessingParameterField,
QgsProcessingParameterFeatureSink)
from processing.algs.qgis.QgisAlgorithm import QgisAlgorithm
from processing.tools import vector
pluginPath = os.path.split(os.path.split(os.path.dirname(__file__))[0])[0]
class SpatialJoinSummary(QgisAlgorithm):
INPUT = "INPUT"
JOIN = "JOIN"
PREDICATE = "PREDICATE"
JOIN_FIELDS = "JOIN_FIELDS"
SUMMARIES = "SUMMARIES"
DISCARD_NONMATCHING = "DISCARD_NONMATCHING"
OUTPUT = "OUTPUT"
def icon(self):
return QIcon(os.path.join(pluginPath, 'images', 'ftools', 'join_location.png'))
def group(self):
return self.tr('Vector general')
def __init__(self):
super().__init__()
def initAlgorithm(self, config=None):
self.predicates = (
('intersects', self.tr('intersects')),
('contains', self.tr('contains')),
('equals', self.tr('equals')),
('touches', self.tr('touches')),
('overlaps', self.tr('overlaps')),
('within', self.tr('within')),
('crosses', self.tr('crosses')))
self.statistics = [
('count', self.tr('count')),
('unique', self.tr('unique')),
('min', self.tr('min')),
('max', self.tr('max')),
('range', self.tr('range')),
('sum', self.tr('sum')),
('mean', self.tr('mean')),
('median', self.tr('median')),
('stddev', self.tr('stddev')),
('minority', self.tr('minority')),
('majority', self.tr('majority')),
('q1', self.tr('q1')),
('q3', self.tr('q3')),
('iqr', self.tr('iqr')),
('empty', self.tr('empty')),
('filled', self.tr('filled')),
('min_length', self.tr('min_length')),
('max_length', self.tr('max_length')),
('mean_length', self.tr('mean_length'))]
self.addParameter(QgsProcessingParameterFeatureSource(self.INPUT,
self.tr('Input layer'),
[QgsProcessing.TypeVectorAnyGeometry]))
self.addParameter(QgsProcessingParameterFeatureSource(self.JOIN,
self.tr('Join layer'),
[QgsProcessing.TypeVectorAnyGeometry]))
predicate = QgsProcessingParameterEnum(self.PREDICATE,
self.tr('Geometric predicate'),
options=[p[1] for p in self.predicates],
allowMultiple=True, defaultValue=[0])
predicate.setMetadata({
'widget_wrapper': {
'class': 'processing.gui.wrappers.EnumWidgetWrapper',
'useCheckBoxes': True,
'columns': 2}})
self.addParameter(predicate)
self.addParameter(QgsProcessingParameterField(self.JOIN_FIELDS,
self.tr('Fields to summarise (leave empty to use all fields)'),
parentLayerParameterName=self.JOIN,
allowMultiple=True, optional=True))
self.addParameter(QgsProcessingParameterEnum(self.SUMMARIES,
self.tr(
'Summaries to calculate (leave empty to use all available)'),
options=[p[1] for p in self.statistics],
allowMultiple=True, optional=True))
self.addParameter(QgsProcessingParameterBoolean(self.DISCARD_NONMATCHING,
self.tr('Discard records which could not be joined'),
defaultValue=False))
self.addParameter(QgsProcessingParameterFeatureSink(self.OUTPUT,
self.tr('Joined layer')))
def name(self):
return 'joinbylocationsummary'
def displayName(self):
return self.tr('Join attributes by location (summary)')
def tags(self):
return self.tr(
"summary,aggregate,join,intersects,intersecting,touching,within,contains,overlaps,relation,spatial").split(
',')
def processAlgorithm(self, parameters, context, feedback):
source = self.parameterAsSource(parameters, self.INPUT, context)
join_source = self.parameterAsSource(parameters, self.JOIN, context)
join_fields = self.parameterAsFields(parameters, self.JOIN_FIELDS, context)
discard_nomatch = self.parameterAsBool(parameters, self.DISCARD_NONMATCHING, context)
summaries = [self.statistics[i][0] for i in
sorted(self.parameterAsEnums(parameters, self.SUMMARIES, context))]
if not summaries:
# none selected, so use all
summaries = [s[0] for s in self.statistics]
source_fields = source.fields()
fields_to_join = QgsFields()
join_field_indexes = []
if not join_fields:
# no fields selected, use all
join_fields = [join_source.fields().at(i).name() for i in range(len(join_source.fields()))]
def addFieldKeepType(original, stat):
"""
Adds a field to the output, keeping the same data type as the original
"""
field = QgsField(original)
field.setName(field.name() + '_' + stat)
fields_to_join.append(field)
def addField(original, stat, type):
"""
Adds a field to the output, with a specified type
"""
field = QgsField(original)
field.setName(field.name() + '_' + stat)
field.setType(type)
if type == QVariant.Double:
field.setLength(20)
field.setPrecision(6)
fields_to_join.append(field)
numeric_fields = (
('count', QVariant.Int, 'count'),
('unique', QVariant.Int, 'variety'),
('min', QVariant.Double, 'min'),
('max', QVariant.Double, 'max'),
('range', QVariant.Double, 'range'),
('sum', QVariant.Double, 'sum'),
('mean', QVariant.Double, 'mean'),
('median', QVariant.Double, 'median'),
('stddev', QVariant.Double, 'stDev'),
('minority', QVariant.Double, 'minority'),
('majority', QVariant.Double, 'majority'),
('q1', QVariant.Double, 'firstQuartile'),
('q3', QVariant.Double, 'thirdQuartile'),
('iqr', QVariant.Double, 'interQuartileRange')
)
datetime_fields = (
('count', QVariant.Int, 'count'),
('unique', QVariant.Int, 'countDistinct'),
('empty', QVariant.Int, 'countMissing'),
('filled', QVariant.Int),
('min', None),
('max', None)
)
string_fields = (
('count', QVariant.Int, 'count'),
('unique', QVariant.Int, 'countDistinct'),
('empty', QVariant.Int, 'countMissing'),
('filled', QVariant.Int),
('min', None, 'min'),
('max', None, 'max'),
('min_length', QVariant.Int, 'minLength'),
('max_length', QVariant.Int, 'maxLength'),
('mean_length', QVariant.Double, 'meanLength')
)
field_types = []
for f in join_fields:
idx = join_source.fields().lookupField(f)
if idx >= 0:
join_field_indexes.append(idx)
join_field = join_source.fields().at(idx)
if join_field.isNumeric():
field_types.append('numeric')
field_list = numeric_fields
elif join_field.type() in (QVariant.Date, QVariant.Time, QVariant.DateTime):
field_types.append('datetime')
field_list = datetime_fields
else:
field_types.append('string')
field_list = string_fields
for f in field_list:
if f[0] in summaries:
if f[1] is not None:
addField(join_field, f[0], f[1])
else:
addFieldKeepType(join_field, f[0])
out_fields = QgsProcessingUtils.combineFields(source_fields, fields_to_join)
(sink, dest_id) = self.parameterAsSink(parameters, self.OUTPUT, context,
out_fields, source.wkbType(), source.sourceCrs())
# do the join
predicates = [self.predicates[i][0] for i in self.parameterAsEnums(parameters, self.PREDICATE, context)]
features = source.getFeatures()
total = 100.0 / source.featureCount() if source.featureCount() else 0
# bounding box transform
bbox_transform = QgsCoordinateTransform(source.sourceCrs(), join_source.sourceCrs())
for current, f in enumerate(features):
if feedback.isCanceled():
break
if not f.hasGeometry():
if not discard_nomatch:
sink.addFeature(f, QgsFeatureSink.FastInsert)
continue
bbox = bbox_transform.transformBoundingBox(f.geometry().boundingBox())
engine = None
values = []
request = QgsFeatureRequest().setFilterRect(bbox).setSubsetOfAttributes(join_field_indexes).setDestinationCrs(source.sourceCrs())
for test_feat in join_source.getFeatures(request):
if feedback.isCanceled():
break
join_attributes = []
for a in join_field_indexes:
join_attributes.append(test_feat.attributes()[a])
if engine is None:
engine = QgsGeometry.createGeometryEngine(f.geometry().constGet())
engine.prepareGeometry()
for predicate in predicates:
if getattr(engine, predicate)(test_feat.geometry().constGet()):
values.append(join_attributes)
break
feedback.setProgress(int(current * total))
if len(values) == 0:
if discard_nomatch:
continue
else:
sink.addFeature(f, QgsFeatureSink.FastInsert)
else:
attrs = f.attributes()
for i in range(len(join_field_indexes)):
attribute_values = [v[i] for v in values]
field_type = field_types[i]
if field_type == 'numeric':
stat = QgsStatisticalSummary()
for v in attribute_values:
stat.addVariant(v)
stat.finalize()
for s in numeric_fields:
if s[0] in summaries:
attrs.append(getattr(stat, s[2])())
elif field_type == 'datetime':
stat = QgsDateTimeStatisticalSummary()
stat.calculate(attribute_values)
for s in datetime_fields:
if s[0] in summaries:
if s[0] == 'filled':
attrs.append(stat.count() - stat.countMissing())
elif s[0] == 'min':
attrs.append(stat.statistic(QgsDateTimeStatisticalSummary.Min))
elif s[0] == 'max':
attrs.append(stat.statistic(QgsDateTimeStatisticalSummary.Max))
else:
attrs.append(getattr(stat, s[2])())
else:
stat = QgsStringStatisticalSummary()
for v in attribute_values:
if v == NULL:
stat.addString('')
else:
stat.addString(str(v))
stat.finalize()
for s in string_fields:
if s[0] in summaries:
if s[0] == 'filled':
attrs.append(stat.count() - stat.countMissing())
else:
attrs.append(getattr(stat, s[2])())
f.setAttributes(attrs)
sink.addFeature(f, QgsFeatureSink.FastInsert)
return {self.OUTPUT: dest_id}