mirror of
https://github.com/qgis/QGIS.git
synced 2025-02-28 00:17:30 -05:00
Prevent changes to files that weren't changed between releases. This eases review of the changes between releases significantly.
149 lines
5.9 KiB
Python
149 lines
5.9 KiB
Python
# -*- coding: utf-8 -*-
|
|
|
|
"""
|
|
***************************************************************************
|
|
DeleteDuplicateGeometries.py
|
|
---------------------
|
|
Date : May 2010
|
|
Copyright : (C) 2010 by Michael Minn
|
|
Email : pyqgis at michaelminn dot com
|
|
***************************************************************************
|
|
* *
|
|
* This program is free software; you can redistribute it and/or modify *
|
|
* it under the terms of the GNU General Public License as published by *
|
|
* the Free Software Foundation; either version 2 of the License, or *
|
|
* (at your option) any later version. *
|
|
* *
|
|
***************************************************************************
|
|
"""
|
|
|
|
__author__ = 'Michael Minn'
|
|
__date__ = 'May 2010'
|
|
__copyright__ = '(C) 2010, Michael Minn'
|
|
|
|
from qgis.core import (QgsFeatureRequest,
|
|
QgsProcessingException,
|
|
QgsFeatureSink,
|
|
QgsSpatialIndex,
|
|
QgsProcessingParameterFeatureSource,
|
|
QgsProcessingParameterFeatureSink,
|
|
QgsProcessingOutputNumber)
|
|
from processing.algs.qgis.QgisAlgorithm import QgisAlgorithm
|
|
|
|
|
|
class DeleteDuplicateGeometries(QgisAlgorithm):
|
|
|
|
INPUT = 'INPUT'
|
|
OUTPUT = 'OUTPUT'
|
|
RETAINED_COUNT = 'RETAINED_COUNT'
|
|
DUPLICATE_COUNT = 'DUPLICATE_COUNT'
|
|
|
|
def group(self):
|
|
return self.tr('Vector general')
|
|
|
|
def groupId(self):
|
|
return 'vectorgeneral'
|
|
|
|
def tags(self):
|
|
return self.tr('drop,remove,same,points,coincident,overlapping,filter').split(',')
|
|
|
|
def __init__(self):
|
|
super().__init__()
|
|
|
|
def initAlgorithm(self, config=None):
|
|
self.addParameter(QgsProcessingParameterFeatureSource(self.INPUT,
|
|
self.tr('Input layer')))
|
|
self.addParameter(QgsProcessingParameterFeatureSink(self.OUTPUT, self.tr('Cleaned')))
|
|
|
|
self.addOutput(QgsProcessingOutputNumber(self.RETAINED_COUNT, self.tr('Count of retained records')))
|
|
self.addOutput(QgsProcessingOutputNumber(self.DUPLICATE_COUNT, self.tr('Count of discarded duplicate records')))
|
|
|
|
def name(self):
|
|
return 'deleteduplicategeometries'
|
|
|
|
def displayName(self):
|
|
return self.tr('Delete duplicate geometries')
|
|
|
|
def processAlgorithm(self, parameters, context, feedback):
|
|
source = self.parameterAsSource(parameters, self.INPUT, context)
|
|
if source is None:
|
|
raise QgsProcessingException(self.invalidSourceError(parameters, self.INPUT))
|
|
|
|
(sink, dest_id) = self.parameterAsSink(parameters, self.OUTPUT, context,
|
|
source.fields(), source.wkbType(), source.sourceCrs())
|
|
if sink is None:
|
|
raise QgsProcessingException(self.invalidSinkError(parameters, self.OUTPUT))
|
|
|
|
features = source.getFeatures(QgsFeatureRequest().setSubsetOfAttributes([]))
|
|
|
|
total = 100.0 / source.featureCount() if source.featureCount() else 0
|
|
geoms = dict()
|
|
null_geom_features = set()
|
|
index = QgsSpatialIndex()
|
|
for current, f in enumerate(features):
|
|
if feedback.isCanceled():
|
|
break
|
|
|
|
if not f.hasGeometry():
|
|
null_geom_features.add(f.id())
|
|
continue
|
|
|
|
geoms[f.id()] = f.geometry()
|
|
index.addFeature(f)
|
|
|
|
feedback.setProgress(int(0.10 * current * total)) # takes about 10% of time
|
|
|
|
# start by assuming everything is unique, and chop away at this list
|
|
unique_features = dict(geoms)
|
|
|
|
current = 0
|
|
removed = 0
|
|
for feature_id, geometry in geoms.items():
|
|
if feedback.isCanceled():
|
|
break
|
|
|
|
if feature_id not in unique_features:
|
|
# feature was already marked as a duplicate
|
|
continue
|
|
|
|
candidates = index.intersects(geometry.boundingBox())
|
|
candidates.remove(feature_id)
|
|
|
|
for candidate_id in candidates:
|
|
if candidate_id not in unique_features:
|
|
# candidate already marked as a duplicate (not sure if this is possible,
|
|
# since it would mean the current feature would also have to be a duplicate!
|
|
# but let's be safe!)
|
|
continue
|
|
|
|
if geometry.isGeosEqual(geoms[candidate_id]):
|
|
# candidate is a duplicate of feature
|
|
del unique_features[candidate_id]
|
|
removed += 1
|
|
|
|
current += 1
|
|
feedback.setProgress(int(0.80 * current * total) + 10) # takes about 80% of time
|
|
|
|
# now, fetch all the feature attributes for the unique features only
|
|
# be super-smart and don't re-fetch geometries
|
|
distinct_geoms = set(unique_features.keys())
|
|
output_feature_ids = distinct_geoms.union(null_geom_features)
|
|
total = 100.0 / len(output_feature_ids) if output_feature_ids else 1
|
|
|
|
request = QgsFeatureRequest().setFilterFids(list(output_feature_ids)).setFlags(QgsFeatureRequest.NoGeometry)
|
|
for current, f in enumerate(source.getFeatures(request)):
|
|
if feedback.isCanceled():
|
|
break
|
|
|
|
# use already fetched geometry
|
|
if f.id() not in null_geom_features:
|
|
f.setGeometry(unique_features[f.id()])
|
|
sink.addFeature(f, QgsFeatureSink.FastInsert)
|
|
|
|
feedback.setProgress(int(0.10 * current * total) + 90) # takes about 10% of time
|
|
|
|
feedback.pushInfo(self.tr('{} duplicate features removed'.format(removed)))
|
|
return {self.OUTPUT: dest_id,
|
|
self.DUPLICATE_COUNT: removed,
|
|
self.RETAINED_COUNT: len(output_feature_ids)}
|