mirror of
				https://github.com/qgis/QGIS.git
				synced 2025-10-31 00:06:02 -04:00 
			
		
		
		
	Prevent changes to files that weren't changed between releases. This eases review of the changes between releases significantly.
		
			
				
	
	
		
			149 lines
		
	
	
		
			5.9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			149 lines
		
	
	
		
			5.9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| # -*- coding: utf-8 -*-
 | |
| 
 | |
| """
 | |
| ***************************************************************************
 | |
|     DeleteDuplicateGeometries.py
 | |
|     ---------------------
 | |
|     Date                 : May 2010
 | |
|     Copyright            : (C) 2010 by Michael Minn
 | |
|     Email                : pyqgis at michaelminn dot com
 | |
| ***************************************************************************
 | |
| *                                                                         *
 | |
| *   This program is free software; you can redistribute it and/or modify  *
 | |
| *   it under the terms of the GNU General Public License as published by  *
 | |
| *   the Free Software Foundation; either version 2 of the License, or     *
 | |
| *   (at your option) any later version.                                   *
 | |
| *                                                                         *
 | |
| ***************************************************************************
 | |
| """
 | |
| 
 | |
| __author__ = 'Michael Minn'
 | |
| __date__ = 'May 2010'
 | |
| __copyright__ = '(C) 2010, Michael Minn'
 | |
| 
 | |
| from qgis.core import (QgsFeatureRequest,
 | |
|                        QgsProcessingException,
 | |
|                        QgsFeatureSink,
 | |
|                        QgsSpatialIndex,
 | |
|                        QgsProcessingParameterFeatureSource,
 | |
|                        QgsProcessingParameterFeatureSink,
 | |
|                        QgsProcessingOutputNumber)
 | |
| from processing.algs.qgis.QgisAlgorithm import QgisAlgorithm
 | |
| 
 | |
| 
 | |
| class DeleteDuplicateGeometries(QgisAlgorithm):
 | |
| 
 | |
|     INPUT = 'INPUT'
 | |
|     OUTPUT = 'OUTPUT'
 | |
|     RETAINED_COUNT = 'RETAINED_COUNT'
 | |
|     DUPLICATE_COUNT = 'DUPLICATE_COUNT'
 | |
| 
 | |
|     def group(self):
 | |
|         return self.tr('Vector general')
 | |
| 
 | |
|     def groupId(self):
 | |
|         return 'vectorgeneral'
 | |
| 
 | |
|     def tags(self):
 | |
|         return self.tr('drop,remove,same,points,coincident,overlapping,filter').split(',')
 | |
| 
 | |
|     def __init__(self):
 | |
|         super().__init__()
 | |
| 
 | |
|     def initAlgorithm(self, config=None):
 | |
|         self.addParameter(QgsProcessingParameterFeatureSource(self.INPUT,
 | |
|                                                               self.tr('Input layer')))
 | |
|         self.addParameter(QgsProcessingParameterFeatureSink(self.OUTPUT, self.tr('Cleaned')))
 | |
| 
 | |
|         self.addOutput(QgsProcessingOutputNumber(self.RETAINED_COUNT, self.tr('Count of retained records')))
 | |
|         self.addOutput(QgsProcessingOutputNumber(self.DUPLICATE_COUNT, self.tr('Count of discarded duplicate records')))
 | |
| 
 | |
|     def name(self):
 | |
|         return 'deleteduplicategeometries'
 | |
| 
 | |
|     def displayName(self):
 | |
|         return self.tr('Delete duplicate geometries')
 | |
| 
 | |
|     def processAlgorithm(self, parameters, context, feedback):
 | |
|         source = self.parameterAsSource(parameters, self.INPUT, context)
 | |
|         if source is None:
 | |
|             raise QgsProcessingException(self.invalidSourceError(parameters, self.INPUT))
 | |
| 
 | |
|         (sink, dest_id) = self.parameterAsSink(parameters, self.OUTPUT, context,
 | |
|                                                source.fields(), source.wkbType(), source.sourceCrs())
 | |
|         if sink is None:
 | |
|             raise QgsProcessingException(self.invalidSinkError(parameters, self.OUTPUT))
 | |
| 
 | |
|         features = source.getFeatures(QgsFeatureRequest().setSubsetOfAttributes([]))
 | |
| 
 | |
|         total = 100.0 / source.featureCount() if source.featureCount() else 0
 | |
|         geoms = dict()
 | |
|         null_geom_features = set()
 | |
|         index = QgsSpatialIndex()
 | |
|         for current, f in enumerate(features):
 | |
|             if feedback.isCanceled():
 | |
|                 break
 | |
| 
 | |
|             if not f.hasGeometry():
 | |
|                 null_geom_features.add(f.id())
 | |
|                 continue
 | |
| 
 | |
|             geoms[f.id()] = f.geometry()
 | |
|             index.addFeature(f)
 | |
| 
 | |
|             feedback.setProgress(int(0.10 * current * total)) # takes about 10% of time
 | |
| 
 | |
|         # start by assuming everything is unique, and chop away at this list
 | |
|         unique_features = dict(geoms)
 | |
| 
 | |
|         current = 0
 | |
|         removed = 0
 | |
|         for feature_id, geometry in geoms.items():
 | |
|             if feedback.isCanceled():
 | |
|                 break
 | |
| 
 | |
|             if feature_id not in unique_features:
 | |
|                 # feature was already marked as a duplicate
 | |
|                 continue
 | |
| 
 | |
|             candidates = index.intersects(geometry.boundingBox())
 | |
|             candidates.remove(feature_id)
 | |
| 
 | |
|             for candidate_id in candidates:
 | |
|                 if candidate_id not in unique_features:
 | |
|                     # candidate already marked as a duplicate (not sure if this is possible,
 | |
|                     # since it would mean the current feature would also have to be a duplicate!
 | |
|                     # but let's be safe!)
 | |
|                     continue
 | |
| 
 | |
|                 if geometry.isGeosEqual(geoms[candidate_id]):
 | |
|                     # candidate is a duplicate of feature
 | |
|                     del unique_features[candidate_id]
 | |
|                     removed += 1
 | |
| 
 | |
|             current += 1
 | |
|             feedback.setProgress(int(0.80 * current * total) + 10)  # takes about 80% of time
 | |
| 
 | |
|         # now, fetch all the feature attributes for the unique features only
 | |
|         # be super-smart and don't re-fetch geometries
 | |
|         distinct_geoms = set(unique_features.keys())
 | |
|         output_feature_ids = distinct_geoms.union(null_geom_features)
 | |
|         total = 100.0 / len(output_feature_ids) if output_feature_ids else 1
 | |
| 
 | |
|         request = QgsFeatureRequest().setFilterFids(list(output_feature_ids)).setFlags(QgsFeatureRequest.NoGeometry)
 | |
|         for current, f in enumerate(source.getFeatures(request)):
 | |
|             if feedback.isCanceled():
 | |
|                 break
 | |
| 
 | |
|             # use already fetched geometry
 | |
|             if f.id() not in null_geom_features:
 | |
|                 f.setGeometry(unique_features[f.id()])
 | |
|             sink.addFeature(f, QgsFeatureSink.FastInsert)
 | |
| 
 | |
|             feedback.setProgress(int(0.10 * current * total) + 90) # takes about 10% of time
 | |
| 
 | |
|         feedback.pushInfo(self.tr('{} duplicate features removed'.format(removed)))
 | |
|         return {self.OUTPUT: dest_id,
 | |
|                 self.DUPLICATE_COUNT: removed,
 | |
|                 self.RETAINED_COUNT: len(output_feature_ids)}
 |