mirror of
https://github.com/qgis/QGIS.git
synced 2025-04-17 00:04:02 -04:00
[processing] Fix inefficiencies in Delete Duplicate Geometries algorithm
..and make progress bar more accurate. Use a spatial index to avoid comparing every feature to every other feature, and only compare against features with intersecting bounding boxes instead. Also optimise feature requests and loop logic. Benchmarks: Point layer, 6000k features Before: 30 seconds After: 0.15 seconds Point layer, 45k features Before: > 10 minutes After: 7 seconds Fixes #19973
This commit is contained in:
parent
6110931f8a
commit
9698444f4a
@ -70,6 +70,7 @@ class DeleteDuplicateGeometries(QgisAlgorithm):
|
|||||||
raise QgsProcessingException(self.invalidSinkError(parameters, self.OUTPUT))
|
raise QgsProcessingException(self.invalidSinkError(parameters, self.OUTPUT))
|
||||||
|
|
||||||
features = source.getFeatures(QgsFeatureRequest().setSubsetOfAttributes([]))
|
features = source.getFeatures(QgsFeatureRequest().setSubsetOfAttributes([]))
|
||||||
|
|
||||||
total = 100.0 / source.featureCount() if source.featureCount() else 0
|
total = 100.0 / source.featureCount() if source.featureCount() else 0
|
||||||
geoms = dict()
|
geoms = dict()
|
||||||
index = QgsSpatialIndex()
|
index = QgsSpatialIndex()
|
||||||
@ -78,28 +79,52 @@ class DeleteDuplicateGeometries(QgisAlgorithm):
|
|||||||
break
|
break
|
||||||
|
|
||||||
geoms[f.id()] = f.geometry()
|
geoms[f.id()] = f.geometry()
|
||||||
#index.insertFeature
|
index.addFeature(f)
|
||||||
feedback.setProgress(int(current * total))
|
|
||||||
|
|
||||||
cleaned = dict(geoms)
|
feedback.setProgress(int(0.10 * current * total)) # takes about 10% of time
|
||||||
|
|
||||||
for i, g in list(geoms.items()):
|
# start by assuming everything is unique, and chop away at this list
|
||||||
|
unique_features = dict(geoms)
|
||||||
|
|
||||||
|
current = 0
|
||||||
|
for feature_id, geometry in geoms.items():
|
||||||
if feedback.isCanceled():
|
if feedback.isCanceled():
|
||||||
break
|
break
|
||||||
|
|
||||||
for j in list(cleaned.keys()):
|
if feature_id not in unique_features:
|
||||||
if i == j or i not in cleaned:
|
# feature was already marked as a duplicate
|
||||||
continue
|
continue
|
||||||
if g.isGeosEqual(cleaned[j]):
|
|
||||||
del cleaned[j]
|
|
||||||
|
|
||||||
total = 100.0 / len(cleaned) if cleaned else 1
|
candidates = index.intersects(geometry.boundingBox())
|
||||||
request = QgsFeatureRequest().setFilterFids(list(cleaned.keys()))
|
candidates.remove(feature_id)
|
||||||
|
|
||||||
|
for candidate_id in candidates:
|
||||||
|
if candidate_id not in unique_features:
|
||||||
|
# candidate already marked as a duplicate (not sure if this is possible,
|
||||||
|
# since it would mean the current feature would also have to be a duplicate!
|
||||||
|
# but let's be safe!)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if geometry.isGeosEqual(geoms[candidate_id]):
|
||||||
|
# candidate is a duplicate of feature
|
||||||
|
del unique_features[candidate_id]
|
||||||
|
|
||||||
|
current += 1
|
||||||
|
feedback.setProgress(int(0.80 * current * total) + 10) # takes about 80% of time
|
||||||
|
|
||||||
|
total = 100.0 / len(unique_features) if unique_features else 1
|
||||||
|
|
||||||
|
# now, fetch all the feature attributes for the unique features only
|
||||||
|
# be super-smart and don't re-fetch geometries
|
||||||
|
request = QgsFeatureRequest().setFilterFids(list(unique_features.keys())).setFlags(QgsFeatureRequest.NoGeometry)
|
||||||
for current, f in enumerate(source.getFeatures(request)):
|
for current, f in enumerate(source.getFeatures(request)):
|
||||||
if feedback.isCanceled():
|
if feedback.isCanceled():
|
||||||
break
|
break
|
||||||
|
|
||||||
|
# use already fetched geometry
|
||||||
|
f.setGeometry(unique_features[f.id()])
|
||||||
sink.addFeature(f, QgsFeatureSink.FastInsert)
|
sink.addFeature(f, QgsFeatureSink.FastInsert)
|
||||||
feedback.setProgress(int(current * total))
|
|
||||||
|
feedback.setProgress(int(0.10 * current * total) + 90) # takes about 10% of time
|
||||||
|
|
||||||
return {self.OUTPUT: dest_id}
|
return {self.OUTPUT: dest_id}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user