diff --git a/python/plugins/processing/tests/testdata/expected/stdbscan_multiple_clusters.gml b/python/plugins/processing/tests/testdata/expected/stdbscan_multiple_clusters.gml
new file mode 100644
index 00000000000..a3475767570
--- /dev/null
+++ b/python/plugins/processing/tests/testdata/expected/stdbscan_multiple_clusters.gml
@@ -0,0 +1,104 @@
+
+
+
+
+ 0-5
+ 83
+
+
+
+
+
+ 0,-1
+ 9
+ 0
+ 2017/08/13
+ 1
+ 2
+
+
+
+
+ 3,3
+ 2
+ 1
+ 2017/09/13
+ 1
+ 2
+
+
+
+
+ 1,1
+ 1
+ 2
+
+
+
+
+
+
+
+ 5,2
+ 4
+ 2
+ 2001/01/01
+ 2
+ 1
+
+
+
+
+ 2,2
+ 3
+ 0
+ 2005/09/13
+ 3
+ 1
+
+
+
+
+ 0,-5
+ 6
+ 0
+
+
+
+
+
+
+
+ 4,1
+ 5
+ 1
+ 2014/03/13
+ 4
+ 3
+
+
+
+
+ 7,-1
+ 8
+ 0
+ 2011/09/13
+ 4
+ 3
+
+
+
+
+ 8,-1
+ 7
+ 0
+ 2010/10/10
+ 4
+ 3
+
+
+
diff --git a/python/plugins/processing/tests/testdata/expected/stdbscan_multiple_clusters.xsd b/python/plugins/processing/tests/testdata/expected/stdbscan_multiple_clusters.xsd
new file mode 100644
index 00000000000..c19cc252774
--- /dev/null
+++ b/python/plugins/processing/tests/testdata/expected/stdbscan_multiple_clusters.xsd
@@ -0,0 +1,53 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/python/plugins/processing/tests/testdata/qgis_algorithm_tests4.yaml b/python/plugins/processing/tests/testdata/qgis_algorithm_tests4.yaml
index 6773d317e72..907a1285bef 100644
--- a/python/plugins/processing/tests/testdata/qgis_algorithm_tests4.yaml
+++ b/python/plugins/processing/tests/testdata/qgis_algorithm_tests4.yaml
@@ -878,6 +878,23 @@ tests:
name: expected/dbscan_multiple_clusters.gml
type: vector
+ - algorithm: native:stdbscanclustering
+ name: ST-DBScan multiple clusters
+ params:
+ DATETIME_FIELD: date
+ DBSCAN*: false
+ EPS: 5.0
+ EPS2: 1000.0
+ FIELD_NAME: CLUSTER_ID
+ INPUT:
+ name: custom/points_with_date.shp
+ type: vector
+ MIN_SIZE: 1
+ results:
+ OUTPUT:
+ name: expected/stdbscan_multiple_clusters.gml
+ type: vector
+
- algorithm: qgis:rastersampling
name: Single band raster
params:
diff --git a/src/analysis/CMakeLists.txt b/src/analysis/CMakeLists.txt
index e7cab14c652..7c13f945287 100644
--- a/src/analysis/CMakeLists.txt
+++ b/src/analysis/CMakeLists.txt
@@ -198,6 +198,7 @@ set(QGIS_ANALYSIS_SRCS
processing/qgsalgorithmsplitlinesbylength.cpp
processing/qgsalgorithmsplitvectorlayer.cpp
processing/qgsalgorithmsplitwithlines.cpp
+ processing/qgsalgorithmstdbscanclustering.cpp
processing/qgsalgorithmstringconcatenation.cpp
processing/qgsalgorithmswapxy.cpp
processing/qgsalgorithmsubdivide.cpp
diff --git a/src/analysis/processing/qgsalgorithmdbscanclustering.cpp b/src/analysis/processing/qgsalgorithmdbscanclustering.cpp
index f867c0b9f47..e5d0b7a8935 100644
--- a/src/analysis/processing/qgsalgorithmdbscanclustering.cpp
+++ b/src/analysis/processing/qgsalgorithmdbscanclustering.cpp
@@ -38,7 +38,7 @@ QString QgsDbscanClusteringAlgorithm::shortDescription() const
QStringList QgsDbscanClusteringAlgorithm::tags() const
{
- return QObject::tr( "clustering,clusters,density,based,points" ).split( ',' );
+ return QObject::tr( "clustering,clusters,density,based,points,distance" ).split( ',' );
}
QString QgsDbscanClusteringAlgorithm::group() const
@@ -113,7 +113,8 @@ QVariantMap QgsDbscanClusteringAlgorithm::processAlgorithm( const QVariantMap &p
throw QgsProcessingException( invalidSourceError( parameters, QStringLiteral( "INPUT" ) ) );
const std::size_t minSize = static_cast< std::size_t>( parameterAsInt( parameters, QStringLiteral( "MIN_SIZE" ), context ) );
- const double eps = parameterAsDouble( parameters, QStringLiteral( "EPS" ), context );
+ const double eps1 = parameterAsDouble( parameters, QStringLiteral( "EPS" ), context );
+ const double eps2 = parameterAsDouble( parameters, QStringLiteral( "EPS2" ), context ) * 24 * 60 * 60;
const bool borderPointsAreNoise = parameterAsBoolean( parameters, QStringLiteral( "DBSCAN*" ), context );
QgsFields outputFields = source->fields();
@@ -135,13 +136,31 @@ QVariantMap QgsDbscanClusteringAlgorithm::processAlgorithm( const QVariantMap &p
if ( feedback->isCanceled() )
return QVariantMap();
- // dbscan!
+ std::unordered_map< QgsFeatureId, QDateTime> idToDateTime;
+ const QString dateTimeFieldName = parameterAsString( parameters, QStringLiteral( "DATETIME_FIELD" ), context );
+ if ( !dateTimeFieldName.isEmpty() )
+ {
+ const int dateTimefieldIndex = source->fields().lookupField( dateTimeFieldName );
+ if ( dateTimefieldIndex == -1 )
+ throw QgsProcessingException( QObject::tr( "Datetime field missing" ) );
+
+ // fetch temporal values
+ feedback->pushInfo( QObject::tr( "Fetching temporal values" ) );
+ QgsFeatureIterator features = source->getFeatures( QgsFeatureRequest().setSubsetOfAttributes( QgsAttributeList() << dateTimefieldIndex ), QgsProcessingFeatureSource::FlagSkipGeometryValidityChecks );
+ QgsFeature feature;
+ while ( features.nextFeature( feature ) )
+ {
+ idToDateTime[ feature.id() ] = feature.attributes().at( dateTimefieldIndex ).toDateTime();
+ }
+ }
+
+ // stdbscan!
feedback->pushInfo( QObject::tr( "Analysing clusters" ) );
std::unordered_map< QgsFeatureId, int> idToCluster;
idToCluster.reserve( index.size() );
- QgsFeatureIterator features = source->getFeatures( QgsFeatureRequest().setNoAttributes() );
const long featureCount = source->featureCount();
- dbscan( minSize, eps, borderPointsAreNoise, featureCount, features, index, idToCluster, feedback );
+ QgsFeatureIterator features = source->getFeatures( QgsFeatureRequest().setNoAttributes() );
+ stdbscan( minSize, eps1, eps2, borderPointsAreNoise, featureCount, features, index, idToCluster, idToDateTime, feedback );
// cluster size
std::unordered_map< int, int> clusterSize;
@@ -182,14 +201,15 @@ QVariantMap QgsDbscanClusteringAlgorithm::processAlgorithm( const QVariantMap &p
return outputs;
}
-
-void QgsDbscanClusteringAlgorithm::dbscan( const std::size_t minSize,
- const double eps,
+void QgsDbscanClusteringAlgorithm::stdbscan( const std::size_t minSize,
+ const double eps1,
+ const double eps2,
const bool borderPointsAreNoise,
const long featureCount,
QgsFeatureIterator features,
QgsSpatialIndexKDBush &index,
std::unordered_map< QgsFeatureId, int> &idToCluster,
+ std::unordered_map< QgsFeatureId, QDateTime> &idToDateTime,
QgsProcessingFeedback *feedback )
{
const double step = featureCount > 0 ? 90.0 / featureCount : 1;
@@ -231,13 +251,22 @@ void QgsDbscanClusteringAlgorithm::dbscan( const std::size_t minSize,
continue;
}
+ if ( !idToDateTime.empty() && !idToDateTime[ feat.id() ].isValid() )
+ {
+ // missing datetime value
+ feedback->reportError( QObject::tr( "Feature %1 is missing a valid datetime value." ).arg( feat.id() ).arg( QgsWkbTypes::displayString( feat.geometry().wkbType() ) ) );
+ feedback->setProgress( ++i * step );
+ continue;
+ }
+
std::unordered_set< QgsSpatialIndexKDBushData, KDBushDataHashById, KDBushDataEqualById> within;
if ( minSize > 1 )
{
- index.within( point, eps, [ &within]( const QgsSpatialIndexKDBushData & data )
+ index.within( point, eps1, [&within, pointId = feat.id(), &idToDateTime, &eps2]( const QgsSpatialIndexKDBushData & data )
{
- within.insert( data );
+ if ( idToDateTime.empty() || ( idToDateTime[ data.id ].isValid() && std::abs( idToDateTime[ pointId ].secsTo( idToDateTime[ data.id ] ) ) <= eps2 ) )
+ within.insert( data );
} );
if ( within.size() < minSize )
continue;
@@ -278,10 +307,12 @@ void QgsDbscanClusteringAlgorithm::dbscan( const std::size_t minSize,
QgsPointXY point2 = j.point();
std::unordered_set< QgsSpatialIndexKDBushData, KDBushDataHashById, KDBushDataEqualById > within2;
- index.within( point2, eps, [&within2]( const QgsSpatialIndexKDBushData & data )
+ index.within( point2, eps1, [&within2, point2Id = j.id, &idToDateTime, &eps2]( const QgsSpatialIndexKDBushData & data )
{
- within2.insert( data );
+ if ( idToDateTime.empty() || ( idToDateTime[ data.id ].isValid() && std::abs( idToDateTime[ point2Id ].secsTo( idToDateTime[ data.id ] ) ) <= eps2 ) )
+ within2.insert( data );
} );
+
if ( within2.size() >= minSize )
{
// expand neighbourhood
diff --git a/src/analysis/processing/qgsalgorithmdbscanclustering.h b/src/analysis/processing/qgsalgorithmdbscanclustering.h
index 3ce8e70a0a9..4c1aec68286 100644
--- a/src/analysis/processing/qgsalgorithmdbscanclustering.h
+++ b/src/analysis/processing/qgsalgorithmdbscanclustering.h
@@ -54,14 +54,18 @@ class ANALYSIS_EXPORT QgsDbscanClusteringAlgorithm : public QgsProcessingAlgorit
QVariantMap processAlgorithm( const QVariantMap ¶meters,
QgsProcessingContext &context, QgsProcessingFeedback *feedback ) override;
private:
- static void dbscan( std::size_t minSize,
- double eps,
- bool borderPointsAreNoise,
- long featureCount,
- QgsFeatureIterator features,
- QgsSpatialIndexKDBush &index,
- std::unordered_map< QgsFeatureId, int> &idToCluster,
- QgsProcessingFeedback *feedback );
+
+ static void stdbscan( std::size_t minSize,
+ const double eps1,
+ const double eps2,
+ bool borderPointsAreNoise,
+ long featureCount,
+ QgsFeatureIterator features,
+ QgsSpatialIndexKDBush &index,
+ std::unordered_map< QgsFeatureId, int> &idToCluster,
+ std::unordered_map< QgsFeatureId, QDateTime> &idToDateTime,
+ QgsProcessingFeedback *feedback );
+
};
///@endcond PRIVATE
diff --git a/src/analysis/processing/qgsalgorithmstdbscanclustering.cpp b/src/analysis/processing/qgsalgorithmstdbscanclustering.cpp
new file mode 100644
index 00000000000..6b12c9f867b
--- /dev/null
+++ b/src/analysis/processing/qgsalgorithmstdbscanclustering.cpp
@@ -0,0 +1,102 @@
+/***************************************************************************
+ qgsalgorithmstdbscanclustering.cpp
+ ---------------------
+ begin : July 2018
+ copyright : (C) 2018 by Nyall Dawson
+ email : nyall dot dawson at gmail dot com
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ ***************************************************************************/
+
+#include "qgsalgorithmstdbscanclustering.h"
+
+///@cond PRIVATE
+
+QString QgsStDbscanClusteringAlgorithm::name() const
+{
+ return QStringLiteral( "stdbscanclustering" );
+}
+
+QString QgsStDbscanClusteringAlgorithm::displayName() const
+{
+ return QObject::tr( "ST-DBSCAN clustering" );
+}
+
+QString QgsStDbscanClusteringAlgorithm::shortDescription() const
+{
+ return QObject::tr( "Clusters spatiotemporal point features using a time and density based scan algorithm." );
+}
+
+QStringList QgsStDbscanClusteringAlgorithm::tags() const
+{
+ return QObject::tr( "clustering,clusters,density,based,points,temporal,time,interval,distance" ).split( ',' );
+}
+
+QString QgsStDbscanClusteringAlgorithm::group() const
+{
+ return QObject::tr( "Vector analysis" );
+}
+
+QString QgsStDbscanClusteringAlgorithm::groupId() const
+{
+ return QStringLiteral( "vectoranalysis" );
+}
+
+void QgsStDbscanClusteringAlgorithm::initAlgorithm( const QVariantMap & )
+{
+ addParameter( new QgsProcessingParameterFeatureSource( QStringLiteral( "INPUT" ),
+ QObject::tr( "Input layer" ), QList< int >() << QgsProcessing::TypeVectorPoint ) );
+ addParameter( new QgsProcessingParameterField( QStringLiteral( "DATETIME_FIELD" ),
+ QObject::tr( "Date/time field" ), QVariant(),
+ QStringLiteral( "INPUT" ), QgsProcessingParameterField::DateTime, false, false ) );
+
+ addParameter( new QgsProcessingParameterNumber( QStringLiteral( "MIN_SIZE" ), QObject::tr( "Minimum cluster size" ),
+ QgsProcessingParameterNumber::Integer, 5, false, 1 ) );
+ addParameter( new QgsProcessingParameterDistance( QStringLiteral( "EPS" ),
+ QObject::tr( "Maximum distance between clustered points" ), 1, QStringLiteral( "INPUT" ), false, 0 ) );
+ addParameter( new QgsProcessingParameterNumber( QStringLiteral( "EPS2" ),
+ QObject::tr( "Maximum date/time interval (in days unit) between clustered points" ), QgsProcessingParameterNumber::Double, 1, false, 0 ) );
+
+ auto dbscanStarParam = std::make_unique( QStringLiteral( "DBSCAN*" ),
+ QObject::tr( "Treat border points as noise (DBSCAN*)" ), false, true );
+ dbscanStarParam->setFlags( dbscanStarParam->flags() | QgsProcessingParameterDefinition::FlagAdvanced );
+ addParameter( dbscanStarParam.release() );
+
+ auto fieldNameParam = std::make_unique( QStringLiteral( "FIELD_NAME" ),
+ QObject::tr( "Cluster field name" ), QStringLiteral( "CLUSTER_ID" ) );
+ fieldNameParam->setFlags( fieldNameParam->flags() | QgsProcessingParameterDefinition::FlagAdvanced );
+ addParameter( fieldNameParam.release() );
+ auto sizeFieldNameParam = std::make_unique( QStringLiteral( "SIZE_FIELD_NAME" ),
+ QObject::tr( "Cluster size field name" ), QStringLiteral( "CLUSTER_SIZE" ) );
+ sizeFieldNameParam->setFlags( sizeFieldNameParam->flags() | QgsProcessingParameterDefinition::FlagAdvanced );
+ addParameter( sizeFieldNameParam.release() );
+
+ addParameter( new QgsProcessingParameterFeatureSink( QStringLiteral( "OUTPUT" ), QObject::tr( "Clusters" ), QgsProcessing::TypeVectorPoint ) );
+
+ addOutput( new QgsProcessingOutputNumber( QStringLiteral( "NUM_CLUSTERS" ), QObject::tr( "Number of clusters" ) ) );
+}
+
+QString QgsStDbscanClusteringAlgorithm::shortHelpString() const
+{
+ return QObject::tr( "Clusters point features based on a 2D implementation of spatiotemporal density-based clustering of applications with noise (ST-DBSCAN) algorithm.\n\n"
+ "For more details, please see the following papers:\n"
+ "* Ester, M., H. P. Kriegel, J. Sander, and X. Xu, \"A Density-Based Algorithm for Discovering Clusters in Large Spatial Databases with Noise\". In: Proceedings of the 2nd International Conference on Knowledge Discovery and Data Mining, Portland, OR, AAAI Press, pp. 226-231. 1996\n"
+ "* Birant, Derya, and Alp Kut. \"ST-DBSCAN: An algorithm for clustering spatial–temporal data.\" Data & Knowledge Engineering 60.1 (2007): 208-221.\n"
+ "* Peca, I., Fuchs, G., Vrotsou, K., Andrienko, N. V., & Andrienko, G. L. (2012). Scalable Cluster Analysis of Spatial Events. In EuroVA@ EuroVis." );
+}
+
+QgsStDbscanClusteringAlgorithm *QgsStDbscanClusteringAlgorithm::createInstance() const
+{
+ return new QgsStDbscanClusteringAlgorithm();
+}
+
+///@endcond
+
+
diff --git a/src/analysis/processing/qgsalgorithmstdbscanclustering.h b/src/analysis/processing/qgsalgorithmstdbscanclustering.h
new file mode 100644
index 00000000000..385f715c910
--- /dev/null
+++ b/src/analysis/processing/qgsalgorithmstdbscanclustering.h
@@ -0,0 +1,56 @@
+/***************************************************************************
+ qgsalgorithmstdbscanclustering.h
+ ---------------------
+ begin : July 2021
+ copyright : (C) 2021 by Mathieu Pellerin
+ email : nirvn dot asia at gmail dot com
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ ***************************************************************************/
+
+#ifndef QGSALGORITHMDSTBSCANCLUSTERING_H
+#define QGSALGORITHMDSTBSCANCLUSTERING_H
+
+#define SIP_NO_FILE
+
+#include "qgis_sip.h"
+#include "qgis_analysis.h"
+#include "qgsprocessingalgorithm.h"
+#include "qgsalgorithmdbscanclustering.h"
+
+///@cond PRIVATE
+
+
+/**
+ * Native ST-DBSCAN density based scan with noise clustering algorithm.
+ */
+class ANALYSIS_EXPORT QgsStDbscanClusteringAlgorithm : public QgsDbscanClusteringAlgorithm
+{
+
+ public:
+
+ QgsStDbscanClusteringAlgorithm() = default;
+ void initAlgorithm( const QVariantMap &configuration = QVariantMap() ) override;
+ QString name() const override;
+ QString displayName() const override;
+ QString shortDescription() const override;
+ QStringList tags() const override;
+ QString group() const override;
+ QString groupId() const override;
+ QString shortHelpString() const override;
+ QgsStDbscanClusteringAlgorithm *createInstance() const override SIP_FACTORY;
+
+};
+
+///@endcond PRIVATE
+
+#endif // QGSALGORITHMDSTBSCANCLUSTERING_H
+
+
diff --git a/src/analysis/processing/qgsnativealgorithms.cpp b/src/analysis/processing/qgsnativealgorithms.cpp
index 9261630ca86..c8a502ba4d1 100644
--- a/src/analysis/processing/qgsnativealgorithms.cpp
+++ b/src/analysis/processing/qgsnativealgorithms.cpp
@@ -186,6 +186,7 @@
#include "qgsalgorithmsplitlinesbylength.h"
#include "qgsalgorithmsplitvectorlayer.h"
#include "qgsalgorithmsplitwithlines.h"
+#include "qgsalgorithmstdbscanclustering.h"
#include "qgsalgorithmstringconcatenation.h"
#include "qgsalgorithmsubdivide.h"
#include "qgsalgorithmsumlinelength.h"
@@ -458,6 +459,7 @@ void QgsNativeAlgorithms::loadAlgorithms()
addAlgorithm( new QgsSplitLinesByLengthAlgorithm() );
addAlgorithm( new QgsSplitVectorLayerAlgorithm() );
addAlgorithm( new QgsSplitWithLinesAlgorithm() );
+ addAlgorithm( new QgsStDbscanClusteringAlgorithm() );
addAlgorithm( new QgsStringConcatenationAlgorithm() );
addAlgorithm( new QgsStyleFromProjectAlgorithm() );
addAlgorithm( new QgsSubdivideAlgorithm() );