diff --git a/python/plugins/processing/tests/testdata/expected/stdbscan_multiple_clusters.gml b/python/plugins/processing/tests/testdata/expected/stdbscan_multiple_clusters.gml new file mode 100644 index 00000000000..a3475767570 --- /dev/null +++ b/python/plugins/processing/tests/testdata/expected/stdbscan_multiple_clusters.gml @@ -0,0 +1,104 @@ + + + + + 0-5 + 83 + + + + + + 0,-1 + 9 + 0 + 2017/08/13 + 1 + 2 + + + + + 3,3 + 2 + 1 + 2017/09/13 + 1 + 2 + + + + + 1,1 + 1 + 2 + + + + + + + + 5,2 + 4 + 2 + 2001/01/01 + 2 + 1 + + + + + 2,2 + 3 + 0 + 2005/09/13 + 3 + 1 + + + + + 0,-5 + 6 + 0 + + + + + + + + 4,1 + 5 + 1 + 2014/03/13 + 4 + 3 + + + + + 7,-1 + 8 + 0 + 2011/09/13 + 4 + 3 + + + + + 8,-1 + 7 + 0 + 2010/10/10 + 4 + 3 + + + diff --git a/python/plugins/processing/tests/testdata/expected/stdbscan_multiple_clusters.xsd b/python/plugins/processing/tests/testdata/expected/stdbscan_multiple_clusters.xsd new file mode 100644 index 00000000000..c19cc252774 --- /dev/null +++ b/python/plugins/processing/tests/testdata/expected/stdbscan_multiple_clusters.xsd @@ -0,0 +1,53 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/python/plugins/processing/tests/testdata/qgis_algorithm_tests4.yaml b/python/plugins/processing/tests/testdata/qgis_algorithm_tests4.yaml index 6773d317e72..907a1285bef 100644 --- a/python/plugins/processing/tests/testdata/qgis_algorithm_tests4.yaml +++ b/python/plugins/processing/tests/testdata/qgis_algorithm_tests4.yaml @@ -878,6 +878,23 @@ tests: name: expected/dbscan_multiple_clusters.gml type: vector + - algorithm: native:stdbscanclustering + name: ST-DBScan multiple clusters + params: + DATETIME_FIELD: date + DBSCAN*: false + EPS: 5.0 + EPS2: 1000.0 + FIELD_NAME: CLUSTER_ID + INPUT: + name: custom/points_with_date.shp + type: vector + MIN_SIZE: 1 + results: + OUTPUT: + name: expected/stdbscan_multiple_clusters.gml + type: vector + - algorithm: qgis:rastersampling name: Single band raster params: diff --git a/src/analysis/CMakeLists.txt b/src/analysis/CMakeLists.txt index e7cab14c652..7c13f945287 100644 --- a/src/analysis/CMakeLists.txt +++ b/src/analysis/CMakeLists.txt @@ -198,6 +198,7 @@ set(QGIS_ANALYSIS_SRCS processing/qgsalgorithmsplitlinesbylength.cpp processing/qgsalgorithmsplitvectorlayer.cpp processing/qgsalgorithmsplitwithlines.cpp + processing/qgsalgorithmstdbscanclustering.cpp processing/qgsalgorithmstringconcatenation.cpp processing/qgsalgorithmswapxy.cpp processing/qgsalgorithmsubdivide.cpp diff --git a/src/analysis/processing/qgsalgorithmdbscanclustering.cpp b/src/analysis/processing/qgsalgorithmdbscanclustering.cpp index f867c0b9f47..e5d0b7a8935 100644 --- a/src/analysis/processing/qgsalgorithmdbscanclustering.cpp +++ b/src/analysis/processing/qgsalgorithmdbscanclustering.cpp @@ -38,7 +38,7 @@ QString QgsDbscanClusteringAlgorithm::shortDescription() const QStringList QgsDbscanClusteringAlgorithm::tags() const { - return QObject::tr( "clustering,clusters,density,based,points" ).split( ',' ); + return QObject::tr( "clustering,clusters,density,based,points,distance" ).split( ',' ); } QString QgsDbscanClusteringAlgorithm::group() const @@ -113,7 +113,8 @@ QVariantMap QgsDbscanClusteringAlgorithm::processAlgorithm( const QVariantMap &p throw QgsProcessingException( invalidSourceError( parameters, QStringLiteral( "INPUT" ) ) ); const std::size_t minSize = static_cast< std::size_t>( parameterAsInt( parameters, QStringLiteral( "MIN_SIZE" ), context ) ); - const double eps = parameterAsDouble( parameters, QStringLiteral( "EPS" ), context ); + const double eps1 = parameterAsDouble( parameters, QStringLiteral( "EPS" ), context ); + const double eps2 = parameterAsDouble( parameters, QStringLiteral( "EPS2" ), context ) * 24 * 60 * 60; const bool borderPointsAreNoise = parameterAsBoolean( parameters, QStringLiteral( "DBSCAN*" ), context ); QgsFields outputFields = source->fields(); @@ -135,13 +136,31 @@ QVariantMap QgsDbscanClusteringAlgorithm::processAlgorithm( const QVariantMap &p if ( feedback->isCanceled() ) return QVariantMap(); - // dbscan! + std::unordered_map< QgsFeatureId, QDateTime> idToDateTime; + const QString dateTimeFieldName = parameterAsString( parameters, QStringLiteral( "DATETIME_FIELD" ), context ); + if ( !dateTimeFieldName.isEmpty() ) + { + const int dateTimefieldIndex = source->fields().lookupField( dateTimeFieldName ); + if ( dateTimefieldIndex == -1 ) + throw QgsProcessingException( QObject::tr( "Datetime field missing" ) ); + + // fetch temporal values + feedback->pushInfo( QObject::tr( "Fetching temporal values" ) ); + QgsFeatureIterator features = source->getFeatures( QgsFeatureRequest().setSubsetOfAttributes( QgsAttributeList() << dateTimefieldIndex ), QgsProcessingFeatureSource::FlagSkipGeometryValidityChecks ); + QgsFeature feature; + while ( features.nextFeature( feature ) ) + { + idToDateTime[ feature.id() ] = feature.attributes().at( dateTimefieldIndex ).toDateTime(); + } + } + + // stdbscan! feedback->pushInfo( QObject::tr( "Analysing clusters" ) ); std::unordered_map< QgsFeatureId, int> idToCluster; idToCluster.reserve( index.size() ); - QgsFeatureIterator features = source->getFeatures( QgsFeatureRequest().setNoAttributes() ); const long featureCount = source->featureCount(); - dbscan( minSize, eps, borderPointsAreNoise, featureCount, features, index, idToCluster, feedback ); + QgsFeatureIterator features = source->getFeatures( QgsFeatureRequest().setNoAttributes() ); + stdbscan( minSize, eps1, eps2, borderPointsAreNoise, featureCount, features, index, idToCluster, idToDateTime, feedback ); // cluster size std::unordered_map< int, int> clusterSize; @@ -182,14 +201,15 @@ QVariantMap QgsDbscanClusteringAlgorithm::processAlgorithm( const QVariantMap &p return outputs; } - -void QgsDbscanClusteringAlgorithm::dbscan( const std::size_t minSize, - const double eps, +void QgsDbscanClusteringAlgorithm::stdbscan( const std::size_t minSize, + const double eps1, + const double eps2, const bool borderPointsAreNoise, const long featureCount, QgsFeatureIterator features, QgsSpatialIndexKDBush &index, std::unordered_map< QgsFeatureId, int> &idToCluster, + std::unordered_map< QgsFeatureId, QDateTime> &idToDateTime, QgsProcessingFeedback *feedback ) { const double step = featureCount > 0 ? 90.0 / featureCount : 1; @@ -231,13 +251,22 @@ void QgsDbscanClusteringAlgorithm::dbscan( const std::size_t minSize, continue; } + if ( !idToDateTime.empty() && !idToDateTime[ feat.id() ].isValid() ) + { + // missing datetime value + feedback->reportError( QObject::tr( "Feature %1 is missing a valid datetime value." ).arg( feat.id() ).arg( QgsWkbTypes::displayString( feat.geometry().wkbType() ) ) ); + feedback->setProgress( ++i * step ); + continue; + } + std::unordered_set< QgsSpatialIndexKDBushData, KDBushDataHashById, KDBushDataEqualById> within; if ( minSize > 1 ) { - index.within( point, eps, [ &within]( const QgsSpatialIndexKDBushData & data ) + index.within( point, eps1, [&within, pointId = feat.id(), &idToDateTime, &eps2]( const QgsSpatialIndexKDBushData & data ) { - within.insert( data ); + if ( idToDateTime.empty() || ( idToDateTime[ data.id ].isValid() && std::abs( idToDateTime[ pointId ].secsTo( idToDateTime[ data.id ] ) ) <= eps2 ) ) + within.insert( data ); } ); if ( within.size() < minSize ) continue; @@ -278,10 +307,12 @@ void QgsDbscanClusteringAlgorithm::dbscan( const std::size_t minSize, QgsPointXY point2 = j.point(); std::unordered_set< QgsSpatialIndexKDBushData, KDBushDataHashById, KDBushDataEqualById > within2; - index.within( point2, eps, [&within2]( const QgsSpatialIndexKDBushData & data ) + index.within( point2, eps1, [&within2, point2Id = j.id, &idToDateTime, &eps2]( const QgsSpatialIndexKDBushData & data ) { - within2.insert( data ); + if ( idToDateTime.empty() || ( idToDateTime[ data.id ].isValid() && std::abs( idToDateTime[ point2Id ].secsTo( idToDateTime[ data.id ] ) ) <= eps2 ) ) + within2.insert( data ); } ); + if ( within2.size() >= minSize ) { // expand neighbourhood diff --git a/src/analysis/processing/qgsalgorithmdbscanclustering.h b/src/analysis/processing/qgsalgorithmdbscanclustering.h index 3ce8e70a0a9..4c1aec68286 100644 --- a/src/analysis/processing/qgsalgorithmdbscanclustering.h +++ b/src/analysis/processing/qgsalgorithmdbscanclustering.h @@ -54,14 +54,18 @@ class ANALYSIS_EXPORT QgsDbscanClusteringAlgorithm : public QgsProcessingAlgorit QVariantMap processAlgorithm( const QVariantMap ¶meters, QgsProcessingContext &context, QgsProcessingFeedback *feedback ) override; private: - static void dbscan( std::size_t minSize, - double eps, - bool borderPointsAreNoise, - long featureCount, - QgsFeatureIterator features, - QgsSpatialIndexKDBush &index, - std::unordered_map< QgsFeatureId, int> &idToCluster, - QgsProcessingFeedback *feedback ); + + static void stdbscan( std::size_t minSize, + const double eps1, + const double eps2, + bool borderPointsAreNoise, + long featureCount, + QgsFeatureIterator features, + QgsSpatialIndexKDBush &index, + std::unordered_map< QgsFeatureId, int> &idToCluster, + std::unordered_map< QgsFeatureId, QDateTime> &idToDateTime, + QgsProcessingFeedback *feedback ); + }; ///@endcond PRIVATE diff --git a/src/analysis/processing/qgsalgorithmstdbscanclustering.cpp b/src/analysis/processing/qgsalgorithmstdbscanclustering.cpp new file mode 100644 index 00000000000..6b12c9f867b --- /dev/null +++ b/src/analysis/processing/qgsalgorithmstdbscanclustering.cpp @@ -0,0 +1,102 @@ +/*************************************************************************** + qgsalgorithmstdbscanclustering.cpp + --------------------- + begin : July 2018 + copyright : (C) 2018 by Nyall Dawson + email : nyall dot dawson at gmail dot com + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include "qgsalgorithmstdbscanclustering.h" + +///@cond PRIVATE + +QString QgsStDbscanClusteringAlgorithm::name() const +{ + return QStringLiteral( "stdbscanclustering" ); +} + +QString QgsStDbscanClusteringAlgorithm::displayName() const +{ + return QObject::tr( "ST-DBSCAN clustering" ); +} + +QString QgsStDbscanClusteringAlgorithm::shortDescription() const +{ + return QObject::tr( "Clusters spatiotemporal point features using a time and density based scan algorithm." ); +} + +QStringList QgsStDbscanClusteringAlgorithm::tags() const +{ + return QObject::tr( "clustering,clusters,density,based,points,temporal,time,interval,distance" ).split( ',' ); +} + +QString QgsStDbscanClusteringAlgorithm::group() const +{ + return QObject::tr( "Vector analysis" ); +} + +QString QgsStDbscanClusteringAlgorithm::groupId() const +{ + return QStringLiteral( "vectoranalysis" ); +} + +void QgsStDbscanClusteringAlgorithm::initAlgorithm( const QVariantMap & ) +{ + addParameter( new QgsProcessingParameterFeatureSource( QStringLiteral( "INPUT" ), + QObject::tr( "Input layer" ), QList< int >() << QgsProcessing::TypeVectorPoint ) ); + addParameter( new QgsProcessingParameterField( QStringLiteral( "DATETIME_FIELD" ), + QObject::tr( "Date/time field" ), QVariant(), + QStringLiteral( "INPUT" ), QgsProcessingParameterField::DateTime, false, false ) ); + + addParameter( new QgsProcessingParameterNumber( QStringLiteral( "MIN_SIZE" ), QObject::tr( "Minimum cluster size" ), + QgsProcessingParameterNumber::Integer, 5, false, 1 ) ); + addParameter( new QgsProcessingParameterDistance( QStringLiteral( "EPS" ), + QObject::tr( "Maximum distance between clustered points" ), 1, QStringLiteral( "INPUT" ), false, 0 ) ); + addParameter( new QgsProcessingParameterNumber( QStringLiteral( "EPS2" ), + QObject::tr( "Maximum date/time interval (in days unit) between clustered points" ), QgsProcessingParameterNumber::Double, 1, false, 0 ) ); + + auto dbscanStarParam = std::make_unique( QStringLiteral( "DBSCAN*" ), + QObject::tr( "Treat border points as noise (DBSCAN*)" ), false, true ); + dbscanStarParam->setFlags( dbscanStarParam->flags() | QgsProcessingParameterDefinition::FlagAdvanced ); + addParameter( dbscanStarParam.release() ); + + auto fieldNameParam = std::make_unique( QStringLiteral( "FIELD_NAME" ), + QObject::tr( "Cluster field name" ), QStringLiteral( "CLUSTER_ID" ) ); + fieldNameParam->setFlags( fieldNameParam->flags() | QgsProcessingParameterDefinition::FlagAdvanced ); + addParameter( fieldNameParam.release() ); + auto sizeFieldNameParam = std::make_unique( QStringLiteral( "SIZE_FIELD_NAME" ), + QObject::tr( "Cluster size field name" ), QStringLiteral( "CLUSTER_SIZE" ) ); + sizeFieldNameParam->setFlags( sizeFieldNameParam->flags() | QgsProcessingParameterDefinition::FlagAdvanced ); + addParameter( sizeFieldNameParam.release() ); + + addParameter( new QgsProcessingParameterFeatureSink( QStringLiteral( "OUTPUT" ), QObject::tr( "Clusters" ), QgsProcessing::TypeVectorPoint ) ); + + addOutput( new QgsProcessingOutputNumber( QStringLiteral( "NUM_CLUSTERS" ), QObject::tr( "Number of clusters" ) ) ); +} + +QString QgsStDbscanClusteringAlgorithm::shortHelpString() const +{ + return QObject::tr( "Clusters point features based on a 2D implementation of spatiotemporal density-based clustering of applications with noise (ST-DBSCAN) algorithm.\n\n" + "For more details, please see the following papers:\n" + "* Ester, M., H. P. Kriegel, J. Sander, and X. Xu, \"A Density-Based Algorithm for Discovering Clusters in Large Spatial Databases with Noise\". In: Proceedings of the 2nd International Conference on Knowledge Discovery and Data Mining, Portland, OR, AAAI Press, pp. 226-231. 1996\n" + "* Birant, Derya, and Alp Kut. \"ST-DBSCAN: An algorithm for clustering spatial–temporal data.\" Data & Knowledge Engineering 60.1 (2007): 208-221.\n" + "* Peca, I., Fuchs, G., Vrotsou, K., Andrienko, N. V., & Andrienko, G. L. (2012). Scalable Cluster Analysis of Spatial Events. In EuroVA@ EuroVis." ); +} + +QgsStDbscanClusteringAlgorithm *QgsStDbscanClusteringAlgorithm::createInstance() const +{ + return new QgsStDbscanClusteringAlgorithm(); +} + +///@endcond + + diff --git a/src/analysis/processing/qgsalgorithmstdbscanclustering.h b/src/analysis/processing/qgsalgorithmstdbscanclustering.h new file mode 100644 index 00000000000..385f715c910 --- /dev/null +++ b/src/analysis/processing/qgsalgorithmstdbscanclustering.h @@ -0,0 +1,56 @@ +/*************************************************************************** + qgsalgorithmstdbscanclustering.h + --------------------- + begin : July 2021 + copyright : (C) 2021 by Mathieu Pellerin + email : nirvn dot asia at gmail dot com + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#ifndef QGSALGORITHMDSTBSCANCLUSTERING_H +#define QGSALGORITHMDSTBSCANCLUSTERING_H + +#define SIP_NO_FILE + +#include "qgis_sip.h" +#include "qgis_analysis.h" +#include "qgsprocessingalgorithm.h" +#include "qgsalgorithmdbscanclustering.h" + +///@cond PRIVATE + + +/** + * Native ST-DBSCAN density based scan with noise clustering algorithm. + */ +class ANALYSIS_EXPORT QgsStDbscanClusteringAlgorithm : public QgsDbscanClusteringAlgorithm +{ + + public: + + QgsStDbscanClusteringAlgorithm() = default; + void initAlgorithm( const QVariantMap &configuration = QVariantMap() ) override; + QString name() const override; + QString displayName() const override; + QString shortDescription() const override; + QStringList tags() const override; + QString group() const override; + QString groupId() const override; + QString shortHelpString() const override; + QgsStDbscanClusteringAlgorithm *createInstance() const override SIP_FACTORY; + +}; + +///@endcond PRIVATE + +#endif // QGSALGORITHMDSTBSCANCLUSTERING_H + + diff --git a/src/analysis/processing/qgsnativealgorithms.cpp b/src/analysis/processing/qgsnativealgorithms.cpp index 9261630ca86..c8a502ba4d1 100644 --- a/src/analysis/processing/qgsnativealgorithms.cpp +++ b/src/analysis/processing/qgsnativealgorithms.cpp @@ -186,6 +186,7 @@ #include "qgsalgorithmsplitlinesbylength.h" #include "qgsalgorithmsplitvectorlayer.h" #include "qgsalgorithmsplitwithlines.h" +#include "qgsalgorithmstdbscanclustering.h" #include "qgsalgorithmstringconcatenation.h" #include "qgsalgorithmsubdivide.h" #include "qgsalgorithmsumlinelength.h" @@ -458,6 +459,7 @@ void QgsNativeAlgorithms::loadAlgorithms() addAlgorithm( new QgsSplitLinesByLengthAlgorithm() ); addAlgorithm( new QgsSplitVectorLayerAlgorithm() ); addAlgorithm( new QgsSplitWithLinesAlgorithm() ); + addAlgorithm( new QgsStDbscanClusteringAlgorithm() ); addAlgorithm( new QgsStringConcatenationAlgorithm() ); addAlgorithm( new QgsStyleFromProjectAlgorithm() ); addAlgorithm( new QgsSubdivideAlgorithm() );