Compare commits

...

8 Commits

Author SHA1 Message Date
Viper MiniQ
db9b117f37
Merge 296f0c1939697dfec7c3823e864aa6eeeadee58d into b927df884feb840b67724af5c82c8088a9d20bfe 2025-07-02 08:26:12 +00:00
Loïc Bartoletti
b927df884f
Merge pull request #62484 from jef-n/fix-62478
fix #62478 (followup eac401c009)
2025-07-02 06:09:23 +02:00
Juergen E. Fischer
6fd32d92fd fix #62478 (followup eac401c009) 2025-07-01 17:54:58 +02:00
github-actions[bot]
296f0c1939 auto-fix pre-commit issues 2025-05-31 16:40:57 +00:00
viperminiq
795bea680b override processAlgorithm 2025-05-31 18:39:52 +02:00
viperminiq
6e53077db0 include as native algorithm 2025-05-31 13:23:09 +02:00
viperminiq
3f082b1688 add kmeans from seed layer 2025-05-31 13:18:48 +02:00
viperminiq
43e1e61a1c refactor (create base algorithm) 2025-05-30 22:14:52 +02:00
4 changed files with 323 additions and 104 deletions

View File

@ -658,6 +658,9 @@ class Repositories(QObject):
.text() .text()
.strip() .strip()
) )
supports_qt6 = pluginNodes.item(i).firstChildElement(
"supports_qt6"
).text().strip().upper() in ["TRUE", "YES"]
if not qgisMaximumVersion: if not qgisMaximumVersion:
if qgisMinimumVersion[0] == "3" and supports_qt6: if qgisMinimumVersion[0] == "3" and supports_qt6:
qgisMaximumVersion = "4.99" qgisMaximumVersion = "4.99"

View File

@ -23,6 +23,113 @@
constexpr uint KMEANS_MAX_ITERATIONS = 1000; constexpr uint KMEANS_MAX_ITERATIONS = 1000;
//
// QgsKmeansClusteringAlgorithmBase
//
QStringList QgsKMeansClusteringAlgorithmBase::tags() const
{
return QObject::tr( "clustering,clusters,kmeans,points" ).split( ',' );
}
QString QgsKMeansClusteringAlgorithmBase::group() const
{
return QObject::tr( "Vector analysis" );
}
QString QgsKMeansClusteringAlgorithmBase::groupId() const
{
return QStringLiteral( "vectoranalysis" );
}
// ported from https://github.com/postgis/postgis/blob/svn-trunk/liblwgeom/lwkmeans.c
void QgsKMeansClusteringAlgorithmBase::updateMeans( const std::vector<Feature> &points, std::vector<QgsPointXY> &centers, std::vector<uint> &weights, const int k )
{
const uint n = points.size();
std::fill( weights.begin(), weights.end(), 0 );
for ( int i = 0; i < k; i++ )
{
centers[i].setX( 0.0 );
centers[i].setY( 0.0 );
}
for ( uint i = 0; i < n; i++ )
{
const int cluster = points[i].cluster;
centers[cluster] += QgsVector( points[i].point.x(), points[i].point.y() );
weights[cluster] += 1;
}
for ( int i = 0; i < k; i++ )
{
centers[i] /= weights[i];
}
}
// ported from https://github.com/postgis/postgis/blob/svn-trunk/liblwgeom/lwkmeans.c
void QgsKMeansClusteringAlgorithmBase::findNearest( std::vector<QgsKMeansClusteringAlgorithm::Feature> &points, const std::vector<QgsPointXY> &centers, const int k, bool &changed )
{
changed = false;
const std::size_t n = points.size();
for ( std::size_t i = 0; i < n; i++ )
{
Feature &point = points[i];
// Initialize with distance to first cluster
double currentDistance = point.point.sqrDist( centers[0] );
int currentCluster = 0;
// Check all other cluster centers and find the nearest
for ( int cluster = 1; cluster < k; cluster++ )
{
const double distance = point.point.sqrDist( centers[cluster] );
if ( distance < currentDistance )
{
currentDistance = distance;
currentCluster = cluster;
}
}
// Store the nearest cluster this object is in
if ( point.cluster != currentCluster )
{
changed = true;
point.cluster = currentCluster;
}
}
}
// ported from https://github.com/postgis/postgis/blob/svn-trunk/liblwgeom/lwkmeans.c
void QgsKMeansClusteringAlgorithmBase::calculateKMeans( std::vector<QgsKMeansClusteringAlgorithm::Feature> &objs, std::vector<QgsPointXY> &centers, int k, QgsProcessingFeedback *feedback )
{
int converged = false;
bool changed = false;
// avoid reallocating weights array for every iteration
std::vector<uint> weights( k );
uint i = 0;
for ( i = 0; i < KMEANS_MAX_ITERATIONS && !converged; i++ )
{
if ( feedback && feedback->isCanceled() )
break;
findNearest( objs, centers, k, changed );
updateMeans( objs, centers, weights, k );
converged = !changed;
}
if ( !converged && feedback )
feedback->reportError( QObject::tr( "Clustering did not converge after %n iteration(s)", nullptr, static_cast<int>( i ) ) );
else if ( feedback )
feedback->pushInfo( QObject::tr( "Clustering converged after %n iteration(s)", nullptr, static_cast<int>( i ) ) );
}
//
// QgsKmeansClusteringAlgorithm
//
QString QgsKMeansClusteringAlgorithm::name() const QString QgsKMeansClusteringAlgorithm::name() const
{ {
return QStringLiteral( "kmeansclustering" ); return QStringLiteral( "kmeansclustering" );
@ -33,21 +140,6 @@ QString QgsKMeansClusteringAlgorithm::displayName() const
return QObject::tr( "K-means clustering" ); return QObject::tr( "K-means clustering" );
} }
QStringList QgsKMeansClusteringAlgorithm::tags() const
{
return QObject::tr( "clustering,clusters,kmeans,points" ).split( ',' );
}
QString QgsKMeansClusteringAlgorithm::group() const
{
return QObject::tr( "Vector analysis" );
}
QString QgsKMeansClusteringAlgorithm::groupId() const
{
return QStringLiteral( "vectoranalysis" );
}
void QgsKMeansClusteringAlgorithm::initAlgorithm( const QVariantMap & ) void QgsKMeansClusteringAlgorithm::initAlgorithm( const QVariantMap & )
{ {
addParameter( new QgsProcessingParameterFeatureSource( QStringLiteral( "INPUT" ), QObject::tr( "Input layer" ), QList<int>() << static_cast<int>( Qgis::ProcessingSourceType::VectorAnyGeometry ) ) ); addParameter( new QgsProcessingParameterFeatureSource( QStringLiteral( "INPUT" ), QObject::tr( "Input layer" ), QList<int>() << static_cast<int>( Qgis::ProcessingSourceType::VectorAnyGeometry ) ) );
@ -459,89 +551,193 @@ void QgsKMeansClusteringAlgorithm::initClustersPlusPlus( std::vector<Feature> &p
} }
} }
// ported from https://github.com/postgis/postgis/blob/svn-trunk/liblwgeom/lwkmeans.c //
// QgsKMeansClusteringFromSeedLayerAlgorithm
//
void QgsKMeansClusteringAlgorithm::calculateKMeans( std::vector<QgsKMeansClusteringAlgorithm::Feature> &objs, std::vector<QgsPointXY> &centers, int k, QgsProcessingFeedback *feedback ) QString QgsKMeansClusteringFromSeedLayerAlgorithm::name() const
{ {
int converged = false; return QStringLiteral( "kmeansclusteringfromseedlayer" );
bool changed = false; }
// avoid reallocating weights array for every iteration QString QgsKMeansClusteringFromSeedLayerAlgorithm::displayName() const
std::vector<uint> weights( k ); {
return QObject::tr( "K-means clustering (from seed layer)" );
}
uint i = 0; void QgsKMeansClusteringFromSeedLayerAlgorithm::initAlgorithm( const QVariantMap & )
for ( i = 0; i < KMEANS_MAX_ITERATIONS && !converged; i++ ) {
addParameter( new QgsProcessingParameterFeatureSource( QStringLiteral( "INPUT" ), QObject::tr( "Input layer" ), QList<int>() << static_cast<int>( Qgis::ProcessingSourceType::VectorAnyGeometry ) ) );
addParameter( new QgsProcessingParameterFeatureSource( QStringLiteral( "SEED" ), QObject::tr( "Seed layer" ), QList<int>() << static_cast<int>( Qgis::ProcessingSourceType::VectorPoint ) ) );
auto fieldNameParam = std::make_unique<QgsProcessingParameterString>( QStringLiteral( "FIELD_NAME" ), QObject::tr( "Cluster field name" ), QStringLiteral( "CLUSTER_ID" ) );
fieldNameParam->setFlags( fieldNameParam->flags() | Qgis::ProcessingParameterFlag::Advanced );
addParameter( fieldNameParam.release() );
auto sizeFieldNameParam = std::make_unique<QgsProcessingParameterString>( QStringLiteral( "SIZE_FIELD_NAME" ), QObject::tr( "Cluster size field name" ), QStringLiteral( "CLUSTER_SIZE" ) );
sizeFieldNameParam->setFlags( sizeFieldNameParam->flags() | Qgis::ProcessingParameterFlag::Advanced );
addParameter( sizeFieldNameParam.release() );
addParameter( new QgsProcessingParameterFeatureSink( QStringLiteral( "OUTPUT" ), QObject::tr( "Clusters" ), Qgis::ProcessingSourceType::VectorAnyGeometry ) );
}
QString QgsKMeansClusteringFromSeedLayerAlgorithm::shortHelpString() const
{
return QObject::tr( "This algorithm calculates the 2D distance based k-means cluster number for each input feature.\n\n"
"If input geometries are lines or polygons, the clustering is based on the centroid of the feature.\n\n" );
}
QString QgsKMeansClusteringFromSeedLayerAlgorithm::shortDescription() const
{
return QObject::tr( "Calculates the 2D distance based k-means cluster number for each input feature.\n"
"Uses the provided seed layer for starting centers." );
}
QgsKMeansClusteringFromSeedLayerAlgorithm *QgsKMeansClusteringFromSeedLayerAlgorithm::createInstance() const
{
return new QgsKMeansClusteringFromSeedLayerAlgorithm();
}
QVariantMap QgsKMeansClusteringFromSeedLayerAlgorithm::processAlgorithm( const QVariantMap &parameters, QgsProcessingContext &context, QgsProcessingFeedback *feedback )
{
std::unique_ptr<QgsProcessingFeatureSource> source( parameterAsSource( parameters, QStringLiteral( "INPUT" ), context ) );
if ( !source )
throw QgsProcessingException( invalidSourceError( parameters, QStringLiteral( "INPUT" ) ) );
std::unique_ptr<QgsProcessingFeatureSource> seedSource( parameterAsSource( parameters, QStringLiteral( "SEED" ), context ) );
if ( !seedSource )
throw QgsProcessingException( invalidSourceError( parameters, QStringLiteral( "SEED" ) ) );
QgsFields outputFields = source->fields();
QgsFields newFields;
const QString clusterFieldName = parameterAsString( parameters, QStringLiteral( "FIELD_NAME" ), context );
newFields.append( QgsField( clusterFieldName, QMetaType::Type::Int ) );
const QString clusterSizeFieldName = parameterAsString( parameters, QStringLiteral( "SIZE_FIELD_NAME" ), context );
newFields.append( QgsField( clusterSizeFieldName, QMetaType::Type::Int ) );
outputFields = QgsProcessingUtils::combineFields( outputFields, newFields );
QString dest;
std::unique_ptr<QgsFeatureSink> sink( parameterAsSink( parameters, QStringLiteral( "OUTPUT" ), context, dest, outputFields, source->wkbType(), source->sourceCrs() ) );
if ( !sink )
throw QgsProcessingException( invalidSinkError( parameters, QStringLiteral( "OUTPUT" ) ) );
// build list of point inputs - if it's already a point, use that. If not, take the centroid.
feedback->pushInfo( QObject::tr( "Collecting input points" ) );
const double step = source->featureCount() + seedSource->featureCount() > 0 ? 50.0 / static_cast< double >( source->featureCount() + seedSource->featureCount() ) : 1;
int i = 0;
int n = 0;
int featureWithGeometryCount = 0;
QgsFeature feat;
std::vector<Feature> clusterFeatures;
QgsFeatureIterator features = source->getFeatures( QgsFeatureRequest().setNoAttributes() );
QHash<QgsFeatureId, std::size_t> idToObj;
while ( features.nextFeature( feat ) )
{ {
if ( feedback && feedback->isCanceled() ) i++;
if ( feedback->isCanceled() )
{
break; break;
findNearest( objs, centers, k, changed );
updateMeans( objs, centers, weights, k );
converged = !changed;
}
if ( !converged && feedback )
feedback->reportError( QObject::tr( "Clustering did not converge after %n iteration(s)", nullptr, static_cast<int>( i ) ) );
else if ( feedback )
feedback->pushInfo( QObject::tr( "Clustering converged after %n iteration(s)", nullptr, static_cast<int>( i ) ) );
}
// ported from https://github.com/postgis/postgis/blob/svn-trunk/liblwgeom/lwkmeans.c
void QgsKMeansClusteringAlgorithm::findNearest( std::vector<QgsKMeansClusteringAlgorithm::Feature> &points, const std::vector<QgsPointXY> &centers, const int k, bool &changed )
{
changed = false;
const std::size_t n = points.size();
for ( std::size_t i = 0; i < n; i++ )
{
Feature &point = points[i];
// Initialize with distance to first cluster
double currentDistance = point.point.sqrDist( centers[0] );
int currentCluster = 0;
// Check all other cluster centers and find the nearest
for ( int cluster = 1; cluster < k; cluster++ )
{
const double distance = point.point.sqrDist( centers[cluster] );
if ( distance < currentDistance )
{
currentDistance = distance;
currentCluster = cluster;
}
} }
// Store the nearest cluster this object is in feedback->setProgress( i * step );
if ( point.cluster != currentCluster ) if ( !feat.hasGeometry() )
continue;
featureWithGeometryCount++;
QgsPointXY point;
if ( QgsWkbTypes::flatType( feat.geometry().wkbType() ) == Qgis::WkbType::Point )
point = QgsPointXY( *qgsgeometry_cast<const QgsPoint *>( feat.geometry().constGet() ) );
else
{ {
changed = true; const QgsGeometry centroid = feat.geometry().centroid();
point.cluster = currentCluster; if ( centroid.isNull() )
continue; // centroid failed, e.g. empty linestring
point = QgsPointXY( *qgsgeometry_cast<const QgsPoint *>( centroid.constGet() ) );
} }
n++;
idToObj[feat.id()] = clusterFeatures.size();
clusterFeatures.emplace_back( Feature( point ) );
} }
feedback->pushInfo( QObject::tr( "Collecting seed points" ) );
std::vector<QgsPointXY> centers;
QgsFeatureIterator seedFeatures = seedSource->getFeatures( QgsFeatureRequest().setNoAttributes() );
while ( seedFeatures.nextFeature( feat ) )
{
i++;
if ( feedback->isCanceled() )
{
break;
}
feedback->setProgress( i * step );
if ( !feat.hasGeometry() )
continue;
centers.emplace_back( QgsPointXY( *qgsgeometry_cast<const QgsPoint *>( feat.geometry().constGet() ) ) );
}
int k = centers.size();
if ( n < k )
{
feedback->reportError( QObject::tr( "Number of geometries is less than the number of clusters requested, not all clusters will get data" ) );
k = n;
}
if ( k > 1 )
{
feedback->pushInfo( QObject::tr( "Calculating clusters" ) );
calculateKMeans( clusterFeatures, centers, k, feedback );
}
// cluster size
std::unordered_map<int, int> clusterSize;
for ( auto it = idToObj.constBegin(); it != idToObj.constEnd(); ++it )
{
clusterSize[clusterFeatures[it.value()].cluster]++;
}
features = source->getFeatures();
i = 0;
while ( features.nextFeature( feat ) )
{
i++;
if ( feedback->isCanceled() )
{
break;
}
feedback->setProgress( 50 + i * step );
QgsAttributes attr = feat.attributes();
const auto obj = idToObj.find( feat.id() );
if ( !feat.hasGeometry() || obj == idToObj.end() )
{
attr << QVariant() << QVariant();
}
else if ( k <= 1 )
{
attr << 0 << featureWithGeometryCount;
}
else
{
const int cluster = clusterFeatures[*obj].cluster;
attr << cluster << clusterSize[cluster];
}
feat.setAttributes( attr );
if ( !sink->addFeature( feat, QgsFeatureSink::FastInsert ) )
throw QgsProcessingException( writeFeatureError( sink.get(), parameters, QStringLiteral( "OUTPUT" ) ) );
}
sink->finalize();
QVariantMap outputs;
outputs.insert( QStringLiteral( "OUTPUT" ), dest );
return outputs;
} }
// ported from https://github.com/postgis/postgis/blob/svn-trunk/liblwgeom/lwkmeans.c
void QgsKMeansClusteringAlgorithm::updateMeans( const std::vector<Feature> &points, std::vector<QgsPointXY> &centers, std::vector<uint> &weights, const int k )
{
const uint n = points.size();
std::fill( weights.begin(), weights.end(), 0 );
for ( int i = 0; i < k; i++ )
{
centers[i].setX( 0.0 );
centers[i].setY( 0.0 );
}
for ( uint i = 0; i < n; i++ )
{
const int cluster = points[i].cluster;
centers[cluster] += QgsVector( points[i].point.x(), points[i].point.y() );
weights[cluster] += 1;
}
for ( int i = 0; i < k; i++ )
{
centers[i] /= weights[i];
}
}
///@endcond ///@endcond

View File

@ -26,28 +26,14 @@
///@cond PRIVATE ///@cond PRIVATE
class ANALYSIS_EXPORT QgsKMeansClusteringAlgorithmBase : public QgsProcessingAlgorithm
/**
* Native k-means clustering algorithm.
*/
class ANALYSIS_EXPORT QgsKMeansClusteringAlgorithm : public QgsProcessingAlgorithm
{ {
public: public:
QgsKMeansClusteringAlgorithm() = default;
void initAlgorithm( const QVariantMap &configuration = QVariantMap() ) override;
QString name() const override;
QString displayName() const override;
QStringList tags() const override; QStringList tags() const override;
QString group() const override; QString group() const override;
QString groupId() const override; QString groupId() const override;
QString shortHelpString() const override;
QString shortDescription() const override;
QgsKMeansClusteringAlgorithm *createInstance() const override SIP_FACTORY;
protected: protected:
QVariantMap processAlgorithm( const QVariantMap &parameters, QgsProcessingContext &context, QgsProcessingFeedback *feedback ) override;
private:
struct Feature struct Feature
{ {
Feature( QgsPointXY point ) Feature( QgsPointXY point )
@ -58,13 +44,46 @@ class ANALYSIS_EXPORT QgsKMeansClusteringAlgorithm : public QgsProcessingAlgorit
int cluster = -1; int cluster = -1;
}; };
static void initClustersFarthestPoints( std::vector<Feature> &points, std::vector<QgsPointXY> &centers, int k, QgsProcessingFeedback *feedback );
static void initClustersPlusPlus( std::vector<Feature> &points, std::vector<QgsPointXY> &centers, int k, QgsProcessingFeedback *feedback );
static void calculateKMeans( std::vector<Feature> &points, std::vector<QgsPointXY> &centers, int k, QgsProcessingFeedback *feedback ); static void calculateKMeans( std::vector<Feature> &points, std::vector<QgsPointXY> &centers, int k, QgsProcessingFeedback *feedback );
static void findNearest( std::vector<Feature> &points, const std::vector<QgsPointXY> &centers, int k, bool &changed ); static void findNearest( std::vector<Feature> &points, const std::vector<QgsPointXY> &centers, int k, bool &changed );
static void updateMeans( const std::vector<Feature> &points, std::vector<QgsPointXY> &centers, std::vector<uint> &weights, int k ); static void updateMeans( const std::vector<Feature> &points, std::vector<QgsPointXY> &centers, std::vector<uint> &weights, int k );
};
class ANALYSIS_EXPORT QgsKMeansClusteringAlgorithm : public QgsKMeansClusteringAlgorithmBase
{
public:
QgsKMeansClusteringAlgorithm() = default;
void initAlgorithm( const QVariantMap &configuration = QVariantMap() ) override;
QString name() const override;
QString displayName() const override;
QString shortHelpString() const override;
QString shortDescription() const override;
QgsKMeansClusteringAlgorithm *createInstance() const override SIP_FACTORY;
private:
static void initClustersFarthestPoints( std::vector<Feature> &points, std::vector<QgsPointXY> &centers, int k, QgsProcessingFeedback *feedback );
static void initClustersPlusPlus( std::vector<Feature> &points, std::vector<QgsPointXY> &centers, int k, QgsProcessingFeedback *feedback );
friend class TestQgsProcessingAlgsPt1; friend class TestQgsProcessingAlgsPt1;
protected:
QVariantMap processAlgorithm( const QVariantMap &parameters, QgsProcessingContext &context, QgsProcessingFeedback *feedback ) override;
};
class ANALYSIS_EXPORT QgsKMeansClusteringFromSeedLayerAlgorithm : public QgsKMeansClusteringAlgorithmBase
{
public:
QgsKMeansClusteringFromSeedLayerAlgorithm() = default;
void initAlgorithm( const QVariantMap &configuration = QVariantMap() ) override;
QString name() const override;
QString displayName() const override;
QString shortHelpString() const override;
QString shortDescription() const override;
QgsKMeansClusteringFromSeedLayerAlgorithm *createInstance() const override SIP_FACTORY;
protected:
QVariantMap processAlgorithm( const QVariantMap &parameters, QgsProcessingContext &context, QgsProcessingFeedback *feedback ) override;
}; };
///@endcond PRIVATE ///@endcond PRIVATE

View File

@ -483,6 +483,7 @@ void QgsNativeAlgorithms::loadAlgorithms()
addAlgorithm( new QgsJoinWithLinesAlgorithm() ); addAlgorithm( new QgsJoinWithLinesAlgorithm() );
addAlgorithm( new QgsKeepNBiggestPartsAlgorithm() ); addAlgorithm( new QgsKeepNBiggestPartsAlgorithm() );
addAlgorithm( new QgsKMeansClusteringAlgorithm() ); addAlgorithm( new QgsKMeansClusteringAlgorithm() );
addAlgorithm( new QgsKMeansClusteringFromSeedLayerAlgorithm() );
addAlgorithm( new QgsLayerToBookmarksAlgorithm() ); addAlgorithm( new QgsLayerToBookmarksAlgorithm() );
addAlgorithm( new QgsLayoutMapExtentToLayerAlgorithm() ); addAlgorithm( new QgsLayoutMapExtentToLayerAlgorithm() );
addAlgorithm( new QgsLayoutAtlasToImageAlgorithm() ); addAlgorithm( new QgsLayoutAtlasToImageAlgorithm() );