mirror of
https://github.com/qgis/QGIS.git
synced 2025-10-06 00:07:29 -04:00
[FEATURE][processing] K Means clustering algorithm
Adds a native k-means clustering algorithm. Based on a port of PostGIS' ST_ClusterKMeans function, this new algorithm adds a new cluster ID field to a set of input features identify the feature's cluster based on the k-means clustering approach. If non-point geometries are used as input, the clustering is based off the centroid of the input geometries.
This commit is contained in:
parent
16f3781ab7
commit
34b9d39b27
Binary file not shown.
55
python/plugins/processing/tests/testdata/expected/kmeans_lines.gml
vendored
Normal file
55
python/plugins/processing/tests/testdata/expected/kmeans_lines.gml
vendored
Normal file
@ -0,0 +1,55 @@
|
||||
<?xml version="1.0" encoding="utf-8" ?>
|
||||
<ogr:FeatureCollection
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://ogr.maptools.org/ kmeans_lines.xsd"
|
||||
xmlns:ogr="http://ogr.maptools.org/"
|
||||
xmlns:gml="http://www.opengis.net/gml">
|
||||
<gml:boundedBy>
|
||||
<gml:Box>
|
||||
<gml:coord><gml:X>-1</gml:X><gml:Y>-3</gml:Y></gml:coord>
|
||||
<gml:coord><gml:X>11</gml:X><gml:Y>5</gml:Y></gml:coord>
|
||||
</gml:Box>
|
||||
</gml:boundedBy>
|
||||
|
||||
<gml:featureMember>
|
||||
<ogr:kmeans_lines fid="lines.0">
|
||||
<ogr:geometryProperty><gml:LineString srsName="EPSG:4326"><gml:coordinates>6,2 9,2 9,3 11,5</gml:coordinates></gml:LineString></ogr:geometryProperty>
|
||||
<ogr:CLUSTER_ID>1</ogr:CLUSTER_ID>
|
||||
</ogr:kmeans_lines>
|
||||
</gml:featureMember>
|
||||
<gml:featureMember>
|
||||
<ogr:kmeans_lines fid="lines.1">
|
||||
<ogr:geometryProperty><gml:LineString srsName="EPSG:4326"><gml:coordinates>-1,-1 1,-1</gml:coordinates></gml:LineString></ogr:geometryProperty>
|
||||
<ogr:CLUSTER_ID>0</ogr:CLUSTER_ID>
|
||||
</ogr:kmeans_lines>
|
||||
</gml:featureMember>
|
||||
<gml:featureMember>
|
||||
<ogr:kmeans_lines fid="lines.2">
|
||||
<ogr:geometryProperty><gml:LineString srsName="EPSG:4326"><gml:coordinates>2,0 2,2 3,2 3,3</gml:coordinates></gml:LineString></ogr:geometryProperty>
|
||||
<ogr:CLUSTER_ID>0</ogr:CLUSTER_ID>
|
||||
</ogr:kmeans_lines>
|
||||
</gml:featureMember>
|
||||
<gml:featureMember>
|
||||
<ogr:kmeans_lines fid="lines.3">
|
||||
<ogr:geometryProperty><gml:LineString srsName="EPSG:4326"><gml:coordinates>3,1 5,1</gml:coordinates></gml:LineString></ogr:geometryProperty>
|
||||
<ogr:CLUSTER_ID>0</ogr:CLUSTER_ID>
|
||||
</ogr:kmeans_lines>
|
||||
</gml:featureMember>
|
||||
<gml:featureMember>
|
||||
<ogr:kmeans_lines fid="lines.4">
|
||||
<ogr:geometryProperty><gml:LineString srsName="EPSG:4326"><gml:coordinates>7,-3 10,-3</gml:coordinates></gml:LineString></ogr:geometryProperty>
|
||||
<ogr:CLUSTER_ID>1</ogr:CLUSTER_ID>
|
||||
</ogr:kmeans_lines>
|
||||
</gml:featureMember>
|
||||
<gml:featureMember>
|
||||
<ogr:kmeans_lines fid="lines.5">
|
||||
<ogr:geometryProperty><gml:LineString srsName="EPSG:4326"><gml:coordinates>6,-3 10,1</gml:coordinates></gml:LineString></ogr:geometryProperty>
|
||||
<ogr:CLUSTER_ID>1</ogr:CLUSTER_ID>
|
||||
</ogr:kmeans_lines>
|
||||
</gml:featureMember>
|
||||
<gml:featureMember>
|
||||
<ogr:kmeans_lines fid="lines.6">
|
||||
<ogr:CLUSTER_ID xsi:nil="true"/>
|
||||
</ogr:kmeans_lines>
|
||||
</gml:featureMember>
|
||||
</ogr:FeatureCollection>
|
30
python/plugins/processing/tests/testdata/expected/kmeans_lines.xsd
vendored
Normal file
30
python/plugins/processing/tests/testdata/expected/kmeans_lines.xsd
vendored
Normal file
@ -0,0 +1,30 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<xs:schema targetNamespace="http://ogr.maptools.org/" xmlns:ogr="http://ogr.maptools.org/" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:gml="http://www.opengis.net/gml" elementFormDefault="qualified" version="1.0">
|
||||
<xs:import namespace="http://www.opengis.net/gml" schemaLocation="http://schemas.opengis.net/gml/2.1.2/feature.xsd"/>
|
||||
<xs:element name="FeatureCollection" type="ogr:FeatureCollectionType" substitutionGroup="gml:_FeatureCollection"/>
|
||||
<xs:complexType name="FeatureCollectionType">
|
||||
<xs:complexContent>
|
||||
<xs:extension base="gml:AbstractFeatureCollectionType">
|
||||
<xs:attribute name="lockId" type="xs:string" use="optional"/>
|
||||
<xs:attribute name="scope" type="xs:string" use="optional"/>
|
||||
</xs:extension>
|
||||
</xs:complexContent>
|
||||
</xs:complexType>
|
||||
<xs:element name="kmeans_lines" type="ogr:kmeans_lines_Type" substitutionGroup="gml:_Feature"/>
|
||||
<xs:complexType name="kmeans_lines_Type">
|
||||
<xs:complexContent>
|
||||
<xs:extension base="gml:AbstractFeatureType">
|
||||
<xs:sequence>
|
||||
<xs:element name="geometryProperty" type="gml:LineStringPropertyType" nillable="true" minOccurs="0" maxOccurs="1"/>
|
||||
<xs:element name="CLUSTER_ID" nillable="true" minOccurs="0" maxOccurs="1">
|
||||
<xs:simpleType>
|
||||
<xs:restriction base="xs:integer">
|
||||
<xs:totalDigits value="10"/>
|
||||
</xs:restriction>
|
||||
</xs:simpleType>
|
||||
</xs:element>
|
||||
</xs:sequence>
|
||||
</xs:extension>
|
||||
</xs:complexContent>
|
||||
</xs:complexType>
|
||||
</xs:schema>
|
86
python/plugins/processing/tests/testdata/expected/kmeans_points_3.gml
vendored
Normal file
86
python/plugins/processing/tests/testdata/expected/kmeans_points_3.gml
vendored
Normal file
@ -0,0 +1,86 @@
|
||||
<?xml version="1.0" encoding="utf-8" ?>
|
||||
<ogr:FeatureCollection
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://ogr.maptools.org/ kmeans_points_3.xsd"
|
||||
xmlns:ogr="http://ogr.maptools.org/"
|
||||
xmlns:gml="http://www.opengis.net/gml">
|
||||
<gml:boundedBy>
|
||||
<gml:Box>
|
||||
<gml:coord><gml:X>0</gml:X><gml:Y>-5</gml:Y></gml:coord>
|
||||
<gml:coord><gml:X>8</gml:X><gml:Y>3</gml:Y></gml:coord>
|
||||
</gml:Box>
|
||||
</gml:boundedBy>
|
||||
|
||||
<gml:featureMember>
|
||||
<ogr:kmeans_points_3 fid="points.0">
|
||||
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>1,1</gml:coordinates></gml:Point></ogr:geometryProperty>
|
||||
<ogr:id>1</ogr:id>
|
||||
<ogr:id2>2</ogr:id2>
|
||||
<ogr:CLUSTER_ID>2</ogr:CLUSTER_ID>
|
||||
</ogr:kmeans_points_3>
|
||||
</gml:featureMember>
|
||||
<gml:featureMember>
|
||||
<ogr:kmeans_points_3 fid="points.1">
|
||||
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>3,3</gml:coordinates></gml:Point></ogr:geometryProperty>
|
||||
<ogr:id>2</ogr:id>
|
||||
<ogr:id2>1</ogr:id2>
|
||||
<ogr:CLUSTER_ID>2</ogr:CLUSTER_ID>
|
||||
</ogr:kmeans_points_3>
|
||||
</gml:featureMember>
|
||||
<gml:featureMember>
|
||||
<ogr:kmeans_points_3 fid="points.2">
|
||||
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>2,2</gml:coordinates></gml:Point></ogr:geometryProperty>
|
||||
<ogr:id>3</ogr:id>
|
||||
<ogr:id2>0</ogr:id2>
|
||||
<ogr:CLUSTER_ID>2</ogr:CLUSTER_ID>
|
||||
</ogr:kmeans_points_3>
|
||||
</gml:featureMember>
|
||||
<gml:featureMember>
|
||||
<ogr:kmeans_points_3 fid="points.3">
|
||||
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>5,2</gml:coordinates></gml:Point></ogr:geometryProperty>
|
||||
<ogr:id>4</ogr:id>
|
||||
<ogr:id2>2</ogr:id2>
|
||||
<ogr:CLUSTER_ID>2</ogr:CLUSTER_ID>
|
||||
</ogr:kmeans_points_3>
|
||||
</gml:featureMember>
|
||||
<gml:featureMember>
|
||||
<ogr:kmeans_points_3 fid="points.4">
|
||||
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>4,1</gml:coordinates></gml:Point></ogr:geometryProperty>
|
||||
<ogr:id>5</ogr:id>
|
||||
<ogr:id2>1</ogr:id2>
|
||||
<ogr:CLUSTER_ID>2</ogr:CLUSTER_ID>
|
||||
</ogr:kmeans_points_3>
|
||||
</gml:featureMember>
|
||||
<gml:featureMember>
|
||||
<ogr:kmeans_points_3 fid="points.5">
|
||||
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>0,-5</gml:coordinates></gml:Point></ogr:geometryProperty>
|
||||
<ogr:id>6</ogr:id>
|
||||
<ogr:id2>0</ogr:id2>
|
||||
<ogr:CLUSTER_ID>1</ogr:CLUSTER_ID>
|
||||
</ogr:kmeans_points_3>
|
||||
</gml:featureMember>
|
||||
<gml:featureMember>
|
||||
<ogr:kmeans_points_3 fid="points.6">
|
||||
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>8,-1</gml:coordinates></gml:Point></ogr:geometryProperty>
|
||||
<ogr:id>7</ogr:id>
|
||||
<ogr:id2>0</ogr:id2>
|
||||
<ogr:CLUSTER_ID>0</ogr:CLUSTER_ID>
|
||||
</ogr:kmeans_points_3>
|
||||
</gml:featureMember>
|
||||
<gml:featureMember>
|
||||
<ogr:kmeans_points_3 fid="points.7">
|
||||
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>7,-1</gml:coordinates></gml:Point></ogr:geometryProperty>
|
||||
<ogr:id>8</ogr:id>
|
||||
<ogr:id2>0</ogr:id2>
|
||||
<ogr:CLUSTER_ID>0</ogr:CLUSTER_ID>
|
||||
</ogr:kmeans_points_3>
|
||||
</gml:featureMember>
|
||||
<gml:featureMember>
|
||||
<ogr:kmeans_points_3 fid="points.8">
|
||||
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>0,-1</gml:coordinates></gml:Point></ogr:geometryProperty>
|
||||
<ogr:id>9</ogr:id>
|
||||
<ogr:id2>0</ogr:id2>
|
||||
<ogr:CLUSTER_ID>2</ogr:CLUSTER_ID>
|
||||
</ogr:kmeans_points_3>
|
||||
</gml:featureMember>
|
||||
</ogr:FeatureCollection>
|
44
python/plugins/processing/tests/testdata/expected/kmeans_points_3.xsd
vendored
Normal file
44
python/plugins/processing/tests/testdata/expected/kmeans_points_3.xsd
vendored
Normal file
@ -0,0 +1,44 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<xs:schema targetNamespace="http://ogr.maptools.org/" xmlns:ogr="http://ogr.maptools.org/" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:gml="http://www.opengis.net/gml" elementFormDefault="qualified" version="1.0">
|
||||
<xs:import namespace="http://www.opengis.net/gml" schemaLocation="http://schemas.opengis.net/gml/2.1.2/feature.xsd"/>
|
||||
<xs:element name="FeatureCollection" type="ogr:FeatureCollectionType" substitutionGroup="gml:_FeatureCollection"/>
|
||||
<xs:complexType name="FeatureCollectionType">
|
||||
<xs:complexContent>
|
||||
<xs:extension base="gml:AbstractFeatureCollectionType">
|
||||
<xs:attribute name="lockId" type="xs:string" use="optional"/>
|
||||
<xs:attribute name="scope" type="xs:string" use="optional"/>
|
||||
</xs:extension>
|
||||
</xs:complexContent>
|
||||
</xs:complexType>
|
||||
<xs:element name="kmeans_points_3" type="ogr:kmeans_points_3_Type" substitutionGroup="gml:_Feature"/>
|
||||
<xs:complexType name="kmeans_points_3_Type">
|
||||
<xs:complexContent>
|
||||
<xs:extension base="gml:AbstractFeatureType">
|
||||
<xs:sequence>
|
||||
<xs:element name="geometryProperty" type="gml:PointPropertyType" nillable="true" minOccurs="0" maxOccurs="1"/>
|
||||
<xs:element name="id" nillable="true" minOccurs="0" maxOccurs="1">
|
||||
<xs:simpleType>
|
||||
<xs:restriction base="xs:integer">
|
||||
<xs:totalDigits value="10"/>
|
||||
</xs:restriction>
|
||||
</xs:simpleType>
|
||||
</xs:element>
|
||||
<xs:element name="id2" nillable="true" minOccurs="0" maxOccurs="1">
|
||||
<xs:simpleType>
|
||||
<xs:restriction base="xs:integer">
|
||||
<xs:totalDigits value="10"/>
|
||||
</xs:restriction>
|
||||
</xs:simpleType>
|
||||
</xs:element>
|
||||
<xs:element name="CLUSTER_ID" nillable="true" minOccurs="0" maxOccurs="1">
|
||||
<xs:simpleType>
|
||||
<xs:restriction base="xs:integer">
|
||||
<xs:totalDigits value="10"/>
|
||||
</xs:restriction>
|
||||
</xs:simpleType>
|
||||
</xs:element>
|
||||
</xs:sequence>
|
||||
</xs:extension>
|
||||
</xs:complexContent>
|
||||
</xs:complexType>
|
||||
</xs:schema>
|
86
python/plugins/processing/tests/testdata/expected/kmeans_points_5.gml
vendored
Normal file
86
python/plugins/processing/tests/testdata/expected/kmeans_points_5.gml
vendored
Normal file
@ -0,0 +1,86 @@
|
||||
<?xml version="1.0" encoding="utf-8" ?>
|
||||
<ogr:FeatureCollection
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://ogr.maptools.org/ kmeans_points_5.xsd"
|
||||
xmlns:ogr="http://ogr.maptools.org/"
|
||||
xmlns:gml="http://www.opengis.net/gml">
|
||||
<gml:boundedBy>
|
||||
<gml:Box>
|
||||
<gml:coord><gml:X>0</gml:X><gml:Y>-5</gml:Y></gml:coord>
|
||||
<gml:coord><gml:X>8</gml:X><gml:Y>3</gml:Y></gml:coord>
|
||||
</gml:Box>
|
||||
</gml:boundedBy>
|
||||
|
||||
<gml:featureMember>
|
||||
<ogr:kmeans_points_5 fid="points.0">
|
||||
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>1,1</gml:coordinates></gml:Point></ogr:geometryProperty>
|
||||
<ogr:id>1</ogr:id>
|
||||
<ogr:id2>2</ogr:id2>
|
||||
<ogr:CLUSTER_ID5>2</ogr:CLUSTER_ID5>
|
||||
</ogr:kmeans_points_5>
|
||||
</gml:featureMember>
|
||||
<gml:featureMember>
|
||||
<ogr:kmeans_points_5 fid="points.1">
|
||||
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>3,3</gml:coordinates></gml:Point></ogr:geometryProperty>
|
||||
<ogr:id>2</ogr:id>
|
||||
<ogr:id2>1</ogr:id2>
|
||||
<ogr:CLUSTER_ID5>2</ogr:CLUSTER_ID5>
|
||||
</ogr:kmeans_points_5>
|
||||
</gml:featureMember>
|
||||
<gml:featureMember>
|
||||
<ogr:kmeans_points_5 fid="points.2">
|
||||
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>2,2</gml:coordinates></gml:Point></ogr:geometryProperty>
|
||||
<ogr:id>3</ogr:id>
|
||||
<ogr:id2>0</ogr:id2>
|
||||
<ogr:CLUSTER_ID5>2</ogr:CLUSTER_ID5>
|
||||
</ogr:kmeans_points_5>
|
||||
</gml:featureMember>
|
||||
<gml:featureMember>
|
||||
<ogr:kmeans_points_5 fid="points.3">
|
||||
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>5,2</gml:coordinates></gml:Point></ogr:geometryProperty>
|
||||
<ogr:id>4</ogr:id>
|
||||
<ogr:id2>2</ogr:id2>
|
||||
<ogr:CLUSTER_ID5>4</ogr:CLUSTER_ID5>
|
||||
</ogr:kmeans_points_5>
|
||||
</gml:featureMember>
|
||||
<gml:featureMember>
|
||||
<ogr:kmeans_points_5 fid="points.4">
|
||||
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>4,1</gml:coordinates></gml:Point></ogr:geometryProperty>
|
||||
<ogr:id>5</ogr:id>
|
||||
<ogr:id2>1</ogr:id2>
|
||||
<ogr:CLUSTER_ID5>4</ogr:CLUSTER_ID5>
|
||||
</ogr:kmeans_points_5>
|
||||
</gml:featureMember>
|
||||
<gml:featureMember>
|
||||
<ogr:kmeans_points_5 fid="points.5">
|
||||
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>0,-5</gml:coordinates></gml:Point></ogr:geometryProperty>
|
||||
<ogr:id>6</ogr:id>
|
||||
<ogr:id2>0</ogr:id2>
|
||||
<ogr:CLUSTER_ID5>1</ogr:CLUSTER_ID5>
|
||||
</ogr:kmeans_points_5>
|
||||
</gml:featureMember>
|
||||
<gml:featureMember>
|
||||
<ogr:kmeans_points_5 fid="points.6">
|
||||
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>8,-1</gml:coordinates></gml:Point></ogr:geometryProperty>
|
||||
<ogr:id>7</ogr:id>
|
||||
<ogr:id2>0</ogr:id2>
|
||||
<ogr:CLUSTER_ID5>0</ogr:CLUSTER_ID5>
|
||||
</ogr:kmeans_points_5>
|
||||
</gml:featureMember>
|
||||
<gml:featureMember>
|
||||
<ogr:kmeans_points_5 fid="points.7">
|
||||
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>7,-1</gml:coordinates></gml:Point></ogr:geometryProperty>
|
||||
<ogr:id>8</ogr:id>
|
||||
<ogr:id2>0</ogr:id2>
|
||||
<ogr:CLUSTER_ID5>0</ogr:CLUSTER_ID5>
|
||||
</ogr:kmeans_points_5>
|
||||
</gml:featureMember>
|
||||
<gml:featureMember>
|
||||
<ogr:kmeans_points_5 fid="points.8">
|
||||
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>0,-1</gml:coordinates></gml:Point></ogr:geometryProperty>
|
||||
<ogr:id>9</ogr:id>
|
||||
<ogr:id2>0</ogr:id2>
|
||||
<ogr:CLUSTER_ID5>3</ogr:CLUSTER_ID5>
|
||||
</ogr:kmeans_points_5>
|
||||
</gml:featureMember>
|
||||
</ogr:FeatureCollection>
|
44
python/plugins/processing/tests/testdata/expected/kmeans_points_5.xsd
vendored
Normal file
44
python/plugins/processing/tests/testdata/expected/kmeans_points_5.xsd
vendored
Normal file
@ -0,0 +1,44 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<xs:schema targetNamespace="http://ogr.maptools.org/" xmlns:ogr="http://ogr.maptools.org/" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:gml="http://www.opengis.net/gml" elementFormDefault="qualified" version="1.0">
|
||||
<xs:import namespace="http://www.opengis.net/gml" schemaLocation="http://schemas.opengis.net/gml/2.1.2/feature.xsd"/>
|
||||
<xs:element name="FeatureCollection" type="ogr:FeatureCollectionType" substitutionGroup="gml:_FeatureCollection"/>
|
||||
<xs:complexType name="FeatureCollectionType">
|
||||
<xs:complexContent>
|
||||
<xs:extension base="gml:AbstractFeatureCollectionType">
|
||||
<xs:attribute name="lockId" type="xs:string" use="optional"/>
|
||||
<xs:attribute name="scope" type="xs:string" use="optional"/>
|
||||
</xs:extension>
|
||||
</xs:complexContent>
|
||||
</xs:complexType>
|
||||
<xs:element name="kmeans_points_5" type="ogr:kmeans_points_5_Type" substitutionGroup="gml:_Feature"/>
|
||||
<xs:complexType name="kmeans_points_5_Type">
|
||||
<xs:complexContent>
|
||||
<xs:extension base="gml:AbstractFeatureType">
|
||||
<xs:sequence>
|
||||
<xs:element name="geometryProperty" type="gml:PointPropertyType" nillable="true" minOccurs="0" maxOccurs="1"/>
|
||||
<xs:element name="id" nillable="true" minOccurs="0" maxOccurs="1">
|
||||
<xs:simpleType>
|
||||
<xs:restriction base="xs:integer">
|
||||
<xs:totalDigits value="10"/>
|
||||
</xs:restriction>
|
||||
</xs:simpleType>
|
||||
</xs:element>
|
||||
<xs:element name="id2" nillable="true" minOccurs="0" maxOccurs="1">
|
||||
<xs:simpleType>
|
||||
<xs:restriction base="xs:integer">
|
||||
<xs:totalDigits value="10"/>
|
||||
</xs:restriction>
|
||||
</xs:simpleType>
|
||||
</xs:element>
|
||||
<xs:element name="CLUSTER_ID5" nillable="true" minOccurs="0" maxOccurs="1">
|
||||
<xs:simpleType>
|
||||
<xs:restriction base="xs:integer">
|
||||
<xs:totalDigits value="10"/>
|
||||
</xs:restriction>
|
||||
</xs:simpleType>
|
||||
</xs:element>
|
||||
</xs:sequence>
|
||||
</xs:extension>
|
||||
</xs:complexContent>
|
||||
</xs:complexType>
|
||||
</xs:schema>
|
67
python/plugins/processing/tests/testdata/expected/kmeans_polys.gml
vendored
Normal file
67
python/plugins/processing/tests/testdata/expected/kmeans_polys.gml
vendored
Normal file
@ -0,0 +1,67 @@
|
||||
<?xml version="1.0" encoding="utf-8" ?>
|
||||
<ogr:FeatureCollection
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://ogr.maptools.org/ kmeans_polys.xsd"
|
||||
xmlns:ogr="http://ogr.maptools.org/"
|
||||
xmlns:gml="http://www.opengis.net/gml">
|
||||
<gml:boundedBy>
|
||||
<gml:Box>
|
||||
<gml:coord><gml:X>-1</gml:X><gml:Y>-3</gml:Y></gml:coord>
|
||||
<gml:coord><gml:X>10</gml:X><gml:Y>6</gml:Y></gml:coord>
|
||||
</gml:Box>
|
||||
</gml:boundedBy>
|
||||
|
||||
<gml:featureMember>
|
||||
<ogr:kmeans_polys fid="polys.0">
|
||||
<ogr:geometryProperty><gml:Polygon srsName="EPSG:4326"><gml:outerBoundaryIs><gml:LinearRing><gml:coordinates>-1,-1 -1,3 3,3 3,2 2,2 2,-1 -1,-1</gml:coordinates></gml:LinearRing></gml:outerBoundaryIs></gml:Polygon></ogr:geometryProperty>
|
||||
<ogr:name>aaaaa</ogr:name>
|
||||
<ogr:intval>33</ogr:intval>
|
||||
<ogr:floatval>44.123456</ogr:floatval>
|
||||
<ogr:CLUSTER_ID>1</ogr:CLUSTER_ID>
|
||||
</ogr:kmeans_polys>
|
||||
</gml:featureMember>
|
||||
<gml:featureMember>
|
||||
<ogr:kmeans_polys fid="polys.1">
|
||||
<ogr:geometryProperty><gml:Polygon srsName="EPSG:4326"><gml:outerBoundaryIs><gml:LinearRing><gml:coordinates>5,5 6,4 4,4 5,5</gml:coordinates></gml:LinearRing></gml:outerBoundaryIs></gml:Polygon></ogr:geometryProperty>
|
||||
<ogr:name>Aaaaa</ogr:name>
|
||||
<ogr:intval>-33</ogr:intval>
|
||||
<ogr:floatval>0</ogr:floatval>
|
||||
<ogr:CLUSTER_ID>1</ogr:CLUSTER_ID>
|
||||
</ogr:kmeans_polys>
|
||||
</gml:featureMember>
|
||||
<gml:featureMember>
|
||||
<ogr:kmeans_polys fid="polys.2">
|
||||
<ogr:geometryProperty><gml:Polygon srsName="EPSG:4326"><gml:outerBoundaryIs><gml:LinearRing><gml:coordinates>2,5 2,6 3,6 3,5 2,5</gml:coordinates></gml:LinearRing></gml:outerBoundaryIs></gml:Polygon></ogr:geometryProperty>
|
||||
<ogr:name>bbaaa</ogr:name>
|
||||
<ogr:intval xsi:nil="true"/>
|
||||
<ogr:floatval>0.123</ogr:floatval>
|
||||
<ogr:CLUSTER_ID>1</ogr:CLUSTER_ID>
|
||||
</ogr:kmeans_polys>
|
||||
</gml:featureMember>
|
||||
<gml:featureMember>
|
||||
<ogr:kmeans_polys fid="polys.3">
|
||||
<ogr:geometryProperty><gml:Polygon srsName="EPSG:4326"><gml:outerBoundaryIs><gml:LinearRing><gml:coordinates>6,1 10,1 10,-3 6,-3 6,1</gml:coordinates></gml:LinearRing></gml:outerBoundaryIs><gml:innerBoundaryIs><gml:LinearRing><gml:coordinates>7,0 7,-2 9,-2 9,0 7,0</gml:coordinates></gml:LinearRing></gml:innerBoundaryIs></gml:Polygon></ogr:geometryProperty>
|
||||
<ogr:name>ASDF</ogr:name>
|
||||
<ogr:intval>0</ogr:intval>
|
||||
<ogr:floatval xsi:nil="true"/>
|
||||
<ogr:CLUSTER_ID>0</ogr:CLUSTER_ID>
|
||||
</ogr:kmeans_polys>
|
||||
</gml:featureMember>
|
||||
<gml:featureMember>
|
||||
<ogr:kmeans_polys fid="polys.4">
|
||||
<ogr:name xsi:nil="true"/>
|
||||
<ogr:intval>120</ogr:intval>
|
||||
<ogr:floatval>-100291.43213</ogr:floatval>
|
||||
<ogr:CLUSTER_ID xsi:nil="true"/>
|
||||
</ogr:kmeans_polys>
|
||||
</gml:featureMember>
|
||||
<gml:featureMember>
|
||||
<ogr:kmeans_polys fid="polys.5">
|
||||
<ogr:geometryProperty><gml:Polygon srsName="EPSG:4326"><gml:outerBoundaryIs><gml:LinearRing><gml:coordinates>3,2 6,1 6,-3 2,-1 2,2 3,2</gml:coordinates></gml:LinearRing></gml:outerBoundaryIs></gml:Polygon></ogr:geometryProperty>
|
||||
<ogr:name>elim</ogr:name>
|
||||
<ogr:intval>2</ogr:intval>
|
||||
<ogr:floatval>3.33</ogr:floatval>
|
||||
<ogr:CLUSTER_ID>1</ogr:CLUSTER_ID>
|
||||
</ogr:kmeans_polys>
|
||||
</gml:featureMember>
|
||||
</ogr:FeatureCollection>
|
50
python/plugins/processing/tests/testdata/expected/kmeans_polys.xsd
vendored
Normal file
50
python/plugins/processing/tests/testdata/expected/kmeans_polys.xsd
vendored
Normal file
@ -0,0 +1,50 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<xs:schema targetNamespace="http://ogr.maptools.org/" xmlns:ogr="http://ogr.maptools.org/" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:gml="http://www.opengis.net/gml" elementFormDefault="qualified" version="1.0">
|
||||
<xs:import namespace="http://www.opengis.net/gml" schemaLocation="http://schemas.opengis.net/gml/2.1.2/feature.xsd"/>
|
||||
<xs:element name="FeatureCollection" type="ogr:FeatureCollectionType" substitutionGroup="gml:_FeatureCollection"/>
|
||||
<xs:complexType name="FeatureCollectionType">
|
||||
<xs:complexContent>
|
||||
<xs:extension base="gml:AbstractFeatureCollectionType">
|
||||
<xs:attribute name="lockId" type="xs:string" use="optional"/>
|
||||
<xs:attribute name="scope" type="xs:string" use="optional"/>
|
||||
</xs:extension>
|
||||
</xs:complexContent>
|
||||
</xs:complexType>
|
||||
<xs:element name="kmeans_polys" type="ogr:kmeans_polys_Type" substitutionGroup="gml:_Feature"/>
|
||||
<xs:complexType name="kmeans_polys_Type">
|
||||
<xs:complexContent>
|
||||
<xs:extension base="gml:AbstractFeatureType">
|
||||
<xs:sequence>
|
||||
<xs:element name="geometryProperty" type="gml:PolygonPropertyType" nillable="true" minOccurs="0" maxOccurs="1"/>
|
||||
<xs:element name="name" nillable="true" minOccurs="0" maxOccurs="1">
|
||||
<xs:simpleType>
|
||||
<xs:restriction base="xs:string">
|
||||
<xs:maxLength value="5"/>
|
||||
</xs:restriction>
|
||||
</xs:simpleType>
|
||||
</xs:element>
|
||||
<xs:element name="intval" nillable="true" minOccurs="0" maxOccurs="1">
|
||||
<xs:simpleType>
|
||||
<xs:restriction base="xs:integer">
|
||||
<xs:totalDigits value="10"/>
|
||||
</xs:restriction>
|
||||
</xs:simpleType>
|
||||
</xs:element>
|
||||
<xs:element name="floatval" nillable="true" minOccurs="0" maxOccurs="1">
|
||||
<xs:simpleType>
|
||||
<xs:restriction base="xs:decimal">
|
||||
</xs:restriction>
|
||||
</xs:simpleType>
|
||||
</xs:element>
|
||||
<xs:element name="CLUSTER_ID" nillable="true" minOccurs="0" maxOccurs="1">
|
||||
<xs:simpleType>
|
||||
<xs:restriction base="xs:integer">
|
||||
<xs:totalDigits value="10"/>
|
||||
</xs:restriction>
|
||||
</xs:simpleType>
|
||||
</xs:element>
|
||||
</xs:sequence>
|
||||
</xs:extension>
|
||||
</xs:complexContent>
|
||||
</xs:complexType>
|
||||
</xs:schema>
|
@ -5660,4 +5660,56 @@ tests:
|
||||
name: expected/vectorize.gml
|
||||
type: vector
|
||||
|
||||
- algorithm: native:kmeansclustering
|
||||
name: K means, points, 3 clusters
|
||||
params:
|
||||
CLUSTERS: 3
|
||||
FIELD_NAME: CLUSTER_ID
|
||||
INPUT:
|
||||
name: points.gml
|
||||
type: vector
|
||||
results:
|
||||
OUTPUT:
|
||||
name: expected/kmeans_points_3.gml
|
||||
type: vector
|
||||
|
||||
- algorithm: native:kmeansclustering
|
||||
name: K means, points, 5 clusters
|
||||
params:
|
||||
CLUSTERS: 5
|
||||
FIELD_NAME: CLUSTER_ID5
|
||||
INPUT:
|
||||
name: points.gml
|
||||
type: vector
|
||||
results:
|
||||
OUTPUT:
|
||||
name: expected/kmeans_points_5.gml
|
||||
type: vector
|
||||
|
||||
- algorithm: native:kmeansclustering
|
||||
name: K means, lines
|
||||
params:
|
||||
CLUSTERS: 2
|
||||
FIELD_NAME: CLUSTER_ID
|
||||
INPUT:
|
||||
name: lines.gml
|
||||
type: vector
|
||||
results:
|
||||
OUTPUT:
|
||||
name: expected/kmeans_lines.gml
|
||||
type: vector
|
||||
|
||||
- algorithm: native:kmeansclustering
|
||||
name: K means, polys
|
||||
params:
|
||||
CLUSTERS: 2
|
||||
FIELD_NAME: CLUSTER_ID
|
||||
INPUT:
|
||||
name: polys.gml
|
||||
type: vector
|
||||
results:
|
||||
OUTPUT:
|
||||
name: expected/kmeans_polys.gml
|
||||
type: vector
|
||||
|
||||
# See ../README.md for a description of the file format
|
||||
|
@ -41,10 +41,11 @@ SET(QGIS_ANALYSIS_SRCS
|
||||
processing/qgsalgorithmfiledownloader.cpp
|
||||
processing/qgsalgorithmfilter.cpp
|
||||
processing/qgsalgorithmfixgeometries.cpp
|
||||
processing/qgsalgorithmimportphotos.cpp
|
||||
processing/qgsalgorithmintersection.cpp
|
||||
processing/qgsalgorithmjoinbyattribute.cpp
|
||||
processing/qgsalgorithmjoinwithlines.cpp
|
||||
processing/qgsalgorithmimportphotos.cpp
|
||||
processing/qgsalgorithmkmeansclustering.cpp
|
||||
processing/qgsalgorithmlineintersection.cpp
|
||||
processing/qgsalgorithmloadlayer.cpp
|
||||
processing/qgsalgorithmmeancoordinates.cpp
|
||||
|
371
src/analysis/processing/qgsalgorithmkmeansclustering.cpp
Normal file
371
src/analysis/processing/qgsalgorithmkmeansclustering.cpp
Normal file
@ -0,0 +1,371 @@
|
||||
/***************************************************************************
|
||||
qgsalgorithmkmeansclustering.cpp
|
||||
---------------------
|
||||
begin : June 2018
|
||||
copyright : (C) 2018 by Nyall Dawson
|
||||
email : nyall dot dawson at gmail dot com
|
||||
***************************************************************************/
|
||||
|
||||
/***************************************************************************
|
||||
* *
|
||||
* This program is free software; you can redistribute it and/or modify *
|
||||
* it under the terms of the GNU General Public License as published by *
|
||||
* the Free Software Foundation; either version 2 of the License, or *
|
||||
* (at your option) any later version. *
|
||||
* *
|
||||
***************************************************************************/
|
||||
|
||||
#include "qgsalgorithmkmeansclustering.h"
|
||||
|
||||
///@cond PRIVATE
|
||||
|
||||
const int KMEANS_MAX_ITERATIONS = 1000;
|
||||
|
||||
QString QgsKMeansClusteringAlgorithm::name() const
|
||||
{
|
||||
return QStringLiteral( "kmeansclustering" );
|
||||
}
|
||||
|
||||
QString QgsKMeansClusteringAlgorithm::displayName() const
|
||||
{
|
||||
return QObject::tr( "K-means clustering" );
|
||||
}
|
||||
|
||||
QStringList QgsKMeansClusteringAlgorithm::tags() const
|
||||
{
|
||||
return QObject::tr( "clustering,clusters,kmeans,points" ).split( ',' );
|
||||
}
|
||||
|
||||
QString QgsKMeansClusteringAlgorithm::group() const
|
||||
{
|
||||
return QObject::tr( "Vector analysis" );
|
||||
}
|
||||
|
||||
QString QgsKMeansClusteringAlgorithm::groupId() const
|
||||
{
|
||||
return QStringLiteral( "vectoranalysis" );
|
||||
}
|
||||
|
||||
void QgsKMeansClusteringAlgorithm::initAlgorithm( const QVariantMap & )
|
||||
{
|
||||
addParameter( new QgsProcessingParameterFeatureSource( QStringLiteral( "INPUT" ),
|
||||
QObject::tr( "Input layer" ), QList< int >() << QgsProcessing::TypeVectorAnyGeometry ) );
|
||||
addParameter( new QgsProcessingParameterNumber( QStringLiteral( "CLUSTERS" ), QObject::tr( "Number of clusters" ),
|
||||
QgsProcessingParameterNumber::Integer, 5, false, 1 ) );
|
||||
addParameter( new QgsProcessingParameterString( QStringLiteral( "FIELD_NAME" ),
|
||||
QObject::tr( "Cluster field name" ), QStringLiteral( "CLUSTER_ID" ) ) );
|
||||
addParameter( new QgsProcessingParameterFeatureSink( QStringLiteral( "OUTPUT" ), QObject::tr( "Clusters" ), QgsProcessing::TypeVectorAnyGeometry ) );
|
||||
}
|
||||
|
||||
QString QgsKMeansClusteringAlgorithm::shortHelpString() const
|
||||
{
|
||||
return QObject::tr( "Calculates the 2D distance based k-means cluster number for each input feature.\n\n"
|
||||
"If input geometries are line or polygons, the clustering is based on the centroid of the feature." );
|
||||
}
|
||||
|
||||
QgsKMeansClusteringAlgorithm *QgsKMeansClusteringAlgorithm::createInstance() const
|
||||
{
|
||||
return new QgsKMeansClusteringAlgorithm();
|
||||
}
|
||||
|
||||
QVariantMap QgsKMeansClusteringAlgorithm::processAlgorithm( const QVariantMap ¶meters, QgsProcessingContext &context, QgsProcessingFeedback *feedback )
|
||||
{
|
||||
std::unique_ptr< QgsProcessingFeatureSource > source( parameterAsSource( parameters, QStringLiteral( "INPUT" ), context ) );
|
||||
if ( !source )
|
||||
throw QgsProcessingException( invalidSourceError( parameters, QStringLiteral( "INPUT" ) ) );
|
||||
|
||||
int k = parameterAsInt( parameters, QStringLiteral( "CLUSTERS" ), context );
|
||||
|
||||
QgsFields outputFields = source->fields();
|
||||
const QString clusterFieldName = parameterAsString( parameters, QStringLiteral( "FIELD_NAME" ), context );
|
||||
QgsFields newFields;
|
||||
newFields.append( QgsField( clusterFieldName, QVariant::Int ) );
|
||||
outputFields = QgsProcessingUtils::combineFields( outputFields, newFields );
|
||||
|
||||
QString dest;
|
||||
std::unique_ptr< QgsFeatureSink > sink( parameterAsSink( parameters, QStringLiteral( "OUTPUT" ), context, dest, outputFields, source->wkbType(), source->sourceCrs() ) );
|
||||
if ( !sink )
|
||||
throw QgsProcessingException( invalidSinkError( parameters, QStringLiteral( "OUTPUT" ) ) );
|
||||
|
||||
// build list of point inputs - if it's already a point, use that. If not, take the centroid.
|
||||
feedback->pushInfo( QObject::tr( "Collecting input points" ) );
|
||||
double step = source->featureCount() > 0 ? 50.0 / source->featureCount() : 1;
|
||||
int i = 0;
|
||||
int n = 0;
|
||||
QgsFeature feat;
|
||||
|
||||
std::vector< Feature > clusterFeatures;
|
||||
QgsFeatureIterator features = source->getFeatures( QgsFeatureRequest().setSubsetOfAttributes( QgsAttributeList() ) );
|
||||
QHash< QgsFeatureId, int > idToObj;
|
||||
while ( features.nextFeature( feat ) )
|
||||
{
|
||||
i++;
|
||||
if ( feedback->isCanceled() )
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
feedback->setProgress( i * step );
|
||||
if ( !feat.hasGeometry() )
|
||||
continue;
|
||||
|
||||
n++;
|
||||
|
||||
QgsPointXY point;
|
||||
if ( QgsWkbTypes::flatType( feat.geometry().wkbType() ) == QgsWkbTypes::Point )
|
||||
point = QgsPointXY( *qgsgeometry_cast< const QgsPoint * >( feat.geometry().constGet() ) );
|
||||
else
|
||||
{
|
||||
QgsGeometry centroid = feat.geometry().centroid();
|
||||
point = QgsPointXY( *qgsgeometry_cast< const QgsPoint * >( centroid.constGet() ) );
|
||||
}
|
||||
|
||||
idToObj.insert( feat.id(), clusterFeatures.size() );
|
||||
clusterFeatures.emplace_back( Feature( point ) );
|
||||
}
|
||||
|
||||
if ( n < k )
|
||||
{
|
||||
feedback->reportError( QObject::tr( "Number of geometries is less than the number of clusters requested, not all clusters will get data" ) );
|
||||
k = n;
|
||||
}
|
||||
|
||||
if ( k > 1 )
|
||||
{
|
||||
feedback->pushInfo( QObject::tr( "Calculating clusters" ) );
|
||||
|
||||
// cluster centers
|
||||
std::vector< QgsPointXY > centers( k );
|
||||
|
||||
initClusters( clusterFeatures, centers, k, feedback );
|
||||
calculateKMeans( clusterFeatures, centers, k, feedback );
|
||||
}
|
||||
|
||||
features = source->getFeatures();
|
||||
i = 0;
|
||||
while ( features.nextFeature( feat ) )
|
||||
{
|
||||
i++;
|
||||
if ( feedback->isCanceled() )
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
feedback->setProgress( 50 + i * step );
|
||||
QgsAttributes attr = feat.attributes();
|
||||
if ( !feat.hasGeometry() )
|
||||
{
|
||||
attr << QVariant();
|
||||
}
|
||||
else if ( k <= 1 )
|
||||
{
|
||||
attr << 0;
|
||||
}
|
||||
else if ( !idToObj.contains( feat.id() ) )
|
||||
{
|
||||
attr << QVariant();
|
||||
}
|
||||
else
|
||||
{
|
||||
attr << clusterFeatures[ idToObj.value( feat.id() ) ].cluster;
|
||||
}
|
||||
feat.setAttributes( attr );
|
||||
sink->addFeature( feat, QgsFeatureSink::FastInsert );
|
||||
}
|
||||
|
||||
QVariantMap outputs;
|
||||
outputs.insert( QStringLiteral( "OUTPUT" ), dest );
|
||||
return outputs;
|
||||
}
|
||||
|
||||
// ported from https://github.com/postgis/postgis/blob/svn-trunk/liblwgeom/lwkmeans.c
|
||||
|
||||
void QgsKMeansClusteringAlgorithm::initClusters( std::vector<Feature> &points, std::vector<QgsPointXY> ¢ers, const int k, QgsProcessingFeedback *feedback )
|
||||
{
|
||||
ulong n = points.size();
|
||||
if ( n == 0 )
|
||||
return;
|
||||
|
||||
if ( n == 1 )
|
||||
{
|
||||
for ( int i = 0; i < k; i++ )
|
||||
centers[ i ] = points[ 0 ].point;
|
||||
return;
|
||||
}
|
||||
|
||||
long duplicateCount = 1;
|
||||
// initially scan for two most distance points from each other, p1 and p2
|
||||
ulong p1 = 0;
|
||||
ulong p2 = 0;
|
||||
double distanceP1 = 0;
|
||||
double distanceP2 = 0;
|
||||
double maxDistance = -1;
|
||||
for ( ulong i = 1; i < n; i++ )
|
||||
{
|
||||
distanceP1 = points[i].point.sqrDist( points[p1].point );
|
||||
distanceP2 = points[i].point.sqrDist( points[p2].point );
|
||||
|
||||
// if this point is further then existing candidates, replace our choice
|
||||
if ( ( distanceP1 > maxDistance ) || ( distanceP2 > maxDistance ) )
|
||||
{
|
||||
maxDistance = std::max( distanceP1, distanceP2 );
|
||||
if ( distanceP1 > distanceP2 )
|
||||
p2 = i;
|
||||
else
|
||||
p1 = i;
|
||||
}
|
||||
|
||||
// also record count of duplicate points
|
||||
if ( qgsDoubleNear( distanceP1, 0 ) || qgsDoubleNear( distanceP2, 0 ) )
|
||||
duplicateCount++;
|
||||
}
|
||||
|
||||
if ( feedback && duplicateCount > 1 )
|
||||
{
|
||||
feedback->pushInfo( QObject::tr( "There are at least %1 duplicate inputs, the number of output clusters may be less than was requested" ).arg( duplicateCount ) );
|
||||
}
|
||||
|
||||
// By now two points should be found and be not the same
|
||||
Q_ASSERT( p1 != p2 && maxDistance >= 0 );
|
||||
|
||||
// Accept these two points as our initial centers
|
||||
centers[0] = points[p1].point;
|
||||
centers[1] = points[p2].point;
|
||||
|
||||
if ( k > 2 )
|
||||
{
|
||||
// array of minimum distance to a point from accepted cluster centers
|
||||
std::vector< double > distances( n );
|
||||
|
||||
// initialize array with distance to first object
|
||||
for ( ulong j = 0; j < n; j++ )
|
||||
{
|
||||
distances[j] = points[j].point.sqrDist( centers[0] );
|
||||
}
|
||||
distances[p1] = -1;
|
||||
distances[p2] = -1;
|
||||
|
||||
// loop i on clusters, skip 0 and 1 as found already
|
||||
for ( int i = 2; i < k; i++ )
|
||||
{
|
||||
ulong candidateCenter = 0;
|
||||
double maxDistance = std::numeric_limits<double>::lowest();
|
||||
|
||||
// loop j on points
|
||||
for ( ulong j = 0; j < n; j++ )
|
||||
{
|
||||
// accepted clusters are already marked with distance = -1
|
||||
if ( distances[j] < 0 )
|
||||
continue;
|
||||
|
||||
// update minimal distance with previously accepted cluster
|
||||
distances[j] = std::min( points[j].point.sqrDist( centers[i - 1] ), distances[j] );
|
||||
|
||||
// greedily take a point that's farthest from any of accepted clusters
|
||||
if ( distances[j] > maxDistance )
|
||||
{
|
||||
candidateCenter = j;
|
||||
maxDistance = distances[j];
|
||||
}
|
||||
}
|
||||
|
||||
// checked earlier by counting entries on input, just in case
|
||||
Q_ASSERT( maxDistance >= 0 );
|
||||
|
||||
// accept candidate to centers
|
||||
distances[candidateCenter] = -1;
|
||||
// copy the point coordinates into the initial centers array
|
||||
centers[i] = points[candidateCenter].point;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ported from https://github.com/postgis/postgis/blob/svn-trunk/liblwgeom/lwkmeans.c
|
||||
|
||||
void QgsKMeansClusteringAlgorithm::calculateKMeans( std::vector<QgsKMeansClusteringAlgorithm::Feature> &objs, std::vector<QgsPointXY> ¢ers, int k, QgsProcessingFeedback *feedback )
|
||||
{
|
||||
int converged = false;
|
||||
bool changed = false;
|
||||
|
||||
// avoid reallocating weights array for every iteration
|
||||
std::vector< uint > weights( k );
|
||||
|
||||
uint i = 0;
|
||||
for ( i = 0; i < KMEANS_MAX_ITERATIONS && !converged; i++ )
|
||||
{
|
||||
if ( feedback && feedback->isCanceled() )
|
||||
break;
|
||||
|
||||
findNearest( objs, centers, k, changed );
|
||||
updateMeans( objs, centers, weights, k );
|
||||
converged = !changed;
|
||||
}
|
||||
|
||||
if ( !converged && feedback )
|
||||
feedback->reportError( QObject::tr( "Clustering did not converge after %1 iterations" ).arg( i ) );
|
||||
else if ( feedback )
|
||||
feedback->pushInfo( QObject::tr( "Clustering converged after %1 iterations" ).arg( i ) );
|
||||
}
|
||||
|
||||
// ported from https://github.com/postgis/postgis/blob/svn-trunk/liblwgeom/lwkmeans.c
|
||||
|
||||
void QgsKMeansClusteringAlgorithm::findNearest( std::vector<QgsKMeansClusteringAlgorithm::Feature> &points, const std::vector<QgsPointXY> ¢ers, const int k, bool &changed )
|
||||
{
|
||||
changed = false;
|
||||
ulong n = points.size();
|
||||
for ( ulong i = 0; i < n; i++ )
|
||||
{
|
||||
Feature &point = points[i];
|
||||
|
||||
// Initialize with distance to first cluster
|
||||
double currentDistance = point.point.sqrDist( centers[0] );
|
||||
int currentCluster = 0;
|
||||
|
||||
// Check all other cluster centers and find the nearest
|
||||
for ( int cluster = 1; cluster < k; cluster++ )
|
||||
{
|
||||
const double distance = point.point.sqrDist( centers[cluster] );
|
||||
if ( distance < currentDistance )
|
||||
{
|
||||
currentDistance = distance;
|
||||
currentCluster = cluster;
|
||||
}
|
||||
}
|
||||
|
||||
// Store the nearest cluster this object is in
|
||||
if ( point.cluster != currentCluster )
|
||||
{
|
||||
changed = true;
|
||||
point.cluster = currentCluster;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ported from https://github.com/postgis/postgis/blob/svn-trunk/liblwgeom/lwkmeans.c
|
||||
|
||||
void QgsKMeansClusteringAlgorithm::updateMeans( const std::vector<Feature> &points, std::vector<QgsPointXY> ¢ers, std::vector<uint> &weights, const int k )
|
||||
{
|
||||
uint n = points.size();
|
||||
std::fill( weights.begin(), weights.end(), 0 );
|
||||
for ( int i = 0; i < k; i++ )
|
||||
{
|
||||
centers[i].setX( 0.0 );
|
||||
centers[i].setY( 0.0 );
|
||||
}
|
||||
for ( uint i = 0; i < n; i++ )
|
||||
{
|
||||
int cluster = points[i].cluster;
|
||||
centers[cluster] += QgsVector( points[i].point.x(),
|
||||
points[i].point.y() );
|
||||
weights[cluster] += 1;
|
||||
}
|
||||
for ( int i = 0; i < k; i++ )
|
||||
{
|
||||
centers[i] /= weights[i];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
///@endcond
|
||||
|
||||
|
77
src/analysis/processing/qgsalgorithmkmeansclustering.h
Normal file
77
src/analysis/processing/qgsalgorithmkmeansclustering.h
Normal file
@ -0,0 +1,77 @@
|
||||
/***************************************************************************
|
||||
qgsalgorithmkmeansclustering.h
|
||||
---------------------
|
||||
begin : June 2018
|
||||
copyright : (C) 2018 by Nyall Dawson
|
||||
email : nyall dot dawson at gmail dot com
|
||||
***************************************************************************/
|
||||
|
||||
/***************************************************************************
|
||||
* *
|
||||
* This program is free software; you can redistribute it and/or modify *
|
||||
* it under the terms of the GNU General Public License as published by *
|
||||
* the Free Software Foundation; either version 2 of the License, or *
|
||||
* (at your option) any later version. *
|
||||
* *
|
||||
***************************************************************************/
|
||||
|
||||
#ifndef QGSALGORITHMKMEANSCLUSTERING_H
|
||||
#define QGSALGORITHMKMEANSCLUSTERING_H
|
||||
|
||||
#define SIP_NO_FILE
|
||||
|
||||
#include "qgis.h"
|
||||
#include "qgis_analysis.h"
|
||||
#include "qgsprocessingalgorithm.h"
|
||||
|
||||
///@cond PRIVATE
|
||||
|
||||
|
||||
/**
|
||||
* Native k-means clustering algorithm.
|
||||
*/
|
||||
class ANALYSIS_EXPORT QgsKMeansClusteringAlgorithm : public QgsProcessingAlgorithm
|
||||
{
|
||||
|
||||
public:
|
||||
|
||||
QgsKMeansClusteringAlgorithm() = default;
|
||||
void initAlgorithm( const QVariantMap &configuration = QVariantMap() ) override;
|
||||
QString name() const override;
|
||||
QString displayName() const override;
|
||||
QStringList tags() const override;
|
||||
QString group() const override;
|
||||
QString groupId() const override;
|
||||
QString shortHelpString() const override;
|
||||
QgsKMeansClusteringAlgorithm *createInstance() const override SIP_FACTORY;
|
||||
|
||||
protected:
|
||||
|
||||
QVariantMap processAlgorithm( const QVariantMap ¶meters,
|
||||
QgsProcessingContext &context, QgsProcessingFeedback *feedback ) override;
|
||||
|
||||
private:
|
||||
|
||||
struct Feature
|
||||
{
|
||||
Feature( QgsPointXY point )
|
||||
: point( point )
|
||||
{}
|
||||
|
||||
QgsPointXY point;
|
||||
int cluster = -1;
|
||||
};
|
||||
|
||||
static void initClusters( std::vector< Feature > &points, std::vector< QgsPointXY > ¢ers, int k, QgsProcessingFeedback *feedback );
|
||||
static void calculateKMeans( std::vector< Feature > &points, std::vector< QgsPointXY > ¢ers, int k, QgsProcessingFeedback *feedback );
|
||||
static void findNearest( std::vector< Feature > &points, const std::vector< QgsPointXY > ¢ers, int k, bool &changed );
|
||||
static void updateMeans( const std::vector< Feature > &points, std::vector< QgsPointXY > ¢ers, std::vector< uint > &weights, int k );
|
||||
|
||||
friend class TestQgsProcessingAlgs;
|
||||
};
|
||||
|
||||
///@endcond PRIVATE
|
||||
|
||||
#endif // QGSALGORITHMKMEANSCLUSTERING_H
|
||||
|
||||
|
@ -42,6 +42,7 @@
|
||||
#include "qgsalgorithmjoinwithlines.h"
|
||||
#include "qgsalgorithmimportphotos.h"
|
||||
#include "qgsalgorithmintersection.h"
|
||||
#include "qgsalgorithmkmeansclustering.h"
|
||||
#include "qgsalgorithmlineintersection.h"
|
||||
#include "qgsalgorithmloadlayer.h"
|
||||
#include "qgsalgorithmmeancoordinates.h"
|
||||
@ -150,6 +151,7 @@ void QgsNativeAlgorithms::loadAlgorithms()
|
||||
addAlgorithm( new QgsIntersectionAlgorithm() );
|
||||
addAlgorithm( new QgsJoinByAttributeAlgorithm() );
|
||||
addAlgorithm( new QgsJoinWithLinesAlgorithm() );
|
||||
addAlgorithm( new QgsKMeansClusteringAlgorithm() );
|
||||
addAlgorithm( new QgsLineIntersectionAlgorithm() );
|
||||
addAlgorithm( new QgsLoadLayerAlgorithm() );
|
||||
addAlgorithm( new QgsMeanCoordinatesAlgorithm() );
|
||||
|
@ -1870,6 +1870,12 @@ QgsGeometry QgsGeometry::centroid() const
|
||||
return QgsGeometry();
|
||||
}
|
||||
|
||||
// avoid calling geos for trivial point centroids
|
||||
if ( QgsWkbTypes::flatType( d->geometry->wkbType() ) == QgsWkbTypes::Point )
|
||||
{
|
||||
return *this;
|
||||
}
|
||||
|
||||
QgsGeos geos( d->geometry.get() );
|
||||
|
||||
mLastError.clear();
|
||||
|
@ -25,6 +25,7 @@
|
||||
#include "qgsnativealgorithms.h"
|
||||
#include "qgsalgorithmimportphotos.h"
|
||||
#include "qgsalgorithmtransform.h"
|
||||
#include "qgsalgorithmkmeansclustering.h"
|
||||
|
||||
class TestQgsProcessingAlgs: public QObject
|
||||
{
|
||||
@ -41,6 +42,7 @@ class TestQgsProcessingAlgs: public QObject
|
||||
void parseGeoTags();
|
||||
void featureFilterAlg();
|
||||
void transformAlg();
|
||||
void kmeansCluster();
|
||||
|
||||
private:
|
||||
|
||||
@ -435,6 +437,64 @@ void TestQgsProcessingAlgs::transformAlg()
|
||||
QVERIFY( ok );
|
||||
}
|
||||
|
||||
void TestQgsProcessingAlgs::kmeansCluster()
|
||||
{
|
||||
// make some features
|
||||
std::vector< QgsKMeansClusteringAlgorithm::Feature > features;
|
||||
std::vector< QgsPointXY > centers( 2 );
|
||||
|
||||
// no features, no crash
|
||||
int k = 2;
|
||||
QgsKMeansClusteringAlgorithm::initClusters( features, centers, k, nullptr );
|
||||
QgsKMeansClusteringAlgorithm::calculateKMeans( features, centers, k, nullptr );
|
||||
|
||||
// features < clusters
|
||||
features.emplace_back( QgsKMeansClusteringAlgorithm::Feature( QgsPointXY( 1, 5 ) ) );
|
||||
QgsKMeansClusteringAlgorithm::initClusters( features, centers, k, nullptr );
|
||||
QgsKMeansClusteringAlgorithm::calculateKMeans( features, centers, k, nullptr );
|
||||
QCOMPARE( features[ 0 ].cluster, 0 );
|
||||
|
||||
// features == clusters
|
||||
features.emplace_back( QgsKMeansClusteringAlgorithm::Feature( QgsPointXY( 11, 5 ) ) );
|
||||
QgsKMeansClusteringAlgorithm::initClusters( features, centers, k, nullptr );
|
||||
QgsKMeansClusteringAlgorithm::calculateKMeans( features, centers, k, nullptr );
|
||||
QCOMPARE( features[ 0 ].cluster, 1 );
|
||||
QCOMPARE( features[ 1 ].cluster, 0 );
|
||||
|
||||
// features > clusters
|
||||
features.emplace_back( QgsKMeansClusteringAlgorithm::Feature( QgsPointXY( 13, 3 ) ) );
|
||||
features.emplace_back( QgsKMeansClusteringAlgorithm::Feature( QgsPointXY( 13, 13 ) ) );
|
||||
features.emplace_back( QgsKMeansClusteringAlgorithm::Feature( QgsPointXY( 23, 6 ) ) );
|
||||
k = 2;
|
||||
QgsKMeansClusteringAlgorithm::initClusters( features, centers, k, nullptr );
|
||||
QgsKMeansClusteringAlgorithm::calculateKMeans( features, centers, k, nullptr );
|
||||
QCOMPARE( features[ 0 ].cluster, 1 );
|
||||
QCOMPARE( features[ 1 ].cluster, 1 );
|
||||
QCOMPARE( features[ 2 ].cluster, 0 );
|
||||
QCOMPARE( features[ 3 ].cluster, 0 );
|
||||
QCOMPARE( features[ 4 ].cluster, 0 );
|
||||
|
||||
// repeat above, with 3 clusters
|
||||
k = 3;
|
||||
centers.resize( 3 );
|
||||
QgsKMeansClusteringAlgorithm::initClusters( features, centers, k, nullptr );
|
||||
QgsKMeansClusteringAlgorithm::calculateKMeans( features, centers, k, nullptr );
|
||||
QCOMPARE( features[ 0 ].cluster, 1 );
|
||||
QCOMPARE( features[ 1 ].cluster, 2 );
|
||||
QCOMPARE( features[ 2 ].cluster, 2 );
|
||||
QCOMPARE( features[ 3 ].cluster, 2 );
|
||||
QCOMPARE( features[ 4 ].cluster, 0 );
|
||||
|
||||
// with identical points
|
||||
features.clear();
|
||||
features.emplace_back( QgsKMeansClusteringAlgorithm::Feature( QgsPointXY( 1, 5 ) ) );
|
||||
features.emplace_back( QgsKMeansClusteringAlgorithm::Feature( QgsPointXY( 1, 5 ) ) );
|
||||
features.emplace_back( QgsKMeansClusteringAlgorithm::Feature( QgsPointXY( 1, 5 ) ) );
|
||||
QCOMPARE( features[ 0 ].cluster, -1 );
|
||||
QCOMPARE( features[ 1 ].cluster, -1 );
|
||||
QCOMPARE( features[ 2 ].cluster, -1 );
|
||||
}
|
||||
|
||||
|
||||
QGSTEST_MAIN( TestQgsProcessingAlgs )
|
||||
#include "testqgsprocessingalgs.moc"
|
||||
|
Loading…
x
Reference in New Issue
Block a user