mirror of
https://github.com/qgis/QGIS.git
synced 2025-10-15 00:02:52 -04:00
[FEATURE][processing] K Means clustering algorithm
Adds a native k-means clustering algorithm. Based on a port of PostGIS' ST_ClusterKMeans function, this new algorithm adds a new cluster ID field to a set of input features identify the feature's cluster based on the k-means clustering approach. If non-point geometries are used as input, the clustering is based off the centroid of the input geometries.
This commit is contained in:
parent
16f3781ab7
commit
34b9d39b27
Binary file not shown.
55
python/plugins/processing/tests/testdata/expected/kmeans_lines.gml
vendored
Normal file
55
python/plugins/processing/tests/testdata/expected/kmeans_lines.gml
vendored
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
<?xml version="1.0" encoding="utf-8" ?>
|
||||||
|
<ogr:FeatureCollection
|
||||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
|
xsi:schemaLocation="http://ogr.maptools.org/ kmeans_lines.xsd"
|
||||||
|
xmlns:ogr="http://ogr.maptools.org/"
|
||||||
|
xmlns:gml="http://www.opengis.net/gml">
|
||||||
|
<gml:boundedBy>
|
||||||
|
<gml:Box>
|
||||||
|
<gml:coord><gml:X>-1</gml:X><gml:Y>-3</gml:Y></gml:coord>
|
||||||
|
<gml:coord><gml:X>11</gml:X><gml:Y>5</gml:Y></gml:coord>
|
||||||
|
</gml:Box>
|
||||||
|
</gml:boundedBy>
|
||||||
|
|
||||||
|
<gml:featureMember>
|
||||||
|
<ogr:kmeans_lines fid="lines.0">
|
||||||
|
<ogr:geometryProperty><gml:LineString srsName="EPSG:4326"><gml:coordinates>6,2 9,2 9,3 11,5</gml:coordinates></gml:LineString></ogr:geometryProperty>
|
||||||
|
<ogr:CLUSTER_ID>1</ogr:CLUSTER_ID>
|
||||||
|
</ogr:kmeans_lines>
|
||||||
|
</gml:featureMember>
|
||||||
|
<gml:featureMember>
|
||||||
|
<ogr:kmeans_lines fid="lines.1">
|
||||||
|
<ogr:geometryProperty><gml:LineString srsName="EPSG:4326"><gml:coordinates>-1,-1 1,-1</gml:coordinates></gml:LineString></ogr:geometryProperty>
|
||||||
|
<ogr:CLUSTER_ID>0</ogr:CLUSTER_ID>
|
||||||
|
</ogr:kmeans_lines>
|
||||||
|
</gml:featureMember>
|
||||||
|
<gml:featureMember>
|
||||||
|
<ogr:kmeans_lines fid="lines.2">
|
||||||
|
<ogr:geometryProperty><gml:LineString srsName="EPSG:4326"><gml:coordinates>2,0 2,2 3,2 3,3</gml:coordinates></gml:LineString></ogr:geometryProperty>
|
||||||
|
<ogr:CLUSTER_ID>0</ogr:CLUSTER_ID>
|
||||||
|
</ogr:kmeans_lines>
|
||||||
|
</gml:featureMember>
|
||||||
|
<gml:featureMember>
|
||||||
|
<ogr:kmeans_lines fid="lines.3">
|
||||||
|
<ogr:geometryProperty><gml:LineString srsName="EPSG:4326"><gml:coordinates>3,1 5,1</gml:coordinates></gml:LineString></ogr:geometryProperty>
|
||||||
|
<ogr:CLUSTER_ID>0</ogr:CLUSTER_ID>
|
||||||
|
</ogr:kmeans_lines>
|
||||||
|
</gml:featureMember>
|
||||||
|
<gml:featureMember>
|
||||||
|
<ogr:kmeans_lines fid="lines.4">
|
||||||
|
<ogr:geometryProperty><gml:LineString srsName="EPSG:4326"><gml:coordinates>7,-3 10,-3</gml:coordinates></gml:LineString></ogr:geometryProperty>
|
||||||
|
<ogr:CLUSTER_ID>1</ogr:CLUSTER_ID>
|
||||||
|
</ogr:kmeans_lines>
|
||||||
|
</gml:featureMember>
|
||||||
|
<gml:featureMember>
|
||||||
|
<ogr:kmeans_lines fid="lines.5">
|
||||||
|
<ogr:geometryProperty><gml:LineString srsName="EPSG:4326"><gml:coordinates>6,-3 10,1</gml:coordinates></gml:LineString></ogr:geometryProperty>
|
||||||
|
<ogr:CLUSTER_ID>1</ogr:CLUSTER_ID>
|
||||||
|
</ogr:kmeans_lines>
|
||||||
|
</gml:featureMember>
|
||||||
|
<gml:featureMember>
|
||||||
|
<ogr:kmeans_lines fid="lines.6">
|
||||||
|
<ogr:CLUSTER_ID xsi:nil="true"/>
|
||||||
|
</ogr:kmeans_lines>
|
||||||
|
</gml:featureMember>
|
||||||
|
</ogr:FeatureCollection>
|
30
python/plugins/processing/tests/testdata/expected/kmeans_lines.xsd
vendored
Normal file
30
python/plugins/processing/tests/testdata/expected/kmeans_lines.xsd
vendored
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<xs:schema targetNamespace="http://ogr.maptools.org/" xmlns:ogr="http://ogr.maptools.org/" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:gml="http://www.opengis.net/gml" elementFormDefault="qualified" version="1.0">
|
||||||
|
<xs:import namespace="http://www.opengis.net/gml" schemaLocation="http://schemas.opengis.net/gml/2.1.2/feature.xsd"/>
|
||||||
|
<xs:element name="FeatureCollection" type="ogr:FeatureCollectionType" substitutionGroup="gml:_FeatureCollection"/>
|
||||||
|
<xs:complexType name="FeatureCollectionType">
|
||||||
|
<xs:complexContent>
|
||||||
|
<xs:extension base="gml:AbstractFeatureCollectionType">
|
||||||
|
<xs:attribute name="lockId" type="xs:string" use="optional"/>
|
||||||
|
<xs:attribute name="scope" type="xs:string" use="optional"/>
|
||||||
|
</xs:extension>
|
||||||
|
</xs:complexContent>
|
||||||
|
</xs:complexType>
|
||||||
|
<xs:element name="kmeans_lines" type="ogr:kmeans_lines_Type" substitutionGroup="gml:_Feature"/>
|
||||||
|
<xs:complexType name="kmeans_lines_Type">
|
||||||
|
<xs:complexContent>
|
||||||
|
<xs:extension base="gml:AbstractFeatureType">
|
||||||
|
<xs:sequence>
|
||||||
|
<xs:element name="geometryProperty" type="gml:LineStringPropertyType" nillable="true" minOccurs="0" maxOccurs="1"/>
|
||||||
|
<xs:element name="CLUSTER_ID" nillable="true" minOccurs="0" maxOccurs="1">
|
||||||
|
<xs:simpleType>
|
||||||
|
<xs:restriction base="xs:integer">
|
||||||
|
<xs:totalDigits value="10"/>
|
||||||
|
</xs:restriction>
|
||||||
|
</xs:simpleType>
|
||||||
|
</xs:element>
|
||||||
|
</xs:sequence>
|
||||||
|
</xs:extension>
|
||||||
|
</xs:complexContent>
|
||||||
|
</xs:complexType>
|
||||||
|
</xs:schema>
|
86
python/plugins/processing/tests/testdata/expected/kmeans_points_3.gml
vendored
Normal file
86
python/plugins/processing/tests/testdata/expected/kmeans_points_3.gml
vendored
Normal file
@ -0,0 +1,86 @@
|
|||||||
|
<?xml version="1.0" encoding="utf-8" ?>
|
||||||
|
<ogr:FeatureCollection
|
||||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
|
xsi:schemaLocation="http://ogr.maptools.org/ kmeans_points_3.xsd"
|
||||||
|
xmlns:ogr="http://ogr.maptools.org/"
|
||||||
|
xmlns:gml="http://www.opengis.net/gml">
|
||||||
|
<gml:boundedBy>
|
||||||
|
<gml:Box>
|
||||||
|
<gml:coord><gml:X>0</gml:X><gml:Y>-5</gml:Y></gml:coord>
|
||||||
|
<gml:coord><gml:X>8</gml:X><gml:Y>3</gml:Y></gml:coord>
|
||||||
|
</gml:Box>
|
||||||
|
</gml:boundedBy>
|
||||||
|
|
||||||
|
<gml:featureMember>
|
||||||
|
<ogr:kmeans_points_3 fid="points.0">
|
||||||
|
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>1,1</gml:coordinates></gml:Point></ogr:geometryProperty>
|
||||||
|
<ogr:id>1</ogr:id>
|
||||||
|
<ogr:id2>2</ogr:id2>
|
||||||
|
<ogr:CLUSTER_ID>2</ogr:CLUSTER_ID>
|
||||||
|
</ogr:kmeans_points_3>
|
||||||
|
</gml:featureMember>
|
||||||
|
<gml:featureMember>
|
||||||
|
<ogr:kmeans_points_3 fid="points.1">
|
||||||
|
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>3,3</gml:coordinates></gml:Point></ogr:geometryProperty>
|
||||||
|
<ogr:id>2</ogr:id>
|
||||||
|
<ogr:id2>1</ogr:id2>
|
||||||
|
<ogr:CLUSTER_ID>2</ogr:CLUSTER_ID>
|
||||||
|
</ogr:kmeans_points_3>
|
||||||
|
</gml:featureMember>
|
||||||
|
<gml:featureMember>
|
||||||
|
<ogr:kmeans_points_3 fid="points.2">
|
||||||
|
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>2,2</gml:coordinates></gml:Point></ogr:geometryProperty>
|
||||||
|
<ogr:id>3</ogr:id>
|
||||||
|
<ogr:id2>0</ogr:id2>
|
||||||
|
<ogr:CLUSTER_ID>2</ogr:CLUSTER_ID>
|
||||||
|
</ogr:kmeans_points_3>
|
||||||
|
</gml:featureMember>
|
||||||
|
<gml:featureMember>
|
||||||
|
<ogr:kmeans_points_3 fid="points.3">
|
||||||
|
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>5,2</gml:coordinates></gml:Point></ogr:geometryProperty>
|
||||||
|
<ogr:id>4</ogr:id>
|
||||||
|
<ogr:id2>2</ogr:id2>
|
||||||
|
<ogr:CLUSTER_ID>2</ogr:CLUSTER_ID>
|
||||||
|
</ogr:kmeans_points_3>
|
||||||
|
</gml:featureMember>
|
||||||
|
<gml:featureMember>
|
||||||
|
<ogr:kmeans_points_3 fid="points.4">
|
||||||
|
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>4,1</gml:coordinates></gml:Point></ogr:geometryProperty>
|
||||||
|
<ogr:id>5</ogr:id>
|
||||||
|
<ogr:id2>1</ogr:id2>
|
||||||
|
<ogr:CLUSTER_ID>2</ogr:CLUSTER_ID>
|
||||||
|
</ogr:kmeans_points_3>
|
||||||
|
</gml:featureMember>
|
||||||
|
<gml:featureMember>
|
||||||
|
<ogr:kmeans_points_3 fid="points.5">
|
||||||
|
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>0,-5</gml:coordinates></gml:Point></ogr:geometryProperty>
|
||||||
|
<ogr:id>6</ogr:id>
|
||||||
|
<ogr:id2>0</ogr:id2>
|
||||||
|
<ogr:CLUSTER_ID>1</ogr:CLUSTER_ID>
|
||||||
|
</ogr:kmeans_points_3>
|
||||||
|
</gml:featureMember>
|
||||||
|
<gml:featureMember>
|
||||||
|
<ogr:kmeans_points_3 fid="points.6">
|
||||||
|
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>8,-1</gml:coordinates></gml:Point></ogr:geometryProperty>
|
||||||
|
<ogr:id>7</ogr:id>
|
||||||
|
<ogr:id2>0</ogr:id2>
|
||||||
|
<ogr:CLUSTER_ID>0</ogr:CLUSTER_ID>
|
||||||
|
</ogr:kmeans_points_3>
|
||||||
|
</gml:featureMember>
|
||||||
|
<gml:featureMember>
|
||||||
|
<ogr:kmeans_points_3 fid="points.7">
|
||||||
|
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>7,-1</gml:coordinates></gml:Point></ogr:geometryProperty>
|
||||||
|
<ogr:id>8</ogr:id>
|
||||||
|
<ogr:id2>0</ogr:id2>
|
||||||
|
<ogr:CLUSTER_ID>0</ogr:CLUSTER_ID>
|
||||||
|
</ogr:kmeans_points_3>
|
||||||
|
</gml:featureMember>
|
||||||
|
<gml:featureMember>
|
||||||
|
<ogr:kmeans_points_3 fid="points.8">
|
||||||
|
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>0,-1</gml:coordinates></gml:Point></ogr:geometryProperty>
|
||||||
|
<ogr:id>9</ogr:id>
|
||||||
|
<ogr:id2>0</ogr:id2>
|
||||||
|
<ogr:CLUSTER_ID>2</ogr:CLUSTER_ID>
|
||||||
|
</ogr:kmeans_points_3>
|
||||||
|
</gml:featureMember>
|
||||||
|
</ogr:FeatureCollection>
|
44
python/plugins/processing/tests/testdata/expected/kmeans_points_3.xsd
vendored
Normal file
44
python/plugins/processing/tests/testdata/expected/kmeans_points_3.xsd
vendored
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<xs:schema targetNamespace="http://ogr.maptools.org/" xmlns:ogr="http://ogr.maptools.org/" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:gml="http://www.opengis.net/gml" elementFormDefault="qualified" version="1.0">
|
||||||
|
<xs:import namespace="http://www.opengis.net/gml" schemaLocation="http://schemas.opengis.net/gml/2.1.2/feature.xsd"/>
|
||||||
|
<xs:element name="FeatureCollection" type="ogr:FeatureCollectionType" substitutionGroup="gml:_FeatureCollection"/>
|
||||||
|
<xs:complexType name="FeatureCollectionType">
|
||||||
|
<xs:complexContent>
|
||||||
|
<xs:extension base="gml:AbstractFeatureCollectionType">
|
||||||
|
<xs:attribute name="lockId" type="xs:string" use="optional"/>
|
||||||
|
<xs:attribute name="scope" type="xs:string" use="optional"/>
|
||||||
|
</xs:extension>
|
||||||
|
</xs:complexContent>
|
||||||
|
</xs:complexType>
|
||||||
|
<xs:element name="kmeans_points_3" type="ogr:kmeans_points_3_Type" substitutionGroup="gml:_Feature"/>
|
||||||
|
<xs:complexType name="kmeans_points_3_Type">
|
||||||
|
<xs:complexContent>
|
||||||
|
<xs:extension base="gml:AbstractFeatureType">
|
||||||
|
<xs:sequence>
|
||||||
|
<xs:element name="geometryProperty" type="gml:PointPropertyType" nillable="true" minOccurs="0" maxOccurs="1"/>
|
||||||
|
<xs:element name="id" nillable="true" minOccurs="0" maxOccurs="1">
|
||||||
|
<xs:simpleType>
|
||||||
|
<xs:restriction base="xs:integer">
|
||||||
|
<xs:totalDigits value="10"/>
|
||||||
|
</xs:restriction>
|
||||||
|
</xs:simpleType>
|
||||||
|
</xs:element>
|
||||||
|
<xs:element name="id2" nillable="true" minOccurs="0" maxOccurs="1">
|
||||||
|
<xs:simpleType>
|
||||||
|
<xs:restriction base="xs:integer">
|
||||||
|
<xs:totalDigits value="10"/>
|
||||||
|
</xs:restriction>
|
||||||
|
</xs:simpleType>
|
||||||
|
</xs:element>
|
||||||
|
<xs:element name="CLUSTER_ID" nillable="true" minOccurs="0" maxOccurs="1">
|
||||||
|
<xs:simpleType>
|
||||||
|
<xs:restriction base="xs:integer">
|
||||||
|
<xs:totalDigits value="10"/>
|
||||||
|
</xs:restriction>
|
||||||
|
</xs:simpleType>
|
||||||
|
</xs:element>
|
||||||
|
</xs:sequence>
|
||||||
|
</xs:extension>
|
||||||
|
</xs:complexContent>
|
||||||
|
</xs:complexType>
|
||||||
|
</xs:schema>
|
86
python/plugins/processing/tests/testdata/expected/kmeans_points_5.gml
vendored
Normal file
86
python/plugins/processing/tests/testdata/expected/kmeans_points_5.gml
vendored
Normal file
@ -0,0 +1,86 @@
|
|||||||
|
<?xml version="1.0" encoding="utf-8" ?>
|
||||||
|
<ogr:FeatureCollection
|
||||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
|
xsi:schemaLocation="http://ogr.maptools.org/ kmeans_points_5.xsd"
|
||||||
|
xmlns:ogr="http://ogr.maptools.org/"
|
||||||
|
xmlns:gml="http://www.opengis.net/gml">
|
||||||
|
<gml:boundedBy>
|
||||||
|
<gml:Box>
|
||||||
|
<gml:coord><gml:X>0</gml:X><gml:Y>-5</gml:Y></gml:coord>
|
||||||
|
<gml:coord><gml:X>8</gml:X><gml:Y>3</gml:Y></gml:coord>
|
||||||
|
</gml:Box>
|
||||||
|
</gml:boundedBy>
|
||||||
|
|
||||||
|
<gml:featureMember>
|
||||||
|
<ogr:kmeans_points_5 fid="points.0">
|
||||||
|
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>1,1</gml:coordinates></gml:Point></ogr:geometryProperty>
|
||||||
|
<ogr:id>1</ogr:id>
|
||||||
|
<ogr:id2>2</ogr:id2>
|
||||||
|
<ogr:CLUSTER_ID5>2</ogr:CLUSTER_ID5>
|
||||||
|
</ogr:kmeans_points_5>
|
||||||
|
</gml:featureMember>
|
||||||
|
<gml:featureMember>
|
||||||
|
<ogr:kmeans_points_5 fid="points.1">
|
||||||
|
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>3,3</gml:coordinates></gml:Point></ogr:geometryProperty>
|
||||||
|
<ogr:id>2</ogr:id>
|
||||||
|
<ogr:id2>1</ogr:id2>
|
||||||
|
<ogr:CLUSTER_ID5>2</ogr:CLUSTER_ID5>
|
||||||
|
</ogr:kmeans_points_5>
|
||||||
|
</gml:featureMember>
|
||||||
|
<gml:featureMember>
|
||||||
|
<ogr:kmeans_points_5 fid="points.2">
|
||||||
|
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>2,2</gml:coordinates></gml:Point></ogr:geometryProperty>
|
||||||
|
<ogr:id>3</ogr:id>
|
||||||
|
<ogr:id2>0</ogr:id2>
|
||||||
|
<ogr:CLUSTER_ID5>2</ogr:CLUSTER_ID5>
|
||||||
|
</ogr:kmeans_points_5>
|
||||||
|
</gml:featureMember>
|
||||||
|
<gml:featureMember>
|
||||||
|
<ogr:kmeans_points_5 fid="points.3">
|
||||||
|
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>5,2</gml:coordinates></gml:Point></ogr:geometryProperty>
|
||||||
|
<ogr:id>4</ogr:id>
|
||||||
|
<ogr:id2>2</ogr:id2>
|
||||||
|
<ogr:CLUSTER_ID5>4</ogr:CLUSTER_ID5>
|
||||||
|
</ogr:kmeans_points_5>
|
||||||
|
</gml:featureMember>
|
||||||
|
<gml:featureMember>
|
||||||
|
<ogr:kmeans_points_5 fid="points.4">
|
||||||
|
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>4,1</gml:coordinates></gml:Point></ogr:geometryProperty>
|
||||||
|
<ogr:id>5</ogr:id>
|
||||||
|
<ogr:id2>1</ogr:id2>
|
||||||
|
<ogr:CLUSTER_ID5>4</ogr:CLUSTER_ID5>
|
||||||
|
</ogr:kmeans_points_5>
|
||||||
|
</gml:featureMember>
|
||||||
|
<gml:featureMember>
|
||||||
|
<ogr:kmeans_points_5 fid="points.5">
|
||||||
|
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>0,-5</gml:coordinates></gml:Point></ogr:geometryProperty>
|
||||||
|
<ogr:id>6</ogr:id>
|
||||||
|
<ogr:id2>0</ogr:id2>
|
||||||
|
<ogr:CLUSTER_ID5>1</ogr:CLUSTER_ID5>
|
||||||
|
</ogr:kmeans_points_5>
|
||||||
|
</gml:featureMember>
|
||||||
|
<gml:featureMember>
|
||||||
|
<ogr:kmeans_points_5 fid="points.6">
|
||||||
|
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>8,-1</gml:coordinates></gml:Point></ogr:geometryProperty>
|
||||||
|
<ogr:id>7</ogr:id>
|
||||||
|
<ogr:id2>0</ogr:id2>
|
||||||
|
<ogr:CLUSTER_ID5>0</ogr:CLUSTER_ID5>
|
||||||
|
</ogr:kmeans_points_5>
|
||||||
|
</gml:featureMember>
|
||||||
|
<gml:featureMember>
|
||||||
|
<ogr:kmeans_points_5 fid="points.7">
|
||||||
|
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>7,-1</gml:coordinates></gml:Point></ogr:geometryProperty>
|
||||||
|
<ogr:id>8</ogr:id>
|
||||||
|
<ogr:id2>0</ogr:id2>
|
||||||
|
<ogr:CLUSTER_ID5>0</ogr:CLUSTER_ID5>
|
||||||
|
</ogr:kmeans_points_5>
|
||||||
|
</gml:featureMember>
|
||||||
|
<gml:featureMember>
|
||||||
|
<ogr:kmeans_points_5 fid="points.8">
|
||||||
|
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>0,-1</gml:coordinates></gml:Point></ogr:geometryProperty>
|
||||||
|
<ogr:id>9</ogr:id>
|
||||||
|
<ogr:id2>0</ogr:id2>
|
||||||
|
<ogr:CLUSTER_ID5>3</ogr:CLUSTER_ID5>
|
||||||
|
</ogr:kmeans_points_5>
|
||||||
|
</gml:featureMember>
|
||||||
|
</ogr:FeatureCollection>
|
44
python/plugins/processing/tests/testdata/expected/kmeans_points_5.xsd
vendored
Normal file
44
python/plugins/processing/tests/testdata/expected/kmeans_points_5.xsd
vendored
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<xs:schema targetNamespace="http://ogr.maptools.org/" xmlns:ogr="http://ogr.maptools.org/" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:gml="http://www.opengis.net/gml" elementFormDefault="qualified" version="1.0">
|
||||||
|
<xs:import namespace="http://www.opengis.net/gml" schemaLocation="http://schemas.opengis.net/gml/2.1.2/feature.xsd"/>
|
||||||
|
<xs:element name="FeatureCollection" type="ogr:FeatureCollectionType" substitutionGroup="gml:_FeatureCollection"/>
|
||||||
|
<xs:complexType name="FeatureCollectionType">
|
||||||
|
<xs:complexContent>
|
||||||
|
<xs:extension base="gml:AbstractFeatureCollectionType">
|
||||||
|
<xs:attribute name="lockId" type="xs:string" use="optional"/>
|
||||||
|
<xs:attribute name="scope" type="xs:string" use="optional"/>
|
||||||
|
</xs:extension>
|
||||||
|
</xs:complexContent>
|
||||||
|
</xs:complexType>
|
||||||
|
<xs:element name="kmeans_points_5" type="ogr:kmeans_points_5_Type" substitutionGroup="gml:_Feature"/>
|
||||||
|
<xs:complexType name="kmeans_points_5_Type">
|
||||||
|
<xs:complexContent>
|
||||||
|
<xs:extension base="gml:AbstractFeatureType">
|
||||||
|
<xs:sequence>
|
||||||
|
<xs:element name="geometryProperty" type="gml:PointPropertyType" nillable="true" minOccurs="0" maxOccurs="1"/>
|
||||||
|
<xs:element name="id" nillable="true" minOccurs="0" maxOccurs="1">
|
||||||
|
<xs:simpleType>
|
||||||
|
<xs:restriction base="xs:integer">
|
||||||
|
<xs:totalDigits value="10"/>
|
||||||
|
</xs:restriction>
|
||||||
|
</xs:simpleType>
|
||||||
|
</xs:element>
|
||||||
|
<xs:element name="id2" nillable="true" minOccurs="0" maxOccurs="1">
|
||||||
|
<xs:simpleType>
|
||||||
|
<xs:restriction base="xs:integer">
|
||||||
|
<xs:totalDigits value="10"/>
|
||||||
|
</xs:restriction>
|
||||||
|
</xs:simpleType>
|
||||||
|
</xs:element>
|
||||||
|
<xs:element name="CLUSTER_ID5" nillable="true" minOccurs="0" maxOccurs="1">
|
||||||
|
<xs:simpleType>
|
||||||
|
<xs:restriction base="xs:integer">
|
||||||
|
<xs:totalDigits value="10"/>
|
||||||
|
</xs:restriction>
|
||||||
|
</xs:simpleType>
|
||||||
|
</xs:element>
|
||||||
|
</xs:sequence>
|
||||||
|
</xs:extension>
|
||||||
|
</xs:complexContent>
|
||||||
|
</xs:complexType>
|
||||||
|
</xs:schema>
|
67
python/plugins/processing/tests/testdata/expected/kmeans_polys.gml
vendored
Normal file
67
python/plugins/processing/tests/testdata/expected/kmeans_polys.gml
vendored
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
<?xml version="1.0" encoding="utf-8" ?>
|
||||||
|
<ogr:FeatureCollection
|
||||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
|
xsi:schemaLocation="http://ogr.maptools.org/ kmeans_polys.xsd"
|
||||||
|
xmlns:ogr="http://ogr.maptools.org/"
|
||||||
|
xmlns:gml="http://www.opengis.net/gml">
|
||||||
|
<gml:boundedBy>
|
||||||
|
<gml:Box>
|
||||||
|
<gml:coord><gml:X>-1</gml:X><gml:Y>-3</gml:Y></gml:coord>
|
||||||
|
<gml:coord><gml:X>10</gml:X><gml:Y>6</gml:Y></gml:coord>
|
||||||
|
</gml:Box>
|
||||||
|
</gml:boundedBy>
|
||||||
|
|
||||||
|
<gml:featureMember>
|
||||||
|
<ogr:kmeans_polys fid="polys.0">
|
||||||
|
<ogr:geometryProperty><gml:Polygon srsName="EPSG:4326"><gml:outerBoundaryIs><gml:LinearRing><gml:coordinates>-1,-1 -1,3 3,3 3,2 2,2 2,-1 -1,-1</gml:coordinates></gml:LinearRing></gml:outerBoundaryIs></gml:Polygon></ogr:geometryProperty>
|
||||||
|
<ogr:name>aaaaa</ogr:name>
|
||||||
|
<ogr:intval>33</ogr:intval>
|
||||||
|
<ogr:floatval>44.123456</ogr:floatval>
|
||||||
|
<ogr:CLUSTER_ID>1</ogr:CLUSTER_ID>
|
||||||
|
</ogr:kmeans_polys>
|
||||||
|
</gml:featureMember>
|
||||||
|
<gml:featureMember>
|
||||||
|
<ogr:kmeans_polys fid="polys.1">
|
||||||
|
<ogr:geometryProperty><gml:Polygon srsName="EPSG:4326"><gml:outerBoundaryIs><gml:LinearRing><gml:coordinates>5,5 6,4 4,4 5,5</gml:coordinates></gml:LinearRing></gml:outerBoundaryIs></gml:Polygon></ogr:geometryProperty>
|
||||||
|
<ogr:name>Aaaaa</ogr:name>
|
||||||
|
<ogr:intval>-33</ogr:intval>
|
||||||
|
<ogr:floatval>0</ogr:floatval>
|
||||||
|
<ogr:CLUSTER_ID>1</ogr:CLUSTER_ID>
|
||||||
|
</ogr:kmeans_polys>
|
||||||
|
</gml:featureMember>
|
||||||
|
<gml:featureMember>
|
||||||
|
<ogr:kmeans_polys fid="polys.2">
|
||||||
|
<ogr:geometryProperty><gml:Polygon srsName="EPSG:4326"><gml:outerBoundaryIs><gml:LinearRing><gml:coordinates>2,5 2,6 3,6 3,5 2,5</gml:coordinates></gml:LinearRing></gml:outerBoundaryIs></gml:Polygon></ogr:geometryProperty>
|
||||||
|
<ogr:name>bbaaa</ogr:name>
|
||||||
|
<ogr:intval xsi:nil="true"/>
|
||||||
|
<ogr:floatval>0.123</ogr:floatval>
|
||||||
|
<ogr:CLUSTER_ID>1</ogr:CLUSTER_ID>
|
||||||
|
</ogr:kmeans_polys>
|
||||||
|
</gml:featureMember>
|
||||||
|
<gml:featureMember>
|
||||||
|
<ogr:kmeans_polys fid="polys.3">
|
||||||
|
<ogr:geometryProperty><gml:Polygon srsName="EPSG:4326"><gml:outerBoundaryIs><gml:LinearRing><gml:coordinates>6,1 10,1 10,-3 6,-3 6,1</gml:coordinates></gml:LinearRing></gml:outerBoundaryIs><gml:innerBoundaryIs><gml:LinearRing><gml:coordinates>7,0 7,-2 9,-2 9,0 7,0</gml:coordinates></gml:LinearRing></gml:innerBoundaryIs></gml:Polygon></ogr:geometryProperty>
|
||||||
|
<ogr:name>ASDF</ogr:name>
|
||||||
|
<ogr:intval>0</ogr:intval>
|
||||||
|
<ogr:floatval xsi:nil="true"/>
|
||||||
|
<ogr:CLUSTER_ID>0</ogr:CLUSTER_ID>
|
||||||
|
</ogr:kmeans_polys>
|
||||||
|
</gml:featureMember>
|
||||||
|
<gml:featureMember>
|
||||||
|
<ogr:kmeans_polys fid="polys.4">
|
||||||
|
<ogr:name xsi:nil="true"/>
|
||||||
|
<ogr:intval>120</ogr:intval>
|
||||||
|
<ogr:floatval>-100291.43213</ogr:floatval>
|
||||||
|
<ogr:CLUSTER_ID xsi:nil="true"/>
|
||||||
|
</ogr:kmeans_polys>
|
||||||
|
</gml:featureMember>
|
||||||
|
<gml:featureMember>
|
||||||
|
<ogr:kmeans_polys fid="polys.5">
|
||||||
|
<ogr:geometryProperty><gml:Polygon srsName="EPSG:4326"><gml:outerBoundaryIs><gml:LinearRing><gml:coordinates>3,2 6,1 6,-3 2,-1 2,2 3,2</gml:coordinates></gml:LinearRing></gml:outerBoundaryIs></gml:Polygon></ogr:geometryProperty>
|
||||||
|
<ogr:name>elim</ogr:name>
|
||||||
|
<ogr:intval>2</ogr:intval>
|
||||||
|
<ogr:floatval>3.33</ogr:floatval>
|
||||||
|
<ogr:CLUSTER_ID>1</ogr:CLUSTER_ID>
|
||||||
|
</ogr:kmeans_polys>
|
||||||
|
</gml:featureMember>
|
||||||
|
</ogr:FeatureCollection>
|
50
python/plugins/processing/tests/testdata/expected/kmeans_polys.xsd
vendored
Normal file
50
python/plugins/processing/tests/testdata/expected/kmeans_polys.xsd
vendored
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<xs:schema targetNamespace="http://ogr.maptools.org/" xmlns:ogr="http://ogr.maptools.org/" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:gml="http://www.opengis.net/gml" elementFormDefault="qualified" version="1.0">
|
||||||
|
<xs:import namespace="http://www.opengis.net/gml" schemaLocation="http://schemas.opengis.net/gml/2.1.2/feature.xsd"/>
|
||||||
|
<xs:element name="FeatureCollection" type="ogr:FeatureCollectionType" substitutionGroup="gml:_FeatureCollection"/>
|
||||||
|
<xs:complexType name="FeatureCollectionType">
|
||||||
|
<xs:complexContent>
|
||||||
|
<xs:extension base="gml:AbstractFeatureCollectionType">
|
||||||
|
<xs:attribute name="lockId" type="xs:string" use="optional"/>
|
||||||
|
<xs:attribute name="scope" type="xs:string" use="optional"/>
|
||||||
|
</xs:extension>
|
||||||
|
</xs:complexContent>
|
||||||
|
</xs:complexType>
|
||||||
|
<xs:element name="kmeans_polys" type="ogr:kmeans_polys_Type" substitutionGroup="gml:_Feature"/>
|
||||||
|
<xs:complexType name="kmeans_polys_Type">
|
||||||
|
<xs:complexContent>
|
||||||
|
<xs:extension base="gml:AbstractFeatureType">
|
||||||
|
<xs:sequence>
|
||||||
|
<xs:element name="geometryProperty" type="gml:PolygonPropertyType" nillable="true" minOccurs="0" maxOccurs="1"/>
|
||||||
|
<xs:element name="name" nillable="true" minOccurs="0" maxOccurs="1">
|
||||||
|
<xs:simpleType>
|
||||||
|
<xs:restriction base="xs:string">
|
||||||
|
<xs:maxLength value="5"/>
|
||||||
|
</xs:restriction>
|
||||||
|
</xs:simpleType>
|
||||||
|
</xs:element>
|
||||||
|
<xs:element name="intval" nillable="true" minOccurs="0" maxOccurs="1">
|
||||||
|
<xs:simpleType>
|
||||||
|
<xs:restriction base="xs:integer">
|
||||||
|
<xs:totalDigits value="10"/>
|
||||||
|
</xs:restriction>
|
||||||
|
</xs:simpleType>
|
||||||
|
</xs:element>
|
||||||
|
<xs:element name="floatval" nillable="true" minOccurs="0" maxOccurs="1">
|
||||||
|
<xs:simpleType>
|
||||||
|
<xs:restriction base="xs:decimal">
|
||||||
|
</xs:restriction>
|
||||||
|
</xs:simpleType>
|
||||||
|
</xs:element>
|
||||||
|
<xs:element name="CLUSTER_ID" nillable="true" minOccurs="0" maxOccurs="1">
|
||||||
|
<xs:simpleType>
|
||||||
|
<xs:restriction base="xs:integer">
|
||||||
|
<xs:totalDigits value="10"/>
|
||||||
|
</xs:restriction>
|
||||||
|
</xs:simpleType>
|
||||||
|
</xs:element>
|
||||||
|
</xs:sequence>
|
||||||
|
</xs:extension>
|
||||||
|
</xs:complexContent>
|
||||||
|
</xs:complexType>
|
||||||
|
</xs:schema>
|
@ -5660,4 +5660,56 @@ tests:
|
|||||||
name: expected/vectorize.gml
|
name: expected/vectorize.gml
|
||||||
type: vector
|
type: vector
|
||||||
|
|
||||||
|
- algorithm: native:kmeansclustering
|
||||||
|
name: K means, points, 3 clusters
|
||||||
|
params:
|
||||||
|
CLUSTERS: 3
|
||||||
|
FIELD_NAME: CLUSTER_ID
|
||||||
|
INPUT:
|
||||||
|
name: points.gml
|
||||||
|
type: vector
|
||||||
|
results:
|
||||||
|
OUTPUT:
|
||||||
|
name: expected/kmeans_points_3.gml
|
||||||
|
type: vector
|
||||||
|
|
||||||
|
- algorithm: native:kmeansclustering
|
||||||
|
name: K means, points, 5 clusters
|
||||||
|
params:
|
||||||
|
CLUSTERS: 5
|
||||||
|
FIELD_NAME: CLUSTER_ID5
|
||||||
|
INPUT:
|
||||||
|
name: points.gml
|
||||||
|
type: vector
|
||||||
|
results:
|
||||||
|
OUTPUT:
|
||||||
|
name: expected/kmeans_points_5.gml
|
||||||
|
type: vector
|
||||||
|
|
||||||
|
- algorithm: native:kmeansclustering
|
||||||
|
name: K means, lines
|
||||||
|
params:
|
||||||
|
CLUSTERS: 2
|
||||||
|
FIELD_NAME: CLUSTER_ID
|
||||||
|
INPUT:
|
||||||
|
name: lines.gml
|
||||||
|
type: vector
|
||||||
|
results:
|
||||||
|
OUTPUT:
|
||||||
|
name: expected/kmeans_lines.gml
|
||||||
|
type: vector
|
||||||
|
|
||||||
|
- algorithm: native:kmeansclustering
|
||||||
|
name: K means, polys
|
||||||
|
params:
|
||||||
|
CLUSTERS: 2
|
||||||
|
FIELD_NAME: CLUSTER_ID
|
||||||
|
INPUT:
|
||||||
|
name: polys.gml
|
||||||
|
type: vector
|
||||||
|
results:
|
||||||
|
OUTPUT:
|
||||||
|
name: expected/kmeans_polys.gml
|
||||||
|
type: vector
|
||||||
|
|
||||||
# See ../README.md for a description of the file format
|
# See ../README.md for a description of the file format
|
||||||
|
@ -41,10 +41,11 @@ SET(QGIS_ANALYSIS_SRCS
|
|||||||
processing/qgsalgorithmfiledownloader.cpp
|
processing/qgsalgorithmfiledownloader.cpp
|
||||||
processing/qgsalgorithmfilter.cpp
|
processing/qgsalgorithmfilter.cpp
|
||||||
processing/qgsalgorithmfixgeometries.cpp
|
processing/qgsalgorithmfixgeometries.cpp
|
||||||
|
processing/qgsalgorithmimportphotos.cpp
|
||||||
processing/qgsalgorithmintersection.cpp
|
processing/qgsalgorithmintersection.cpp
|
||||||
processing/qgsalgorithmjoinbyattribute.cpp
|
processing/qgsalgorithmjoinbyattribute.cpp
|
||||||
processing/qgsalgorithmjoinwithlines.cpp
|
processing/qgsalgorithmjoinwithlines.cpp
|
||||||
processing/qgsalgorithmimportphotos.cpp
|
processing/qgsalgorithmkmeansclustering.cpp
|
||||||
processing/qgsalgorithmlineintersection.cpp
|
processing/qgsalgorithmlineintersection.cpp
|
||||||
processing/qgsalgorithmloadlayer.cpp
|
processing/qgsalgorithmloadlayer.cpp
|
||||||
processing/qgsalgorithmmeancoordinates.cpp
|
processing/qgsalgorithmmeancoordinates.cpp
|
||||||
|
371
src/analysis/processing/qgsalgorithmkmeansclustering.cpp
Normal file
371
src/analysis/processing/qgsalgorithmkmeansclustering.cpp
Normal file
@ -0,0 +1,371 @@
|
|||||||
|
/***************************************************************************
|
||||||
|
qgsalgorithmkmeansclustering.cpp
|
||||||
|
---------------------
|
||||||
|
begin : June 2018
|
||||||
|
copyright : (C) 2018 by Nyall Dawson
|
||||||
|
email : nyall dot dawson at gmail dot com
|
||||||
|
***************************************************************************/
|
||||||
|
|
||||||
|
/***************************************************************************
|
||||||
|
* *
|
||||||
|
* This program is free software; you can redistribute it and/or modify *
|
||||||
|
* it under the terms of the GNU General Public License as published by *
|
||||||
|
* the Free Software Foundation; either version 2 of the License, or *
|
||||||
|
* (at your option) any later version. *
|
||||||
|
* *
|
||||||
|
***************************************************************************/
|
||||||
|
|
||||||
|
#include "qgsalgorithmkmeansclustering.h"
|
||||||
|
|
||||||
|
///@cond PRIVATE
|
||||||
|
|
||||||
|
const int KMEANS_MAX_ITERATIONS = 1000;
|
||||||
|
|
||||||
|
QString QgsKMeansClusteringAlgorithm::name() const
|
||||||
|
{
|
||||||
|
return QStringLiteral( "kmeansclustering" );
|
||||||
|
}
|
||||||
|
|
||||||
|
QString QgsKMeansClusteringAlgorithm::displayName() const
|
||||||
|
{
|
||||||
|
return QObject::tr( "K-means clustering" );
|
||||||
|
}
|
||||||
|
|
||||||
|
QStringList QgsKMeansClusteringAlgorithm::tags() const
|
||||||
|
{
|
||||||
|
return QObject::tr( "clustering,clusters,kmeans,points" ).split( ',' );
|
||||||
|
}
|
||||||
|
|
||||||
|
QString QgsKMeansClusteringAlgorithm::group() const
|
||||||
|
{
|
||||||
|
return QObject::tr( "Vector analysis" );
|
||||||
|
}
|
||||||
|
|
||||||
|
QString QgsKMeansClusteringAlgorithm::groupId() const
|
||||||
|
{
|
||||||
|
return QStringLiteral( "vectoranalysis" );
|
||||||
|
}
|
||||||
|
|
||||||
|
void QgsKMeansClusteringAlgorithm::initAlgorithm( const QVariantMap & )
|
||||||
|
{
|
||||||
|
addParameter( new QgsProcessingParameterFeatureSource( QStringLiteral( "INPUT" ),
|
||||||
|
QObject::tr( "Input layer" ), QList< int >() << QgsProcessing::TypeVectorAnyGeometry ) );
|
||||||
|
addParameter( new QgsProcessingParameterNumber( QStringLiteral( "CLUSTERS" ), QObject::tr( "Number of clusters" ),
|
||||||
|
QgsProcessingParameterNumber::Integer, 5, false, 1 ) );
|
||||||
|
addParameter( new QgsProcessingParameterString( QStringLiteral( "FIELD_NAME" ),
|
||||||
|
QObject::tr( "Cluster field name" ), QStringLiteral( "CLUSTER_ID" ) ) );
|
||||||
|
addParameter( new QgsProcessingParameterFeatureSink( QStringLiteral( "OUTPUT" ), QObject::tr( "Clusters" ), QgsProcessing::TypeVectorAnyGeometry ) );
|
||||||
|
}
|
||||||
|
|
||||||
|
QString QgsKMeansClusteringAlgorithm::shortHelpString() const
|
||||||
|
{
|
||||||
|
return QObject::tr( "Calculates the 2D distance based k-means cluster number for each input feature.\n\n"
|
||||||
|
"If input geometries are line or polygons, the clustering is based on the centroid of the feature." );
|
||||||
|
}
|
||||||
|
|
||||||
|
QgsKMeansClusteringAlgorithm *QgsKMeansClusteringAlgorithm::createInstance() const
|
||||||
|
{
|
||||||
|
return new QgsKMeansClusteringAlgorithm();
|
||||||
|
}
|
||||||
|
|
||||||
|
QVariantMap QgsKMeansClusteringAlgorithm::processAlgorithm( const QVariantMap ¶meters, QgsProcessingContext &context, QgsProcessingFeedback *feedback )
|
||||||
|
{
|
||||||
|
std::unique_ptr< QgsProcessingFeatureSource > source( parameterAsSource( parameters, QStringLiteral( "INPUT" ), context ) );
|
||||||
|
if ( !source )
|
||||||
|
throw QgsProcessingException( invalidSourceError( parameters, QStringLiteral( "INPUT" ) ) );
|
||||||
|
|
||||||
|
int k = parameterAsInt( parameters, QStringLiteral( "CLUSTERS" ), context );
|
||||||
|
|
||||||
|
QgsFields outputFields = source->fields();
|
||||||
|
const QString clusterFieldName = parameterAsString( parameters, QStringLiteral( "FIELD_NAME" ), context );
|
||||||
|
QgsFields newFields;
|
||||||
|
newFields.append( QgsField( clusterFieldName, QVariant::Int ) );
|
||||||
|
outputFields = QgsProcessingUtils::combineFields( outputFields, newFields );
|
||||||
|
|
||||||
|
QString dest;
|
||||||
|
std::unique_ptr< QgsFeatureSink > sink( parameterAsSink( parameters, QStringLiteral( "OUTPUT" ), context, dest, outputFields, source->wkbType(), source->sourceCrs() ) );
|
||||||
|
if ( !sink )
|
||||||
|
throw QgsProcessingException( invalidSinkError( parameters, QStringLiteral( "OUTPUT" ) ) );
|
||||||
|
|
||||||
|
// build list of point inputs - if it's already a point, use that. If not, take the centroid.
|
||||||
|
feedback->pushInfo( QObject::tr( "Collecting input points" ) );
|
||||||
|
double step = source->featureCount() > 0 ? 50.0 / source->featureCount() : 1;
|
||||||
|
int i = 0;
|
||||||
|
int n = 0;
|
||||||
|
QgsFeature feat;
|
||||||
|
|
||||||
|
std::vector< Feature > clusterFeatures;
|
||||||
|
QgsFeatureIterator features = source->getFeatures( QgsFeatureRequest().setSubsetOfAttributes( QgsAttributeList() ) );
|
||||||
|
QHash< QgsFeatureId, int > idToObj;
|
||||||
|
while ( features.nextFeature( feat ) )
|
||||||
|
{
|
||||||
|
i++;
|
||||||
|
if ( feedback->isCanceled() )
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
feedback->setProgress( i * step );
|
||||||
|
if ( !feat.hasGeometry() )
|
||||||
|
continue;
|
||||||
|
|
||||||
|
n++;
|
||||||
|
|
||||||
|
QgsPointXY point;
|
||||||
|
if ( QgsWkbTypes::flatType( feat.geometry().wkbType() ) == QgsWkbTypes::Point )
|
||||||
|
point = QgsPointXY( *qgsgeometry_cast< const QgsPoint * >( feat.geometry().constGet() ) );
|
||||||
|
else
|
||||||
|
{
|
||||||
|
QgsGeometry centroid = feat.geometry().centroid();
|
||||||
|
point = QgsPointXY( *qgsgeometry_cast< const QgsPoint * >( centroid.constGet() ) );
|
||||||
|
}
|
||||||
|
|
||||||
|
idToObj.insert( feat.id(), clusterFeatures.size() );
|
||||||
|
clusterFeatures.emplace_back( Feature( point ) );
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( n < k )
|
||||||
|
{
|
||||||
|
feedback->reportError( QObject::tr( "Number of geometries is less than the number of clusters requested, not all clusters will get data" ) );
|
||||||
|
k = n;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( k > 1 )
|
||||||
|
{
|
||||||
|
feedback->pushInfo( QObject::tr( "Calculating clusters" ) );
|
||||||
|
|
||||||
|
// cluster centers
|
||||||
|
std::vector< QgsPointXY > centers( k );
|
||||||
|
|
||||||
|
initClusters( clusterFeatures, centers, k, feedback );
|
||||||
|
calculateKMeans( clusterFeatures, centers, k, feedback );
|
||||||
|
}
|
||||||
|
|
||||||
|
features = source->getFeatures();
|
||||||
|
i = 0;
|
||||||
|
while ( features.nextFeature( feat ) )
|
||||||
|
{
|
||||||
|
i++;
|
||||||
|
if ( feedback->isCanceled() )
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
feedback->setProgress( 50 + i * step );
|
||||||
|
QgsAttributes attr = feat.attributes();
|
||||||
|
if ( !feat.hasGeometry() )
|
||||||
|
{
|
||||||
|
attr << QVariant();
|
||||||
|
}
|
||||||
|
else if ( k <= 1 )
|
||||||
|
{
|
||||||
|
attr << 0;
|
||||||
|
}
|
||||||
|
else if ( !idToObj.contains( feat.id() ) )
|
||||||
|
{
|
||||||
|
attr << QVariant();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
attr << clusterFeatures[ idToObj.value( feat.id() ) ].cluster;
|
||||||
|
}
|
||||||
|
feat.setAttributes( attr );
|
||||||
|
sink->addFeature( feat, QgsFeatureSink::FastInsert );
|
||||||
|
}
|
||||||
|
|
||||||
|
QVariantMap outputs;
|
||||||
|
outputs.insert( QStringLiteral( "OUTPUT" ), dest );
|
||||||
|
return outputs;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ported from https://github.com/postgis/postgis/blob/svn-trunk/liblwgeom/lwkmeans.c
|
||||||
|
|
||||||
|
void QgsKMeansClusteringAlgorithm::initClusters( std::vector<Feature> &points, std::vector<QgsPointXY> ¢ers, const int k, QgsProcessingFeedback *feedback )
|
||||||
|
{
|
||||||
|
ulong n = points.size();
|
||||||
|
if ( n == 0 )
|
||||||
|
return;
|
||||||
|
|
||||||
|
if ( n == 1 )
|
||||||
|
{
|
||||||
|
for ( int i = 0; i < k; i++ )
|
||||||
|
centers[ i ] = points[ 0 ].point;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
long duplicateCount = 1;
|
||||||
|
// initially scan for two most distance points from each other, p1 and p2
|
||||||
|
ulong p1 = 0;
|
||||||
|
ulong p2 = 0;
|
||||||
|
double distanceP1 = 0;
|
||||||
|
double distanceP2 = 0;
|
||||||
|
double maxDistance = -1;
|
||||||
|
for ( ulong i = 1; i < n; i++ )
|
||||||
|
{
|
||||||
|
distanceP1 = points[i].point.sqrDist( points[p1].point );
|
||||||
|
distanceP2 = points[i].point.sqrDist( points[p2].point );
|
||||||
|
|
||||||
|
// if this point is further then existing candidates, replace our choice
|
||||||
|
if ( ( distanceP1 > maxDistance ) || ( distanceP2 > maxDistance ) )
|
||||||
|
{
|
||||||
|
maxDistance = std::max( distanceP1, distanceP2 );
|
||||||
|
if ( distanceP1 > distanceP2 )
|
||||||
|
p2 = i;
|
||||||
|
else
|
||||||
|
p1 = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
// also record count of duplicate points
|
||||||
|
if ( qgsDoubleNear( distanceP1, 0 ) || qgsDoubleNear( distanceP2, 0 ) )
|
||||||
|
duplicateCount++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( feedback && duplicateCount > 1 )
|
||||||
|
{
|
||||||
|
feedback->pushInfo( QObject::tr( "There are at least %1 duplicate inputs, the number of output clusters may be less than was requested" ).arg( duplicateCount ) );
|
||||||
|
}
|
||||||
|
|
||||||
|
// By now two points should be found and be not the same
|
||||||
|
Q_ASSERT( p1 != p2 && maxDistance >= 0 );
|
||||||
|
|
||||||
|
// Accept these two points as our initial centers
|
||||||
|
centers[0] = points[p1].point;
|
||||||
|
centers[1] = points[p2].point;
|
||||||
|
|
||||||
|
if ( k > 2 )
|
||||||
|
{
|
||||||
|
// array of minimum distance to a point from accepted cluster centers
|
||||||
|
std::vector< double > distances( n );
|
||||||
|
|
||||||
|
// initialize array with distance to first object
|
||||||
|
for ( ulong j = 0; j < n; j++ )
|
||||||
|
{
|
||||||
|
distances[j] = points[j].point.sqrDist( centers[0] );
|
||||||
|
}
|
||||||
|
distances[p1] = -1;
|
||||||
|
distances[p2] = -1;
|
||||||
|
|
||||||
|
// loop i on clusters, skip 0 and 1 as found already
|
||||||
|
for ( int i = 2; i < k; i++ )
|
||||||
|
{
|
||||||
|
ulong candidateCenter = 0;
|
||||||
|
double maxDistance = std::numeric_limits<double>::lowest();
|
||||||
|
|
||||||
|
// loop j on points
|
||||||
|
for ( ulong j = 0; j < n; j++ )
|
||||||
|
{
|
||||||
|
// accepted clusters are already marked with distance = -1
|
||||||
|
if ( distances[j] < 0 )
|
||||||
|
continue;
|
||||||
|
|
||||||
|
// update minimal distance with previously accepted cluster
|
||||||
|
distances[j] = std::min( points[j].point.sqrDist( centers[i - 1] ), distances[j] );
|
||||||
|
|
||||||
|
// greedily take a point that's farthest from any of accepted clusters
|
||||||
|
if ( distances[j] > maxDistance )
|
||||||
|
{
|
||||||
|
candidateCenter = j;
|
||||||
|
maxDistance = distances[j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// checked earlier by counting entries on input, just in case
|
||||||
|
Q_ASSERT( maxDistance >= 0 );
|
||||||
|
|
||||||
|
// accept candidate to centers
|
||||||
|
distances[candidateCenter] = -1;
|
||||||
|
// copy the point coordinates into the initial centers array
|
||||||
|
centers[i] = points[candidateCenter].point;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ported from https://github.com/postgis/postgis/blob/svn-trunk/liblwgeom/lwkmeans.c
|
||||||
|
|
||||||
|
void QgsKMeansClusteringAlgorithm::calculateKMeans( std::vector<QgsKMeansClusteringAlgorithm::Feature> &objs, std::vector<QgsPointXY> ¢ers, int k, QgsProcessingFeedback *feedback )
|
||||||
|
{
|
||||||
|
int converged = false;
|
||||||
|
bool changed = false;
|
||||||
|
|
||||||
|
// avoid reallocating weights array for every iteration
|
||||||
|
std::vector< uint > weights( k );
|
||||||
|
|
||||||
|
uint i = 0;
|
||||||
|
for ( i = 0; i < KMEANS_MAX_ITERATIONS && !converged; i++ )
|
||||||
|
{
|
||||||
|
if ( feedback && feedback->isCanceled() )
|
||||||
|
break;
|
||||||
|
|
||||||
|
findNearest( objs, centers, k, changed );
|
||||||
|
updateMeans( objs, centers, weights, k );
|
||||||
|
converged = !changed;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( !converged && feedback )
|
||||||
|
feedback->reportError( QObject::tr( "Clustering did not converge after %1 iterations" ).arg( i ) );
|
||||||
|
else if ( feedback )
|
||||||
|
feedback->pushInfo( QObject::tr( "Clustering converged after %1 iterations" ).arg( i ) );
|
||||||
|
}
|
||||||
|
|
||||||
|
// ported from https://github.com/postgis/postgis/blob/svn-trunk/liblwgeom/lwkmeans.c
|
||||||
|
|
||||||
|
void QgsKMeansClusteringAlgorithm::findNearest( std::vector<QgsKMeansClusteringAlgorithm::Feature> &points, const std::vector<QgsPointXY> ¢ers, const int k, bool &changed )
|
||||||
|
{
|
||||||
|
changed = false;
|
||||||
|
ulong n = points.size();
|
||||||
|
for ( ulong i = 0; i < n; i++ )
|
||||||
|
{
|
||||||
|
Feature &point = points[i];
|
||||||
|
|
||||||
|
// Initialize with distance to first cluster
|
||||||
|
double currentDistance = point.point.sqrDist( centers[0] );
|
||||||
|
int currentCluster = 0;
|
||||||
|
|
||||||
|
// Check all other cluster centers and find the nearest
|
||||||
|
for ( int cluster = 1; cluster < k; cluster++ )
|
||||||
|
{
|
||||||
|
const double distance = point.point.sqrDist( centers[cluster] );
|
||||||
|
if ( distance < currentDistance )
|
||||||
|
{
|
||||||
|
currentDistance = distance;
|
||||||
|
currentCluster = cluster;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Store the nearest cluster this object is in
|
||||||
|
if ( point.cluster != currentCluster )
|
||||||
|
{
|
||||||
|
changed = true;
|
||||||
|
point.cluster = currentCluster;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ported from https://github.com/postgis/postgis/blob/svn-trunk/liblwgeom/lwkmeans.c
|
||||||
|
|
||||||
|
void QgsKMeansClusteringAlgorithm::updateMeans( const std::vector<Feature> &points, std::vector<QgsPointXY> ¢ers, std::vector<uint> &weights, const int k )
|
||||||
|
{
|
||||||
|
uint n = points.size();
|
||||||
|
std::fill( weights.begin(), weights.end(), 0 );
|
||||||
|
for ( int i = 0; i < k; i++ )
|
||||||
|
{
|
||||||
|
centers[i].setX( 0.0 );
|
||||||
|
centers[i].setY( 0.0 );
|
||||||
|
}
|
||||||
|
for ( uint i = 0; i < n; i++ )
|
||||||
|
{
|
||||||
|
int cluster = points[i].cluster;
|
||||||
|
centers[cluster] += QgsVector( points[i].point.x(),
|
||||||
|
points[i].point.y() );
|
||||||
|
weights[cluster] += 1;
|
||||||
|
}
|
||||||
|
for ( int i = 0; i < k; i++ )
|
||||||
|
{
|
||||||
|
centers[i] /= weights[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
///@endcond
|
||||||
|
|
||||||
|
|
77
src/analysis/processing/qgsalgorithmkmeansclustering.h
Normal file
77
src/analysis/processing/qgsalgorithmkmeansclustering.h
Normal file
@ -0,0 +1,77 @@
|
|||||||
|
/***************************************************************************
|
||||||
|
qgsalgorithmkmeansclustering.h
|
||||||
|
---------------------
|
||||||
|
begin : June 2018
|
||||||
|
copyright : (C) 2018 by Nyall Dawson
|
||||||
|
email : nyall dot dawson at gmail dot com
|
||||||
|
***************************************************************************/
|
||||||
|
|
||||||
|
/***************************************************************************
|
||||||
|
* *
|
||||||
|
* This program is free software; you can redistribute it and/or modify *
|
||||||
|
* it under the terms of the GNU General Public License as published by *
|
||||||
|
* the Free Software Foundation; either version 2 of the License, or *
|
||||||
|
* (at your option) any later version. *
|
||||||
|
* *
|
||||||
|
***************************************************************************/
|
||||||
|
|
||||||
|
#ifndef QGSALGORITHMKMEANSCLUSTERING_H
|
||||||
|
#define QGSALGORITHMKMEANSCLUSTERING_H
|
||||||
|
|
||||||
|
#define SIP_NO_FILE
|
||||||
|
|
||||||
|
#include "qgis.h"
|
||||||
|
#include "qgis_analysis.h"
|
||||||
|
#include "qgsprocessingalgorithm.h"
|
||||||
|
|
||||||
|
///@cond PRIVATE
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Native k-means clustering algorithm.
|
||||||
|
*/
|
||||||
|
class ANALYSIS_EXPORT QgsKMeansClusteringAlgorithm : public QgsProcessingAlgorithm
|
||||||
|
{
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
QgsKMeansClusteringAlgorithm() = default;
|
||||||
|
void initAlgorithm( const QVariantMap &configuration = QVariantMap() ) override;
|
||||||
|
QString name() const override;
|
||||||
|
QString displayName() const override;
|
||||||
|
QStringList tags() const override;
|
||||||
|
QString group() const override;
|
||||||
|
QString groupId() const override;
|
||||||
|
QString shortHelpString() const override;
|
||||||
|
QgsKMeansClusteringAlgorithm *createInstance() const override SIP_FACTORY;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
|
||||||
|
QVariantMap processAlgorithm( const QVariantMap ¶meters,
|
||||||
|
QgsProcessingContext &context, QgsProcessingFeedback *feedback ) override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
|
||||||
|
struct Feature
|
||||||
|
{
|
||||||
|
Feature( QgsPointXY point )
|
||||||
|
: point( point )
|
||||||
|
{}
|
||||||
|
|
||||||
|
QgsPointXY point;
|
||||||
|
int cluster = -1;
|
||||||
|
};
|
||||||
|
|
||||||
|
static void initClusters( std::vector< Feature > &points, std::vector< QgsPointXY > ¢ers, int k, QgsProcessingFeedback *feedback );
|
||||||
|
static void calculateKMeans( std::vector< Feature > &points, std::vector< QgsPointXY > ¢ers, int k, QgsProcessingFeedback *feedback );
|
||||||
|
static void findNearest( std::vector< Feature > &points, const std::vector< QgsPointXY > ¢ers, int k, bool &changed );
|
||||||
|
static void updateMeans( const std::vector< Feature > &points, std::vector< QgsPointXY > ¢ers, std::vector< uint > &weights, int k );
|
||||||
|
|
||||||
|
friend class TestQgsProcessingAlgs;
|
||||||
|
};
|
||||||
|
|
||||||
|
///@endcond PRIVATE
|
||||||
|
|
||||||
|
#endif // QGSALGORITHMKMEANSCLUSTERING_H
|
||||||
|
|
||||||
|
|
@ -42,6 +42,7 @@
|
|||||||
#include "qgsalgorithmjoinwithlines.h"
|
#include "qgsalgorithmjoinwithlines.h"
|
||||||
#include "qgsalgorithmimportphotos.h"
|
#include "qgsalgorithmimportphotos.h"
|
||||||
#include "qgsalgorithmintersection.h"
|
#include "qgsalgorithmintersection.h"
|
||||||
|
#include "qgsalgorithmkmeansclustering.h"
|
||||||
#include "qgsalgorithmlineintersection.h"
|
#include "qgsalgorithmlineintersection.h"
|
||||||
#include "qgsalgorithmloadlayer.h"
|
#include "qgsalgorithmloadlayer.h"
|
||||||
#include "qgsalgorithmmeancoordinates.h"
|
#include "qgsalgorithmmeancoordinates.h"
|
||||||
@ -150,6 +151,7 @@ void QgsNativeAlgorithms::loadAlgorithms()
|
|||||||
addAlgorithm( new QgsIntersectionAlgorithm() );
|
addAlgorithm( new QgsIntersectionAlgorithm() );
|
||||||
addAlgorithm( new QgsJoinByAttributeAlgorithm() );
|
addAlgorithm( new QgsJoinByAttributeAlgorithm() );
|
||||||
addAlgorithm( new QgsJoinWithLinesAlgorithm() );
|
addAlgorithm( new QgsJoinWithLinesAlgorithm() );
|
||||||
|
addAlgorithm( new QgsKMeansClusteringAlgorithm() );
|
||||||
addAlgorithm( new QgsLineIntersectionAlgorithm() );
|
addAlgorithm( new QgsLineIntersectionAlgorithm() );
|
||||||
addAlgorithm( new QgsLoadLayerAlgorithm() );
|
addAlgorithm( new QgsLoadLayerAlgorithm() );
|
||||||
addAlgorithm( new QgsMeanCoordinatesAlgorithm() );
|
addAlgorithm( new QgsMeanCoordinatesAlgorithm() );
|
||||||
|
@ -1870,6 +1870,12 @@ QgsGeometry QgsGeometry::centroid() const
|
|||||||
return QgsGeometry();
|
return QgsGeometry();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// avoid calling geos for trivial point centroids
|
||||||
|
if ( QgsWkbTypes::flatType( d->geometry->wkbType() ) == QgsWkbTypes::Point )
|
||||||
|
{
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
QgsGeos geos( d->geometry.get() );
|
QgsGeos geos( d->geometry.get() );
|
||||||
|
|
||||||
mLastError.clear();
|
mLastError.clear();
|
||||||
|
@ -25,6 +25,7 @@
|
|||||||
#include "qgsnativealgorithms.h"
|
#include "qgsnativealgorithms.h"
|
||||||
#include "qgsalgorithmimportphotos.h"
|
#include "qgsalgorithmimportphotos.h"
|
||||||
#include "qgsalgorithmtransform.h"
|
#include "qgsalgorithmtransform.h"
|
||||||
|
#include "qgsalgorithmkmeansclustering.h"
|
||||||
|
|
||||||
class TestQgsProcessingAlgs: public QObject
|
class TestQgsProcessingAlgs: public QObject
|
||||||
{
|
{
|
||||||
@ -41,6 +42,7 @@ class TestQgsProcessingAlgs: public QObject
|
|||||||
void parseGeoTags();
|
void parseGeoTags();
|
||||||
void featureFilterAlg();
|
void featureFilterAlg();
|
||||||
void transformAlg();
|
void transformAlg();
|
||||||
|
void kmeansCluster();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
@ -435,6 +437,64 @@ void TestQgsProcessingAlgs::transformAlg()
|
|||||||
QVERIFY( ok );
|
QVERIFY( ok );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void TestQgsProcessingAlgs::kmeansCluster()
|
||||||
|
{
|
||||||
|
// make some features
|
||||||
|
std::vector< QgsKMeansClusteringAlgorithm::Feature > features;
|
||||||
|
std::vector< QgsPointXY > centers( 2 );
|
||||||
|
|
||||||
|
// no features, no crash
|
||||||
|
int k = 2;
|
||||||
|
QgsKMeansClusteringAlgorithm::initClusters( features, centers, k, nullptr );
|
||||||
|
QgsKMeansClusteringAlgorithm::calculateKMeans( features, centers, k, nullptr );
|
||||||
|
|
||||||
|
// features < clusters
|
||||||
|
features.emplace_back( QgsKMeansClusteringAlgorithm::Feature( QgsPointXY( 1, 5 ) ) );
|
||||||
|
QgsKMeansClusteringAlgorithm::initClusters( features, centers, k, nullptr );
|
||||||
|
QgsKMeansClusteringAlgorithm::calculateKMeans( features, centers, k, nullptr );
|
||||||
|
QCOMPARE( features[ 0 ].cluster, 0 );
|
||||||
|
|
||||||
|
// features == clusters
|
||||||
|
features.emplace_back( QgsKMeansClusteringAlgorithm::Feature( QgsPointXY( 11, 5 ) ) );
|
||||||
|
QgsKMeansClusteringAlgorithm::initClusters( features, centers, k, nullptr );
|
||||||
|
QgsKMeansClusteringAlgorithm::calculateKMeans( features, centers, k, nullptr );
|
||||||
|
QCOMPARE( features[ 0 ].cluster, 1 );
|
||||||
|
QCOMPARE( features[ 1 ].cluster, 0 );
|
||||||
|
|
||||||
|
// features > clusters
|
||||||
|
features.emplace_back( QgsKMeansClusteringAlgorithm::Feature( QgsPointXY( 13, 3 ) ) );
|
||||||
|
features.emplace_back( QgsKMeansClusteringAlgorithm::Feature( QgsPointXY( 13, 13 ) ) );
|
||||||
|
features.emplace_back( QgsKMeansClusteringAlgorithm::Feature( QgsPointXY( 23, 6 ) ) );
|
||||||
|
k = 2;
|
||||||
|
QgsKMeansClusteringAlgorithm::initClusters( features, centers, k, nullptr );
|
||||||
|
QgsKMeansClusteringAlgorithm::calculateKMeans( features, centers, k, nullptr );
|
||||||
|
QCOMPARE( features[ 0 ].cluster, 1 );
|
||||||
|
QCOMPARE( features[ 1 ].cluster, 1 );
|
||||||
|
QCOMPARE( features[ 2 ].cluster, 0 );
|
||||||
|
QCOMPARE( features[ 3 ].cluster, 0 );
|
||||||
|
QCOMPARE( features[ 4 ].cluster, 0 );
|
||||||
|
|
||||||
|
// repeat above, with 3 clusters
|
||||||
|
k = 3;
|
||||||
|
centers.resize( 3 );
|
||||||
|
QgsKMeansClusteringAlgorithm::initClusters( features, centers, k, nullptr );
|
||||||
|
QgsKMeansClusteringAlgorithm::calculateKMeans( features, centers, k, nullptr );
|
||||||
|
QCOMPARE( features[ 0 ].cluster, 1 );
|
||||||
|
QCOMPARE( features[ 1 ].cluster, 2 );
|
||||||
|
QCOMPARE( features[ 2 ].cluster, 2 );
|
||||||
|
QCOMPARE( features[ 3 ].cluster, 2 );
|
||||||
|
QCOMPARE( features[ 4 ].cluster, 0 );
|
||||||
|
|
||||||
|
// with identical points
|
||||||
|
features.clear();
|
||||||
|
features.emplace_back( QgsKMeansClusteringAlgorithm::Feature( QgsPointXY( 1, 5 ) ) );
|
||||||
|
features.emplace_back( QgsKMeansClusteringAlgorithm::Feature( QgsPointXY( 1, 5 ) ) );
|
||||||
|
features.emplace_back( QgsKMeansClusteringAlgorithm::Feature( QgsPointXY( 1, 5 ) ) );
|
||||||
|
QCOMPARE( features[ 0 ].cluster, -1 );
|
||||||
|
QCOMPARE( features[ 1 ].cluster, -1 );
|
||||||
|
QCOMPARE( features[ 2 ].cluster, -1 );
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
QGSTEST_MAIN( TestQgsProcessingAlgs )
|
QGSTEST_MAIN( TestQgsProcessingAlgs )
|
||||||
#include "testqgsprocessingalgs.moc"
|
#include "testqgsprocessingalgs.moc"
|
||||||
|
Loading…
x
Reference in New Issue
Block a user