[FEATURE][processing] K Means clustering algorithm

Adds a native k-means clustering algorithm.

Based on a port of PostGIS' ST_ClusterKMeans function, this
new algorithm adds a new cluster ID field to a set of input
features identify the feature's cluster based on the k-means
clustering approach. If non-point geometries are used as input,
the clustering is based off the centroid of the input geometries.
This commit is contained in:
Nyall Dawson 2018-06-26 19:21:51 +10:00
parent 16f3781ab7
commit 34b9d39b27
16 changed files with 1032 additions and 1 deletions

View File

@ -0,0 +1,55 @@
<?xml version="1.0" encoding="utf-8" ?>
<ogr:FeatureCollection
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://ogr.maptools.org/ kmeans_lines.xsd"
xmlns:ogr="http://ogr.maptools.org/"
xmlns:gml="http://www.opengis.net/gml">
<gml:boundedBy>
<gml:Box>
<gml:coord><gml:X>-1</gml:X><gml:Y>-3</gml:Y></gml:coord>
<gml:coord><gml:X>11</gml:X><gml:Y>5</gml:Y></gml:coord>
</gml:Box>
</gml:boundedBy>
<gml:featureMember>
<ogr:kmeans_lines fid="lines.0">
<ogr:geometryProperty><gml:LineString srsName="EPSG:4326"><gml:coordinates>6,2 9,2 9,3 11,5</gml:coordinates></gml:LineString></ogr:geometryProperty>
<ogr:CLUSTER_ID>1</ogr:CLUSTER_ID>
</ogr:kmeans_lines>
</gml:featureMember>
<gml:featureMember>
<ogr:kmeans_lines fid="lines.1">
<ogr:geometryProperty><gml:LineString srsName="EPSG:4326"><gml:coordinates>-1,-1 1,-1</gml:coordinates></gml:LineString></ogr:geometryProperty>
<ogr:CLUSTER_ID>0</ogr:CLUSTER_ID>
</ogr:kmeans_lines>
</gml:featureMember>
<gml:featureMember>
<ogr:kmeans_lines fid="lines.2">
<ogr:geometryProperty><gml:LineString srsName="EPSG:4326"><gml:coordinates>2,0 2,2 3,2 3,3</gml:coordinates></gml:LineString></ogr:geometryProperty>
<ogr:CLUSTER_ID>0</ogr:CLUSTER_ID>
</ogr:kmeans_lines>
</gml:featureMember>
<gml:featureMember>
<ogr:kmeans_lines fid="lines.3">
<ogr:geometryProperty><gml:LineString srsName="EPSG:4326"><gml:coordinates>3,1 5,1</gml:coordinates></gml:LineString></ogr:geometryProperty>
<ogr:CLUSTER_ID>0</ogr:CLUSTER_ID>
</ogr:kmeans_lines>
</gml:featureMember>
<gml:featureMember>
<ogr:kmeans_lines fid="lines.4">
<ogr:geometryProperty><gml:LineString srsName="EPSG:4326"><gml:coordinates>7,-3 10,-3</gml:coordinates></gml:LineString></ogr:geometryProperty>
<ogr:CLUSTER_ID>1</ogr:CLUSTER_ID>
</ogr:kmeans_lines>
</gml:featureMember>
<gml:featureMember>
<ogr:kmeans_lines fid="lines.5">
<ogr:geometryProperty><gml:LineString srsName="EPSG:4326"><gml:coordinates>6,-3 10,1</gml:coordinates></gml:LineString></ogr:geometryProperty>
<ogr:CLUSTER_ID>1</ogr:CLUSTER_ID>
</ogr:kmeans_lines>
</gml:featureMember>
<gml:featureMember>
<ogr:kmeans_lines fid="lines.6">
<ogr:CLUSTER_ID xsi:nil="true"/>
</ogr:kmeans_lines>
</gml:featureMember>
</ogr:FeatureCollection>

View File

@ -0,0 +1,30 @@
<?xml version="1.0" encoding="UTF-8"?>
<xs:schema targetNamespace="http://ogr.maptools.org/" xmlns:ogr="http://ogr.maptools.org/" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:gml="http://www.opengis.net/gml" elementFormDefault="qualified" version="1.0">
<xs:import namespace="http://www.opengis.net/gml" schemaLocation="http://schemas.opengis.net/gml/2.1.2/feature.xsd"/>
<xs:element name="FeatureCollection" type="ogr:FeatureCollectionType" substitutionGroup="gml:_FeatureCollection"/>
<xs:complexType name="FeatureCollectionType">
<xs:complexContent>
<xs:extension base="gml:AbstractFeatureCollectionType">
<xs:attribute name="lockId" type="xs:string" use="optional"/>
<xs:attribute name="scope" type="xs:string" use="optional"/>
</xs:extension>
</xs:complexContent>
</xs:complexType>
<xs:element name="kmeans_lines" type="ogr:kmeans_lines_Type" substitutionGroup="gml:_Feature"/>
<xs:complexType name="kmeans_lines_Type">
<xs:complexContent>
<xs:extension base="gml:AbstractFeatureType">
<xs:sequence>
<xs:element name="geometryProperty" type="gml:LineStringPropertyType" nillable="true" minOccurs="0" maxOccurs="1"/>
<xs:element name="CLUSTER_ID" nillable="true" minOccurs="0" maxOccurs="1">
<xs:simpleType>
<xs:restriction base="xs:integer">
<xs:totalDigits value="10"/>
</xs:restriction>
</xs:simpleType>
</xs:element>
</xs:sequence>
</xs:extension>
</xs:complexContent>
</xs:complexType>
</xs:schema>

View File

@ -0,0 +1,86 @@
<?xml version="1.0" encoding="utf-8" ?>
<ogr:FeatureCollection
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://ogr.maptools.org/ kmeans_points_3.xsd"
xmlns:ogr="http://ogr.maptools.org/"
xmlns:gml="http://www.opengis.net/gml">
<gml:boundedBy>
<gml:Box>
<gml:coord><gml:X>0</gml:X><gml:Y>-5</gml:Y></gml:coord>
<gml:coord><gml:X>8</gml:X><gml:Y>3</gml:Y></gml:coord>
</gml:Box>
</gml:boundedBy>
<gml:featureMember>
<ogr:kmeans_points_3 fid="points.0">
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>1,1</gml:coordinates></gml:Point></ogr:geometryProperty>
<ogr:id>1</ogr:id>
<ogr:id2>2</ogr:id2>
<ogr:CLUSTER_ID>2</ogr:CLUSTER_ID>
</ogr:kmeans_points_3>
</gml:featureMember>
<gml:featureMember>
<ogr:kmeans_points_3 fid="points.1">
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>3,3</gml:coordinates></gml:Point></ogr:geometryProperty>
<ogr:id>2</ogr:id>
<ogr:id2>1</ogr:id2>
<ogr:CLUSTER_ID>2</ogr:CLUSTER_ID>
</ogr:kmeans_points_3>
</gml:featureMember>
<gml:featureMember>
<ogr:kmeans_points_3 fid="points.2">
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>2,2</gml:coordinates></gml:Point></ogr:geometryProperty>
<ogr:id>3</ogr:id>
<ogr:id2>0</ogr:id2>
<ogr:CLUSTER_ID>2</ogr:CLUSTER_ID>
</ogr:kmeans_points_3>
</gml:featureMember>
<gml:featureMember>
<ogr:kmeans_points_3 fid="points.3">
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>5,2</gml:coordinates></gml:Point></ogr:geometryProperty>
<ogr:id>4</ogr:id>
<ogr:id2>2</ogr:id2>
<ogr:CLUSTER_ID>2</ogr:CLUSTER_ID>
</ogr:kmeans_points_3>
</gml:featureMember>
<gml:featureMember>
<ogr:kmeans_points_3 fid="points.4">
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>4,1</gml:coordinates></gml:Point></ogr:geometryProperty>
<ogr:id>5</ogr:id>
<ogr:id2>1</ogr:id2>
<ogr:CLUSTER_ID>2</ogr:CLUSTER_ID>
</ogr:kmeans_points_3>
</gml:featureMember>
<gml:featureMember>
<ogr:kmeans_points_3 fid="points.5">
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>0,-5</gml:coordinates></gml:Point></ogr:geometryProperty>
<ogr:id>6</ogr:id>
<ogr:id2>0</ogr:id2>
<ogr:CLUSTER_ID>1</ogr:CLUSTER_ID>
</ogr:kmeans_points_3>
</gml:featureMember>
<gml:featureMember>
<ogr:kmeans_points_3 fid="points.6">
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>8,-1</gml:coordinates></gml:Point></ogr:geometryProperty>
<ogr:id>7</ogr:id>
<ogr:id2>0</ogr:id2>
<ogr:CLUSTER_ID>0</ogr:CLUSTER_ID>
</ogr:kmeans_points_3>
</gml:featureMember>
<gml:featureMember>
<ogr:kmeans_points_3 fid="points.7">
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>7,-1</gml:coordinates></gml:Point></ogr:geometryProperty>
<ogr:id>8</ogr:id>
<ogr:id2>0</ogr:id2>
<ogr:CLUSTER_ID>0</ogr:CLUSTER_ID>
</ogr:kmeans_points_3>
</gml:featureMember>
<gml:featureMember>
<ogr:kmeans_points_3 fid="points.8">
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>0,-1</gml:coordinates></gml:Point></ogr:geometryProperty>
<ogr:id>9</ogr:id>
<ogr:id2>0</ogr:id2>
<ogr:CLUSTER_ID>2</ogr:CLUSTER_ID>
</ogr:kmeans_points_3>
</gml:featureMember>
</ogr:FeatureCollection>

View File

@ -0,0 +1,44 @@
<?xml version="1.0" encoding="UTF-8"?>
<xs:schema targetNamespace="http://ogr.maptools.org/" xmlns:ogr="http://ogr.maptools.org/" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:gml="http://www.opengis.net/gml" elementFormDefault="qualified" version="1.0">
<xs:import namespace="http://www.opengis.net/gml" schemaLocation="http://schemas.opengis.net/gml/2.1.2/feature.xsd"/>
<xs:element name="FeatureCollection" type="ogr:FeatureCollectionType" substitutionGroup="gml:_FeatureCollection"/>
<xs:complexType name="FeatureCollectionType">
<xs:complexContent>
<xs:extension base="gml:AbstractFeatureCollectionType">
<xs:attribute name="lockId" type="xs:string" use="optional"/>
<xs:attribute name="scope" type="xs:string" use="optional"/>
</xs:extension>
</xs:complexContent>
</xs:complexType>
<xs:element name="kmeans_points_3" type="ogr:kmeans_points_3_Type" substitutionGroup="gml:_Feature"/>
<xs:complexType name="kmeans_points_3_Type">
<xs:complexContent>
<xs:extension base="gml:AbstractFeatureType">
<xs:sequence>
<xs:element name="geometryProperty" type="gml:PointPropertyType" nillable="true" minOccurs="0" maxOccurs="1"/>
<xs:element name="id" nillable="true" minOccurs="0" maxOccurs="1">
<xs:simpleType>
<xs:restriction base="xs:integer">
<xs:totalDigits value="10"/>
</xs:restriction>
</xs:simpleType>
</xs:element>
<xs:element name="id2" nillable="true" minOccurs="0" maxOccurs="1">
<xs:simpleType>
<xs:restriction base="xs:integer">
<xs:totalDigits value="10"/>
</xs:restriction>
</xs:simpleType>
</xs:element>
<xs:element name="CLUSTER_ID" nillable="true" minOccurs="0" maxOccurs="1">
<xs:simpleType>
<xs:restriction base="xs:integer">
<xs:totalDigits value="10"/>
</xs:restriction>
</xs:simpleType>
</xs:element>
</xs:sequence>
</xs:extension>
</xs:complexContent>
</xs:complexType>
</xs:schema>

View File

@ -0,0 +1,86 @@
<?xml version="1.0" encoding="utf-8" ?>
<ogr:FeatureCollection
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://ogr.maptools.org/ kmeans_points_5.xsd"
xmlns:ogr="http://ogr.maptools.org/"
xmlns:gml="http://www.opengis.net/gml">
<gml:boundedBy>
<gml:Box>
<gml:coord><gml:X>0</gml:X><gml:Y>-5</gml:Y></gml:coord>
<gml:coord><gml:X>8</gml:X><gml:Y>3</gml:Y></gml:coord>
</gml:Box>
</gml:boundedBy>
<gml:featureMember>
<ogr:kmeans_points_5 fid="points.0">
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>1,1</gml:coordinates></gml:Point></ogr:geometryProperty>
<ogr:id>1</ogr:id>
<ogr:id2>2</ogr:id2>
<ogr:CLUSTER_ID5>2</ogr:CLUSTER_ID5>
</ogr:kmeans_points_5>
</gml:featureMember>
<gml:featureMember>
<ogr:kmeans_points_5 fid="points.1">
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>3,3</gml:coordinates></gml:Point></ogr:geometryProperty>
<ogr:id>2</ogr:id>
<ogr:id2>1</ogr:id2>
<ogr:CLUSTER_ID5>2</ogr:CLUSTER_ID5>
</ogr:kmeans_points_5>
</gml:featureMember>
<gml:featureMember>
<ogr:kmeans_points_5 fid="points.2">
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>2,2</gml:coordinates></gml:Point></ogr:geometryProperty>
<ogr:id>3</ogr:id>
<ogr:id2>0</ogr:id2>
<ogr:CLUSTER_ID5>2</ogr:CLUSTER_ID5>
</ogr:kmeans_points_5>
</gml:featureMember>
<gml:featureMember>
<ogr:kmeans_points_5 fid="points.3">
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>5,2</gml:coordinates></gml:Point></ogr:geometryProperty>
<ogr:id>4</ogr:id>
<ogr:id2>2</ogr:id2>
<ogr:CLUSTER_ID5>4</ogr:CLUSTER_ID5>
</ogr:kmeans_points_5>
</gml:featureMember>
<gml:featureMember>
<ogr:kmeans_points_5 fid="points.4">
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>4,1</gml:coordinates></gml:Point></ogr:geometryProperty>
<ogr:id>5</ogr:id>
<ogr:id2>1</ogr:id2>
<ogr:CLUSTER_ID5>4</ogr:CLUSTER_ID5>
</ogr:kmeans_points_5>
</gml:featureMember>
<gml:featureMember>
<ogr:kmeans_points_5 fid="points.5">
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>0,-5</gml:coordinates></gml:Point></ogr:geometryProperty>
<ogr:id>6</ogr:id>
<ogr:id2>0</ogr:id2>
<ogr:CLUSTER_ID5>1</ogr:CLUSTER_ID5>
</ogr:kmeans_points_5>
</gml:featureMember>
<gml:featureMember>
<ogr:kmeans_points_5 fid="points.6">
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>8,-1</gml:coordinates></gml:Point></ogr:geometryProperty>
<ogr:id>7</ogr:id>
<ogr:id2>0</ogr:id2>
<ogr:CLUSTER_ID5>0</ogr:CLUSTER_ID5>
</ogr:kmeans_points_5>
</gml:featureMember>
<gml:featureMember>
<ogr:kmeans_points_5 fid="points.7">
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>7,-1</gml:coordinates></gml:Point></ogr:geometryProperty>
<ogr:id>8</ogr:id>
<ogr:id2>0</ogr:id2>
<ogr:CLUSTER_ID5>0</ogr:CLUSTER_ID5>
</ogr:kmeans_points_5>
</gml:featureMember>
<gml:featureMember>
<ogr:kmeans_points_5 fid="points.8">
<ogr:geometryProperty><gml:Point srsName="EPSG:4326"><gml:coordinates>0,-1</gml:coordinates></gml:Point></ogr:geometryProperty>
<ogr:id>9</ogr:id>
<ogr:id2>0</ogr:id2>
<ogr:CLUSTER_ID5>3</ogr:CLUSTER_ID5>
</ogr:kmeans_points_5>
</gml:featureMember>
</ogr:FeatureCollection>

View File

@ -0,0 +1,44 @@
<?xml version="1.0" encoding="UTF-8"?>
<xs:schema targetNamespace="http://ogr.maptools.org/" xmlns:ogr="http://ogr.maptools.org/" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:gml="http://www.opengis.net/gml" elementFormDefault="qualified" version="1.0">
<xs:import namespace="http://www.opengis.net/gml" schemaLocation="http://schemas.opengis.net/gml/2.1.2/feature.xsd"/>
<xs:element name="FeatureCollection" type="ogr:FeatureCollectionType" substitutionGroup="gml:_FeatureCollection"/>
<xs:complexType name="FeatureCollectionType">
<xs:complexContent>
<xs:extension base="gml:AbstractFeatureCollectionType">
<xs:attribute name="lockId" type="xs:string" use="optional"/>
<xs:attribute name="scope" type="xs:string" use="optional"/>
</xs:extension>
</xs:complexContent>
</xs:complexType>
<xs:element name="kmeans_points_5" type="ogr:kmeans_points_5_Type" substitutionGroup="gml:_Feature"/>
<xs:complexType name="kmeans_points_5_Type">
<xs:complexContent>
<xs:extension base="gml:AbstractFeatureType">
<xs:sequence>
<xs:element name="geometryProperty" type="gml:PointPropertyType" nillable="true" minOccurs="0" maxOccurs="1"/>
<xs:element name="id" nillable="true" minOccurs="0" maxOccurs="1">
<xs:simpleType>
<xs:restriction base="xs:integer">
<xs:totalDigits value="10"/>
</xs:restriction>
</xs:simpleType>
</xs:element>
<xs:element name="id2" nillable="true" minOccurs="0" maxOccurs="1">
<xs:simpleType>
<xs:restriction base="xs:integer">
<xs:totalDigits value="10"/>
</xs:restriction>
</xs:simpleType>
</xs:element>
<xs:element name="CLUSTER_ID5" nillable="true" minOccurs="0" maxOccurs="1">
<xs:simpleType>
<xs:restriction base="xs:integer">
<xs:totalDigits value="10"/>
</xs:restriction>
</xs:simpleType>
</xs:element>
</xs:sequence>
</xs:extension>
</xs:complexContent>
</xs:complexType>
</xs:schema>

View File

@ -0,0 +1,67 @@
<?xml version="1.0" encoding="utf-8" ?>
<ogr:FeatureCollection
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://ogr.maptools.org/ kmeans_polys.xsd"
xmlns:ogr="http://ogr.maptools.org/"
xmlns:gml="http://www.opengis.net/gml">
<gml:boundedBy>
<gml:Box>
<gml:coord><gml:X>-1</gml:X><gml:Y>-3</gml:Y></gml:coord>
<gml:coord><gml:X>10</gml:X><gml:Y>6</gml:Y></gml:coord>
</gml:Box>
</gml:boundedBy>
<gml:featureMember>
<ogr:kmeans_polys fid="polys.0">
<ogr:geometryProperty><gml:Polygon srsName="EPSG:4326"><gml:outerBoundaryIs><gml:LinearRing><gml:coordinates>-1,-1 -1,3 3,3 3,2 2,2 2,-1 -1,-1</gml:coordinates></gml:LinearRing></gml:outerBoundaryIs></gml:Polygon></ogr:geometryProperty>
<ogr:name>aaaaa</ogr:name>
<ogr:intval>33</ogr:intval>
<ogr:floatval>44.123456</ogr:floatval>
<ogr:CLUSTER_ID>1</ogr:CLUSTER_ID>
</ogr:kmeans_polys>
</gml:featureMember>
<gml:featureMember>
<ogr:kmeans_polys fid="polys.1">
<ogr:geometryProperty><gml:Polygon srsName="EPSG:4326"><gml:outerBoundaryIs><gml:LinearRing><gml:coordinates>5,5 6,4 4,4 5,5</gml:coordinates></gml:LinearRing></gml:outerBoundaryIs></gml:Polygon></ogr:geometryProperty>
<ogr:name>Aaaaa</ogr:name>
<ogr:intval>-33</ogr:intval>
<ogr:floatval>0</ogr:floatval>
<ogr:CLUSTER_ID>1</ogr:CLUSTER_ID>
</ogr:kmeans_polys>
</gml:featureMember>
<gml:featureMember>
<ogr:kmeans_polys fid="polys.2">
<ogr:geometryProperty><gml:Polygon srsName="EPSG:4326"><gml:outerBoundaryIs><gml:LinearRing><gml:coordinates>2,5 2,6 3,6 3,5 2,5</gml:coordinates></gml:LinearRing></gml:outerBoundaryIs></gml:Polygon></ogr:geometryProperty>
<ogr:name>bbaaa</ogr:name>
<ogr:intval xsi:nil="true"/>
<ogr:floatval>0.123</ogr:floatval>
<ogr:CLUSTER_ID>1</ogr:CLUSTER_ID>
</ogr:kmeans_polys>
</gml:featureMember>
<gml:featureMember>
<ogr:kmeans_polys fid="polys.3">
<ogr:geometryProperty><gml:Polygon srsName="EPSG:4326"><gml:outerBoundaryIs><gml:LinearRing><gml:coordinates>6,1 10,1 10,-3 6,-3 6,1</gml:coordinates></gml:LinearRing></gml:outerBoundaryIs><gml:innerBoundaryIs><gml:LinearRing><gml:coordinates>7,0 7,-2 9,-2 9,0 7,0</gml:coordinates></gml:LinearRing></gml:innerBoundaryIs></gml:Polygon></ogr:geometryProperty>
<ogr:name>ASDF</ogr:name>
<ogr:intval>0</ogr:intval>
<ogr:floatval xsi:nil="true"/>
<ogr:CLUSTER_ID>0</ogr:CLUSTER_ID>
</ogr:kmeans_polys>
</gml:featureMember>
<gml:featureMember>
<ogr:kmeans_polys fid="polys.4">
<ogr:name xsi:nil="true"/>
<ogr:intval>120</ogr:intval>
<ogr:floatval>-100291.43213</ogr:floatval>
<ogr:CLUSTER_ID xsi:nil="true"/>
</ogr:kmeans_polys>
</gml:featureMember>
<gml:featureMember>
<ogr:kmeans_polys fid="polys.5">
<ogr:geometryProperty><gml:Polygon srsName="EPSG:4326"><gml:outerBoundaryIs><gml:LinearRing><gml:coordinates>3,2 6,1 6,-3 2,-1 2,2 3,2</gml:coordinates></gml:LinearRing></gml:outerBoundaryIs></gml:Polygon></ogr:geometryProperty>
<ogr:name>elim</ogr:name>
<ogr:intval>2</ogr:intval>
<ogr:floatval>3.33</ogr:floatval>
<ogr:CLUSTER_ID>1</ogr:CLUSTER_ID>
</ogr:kmeans_polys>
</gml:featureMember>
</ogr:FeatureCollection>

View File

@ -0,0 +1,50 @@
<?xml version="1.0" encoding="UTF-8"?>
<xs:schema targetNamespace="http://ogr.maptools.org/" xmlns:ogr="http://ogr.maptools.org/" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:gml="http://www.opengis.net/gml" elementFormDefault="qualified" version="1.0">
<xs:import namespace="http://www.opengis.net/gml" schemaLocation="http://schemas.opengis.net/gml/2.1.2/feature.xsd"/>
<xs:element name="FeatureCollection" type="ogr:FeatureCollectionType" substitutionGroup="gml:_FeatureCollection"/>
<xs:complexType name="FeatureCollectionType">
<xs:complexContent>
<xs:extension base="gml:AbstractFeatureCollectionType">
<xs:attribute name="lockId" type="xs:string" use="optional"/>
<xs:attribute name="scope" type="xs:string" use="optional"/>
</xs:extension>
</xs:complexContent>
</xs:complexType>
<xs:element name="kmeans_polys" type="ogr:kmeans_polys_Type" substitutionGroup="gml:_Feature"/>
<xs:complexType name="kmeans_polys_Type">
<xs:complexContent>
<xs:extension base="gml:AbstractFeatureType">
<xs:sequence>
<xs:element name="geometryProperty" type="gml:PolygonPropertyType" nillable="true" minOccurs="0" maxOccurs="1"/>
<xs:element name="name" nillable="true" minOccurs="0" maxOccurs="1">
<xs:simpleType>
<xs:restriction base="xs:string">
<xs:maxLength value="5"/>
</xs:restriction>
</xs:simpleType>
</xs:element>
<xs:element name="intval" nillable="true" minOccurs="0" maxOccurs="1">
<xs:simpleType>
<xs:restriction base="xs:integer">
<xs:totalDigits value="10"/>
</xs:restriction>
</xs:simpleType>
</xs:element>
<xs:element name="floatval" nillable="true" minOccurs="0" maxOccurs="1">
<xs:simpleType>
<xs:restriction base="xs:decimal">
</xs:restriction>
</xs:simpleType>
</xs:element>
<xs:element name="CLUSTER_ID" nillable="true" minOccurs="0" maxOccurs="1">
<xs:simpleType>
<xs:restriction base="xs:integer">
<xs:totalDigits value="10"/>
</xs:restriction>
</xs:simpleType>
</xs:element>
</xs:sequence>
</xs:extension>
</xs:complexContent>
</xs:complexType>
</xs:schema>

View File

@ -5660,4 +5660,56 @@ tests:
name: expected/vectorize.gml
type: vector
- algorithm: native:kmeansclustering
name: K means, points, 3 clusters
params:
CLUSTERS: 3
FIELD_NAME: CLUSTER_ID
INPUT:
name: points.gml
type: vector
results:
OUTPUT:
name: expected/kmeans_points_3.gml
type: vector
- algorithm: native:kmeansclustering
name: K means, points, 5 clusters
params:
CLUSTERS: 5
FIELD_NAME: CLUSTER_ID5
INPUT:
name: points.gml
type: vector
results:
OUTPUT:
name: expected/kmeans_points_5.gml
type: vector
- algorithm: native:kmeansclustering
name: K means, lines
params:
CLUSTERS: 2
FIELD_NAME: CLUSTER_ID
INPUT:
name: lines.gml
type: vector
results:
OUTPUT:
name: expected/kmeans_lines.gml
type: vector
- algorithm: native:kmeansclustering
name: K means, polys
params:
CLUSTERS: 2
FIELD_NAME: CLUSTER_ID
INPUT:
name: polys.gml
type: vector
results:
OUTPUT:
name: expected/kmeans_polys.gml
type: vector
# See ../README.md for a description of the file format

View File

@ -41,10 +41,11 @@ SET(QGIS_ANALYSIS_SRCS
processing/qgsalgorithmfiledownloader.cpp
processing/qgsalgorithmfilter.cpp
processing/qgsalgorithmfixgeometries.cpp
processing/qgsalgorithmimportphotos.cpp
processing/qgsalgorithmintersection.cpp
processing/qgsalgorithmjoinbyattribute.cpp
processing/qgsalgorithmjoinwithlines.cpp
processing/qgsalgorithmimportphotos.cpp
processing/qgsalgorithmkmeansclustering.cpp
processing/qgsalgorithmlineintersection.cpp
processing/qgsalgorithmloadlayer.cpp
processing/qgsalgorithmmeancoordinates.cpp

View File

@ -0,0 +1,371 @@
/***************************************************************************
qgsalgorithmkmeansclustering.cpp
---------------------
begin : June 2018
copyright : (C) 2018 by Nyall Dawson
email : nyall dot dawson at gmail dot com
***************************************************************************/
/***************************************************************************
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
***************************************************************************/
#include "qgsalgorithmkmeansclustering.h"
///@cond PRIVATE
const int KMEANS_MAX_ITERATIONS = 1000;
QString QgsKMeansClusteringAlgorithm::name() const
{
return QStringLiteral( "kmeansclustering" );
}
QString QgsKMeansClusteringAlgorithm::displayName() const
{
return QObject::tr( "K-means clustering" );
}
QStringList QgsKMeansClusteringAlgorithm::tags() const
{
return QObject::tr( "clustering,clusters,kmeans,points" ).split( ',' );
}
QString QgsKMeansClusteringAlgorithm::group() const
{
return QObject::tr( "Vector analysis" );
}
QString QgsKMeansClusteringAlgorithm::groupId() const
{
return QStringLiteral( "vectoranalysis" );
}
void QgsKMeansClusteringAlgorithm::initAlgorithm( const QVariantMap & )
{
addParameter( new QgsProcessingParameterFeatureSource( QStringLiteral( "INPUT" ),
QObject::tr( "Input layer" ), QList< int >() << QgsProcessing::TypeVectorAnyGeometry ) );
addParameter( new QgsProcessingParameterNumber( QStringLiteral( "CLUSTERS" ), QObject::tr( "Number of clusters" ),
QgsProcessingParameterNumber::Integer, 5, false, 1 ) );
addParameter( new QgsProcessingParameterString( QStringLiteral( "FIELD_NAME" ),
QObject::tr( "Cluster field name" ), QStringLiteral( "CLUSTER_ID" ) ) );
addParameter( new QgsProcessingParameterFeatureSink( QStringLiteral( "OUTPUT" ), QObject::tr( "Clusters" ), QgsProcessing::TypeVectorAnyGeometry ) );
}
QString QgsKMeansClusteringAlgorithm::shortHelpString() const
{
return QObject::tr( "Calculates the 2D distance based k-means cluster number for each input feature.\n\n"
"If input geometries are line or polygons, the clustering is based on the centroid of the feature." );
}
QgsKMeansClusteringAlgorithm *QgsKMeansClusteringAlgorithm::createInstance() const
{
return new QgsKMeansClusteringAlgorithm();
}
QVariantMap QgsKMeansClusteringAlgorithm::processAlgorithm( const QVariantMap &parameters, QgsProcessingContext &context, QgsProcessingFeedback *feedback )
{
std::unique_ptr< QgsProcessingFeatureSource > source( parameterAsSource( parameters, QStringLiteral( "INPUT" ), context ) );
if ( !source )
throw QgsProcessingException( invalidSourceError( parameters, QStringLiteral( "INPUT" ) ) );
int k = parameterAsInt( parameters, QStringLiteral( "CLUSTERS" ), context );
QgsFields outputFields = source->fields();
const QString clusterFieldName = parameterAsString( parameters, QStringLiteral( "FIELD_NAME" ), context );
QgsFields newFields;
newFields.append( QgsField( clusterFieldName, QVariant::Int ) );
outputFields = QgsProcessingUtils::combineFields( outputFields, newFields );
QString dest;
std::unique_ptr< QgsFeatureSink > sink( parameterAsSink( parameters, QStringLiteral( "OUTPUT" ), context, dest, outputFields, source->wkbType(), source->sourceCrs() ) );
if ( !sink )
throw QgsProcessingException( invalidSinkError( parameters, QStringLiteral( "OUTPUT" ) ) );
// build list of point inputs - if it's already a point, use that. If not, take the centroid.
feedback->pushInfo( QObject::tr( "Collecting input points" ) );
double step = source->featureCount() > 0 ? 50.0 / source->featureCount() : 1;
int i = 0;
int n = 0;
QgsFeature feat;
std::vector< Feature > clusterFeatures;
QgsFeatureIterator features = source->getFeatures( QgsFeatureRequest().setSubsetOfAttributes( QgsAttributeList() ) );
QHash< QgsFeatureId, int > idToObj;
while ( features.nextFeature( feat ) )
{
i++;
if ( feedback->isCanceled() )
{
break;
}
feedback->setProgress( i * step );
if ( !feat.hasGeometry() )
continue;
n++;
QgsPointXY point;
if ( QgsWkbTypes::flatType( feat.geometry().wkbType() ) == QgsWkbTypes::Point )
point = QgsPointXY( *qgsgeometry_cast< const QgsPoint * >( feat.geometry().constGet() ) );
else
{
QgsGeometry centroid = feat.geometry().centroid();
point = QgsPointXY( *qgsgeometry_cast< const QgsPoint * >( centroid.constGet() ) );
}
idToObj.insert( feat.id(), clusterFeatures.size() );
clusterFeatures.emplace_back( Feature( point ) );
}
if ( n < k )
{
feedback->reportError( QObject::tr( "Number of geometries is less than the number of clusters requested, not all clusters will get data" ) );
k = n;
}
if ( k > 1 )
{
feedback->pushInfo( QObject::tr( "Calculating clusters" ) );
// cluster centers
std::vector< QgsPointXY > centers( k );
initClusters( clusterFeatures, centers, k, feedback );
calculateKMeans( clusterFeatures, centers, k, feedback );
}
features = source->getFeatures();
i = 0;
while ( features.nextFeature( feat ) )
{
i++;
if ( feedback->isCanceled() )
{
break;
}
feedback->setProgress( 50 + i * step );
QgsAttributes attr = feat.attributes();
if ( !feat.hasGeometry() )
{
attr << QVariant();
}
else if ( k <= 1 )
{
attr << 0;
}
else if ( !idToObj.contains( feat.id() ) )
{
attr << QVariant();
}
else
{
attr << clusterFeatures[ idToObj.value( feat.id() ) ].cluster;
}
feat.setAttributes( attr );
sink->addFeature( feat, QgsFeatureSink::FastInsert );
}
QVariantMap outputs;
outputs.insert( QStringLiteral( "OUTPUT" ), dest );
return outputs;
}
// ported from https://github.com/postgis/postgis/blob/svn-trunk/liblwgeom/lwkmeans.c
void QgsKMeansClusteringAlgorithm::initClusters( std::vector<Feature> &points, std::vector<QgsPointXY> &centers, const int k, QgsProcessingFeedback *feedback )
{
ulong n = points.size();
if ( n == 0 )
return;
if ( n == 1 )
{
for ( int i = 0; i < k; i++ )
centers[ i ] = points[ 0 ].point;
return;
}
long duplicateCount = 1;
// initially scan for two most distance points from each other, p1 and p2
ulong p1 = 0;
ulong p2 = 0;
double distanceP1 = 0;
double distanceP2 = 0;
double maxDistance = -1;
for ( ulong i = 1; i < n; i++ )
{
distanceP1 = points[i].point.sqrDist( points[p1].point );
distanceP2 = points[i].point.sqrDist( points[p2].point );
// if this point is further then existing candidates, replace our choice
if ( ( distanceP1 > maxDistance ) || ( distanceP2 > maxDistance ) )
{
maxDistance = std::max( distanceP1, distanceP2 );
if ( distanceP1 > distanceP2 )
p2 = i;
else
p1 = i;
}
// also record count of duplicate points
if ( qgsDoubleNear( distanceP1, 0 ) || qgsDoubleNear( distanceP2, 0 ) )
duplicateCount++;
}
if ( feedback && duplicateCount > 1 )
{
feedback->pushInfo( QObject::tr( "There are at least %1 duplicate inputs, the number of output clusters may be less than was requested" ).arg( duplicateCount ) );
}
// By now two points should be found and be not the same
Q_ASSERT( p1 != p2 && maxDistance >= 0 );
// Accept these two points as our initial centers
centers[0] = points[p1].point;
centers[1] = points[p2].point;
if ( k > 2 )
{
// array of minimum distance to a point from accepted cluster centers
std::vector< double > distances( n );
// initialize array with distance to first object
for ( ulong j = 0; j < n; j++ )
{
distances[j] = points[j].point.sqrDist( centers[0] );
}
distances[p1] = -1;
distances[p2] = -1;
// loop i on clusters, skip 0 and 1 as found already
for ( int i = 2; i < k; i++ )
{
ulong candidateCenter = 0;
double maxDistance = std::numeric_limits<double>::lowest();
// loop j on points
for ( ulong j = 0; j < n; j++ )
{
// accepted clusters are already marked with distance = -1
if ( distances[j] < 0 )
continue;
// update minimal distance with previously accepted cluster
distances[j] = std::min( points[j].point.sqrDist( centers[i - 1] ), distances[j] );
// greedily take a point that's farthest from any of accepted clusters
if ( distances[j] > maxDistance )
{
candidateCenter = j;
maxDistance = distances[j];
}
}
// checked earlier by counting entries on input, just in case
Q_ASSERT( maxDistance >= 0 );
// accept candidate to centers
distances[candidateCenter] = -1;
// copy the point coordinates into the initial centers array
centers[i] = points[candidateCenter].point;
}
}
}
// ported from https://github.com/postgis/postgis/blob/svn-trunk/liblwgeom/lwkmeans.c
void QgsKMeansClusteringAlgorithm::calculateKMeans( std::vector<QgsKMeansClusteringAlgorithm::Feature> &objs, std::vector<QgsPointXY> &centers, int k, QgsProcessingFeedback *feedback )
{
int converged = false;
bool changed = false;
// avoid reallocating weights array for every iteration
std::vector< uint > weights( k );
uint i = 0;
for ( i = 0; i < KMEANS_MAX_ITERATIONS && !converged; i++ )
{
if ( feedback && feedback->isCanceled() )
break;
findNearest( objs, centers, k, changed );
updateMeans( objs, centers, weights, k );
converged = !changed;
}
if ( !converged && feedback )
feedback->reportError( QObject::tr( "Clustering did not converge after %1 iterations" ).arg( i ) );
else if ( feedback )
feedback->pushInfo( QObject::tr( "Clustering converged after %1 iterations" ).arg( i ) );
}
// ported from https://github.com/postgis/postgis/blob/svn-trunk/liblwgeom/lwkmeans.c
void QgsKMeansClusteringAlgorithm::findNearest( std::vector<QgsKMeansClusteringAlgorithm::Feature> &points, const std::vector<QgsPointXY> &centers, const int k, bool &changed )
{
changed = false;
ulong n = points.size();
for ( ulong i = 0; i < n; i++ )
{
Feature &point = points[i];
// Initialize with distance to first cluster
double currentDistance = point.point.sqrDist( centers[0] );
int currentCluster = 0;
// Check all other cluster centers and find the nearest
for ( int cluster = 1; cluster < k; cluster++ )
{
const double distance = point.point.sqrDist( centers[cluster] );
if ( distance < currentDistance )
{
currentDistance = distance;
currentCluster = cluster;
}
}
// Store the nearest cluster this object is in
if ( point.cluster != currentCluster )
{
changed = true;
point.cluster = currentCluster;
}
}
}
// ported from https://github.com/postgis/postgis/blob/svn-trunk/liblwgeom/lwkmeans.c
void QgsKMeansClusteringAlgorithm::updateMeans( const std::vector<Feature> &points, std::vector<QgsPointXY> &centers, std::vector<uint> &weights, const int k )
{
uint n = points.size();
std::fill( weights.begin(), weights.end(), 0 );
for ( int i = 0; i < k; i++ )
{
centers[i].setX( 0.0 );
centers[i].setY( 0.0 );
}
for ( uint i = 0; i < n; i++ )
{
int cluster = points[i].cluster;
centers[cluster] += QgsVector( points[i].point.x(),
points[i].point.y() );
weights[cluster] += 1;
}
for ( int i = 0; i < k; i++ )
{
centers[i] /= weights[i];
}
}
///@endcond

View File

@ -0,0 +1,77 @@
/***************************************************************************
qgsalgorithmkmeansclustering.h
---------------------
begin : June 2018
copyright : (C) 2018 by Nyall Dawson
email : nyall dot dawson at gmail dot com
***************************************************************************/
/***************************************************************************
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
***************************************************************************/
#ifndef QGSALGORITHMKMEANSCLUSTERING_H
#define QGSALGORITHMKMEANSCLUSTERING_H
#define SIP_NO_FILE
#include "qgis.h"
#include "qgis_analysis.h"
#include "qgsprocessingalgorithm.h"
///@cond PRIVATE
/**
* Native k-means clustering algorithm.
*/
class ANALYSIS_EXPORT QgsKMeansClusteringAlgorithm : public QgsProcessingAlgorithm
{
public:
QgsKMeansClusteringAlgorithm() = default;
void initAlgorithm( const QVariantMap &configuration = QVariantMap() ) override;
QString name() const override;
QString displayName() const override;
QStringList tags() const override;
QString group() const override;
QString groupId() const override;
QString shortHelpString() const override;
QgsKMeansClusteringAlgorithm *createInstance() const override SIP_FACTORY;
protected:
QVariantMap processAlgorithm( const QVariantMap &parameters,
QgsProcessingContext &context, QgsProcessingFeedback *feedback ) override;
private:
struct Feature
{
Feature( QgsPointXY point )
: point( point )
{}
QgsPointXY point;
int cluster = -1;
};
static void initClusters( std::vector< Feature > &points, std::vector< QgsPointXY > &centers, int k, QgsProcessingFeedback *feedback );
static void calculateKMeans( std::vector< Feature > &points, std::vector< QgsPointXY > &centers, int k, QgsProcessingFeedback *feedback );
static void findNearest( std::vector< Feature > &points, const std::vector< QgsPointXY > &centers, int k, bool &changed );
static void updateMeans( const std::vector< Feature > &points, std::vector< QgsPointXY > &centers, std::vector< uint > &weights, int k );
friend class TestQgsProcessingAlgs;
};
///@endcond PRIVATE
#endif // QGSALGORITHMKMEANSCLUSTERING_H

View File

@ -42,6 +42,7 @@
#include "qgsalgorithmjoinwithlines.h"
#include "qgsalgorithmimportphotos.h"
#include "qgsalgorithmintersection.h"
#include "qgsalgorithmkmeansclustering.h"
#include "qgsalgorithmlineintersection.h"
#include "qgsalgorithmloadlayer.h"
#include "qgsalgorithmmeancoordinates.h"
@ -150,6 +151,7 @@ void QgsNativeAlgorithms::loadAlgorithms()
addAlgorithm( new QgsIntersectionAlgorithm() );
addAlgorithm( new QgsJoinByAttributeAlgorithm() );
addAlgorithm( new QgsJoinWithLinesAlgorithm() );
addAlgorithm( new QgsKMeansClusteringAlgorithm() );
addAlgorithm( new QgsLineIntersectionAlgorithm() );
addAlgorithm( new QgsLoadLayerAlgorithm() );
addAlgorithm( new QgsMeanCoordinatesAlgorithm() );

View File

@ -1870,6 +1870,12 @@ QgsGeometry QgsGeometry::centroid() const
return QgsGeometry();
}
// avoid calling geos for trivial point centroids
if ( QgsWkbTypes::flatType( d->geometry->wkbType() ) == QgsWkbTypes::Point )
{
return *this;
}
QgsGeos geos( d->geometry.get() );
mLastError.clear();

View File

@ -25,6 +25,7 @@
#include "qgsnativealgorithms.h"
#include "qgsalgorithmimportphotos.h"
#include "qgsalgorithmtransform.h"
#include "qgsalgorithmkmeansclustering.h"
class TestQgsProcessingAlgs: public QObject
{
@ -41,6 +42,7 @@ class TestQgsProcessingAlgs: public QObject
void parseGeoTags();
void featureFilterAlg();
void transformAlg();
void kmeansCluster();
private:
@ -435,6 +437,64 @@ void TestQgsProcessingAlgs::transformAlg()
QVERIFY( ok );
}
void TestQgsProcessingAlgs::kmeansCluster()
{
// make some features
std::vector< QgsKMeansClusteringAlgorithm::Feature > features;
std::vector< QgsPointXY > centers( 2 );
// no features, no crash
int k = 2;
QgsKMeansClusteringAlgorithm::initClusters( features, centers, k, nullptr );
QgsKMeansClusteringAlgorithm::calculateKMeans( features, centers, k, nullptr );
// features < clusters
features.emplace_back( QgsKMeansClusteringAlgorithm::Feature( QgsPointXY( 1, 5 ) ) );
QgsKMeansClusteringAlgorithm::initClusters( features, centers, k, nullptr );
QgsKMeansClusteringAlgorithm::calculateKMeans( features, centers, k, nullptr );
QCOMPARE( features[ 0 ].cluster, 0 );
// features == clusters
features.emplace_back( QgsKMeansClusteringAlgorithm::Feature( QgsPointXY( 11, 5 ) ) );
QgsKMeansClusteringAlgorithm::initClusters( features, centers, k, nullptr );
QgsKMeansClusteringAlgorithm::calculateKMeans( features, centers, k, nullptr );
QCOMPARE( features[ 0 ].cluster, 1 );
QCOMPARE( features[ 1 ].cluster, 0 );
// features > clusters
features.emplace_back( QgsKMeansClusteringAlgorithm::Feature( QgsPointXY( 13, 3 ) ) );
features.emplace_back( QgsKMeansClusteringAlgorithm::Feature( QgsPointXY( 13, 13 ) ) );
features.emplace_back( QgsKMeansClusteringAlgorithm::Feature( QgsPointXY( 23, 6 ) ) );
k = 2;
QgsKMeansClusteringAlgorithm::initClusters( features, centers, k, nullptr );
QgsKMeansClusteringAlgorithm::calculateKMeans( features, centers, k, nullptr );
QCOMPARE( features[ 0 ].cluster, 1 );
QCOMPARE( features[ 1 ].cluster, 1 );
QCOMPARE( features[ 2 ].cluster, 0 );
QCOMPARE( features[ 3 ].cluster, 0 );
QCOMPARE( features[ 4 ].cluster, 0 );
// repeat above, with 3 clusters
k = 3;
centers.resize( 3 );
QgsKMeansClusteringAlgorithm::initClusters( features, centers, k, nullptr );
QgsKMeansClusteringAlgorithm::calculateKMeans( features, centers, k, nullptr );
QCOMPARE( features[ 0 ].cluster, 1 );
QCOMPARE( features[ 1 ].cluster, 2 );
QCOMPARE( features[ 2 ].cluster, 2 );
QCOMPARE( features[ 3 ].cluster, 2 );
QCOMPARE( features[ 4 ].cluster, 0 );
// with identical points
features.clear();
features.emplace_back( QgsKMeansClusteringAlgorithm::Feature( QgsPointXY( 1, 5 ) ) );
features.emplace_back( QgsKMeansClusteringAlgorithm::Feature( QgsPointXY( 1, 5 ) ) );
features.emplace_back( QgsKMeansClusteringAlgorithm::Feature( QgsPointXY( 1, 5 ) ) );
QCOMPARE( features[ 0 ].cluster, -1 );
QCOMPARE( features[ 1 ].cluster, -1 );
QCOMPARE( features[ 2 ].cluster, -1 );
}
QGSTEST_MAIN( TestQgsProcessingAlgs )
#include "testqgsprocessingalgs.moc"