19#include <unordered_map>
23const int KMEANS_MAX_ITERATIONS = 1000;
25QString QgsKMeansClusteringAlgorithm::name()
const
27 return QStringLiteral(
"kmeansclustering" );
30QString QgsKMeansClusteringAlgorithm::displayName()
const
32 return QObject::tr(
"K-means clustering" );
35QStringList QgsKMeansClusteringAlgorithm::tags()
const
37 return QObject::tr(
"clustering,clusters,kmeans,points" ).split(
',' );
40QString QgsKMeansClusteringAlgorithm::group()
const
42 return QObject::tr(
"Vector analysis" );
45QString QgsKMeansClusteringAlgorithm::groupId()
const
47 return QStringLiteral(
"vectoranalysis" );
50void QgsKMeansClusteringAlgorithm::initAlgorithm(
const QVariantMap & )
57 auto fieldNameParam = std::make_unique<QgsProcessingParameterString>( QStringLiteral(
"FIELD_NAME" ),
58 QObject::tr(
"Cluster field name" ), QStringLiteral(
"CLUSTER_ID" ) );
60 addParameter( fieldNameParam.release() );
61 auto sizeFieldNameParam = std::make_unique<QgsProcessingParameterString>( QStringLiteral(
"SIZE_FIELD_NAME" ),
62 QObject::tr(
"Cluster size field name" ), QStringLiteral(
"CLUSTER_SIZE" ) );
64 addParameter( sizeFieldNameParam.release() );
69QString QgsKMeansClusteringAlgorithm::shortHelpString()
const
71 return QObject::tr(
"Calculates the 2D distance based k-means cluster number for each input feature.\n\n"
72 "If input geometries are lines or polygons, the clustering is based on the centroid of the feature." );
75QgsKMeansClusteringAlgorithm *QgsKMeansClusteringAlgorithm::createInstance()
const
77 return new QgsKMeansClusteringAlgorithm();
82 std::unique_ptr< QgsProcessingFeatureSource > source( parameterAsSource( parameters, QStringLiteral(
"INPUT" ), context ) );
86 int k = parameterAsInt( parameters, QStringLiteral(
"CLUSTERS" ), context );
88 QgsFields outputFields = source->fields();
90 const QString clusterFieldName = parameterAsString( parameters, QStringLiteral(
"FIELD_NAME" ), context );
91 newFields.
append(
QgsField( clusterFieldName, QMetaType::Type::Int ) );
92 const QString clusterSizeFieldName = parameterAsString( parameters, QStringLiteral(
"SIZE_FIELD_NAME" ), context );
93 newFields.
append(
QgsField( clusterSizeFieldName, QMetaType::Type::Int ) );
97 std::unique_ptr< QgsFeatureSink > sink( parameterAsSink( parameters, QStringLiteral(
"OUTPUT" ), context, dest, outputFields, source->wkbType(), source->sourceCrs() ) );
102 feedback->
pushInfo( QObject::tr(
"Collecting input points" ) );
103 const double step = source->featureCount() > 0 ? 50.0 / source->featureCount() : 1;
106 int featureWithGeometryCount = 0;
109 std::vector< Feature > clusterFeatures;
111 QHash< QgsFeatureId, int > idToObj;
123 featureWithGeometryCount++;
131 if ( centroid.isNull() )
134 point =
QgsPointXY( *qgsgeometry_cast< const QgsPoint * >( centroid.constGet() ) );
139 idToObj[ feat.
id() ] = clusterFeatures.size();
140 clusterFeatures.emplace_back( Feature( point ) );
145 feedback->
reportError( QObject::tr(
"Number of geometries is less than the number of clusters requested, not all clusters will get data" ) );
151 feedback->
pushInfo( QObject::tr(
"Calculating clusters" ) );
154 std::vector< QgsPointXY > centers( k );
156 initClusters( clusterFeatures, centers, k, feedback );
157 calculateKMeans( clusterFeatures, centers, k, feedback );
161 std::unordered_map< int, int> clusterSize;
162 for (
const int obj : idToObj )
163 clusterSize[ clusterFeatures[ obj ].cluster ]++;
165 features = source->getFeatures();
177 const auto obj = idToObj.find( feat.
id() );
180 attr << QVariant() << QVariant();
184 attr << 0 << featureWithGeometryCount;
188 const int cluster = clusterFeatures[ *obj ].cluster;
189 attr << cluster << clusterSize[ cluster ];
199 outputs.insert( QStringLiteral(
"OUTPUT" ), dest );
205void QgsKMeansClusteringAlgorithm::initClusters( std::vector<Feature> &points, std::vector<QgsPointXY> ¢ers,
const int k,
QgsProcessingFeedback *feedback )
207 const std::size_t n = points.size();
213 for (
int i = 0; i < k; i++ )
214 centers[ i ] = points[ 0 ].point;
218 long duplicateCount = 1;
222 double distanceP1 = 0;
223 double distanceP2 = 0;
224 double maxDistance = -1;
225 for ( std::size_t i = 1; i < n; i++ )
227 distanceP1 = points[i].point.sqrDist( points[p1].point );
228 distanceP2 = points[i].point.sqrDist( points[p2].point );
231 if ( ( distanceP1 > maxDistance ) || ( distanceP2 > maxDistance ) )
233 maxDistance = std::max( distanceP1, distanceP2 );
234 if ( distanceP1 > distanceP2 )
245 if ( feedback && duplicateCount > 1 )
247 feedback->
pushInfo( QObject::tr(
"There are at least %n duplicate input(s), the number of output clusters may be less than was requested",
nullptr, duplicateCount ) );
254 centers[0] = points[p1].point;
255 centers[1] = points[p2].point;
260 std::vector< double > distances( n );
263 for ( std::size_t j = 0; j < n; j++ )
265 distances[j] = points[j].point.sqrDist( centers[0] );
271 for (
int i = 2; i < k; i++ )
273 std::size_t candidateCenter = 0;
274 double maxDistance = std::numeric_limits<double>::lowest();
277 for ( std::size_t j = 0; j < n; j++ )
280 if ( distances[j] < 0 )
284 distances[j] = std::min( points[j].point.sqrDist( centers[i - 1] ), distances[j] );
287 if ( distances[j] > maxDistance )
290 maxDistance = distances[j];
295 Q_ASSERT( maxDistance >= 0 );
298 distances[candidateCenter] = -1;
300 centers[i] = points[candidateCenter].point;
307void QgsKMeansClusteringAlgorithm::calculateKMeans( std::vector<QgsKMeansClusteringAlgorithm::Feature> &objs, std::vector<QgsPointXY> ¢ers,
int k,
QgsProcessingFeedback *feedback )
309 int converged =
false;
310 bool changed =
false;
313 std::vector< uint > weights( k );
316 for ( i = 0; i < KMEANS_MAX_ITERATIONS && !converged; i++ )
321 findNearest( objs, centers, k, changed );
322 updateMeans( objs, centers, weights, k );
323 converged = !changed;
326 if ( !converged && feedback )
327 feedback->
reportError( QObject::tr(
"Clustering did not converge after %n iteration(s)",
nullptr, i ) );
329 feedback->
pushInfo( QObject::tr(
"Clustering converged after %n iteration(s)",
nullptr, i ) );
334void QgsKMeansClusteringAlgorithm::findNearest( std::vector<QgsKMeansClusteringAlgorithm::Feature> &points,
const std::vector<QgsPointXY> ¢ers,
const int k,
bool &changed )
337 const std::size_t n = points.size();
338 for ( std::size_t i = 0; i < n; i++ )
340 Feature &point = points[i];
343 double currentDistance = point.point.sqrDist( centers[0] );
344 int currentCluster = 0;
347 for (
int cluster = 1; cluster < k; cluster++ )
349 const double distance = point.point.sqrDist( centers[cluster] );
350 if ( distance < currentDistance )
352 currentDistance = distance;
353 currentCluster = cluster;
358 if ( point.cluster != currentCluster )
361 point.cluster = currentCluster;
368void QgsKMeansClusteringAlgorithm::updateMeans(
const std::vector<Feature> &points, std::vector<QgsPointXY> ¢ers, std::vector<uint> &weights,
const int k )
370 const uint n = points.size();
371 std::fill( weights.begin(), weights.end(), 0 );
372 for (
int i = 0; i < k; i++ )
374 centers[i].setX( 0.0 );
375 centers[i].setY( 0.0 );
377 for ( uint i = 0; i < n; i++ )
379 const int cluster = points[i].cluster;
380 centers[cluster] +=
QgsVector( points[i].point.x(),
381 points[i].point.y() );
382 weights[cluster] += 1;
384 for (
int i = 0; i < k; i++ )
386 centers[i] /= weights[i];
@ VectorAnyGeometry
Any vector layer with geometry.
@ Advanced
Parameter is an advanced parameter which should be hidden from users by default.
Wrapper for iterator of features from vector data provider or vector layer.
bool nextFeature(QgsFeature &f)
Fetch next feature and stores in f, returns true on success.
This class wraps a request for features to a vector layer (or directly its vector data provider).
@ FastInsert
Use faster inserts, at the cost of updating the passed features to reflect changes made at the provid...
The feature class encapsulates a single feature including its unique ID, geometry and a list of field...
void setAttributes(const QgsAttributes &attrs)
Sets the feature's attributes.
bool hasGeometry() const
Returns true if the feature has an associated geometry.
bool isCanceled() const
Tells whether the operation has been canceled already.
void setProgress(double progress)
Sets the current progress for the feedback object.
Encapsulate a field in an attribute table or data source.
Container of fields for a vector layer.
bool append(const QgsField &field, Qgis::FieldOrigin origin=Qgis::FieldOrigin::Provider, int originIndex=-1)
Appends a field.
A geometry is the spatial representation of a feature.
const QgsAbstractGeometry * constGet() const
Returns a non-modifiable (const) reference to the underlying abstract geometry primitive.
QgsGeometry centroid() const
Returns the center of mass of a geometry.
Qgis::WkbType wkbType() const
Returns type of the geometry as a WKB type (point / linestring / polygon etc.)
A class to represent a 2D point.
Contains information about the context in which a processing algorithm is executed.
Custom exception class for processing related exceptions.
Base class for providing feedback from a processing algorithm.
virtual void pushInfo(const QString &info)
Pushes a general informational message from the algorithm.
virtual void reportError(const QString &error, bool fatalError=false)
Reports that the algorithm encountered an error while executing.
A feature sink output for processing algorithms.
An input feature source (such as vector layers) parameter for processing algorithms.
A numeric parameter for processing algorithms.
static QgsFields combineFields(const QgsFields &fieldsA, const QgsFields &fieldsB, const QString &fieldsBPrefix=QString())
Combines two field lists, avoiding duplicate field names (in a case-insensitive manner).
A class to represent a vector.
static Qgis::WkbType flatType(Qgis::WkbType type)
Returns the flat type for a WKB type.
bool qgsDoubleNear(double a, double b, double epsilon=4 *std::numeric_limits< double >::epsilon())
Compare two doubles (but allow some difference)