PandaRoot
PndMvaCluster.h
Go to the documentation of this file.
1 //****************************************************************************
2 //* This file is part of PandaRoot. *
3 //* *
4 //* PandaRoot is distributed under the terms of the *
5 //* GNU General Public License (GPL) version 3, *
6 //* copied verbatim in the file "LICENSE". *
7 //* *
8 //* Copyright (C) 2006 - 2024 FAIR GmbH and copyright holders of PandaRoot *
9 //* The copyright holders are listed in the file "COPYRIGHTHOLDERS". *
10 //* The authors are listed in the file "AUTHORS". *
11 //****************************************************************************
12 
13 /* ***************************************
14  * Clustering algorithms *
15  * Author: M.Babai@rug.nl *
16  * Version: *
17  * LICENSE: *
18  * ***************************************
19  */
20 //#pragma once
21 #ifndef PND_MVA_CLUSTER_H
22 #define PND_MVA_CLUSTER_H
23 
24 #include <iostream>
25 #include <cassert>
26 #include <vector>
27 #include <set>
28 #include <limits>
29 
30 // Local includes
31 #include "PndMvaUtil.h"
32 
33 #define PNDMVA_CLUSTER_DEBUG 0
34 
36 typedef std::vector<std::pair<std::string, std::vector<float> *>> DataPoints;
37 
39 typedef enum ClusteringType {
41  KMEANS_SOFT = 1 // Not Implemented yet.
43 
44 //---------------- Class definition --------------
46  //--------------------------------------------
47  // -------------- public members -------------
48  public:
54  explicit PndMvaCluster(DataPoints const &InputData, size_t nCluster);
55 
63  explicit PndMvaCluster(DataPoints const &InputData, size_t nCluster, bool const prune, bool const forceLabels);
64 
68  virtual ~PndMvaCluster();
69 
75  virtual DataPoints *Cluster(ClusteringType const ClType = KMEANS_HARD);
76 
84  virtual DataPoints *ClusterAndLabel(ClusteringType const ClType, std::vector<std::string> const &labels);
85 
86  //------- Getters
91  inline size_t GetNumberOfClusters() const;
92 
97  inline size_t GetClusterDimension() const;
98 
99  //------- Setters
104  inline void SetNumberOfClusters(size_t val);
105 
111  inline void Setprune(bool const prune = false);
112 
119  inline void SetForceToLabel(bool const forceLabel = false);
120 
121  //__________________ DEBUG FUNCTIONS ______________
122 #if (PNDMVA_CLUSTER_DEBUG > 0)
123 
126  void printStructs();
127 #endif
128  //--------------------------------------------
129  // protected:
130  // -------------- private members ------------
131  private:
133  PndMvaCluster(const PndMvaCluster &other);
134  PndMvaCluster &operator=(const PndMvaCluster &other);
135 
136  // Functions & Procedures
137  // Performs the actual hard K-Means clustering.
138  DataPoints *K_Means();
139 
140  // Initialize the centroids before clustering.
141  void InitCentroids();
142 
143  // Partitions the data points among the current cluster centroids.
144  void InitialPartition();
145 
146  // Compute (modify) the coordinates of centroids.
147  void ComputeCentroids();
148 
149  // Set all dimensions to zero
150  void ResetCenteroids();
151 
152  // Clear the currently used data structures.
153  void ClearStructures();
154 
155  // Init empty Centroid to the furthest point.
156  void ReInitEmptyCenter(size_t centerIdx);
157 
158  // Variables
159  size_t m_num_Cluster;
160  DataPoints m_PointSet;
161  size_t m_dimension;
162 
163  // Container to hold the centroid.
164  DataPoints m_Centroids;
165 
166  // Connection of each point to a centroid.
167  std::vector<size_t> m_PointsToClusters;
168 
169  // Responsibility list of each centroid.
170  std::vector<std::set<size_t> *> m_ClustersToPoints;
171 
172  bool m_prune; // If prune the current cluster.
173  bool m_forceToLabel; // Force to label the current mean
174 };
175 
176 //__________________ Inlines ____________
177 
179 {
180  return m_num_Cluster;
181 };
182 
184 {
185  return m_dimension;
186 };
187 
188 inline void PndMvaCluster::SetNumberOfClusters(size_t val)
189 {
190  m_num_Cluster = val;
191 };
192 
193 inline void PndMvaCluster::Setprune(bool const prune)
194 {
195  m_prune = prune;
196 };
197 inline void PndMvaCluster::SetForceToLabel(bool const label)
198 {
199  m_forceToLabel = label;
200 };
201 #endif // End interface
void SetForceToLabel(bool const forceLabel=false)
PndMvaCluster(DataPoints const &InputData, size_t nCluster)
size_t GetClusterDimension() const
virtual DataPoints * Cluster(ClusteringType const ClType=KMEANS_HARD)
std::vector< std::pair< std::string, std::vector< float > * > > DataPoints
Data structure of the space points and the cluster centers.
Definition: PndMvaCluster.h:36
virtual DataPoints * ClusterAndLabel(ClusteringType const ClType, std::vector< std::string > const &labels)
virtual ~PndMvaCluster()
void SetNumberOfClusters(size_t val)
ClusteringType
Clustering types.
Definition: PndMvaCluster.h:39
void Setprune(bool const prune=false)
size_t GetNumberOfClusters() const