PandaRoot
PndMvaCluster.h
Go to the documentation of this file.
1 /* ***************************************
2  * Clustering algorithms *
3  * Author: M.Babai@rug.nl *
4  * Version: *
5  * LICENSE: *
6  * ***************************************
7  */
8 //#pragma once
9 #ifndef PND_MVA_CLUSTER_H
10 #define PND_MVA_CLUSTER_H
11 
12 #include <iostream>
13 #include <cassert>
14 #include <vector>
15 #include <set>
16 #include <limits>
17 
18 // Local includes
19 #include "PndMvaUtil.h"
20 
21 #define PNDMVA_CLUSTER_DEBUG 0
22 
24 typedef std::vector<std::pair<std::string, std::vector<float> *>> DataPoints;
25 
27 typedef enum ClusteringType {
29  KMEANS_SOFT = 1 // Not Implemented yet.
31 
32 //---------------- Class definition --------------
34  //--------------------------------------------
35  // -------------- public members -------------
36  public:
42  explicit PndMvaCluster(DataPoints const &InputData, size_t nCluster);
43 
51  explicit PndMvaCluster(DataPoints const &InputData, size_t nCluster, bool const prune, bool const forceLabels);
52 
56  virtual ~PndMvaCluster();
57 
63  virtual DataPoints *Cluster(ClusteringType const ClType = KMEANS_HARD);
64 
72  virtual DataPoints *ClusterAndLabel(ClusteringType const ClType, std::vector<std::string> const &labels);
73 
74  //------- Getters
79  inline size_t GetNumberOfClusters() const;
80 
85  inline size_t GetClusterDimension() const;
86 
87  //------- Setters
92  inline void SetNumberOfClusters(size_t val);
93 
99  inline void Setprune(bool const prune = false);
100 
107  inline void SetForceToLabel(bool const forceLabel = false);
108 
109  //__________________ DEBUG FUNCTIONS ______________
110 #if (PNDMVA_CLUSTER_DEBUG > 0)
111 
114  void printStructs();
115 #endif
116  //--------------------------------------------
117  // protected:
118  // -------------- private members ------------
119  private:
121  PndMvaCluster(const PndMvaCluster &other);
122  PndMvaCluster &operator=(const PndMvaCluster &other);
123 
124  // Functions & Procedures
125  // Performs the actual hard K-Means clustering.
126  DataPoints *K_Means();
127 
128  // Initialize the centroids before clustering.
129  void InitCentroids();
130 
131  // Partitions the data points among the current cluster centroids.
132  void InitialPartition();
133 
134  // Compute (modify) the coordinates of centroids.
135  void ComputeCentroids();
136 
137  // Set all dimensions to zero
138  void ResetCenteroids();
139 
140  // Clear the currently used data structures.
141  void ClearStructures();
142 
143  // Init empty Centroid to the furthest point.
144  void ReInitEmptyCenter(size_t centerIdx);
145 
146  // Variables
147  size_t m_num_Cluster;
148  DataPoints m_PointSet;
149  size_t m_dimension;
150 
151  // Container to hold the centroid.
152  DataPoints m_Centroids;
153 
154  // Connection of each point to a centroid.
155  std::vector<size_t> m_PointsToClusters;
156 
157  // Responsibility list of each centroid.
158  std::vector<std::set<size_t> *> m_ClustersToPoints;
159 
160  bool m_prune; // If prune the current cluster.
161  bool m_forceToLabel; // Force to label the current mean
162 };
163 
164 //__________________ Inlines ____________
165 
167 {
168  return m_num_Cluster;
169 };
170 
172 {
173  return m_dimension;
174 };
175 
176 inline void PndMvaCluster::SetNumberOfClusters(size_t val)
177 {
178  m_num_Cluster = val;
179 };
180 
181 inline void PndMvaCluster::Setprune(bool const prune)
182 {
183  m_prune = prune;
184 };
185 inline void PndMvaCluster::SetForceToLabel(bool const label)
186 {
187  m_forceToLabel = label;
188 };
189 #endif // End interface
void SetForceToLabel(bool const forceLabel=false)
PndMvaCluster(DataPoints const &InputData, size_t nCluster)
size_t GetClusterDimension() const
virtual DataPoints * Cluster(ClusteringType const ClType=KMEANS_HARD)
std::vector< std::pair< std::string, std::vector< float > * > > DataPoints
Data structure of the space points and the cluster centers.
Definition: PndMvaCluster.h:24
virtual DataPoints * ClusterAndLabel(ClusteringType const ClType, std::vector< std::string > const &labels)
virtual ~PndMvaCluster()
void SetNumberOfClusters(size_t val)
ClusteringType
Clustering types.
Definition: PndMvaCluster.h:27
void Setprune(bool const prune=false)
size_t GetNumberOfClusters() const