GestureRecognitionToolkit  Version: 1.0 Revision: 04-03-15
The Gesture Recognition Toolkit (GRT) is a cross-platform, open-source, c++ machine learning library for real-time gesture recognition.
DecisionTreeClusterNode.cpp
1 
3 
4 using namespace GRT;
5 
6 //Register the DecisionTreeClusterNode module with the Node base class
7 RegisterNode< DecisionTreeClusterNode > DecisionTreeClusterNode::registerModule("DecisionTreeClusterNode");
8 
10  nodeType = "DecisionTreeClusterNode";
11  parent = NULL;
12  leftChild = NULL;
13  rightChild = NULL;
14  clear();
15 }
16 
18  clear();
19 }
20 
21 bool DecisionTreeClusterNode::predict(const VectorDouble &x) {
22 
23  if( x[ featureIndex ] >= threshold ) return true;
24 
25  return false;
26 }
27 
29 
30  //Call the base class clear function
32 
33  featureIndex = 0;
34  threshold = 0;
35 
36  return true;
37 }
38 
40 
41  ostringstream stream;
42 
43  if( getModel( stream ) ){
44  cout << stream.str();
45  return true;
46  }
47 
48  return false;
49 }
50 
51 bool DecisionTreeClusterNode::getModel(ostream &stream) const{
52 
53  string tab = "";
54  for(UINT i=0; i<depth; i++) tab += "\t";
55 
56  stream << tab << "depth: " << depth;
57  stream << " nodeSize: " << nodeSize;
58  stream << " featureIndex: " << featureIndex;
59  stream << " threshold " << threshold;
60  stream << " isLeafNode: " << isLeafNode << endl;
61 
62  stream << tab << "ClassProbabilities: ";
63  for(UINT i=0; i<classProbabilities.size(); i++){
64  stream << classProbabilities[i] << "\t";
65  }
66  stream << endl;
67 
68  if( leftChild != NULL ){
69  stream << tab << "LeftChild: " << endl;
70  leftChild->getModel( stream );
71  }
72 
73  if( rightChild != NULL ){
74  stream << tab << "RightChild: " << endl;
75  rightChild->getModel( stream );
76  }
77 
78  return true;
79 }
80 
82 
84 
85  if( node == NULL ){
86  return NULL;
87  }
88 
89  //Copy this node into the node
90  node->depth = depth;
91  node->isLeafNode = isLeafNode;
92  node->nodeID = nodeID;
93  node->predictedNodeID = predictedNodeID;
94  node->nodeSize = nodeSize;
95  node->featureIndex = featureIndex;
96  node->threshold = threshold;
97  node->classProbabilities = classProbabilities;
98 
99  //Recursively deep copy the left child
100  if( leftChild ){
101  node->leftChild = leftChild->deepCopyNode();
102  node->leftChild->setParent( node );
103  }
104 
105  //Recursively deep copy the right child
106  if( rightChild ){
107  node->rightChild = rightChild->deepCopyNode();
108  node->rightChild->setParent( node );
109  }
110 
111  return dynamic_cast< DecisionTreeClusterNode* >( node );
112 }
113 
115  return dynamic_cast< DecisionTreeClusterNode* >( deepCopyNode() );
116 }
117 
119  return featureIndex;
120 }
121 
123  return threshold;
124 }
125 
126 bool DecisionTreeClusterNode::set(const UINT nodeSize,const UINT featureIndex,const double threshold,const VectorDouble &classProbabilities){
127  this->nodeSize = nodeSize;
128  this->featureIndex = featureIndex;
129  this->threshold = threshold;
130  this->classProbabilities = classProbabilities;
131  return true;
132 }
133 
134 bool DecisionTreeClusterNode::computeBestSpiltBestIterativeSpilt( const UINT &numSplittingSteps, const ClassificationData &trainingData, const vector< UINT > &features, const vector< UINT > &classLabels, UINT &featureIndex, double &minError ){
135 
136  return computeBestSpilt( numSplittingSteps, trainingData, features, classLabels, featureIndex, minError);
137 }
138 
139 bool DecisionTreeClusterNode::computeBestSpiltBestRandomSpilt( const UINT &numSplittingSteps, const ClassificationData &trainingData, const vector< UINT > &features, const vector< UINT > &classLabels, UINT &featureIndex, double &minError ){
140 
141  return computeBestSpilt( numSplittingSteps, trainingData, features, classLabels, featureIndex, minError);
142 }
143 
144 bool DecisionTreeClusterNode::computeBestSpilt( const UINT &numSplittingSteps, const ClassificationData &trainingData, const vector< UINT > &features, const vector< UINT > &classLabels, UINT &featureIndex, double &minError ){
145 
146  const UINT M = trainingData.getNumSamples();
147  const UINT N = (UINT)features.size();
148  const UINT K = (UINT)classLabels.size();
149 
150  if( N == 0 ) return false;
151 
152  minError = numeric_limits<double>::max();
153  Random random;
154  UINT bestFeatureIndex = 0;
155  double bestThreshold = 0;
156  double error = 0;
157  double giniIndexL = 0;
158  double giniIndexR = 0;
159  double weightL = 0;
160  double weightR = 0;
161  vector< UINT > groupIndex(M);
162  VectorDouble groupCounter(2,0);
163  vector< MinMax > ranges = trainingData.getRanges();
164  MatrixDouble classProbabilities(K,2);
165  MatrixDouble data(M,1); //This will store our temporary data for each dimension
166 
167  //Randomly select which features we want to use
168  UINT numRandomFeatures = numSplittingSteps > N ? N : numSplittingSteps;
169  vector< UINT > randomFeatures = random.getRandomSubset( 0, N, numRandomFeatures );
170 
171  //Loop over each random feature and try and find the best split point
172  for(UINT n=0; n<numRandomFeatures; n++){
173 
174  featureIndex = features[ randomFeatures[n] ];
175 
176  //Use the data in this feature dimension to create a sum dataset
177  for(UINT i=0; i<M; i++){
178  data[i][0] = trainingData[i][featureIndex];
179  }
180 
181  //Use this data to train a KMeans cluster with 2 clusters
182  KMeans kmeans;
183  kmeans.setNumClusters( 2 );
184  kmeans.setComputeTheta( true );
185  kmeans.setMinChange( 1.0e-5 );
186  kmeans.setMinNumEpochs( 1 );
187  kmeans.setMaxNumEpochs( 100 );
188 
189  if( !kmeans.train( data ) ){
190  errorLog << "computeBestSpilt() - Failed to train KMeans model for feature: " << featureIndex << endl;
191  return false;
192  }
193 
194  //Set the split threshold as the mid point between the two clusters
195  MatrixDouble clusters = kmeans.getClusters();
196  threshold = 0;
197  for(UINT i=0; i<clusters.getNumRows(); i++){
198  threshold += clusters[i][0];
199  }
200  threshold /= clusters.getNumRows();
201 
202  //Iterate over each sample and work out if it should be in the lhs (0) or rhs (1) group based on the current threshold
203  groupCounter[0] = groupCounter[1] = 0;
204  classProbabilities.setAllValues(0);
205  for(UINT i=0; i<M; i++){
206  groupIndex[i] = trainingData[ i ][ featureIndex ] >= threshold ? 1 : 0;
207  groupCounter[ groupIndex[i] ]++;
208  classProbabilities[ getClassLabelIndexValue(trainingData[i].getClassLabel(),classLabels) ][ groupIndex[i] ]++;
209  }
210 
211  //Compute the class probabilities for the lhs group and rhs group
212  for(UINT k=0; k<K; k++){
213  classProbabilities[k][0] = groupCounter[0]>0 ? classProbabilities[k][0]/groupCounter[0] : 0;
214  classProbabilities[k][1] = groupCounter[1]>0 ? classProbabilities[k][1]/groupCounter[1] : 0;
215  }
216 
217  //Compute the Gini index for the lhs and rhs groups
218  giniIndexL = giniIndexR = 0;
219  for(UINT k=0; k<K; k++){
220  giniIndexL += classProbabilities[k][0] * (1.0-classProbabilities[k][0]);
221  giniIndexR += classProbabilities[k][1] * (1.0-classProbabilities[k][1]);
222  }
223  weightL = groupCounter[0]/M;
224  weightR = groupCounter[1]/M;
225  error = (giniIndexL*weightL) + (giniIndexR*weightR);
226 
227  //Store the best threshold and feature index
228  if( error < minError ){
229  minError = error;
230  bestThreshold = threshold;
231  bestFeatureIndex = featureIndex;
232  }
233  }
234 
235  //Set the best feature index that will be returned to the DecisionTree that called this function
236  featureIndex = bestFeatureIndex;
237 
238  //Store the node size, feature index, best threshold and class probabilities for this node
239  set(M,featureIndex,bestThreshold,trainingData.getClassProbabilities(classLabels));
240 
241  return true;
242 }
243 
245 
246  if( !file.is_open() )
247  {
248  errorLog << "saveParametersToFile(fstream &file) - File is not open!" << endl;
249  return false;
250  }
251 
252  //Save the DecisionTreeNode parameters
254  errorLog << "saveParametersToFile(fstream &file) - Failed to save DecisionTreeNode parameters to file!" << endl;
255  return false;
256  }
257 
258  //Save the custom DecisionTreeThresholdNode parameters
259  file << "FeatureIndex: " << featureIndex << endl;
260  file << "Threshold: " << threshold << endl;
261 
262  return true;
263 }
264 
266 
267  if(!file.is_open())
268  {
269  errorLog << "loadParametersFromFile(fstream &file) - File is not open!" << endl;
270  return false;
271  }
272 
273  //Load the DecisionTreeNode parameters
275  errorLog << "loadParametersFromFile(fstream &file) - Failed to load DecisionTreeNode parameters from file!" << endl;
276  return false;
277  }
278 
279  string word;
280  //Load the custom DecisionTreeThresholdNode Parameters
281  file >> word;
282  if( word != "FeatureIndex:" ){
283  errorLog << "loadParametersFromFile(fstream &file) - Failed to find FeatureIndex header!" << endl;
284  return false;
285  }
286  file >> featureIndex;
287 
288  file >> word;
289  if( word != "Threshold:" ){
290  errorLog << "loadParametersFromFile(fstream &file) - Failed to find Threshold header!" << endl;
291  return false;
292  }
293  file >> threshold;
294 
295  return true;
296 }
297 
virtual Node * deepCopyNode() const
Definition: Node.cpp:267
virtual Node * deepCopyNode() const
bool setMaxNumEpochs(const UINT maxNumEpochs)
Definition: MLBase.cpp:237
virtual bool saveParametersToFile(fstream &file) const
Definition: AdaBoost.cpp:25
std::vector< unsigned int > getRandomSubset(const unsigned int startRange, const unsigned int endRange, const unsigned int subsetSize)
Definition: Random.h:267
DecisionTreeClusterNode * deepCopy() const
virtual bool train(ClassificationData trainingData)
Definition: MLBase.cpp:80
virtual bool getModel(ostream &stream) const
Definition: Node.cpp:111
bool setMinChange(const double minChange)
Definition: MLBase.cpp:251
bool setNumClusters(const UINT numClusters)
Definition: Clusterer.cpp:263
This file implements a DecisionTreeClusterNode, which is a specific type of node used for a DecisionT...
virtual bool getModel(ostream &stream) const
bool set(const UINT nodeSize, const UINT featureIndex, const double threshold, const VectorDouble &classProbabilities)
virtual bool loadParametersFromFile(fstream &file)
virtual bool saveParametersToFile(fstream &file) const
unsigned int getNumRows() const
Definition: Matrix.h:531
virtual bool loadParametersFromFile(fstream &file)
vector< MinMax > getRanges() const
virtual bool predict(const VectorDouble &x)
Definition: Node.h:37
bool setMinNumEpochs(const UINT minNumEpochs)
Definition: MLBase.cpp:246