GestureRecognitionToolkit  Version: 1.0 Revision: 04-03-15
The Gesture Recognition Toolkit (GRT) is a cross-platform, open-source, c++ machine learning library for real-time gesture recognition.
KNN.cpp
1 /*
2 GRT MIT License
3 Copyright (c) <2012> <Nicholas Gillian, Media Lab, MIT>
4 
5 Permission is hereby granted, free of charge, to any person obtaining a copy of this software
6 and associated documentation files (the "Software"), to deal in the Software without restriction,
7 including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so,
9 subject to the following conditions:
10 
11 The above copyright notice and this permission notice shall be included in all copies or substantial
12 portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
15 LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
16 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
17 WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
18 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
19 */
20 
21 #include "KNN.h"
22 
23 using namespace std;
24 
25 namespace GRT{
26 
27 //Register the DTW module with the Classifier base class
28 RegisterClassifierModule< KNN > KNN::registerModule("KNN");
29 
30 KNN::KNN(unsigned int K,bool useScaling,bool useNullRejection,double nullRejectionCoeff,bool searchForBestKValue,UINT minKSearchValue,UINT maxKSearchValue){
31  this->K = K;
32  this->distanceMethod = EUCLIDEAN_DISTANCE;
33  this->useScaling = useScaling;
34  this->useNullRejection = useNullRejection;
35  this->nullRejectionCoeff = nullRejectionCoeff;
36  this->searchForBestKValue = searchForBestKValue;
37  this->minKSearchValue = minKSearchValue;
38  this->maxKSearchValue = maxKSearchValue;
39  supportsNullRejection = true;
40  classType = "KNN";
41  classifierType = classType;
42  classifierMode = STANDARD_CLASSIFIER_MODE;
43  distanceMethod = EUCLIDEAN_DISTANCE;
44  debugLog.setProceedingText("[DEBUG KNN]");
45  errorLog.setProceedingText("[ERROR KNN]");
46  trainingLog.setProceedingText("[TRAINING KNN]");
47  warningLog.setProceedingText("[WARNING KNN]");
48 }
49 
50 KNN::KNN(const KNN &rhs){
51  classType = "KNN";
52  classifierType = classType;
53  classifierMode = STANDARD_CLASSIFIER_MODE;
54  debugLog.setProceedingText("[DEBUG KNN]");
55  errorLog.setProceedingText("[ERROR KNN]");
56  trainingLog.setProceedingText("[TRAINING KNN]");
57  warningLog.setProceedingText("[WARNING KNN]");
58  *this = rhs;
59 }
60 
61 KNN::~KNN(void)
62 {
63 }
64 
65 KNN& KNN::operator=(const KNN &rhs){
66  if( this != &rhs ){
67  //KNN variables
68  this->K = rhs.K;
69  this->distanceMethod = rhs.distanceMethod;
70  this->searchForBestKValue = rhs.searchForBestKValue;
71  this->minKSearchValue = rhs.minKSearchValue;
72  this->maxKSearchValue = rhs.maxKSearchValue;
73  this->trainingData = rhs.trainingData;
74  this->trainingMu = rhs.trainingMu;
75  this->trainingSigma = rhs.trainingSigma;
76 
77  //Classifier variables
78  copyBaseVariables( (Classifier*)&rhs );
79  }
80  return *this;
81 }
82 
83 bool KNN::deepCopyFrom(const Classifier *classifier){
84 
85  if( classifier == NULL ) return false;
86 
87  if( this->getClassifierType() == classifier->getClassifierType() ){
88  //Get a pointer the KNN copy instance
89  KNN *ptr = (KNN*)classifier;
90 
91  this->K = ptr->K;
92  this->distanceMethod = ptr->distanceMethod;
93  this->searchForBestKValue = ptr->searchForBestKValue;
94  this->minKSearchValue = ptr->minKSearchValue;
95  this->maxKSearchValue = ptr->maxKSearchValue;
96  this->trainingData = ptr->trainingData;
97  this->trainingMu = ptr->trainingMu;
98  this->trainingSigma = ptr->trainingSigma;
99 
100  //Classifier variables
101  return copyBaseVariables( classifier );
102  }
103  return false;
104 }
105 
106 bool KNN::train_(ClassificationData &trainingData){
107 
108  //Clear any previous models
109  clear();
110 
111  if( trainingData.getNumSamples() == 0 ){
112  errorLog << "train_(ClassificationData &trainingData) - Training data has zero samples!" << endl;
113  return false;
114  }
115 
116  //Get the ranges of the data
117  ranges = trainingData.getRanges();
118  if( useScaling ){
119  //Scale the training data between 0 and 1
120  trainingData.scale(0, 1);
121  }
122 
123  //Store the number of features, classes and the training data
124  this->numInputDimensions = trainingData.getNumDimensions();
125  this->numClasses = trainingData.getNumClasses();
126 
127  //TODO: In the future need to build a kdtree from the training data to allow better realtime prediction
128  this->trainingData = trainingData;
129 
130  //Set the class labels
131  classLabels.resize( numClasses );
132  for(UINT k=0; k<numClasses; k++){
133  classLabels[k] = trainingData.getClassTracker()[k].classLabel;
134  }
135 
136  //If we do not need to search for the best K value, then call the sub training function and return the result
137  if( !searchForBestKValue ){
138  return train_(trainingData,K);
139  }
140 
141  //If we have got this far then we are going to search for the best K value
142  UINT index = 0;
143  double bestAccuracy = 0;
144  vector< IndexedDouble > trainingAccuracyLog;
145 
146  for(UINT k=minKSearchValue; k<=maxKSearchValue; k++){
147  //Randomly spilt the data and use 80% to train the algorithm and 20% to test it
148  ClassificationData trainingSet(trainingData);
149  ClassificationData testSet = trainingSet.partition(80,true);
150 
151  if( !train_(trainingSet, k) ){
152  errorLog << "Failed to train model for a k value of " << k << endl;
153  }else{
154 
155  //Compute the classification error
156  double accuracy = 0;
157  for(UINT i=0; i<testSet.getNumSamples(); i++){
158 
159  VectorDouble sample = testSet[i].getSample();
160 
161  if( !predict( sample , k) ){
162  errorLog << "Failed to predict label for test sample with a k value of " << k << endl;
163  return false;
164  }
165 
166  if( testSet[i].getClassLabel() == predictedClassLabel ){
167  accuracy++;
168  }
169  }
170 
171  accuracy = accuracy /double( testSet.getNumSamples() ) * 100.0;
172  trainingAccuracyLog.push_back( IndexedDouble(k,accuracy) );
173 
174  trainingLog << "K:\t" << k << "\tAccuracy:\t" << accuracy << endl;
175 
176  if( accuracy > bestAccuracy ){
177  bestAccuracy = accuracy;
178  }
179 
180  index++;
181  }
182 
183  }
184 
185  if( bestAccuracy > 0 ){
186  //Sort the training log by value
187  std::sort(trainingAccuracyLog.begin(),trainingAccuracyLog.end(),IndexedDouble::sortIndexedDoubleByValueDescending);
188 
189  //Copy the top matching values into a temporary buffer
190  vector< IndexedDouble > tempLog;
191 
192  //Add the first value
193  tempLog.push_back( trainingAccuracyLog[0] );
194 
195  //Keep adding values until the value changes
196  for(UINT i=1; i<trainingAccuracyLog.size(); i++){
197  if( trainingAccuracyLog[i].value == tempLog[0].value ){
198  tempLog.push_back( trainingAccuracyLog[i] );
199  }else break;
200  }
201 
202  //Sort the temp values by index (the index is the K value so we want to get the minimum K value with the maximum accuracy)
203  std::sort(tempLog.begin(),tempLog.end(),IndexedDouble::sortIndexedDoubleByIndexAscending);
204 
205  trainingLog << "Best K Value: " << tempLog[0].index << "\tAccuracy:\t" << tempLog[0].value << endl;
206 
207  //Use the minimum index, this should give us the best accuracy with the minimum K value
208  //We now need to train the model again to make sure all the training metrics are computed correctly
209  return train_(trainingData,tempLog[0].index);
210  }
211 
212  return false;
213 }
214 
215 bool KNN::train_(const ClassificationData &trainingData,const UINT K){
216 
217  //Set the dimensionality of the input data
218  this->K = K;
219 
220  //Flag that the algorithm has been trained so we can compute the rejection thresholds
221  trained = true;
222 
223  //If null rejection is enabled then compute the null rejection thresholds
224  if( useNullRejection ){
225 
226  //Set the null rejection to false so we can compute the values for it (this will be set back to its current value later)
227  useNullRejection = false;
228  nullRejectionThresholds.clear();
229 
230  //Compute the rejection thresholds for each of the K classes
231  VectorDouble counter(numClasses,0);
232  trainingMu.resize( numClasses, 0 );
233  trainingSigma.resize( numClasses, 0 );
234  nullRejectionThresholds.resize( numClasses, 0 );
235 
236  //Compute Mu for each of the classes
237  const unsigned int numTrainingExamples = trainingData.getNumSamples();
238  vector< IndexedDouble > predictionResults( numTrainingExamples );
239  for(UINT i=0; i<numTrainingExamples; i++){
240  predict( trainingData[i].getSample(), K);
241 
242  UINT classLabelIndex = 0;
243  for(UINT k=0; k<numClasses; k++){
244  if( predictedClassLabel == classLabels[k] ){
245  classLabelIndex = k;
246  break;
247  }
248  }
249 
250  predictionResults[ i ].index = classLabelIndex;
251  predictionResults[ i ].value = classDistances[ classLabelIndex ];
252 
253  trainingMu[ classLabelIndex ] += predictionResults[ i ].value;
254  counter[ classLabelIndex ]++;
255  }
256 
257  for(UINT j=0; j<numClasses; j++){
258  trainingMu[j] /= counter[j];
259  }
260 
261  //Compute Sigma for each of the classes
262  for(UINT i=0; i<numTrainingExamples; i++){
263  trainingSigma[predictionResults[i].index] += SQR(predictionResults[i].value - trainingMu[predictionResults[i].index]);
264  }
265 
266  for(UINT j=0; j<numClasses; j++){
267  double count = counter[j];
268  if( count > 1 ){
269  trainingSigma[ j ] = sqrt( trainingSigma[j] / (count-1) );
270  }else{
271  trainingSigma[ j ] = 1.0;
272  }
273  }
274 
275  //Check to see if any of the mu or sigma values are zero or NaN
276  bool errorFound = false;
277  for(UINT j=0; j<numClasses; j++){
278  if( trainingMu[j] == 0 ){
279  warningLog << "TrainingMu[ " << j << " ] is zero for a K value of " << K << endl;
280  }
281  if( trainingSigma[j] == 0 ){
282  warningLog << "TrainingSigma[ " << j << " ] is zero for a K value of " << K << endl;
283  }
284  if( grt_isnan( trainingMu[j] ) ){
285  errorLog << "TrainingMu[ " << j << " ] is NAN for a K value of " << K << endl;
286  errorFound = true;
287  }
288  if( grt_isnan( trainingSigma[j] ) ){
289  errorLog << "TrainingSigma[ " << j << " ] is NAN for a K value of " << K << endl;
290  errorFound = true;
291  }
292  }
293 
294  if( errorFound ){
295  trained = false;
296  return false;
297  }
298 
299  //Compute the rejection thresholds
300  for(unsigned int j=0; j<numClasses; j++){
301  nullRejectionThresholds[j] = trainingMu[j] + (trainingSigma[j]*nullRejectionCoeff);
302  }
303 
304  //Restore the actual state of the null rejection
305  useNullRejection = true;
306 
307  }else{
308  //Resize the rejection thresholds but set the values to 0
309  nullRejectionThresholds.clear();
310  nullRejectionThresholds.resize( numClasses, 0 );
311  }
312 
313  return true;
314 }
315 
316 bool KNN::predict_(VectorDouble &inputVector){
317 
318  if( !trained ){
319  errorLog << "predict_(VectorDouble &inputVector) - KNN model has not been trained" << endl;
320  return false;
321  }
322 
323  if( inputVector.size() != numInputDimensions ){
324  errorLog << "predict_(VectorDouble &inputVector) - the size of the input vector " << inputVector.size() << " does not match the number of features " << numInputDimensions << endl;
325  return false;
326  }
327 
328  //Scale the input vector if needed
329  if( useScaling ){
330  for(UINT i=0; i<numInputDimensions; i++){
331  inputVector[i] = scale(inputVector[i], ranges[i].minValue, ranges[i].maxValue, 0, 1);
332  }
333  }
334 
335  //Run the prediction
336  return predict(inputVector,K);
337 }
338 
339 bool KNN::predict(const VectorDouble &inputVector,const UINT K){
340 
341  if( !trained ){
342  errorLog << "predict(VectorDouble inputVector,UINT K) - KNN model has not been trained" << endl;
343  return false;
344  }
345 
346  if( inputVector.size() != numInputDimensions ){
347  errorLog << "predict(VectorDouble inputVector) - the size of the input vector " << inputVector.size() << " does not match the number of features " << numInputDimensions << endl;
348  return false;
349  }
350 
351  if( K > trainingData.getNumSamples() ){
352  errorLog << "predict(VectorDouble inputVector,UINT K) - K Is Greater Than The Number Of Training Samples" << endl;
353  return false;
354  }
355 
356  //TODO - need to build a kdtree of the training data to allow better realtime prediction
357  const UINT M = trainingData.getNumSamples();
358  vector< IndexedDouble > neighbours;
359 
360  for(UINT i=0; i<M; i++){
361  double dist = 0;
362  UINT classLabel = trainingData[i].getClassLabel();
363  VectorDouble trainingSample = trainingData[i].getSample();
364 
365  switch( distanceMethod ){
366  case EUCLIDEAN_DISTANCE:
367  dist = computeEuclideanDistance(inputVector,trainingSample);
368  break;
369  case COSINE_DISTANCE:
370  dist = computeCosineDistance(inputVector,trainingSample);
371  break;
372  case MANHATTAN_DISTANCE:
373  dist = computeManhattanDistance(inputVector, trainingSample);
374  break;
375  default:
376  errorLog << "predict(vector< double > inputVector) - unkown distance measure!" << endl;
377  return false;
378  break;
379  }
380 
381  if( neighbours.size() < K ){
382  neighbours.push_back( IndexedDouble(classLabel,dist) );
383  }else{
384  //Find the maximum value in the neighbours buffer
385  double maxValue = neighbours[0].value;
386  UINT maxIndex = 0;
387  for(UINT n=1; n<neighbours.size(); n++){
388  if( neighbours[n].value > maxValue ){
389  maxValue = neighbours[n].value;
390  maxIndex = n;
391  }
392  }
393 
394  //If the dist is less than the maximum value in the buffer, then replace that value with the new dist
395  if( dist < maxValue ){
396  neighbours[ maxIndex ] = IndexedDouble(classLabel,dist);
397  }
398  }
399  }
400 
401  //Predict the class ID using the labels of the K nearest neighbours
402  if( classLikelihoods.size() != numClasses ) classLikelihoods.resize(numClasses);
403  if( classDistances.size() != numClasses ) classDistances.resize(numClasses);
404 
405  std::fill(classLikelihoods.begin(),classLikelihoods.end(),0);
406  std::fill(classDistances.begin(),classDistances.end(),0);
407 
408  //Count the classes
409  for(UINT k=0; k<neighbours.size(); k++){
410  UINT classLabel = neighbours[k].index;
411  if( classLabel == 0 ){
412  errorLog << "predict(VectorDouble inputVector) - Class label of training example can not be zero!" << endl;
413  return false;
414  }
415 
416  //Find the index of the classLabel
417  UINT classLabelIndex = 0;
418  for(UINT j=0; j<numClasses; j++){
419  if( classLabel == classLabels[j] ){
420  classLabelIndex = j;
421  break;
422  }
423  }
424  classLikelihoods[ classLabelIndex ] += 1;
425  classDistances[ classLabelIndex ] += neighbours[k].value;
426  }
427 
428  //Get the max count
429  double maxCount = classLikelihoods[0];
430  UINT maxIndex = 0;
431  for(UINT i=1; i<classLikelihoods.size(); i++){
432  if( classLikelihoods[i] > maxCount ){
433  maxCount = classLikelihoods[i];
434  maxIndex = i;
435  }
436  }
437 
438  //Compute the average distances per class
439  for(UINT i=0; i<numClasses; i++){
440  if( classLikelihoods[i] > 0 ) classDistances[i] /= classLikelihoods[i];
441  else classDistances[i] = BIG_DISTANCE;
442  }
443 
444  //Normalize the likelihoods
445  for(UINT i=0; i<numClasses; i++){
446  classLikelihoods[i] /= double( neighbours.size() );
447  }
448 
449  //Set the maximum likelihood value
450  maxLikelihood = classLikelihoods[ maxIndex ];
451 
452  if( useNullRejection ){
453  if( classDistances[ maxIndex ] <= nullRejectionThresholds[ maxIndex ] ){
454  predictedClassLabel = classLabels[maxIndex];
455  }else{
456  predictedClassLabel = GRT_DEFAULT_NULL_CLASS_LABEL; //Set the gesture label as the null label
457  }
458  }else{
459  predictedClassLabel = classLabels[maxIndex];
460  }
461 
462  return true;
463 }
464 
465 bool KNN::clear(){
466 
467  //Clear the Classifier variables
468  Classifier::clear();
469 
470  //Clear the KNN model
471  trainingData.clear();
472  trainingMu.clear();
473  trainingSigma.clear();
474 
475  return true;
476 }
477 
478 bool KNN::saveModelToFile(fstream &file) const{
479 
480  if(!file.is_open())
481  {
482  errorLog << "saveModelToFile(fstream &file) - Could not open file to save model!" << endl;
483  return false;
484  }
485 
486  //Write the header info
487  file << "GRT_KNN_MODEL_FILE_V2.0\n";
488 
489  //Write the classifier settings to the file
490  if( !Classifier::saveBaseSettingsToFile(file) ){
491  errorLog <<"saveModelToFile(fstream &file) - Failed to save classifier base settings to file!" << endl;
492  return false;
493  }
494 
495  file << "K: " << K << endl;
496  file << "DistanceMethod: " << distanceMethod << endl;
497  file << "SearchForBestKValue: " << searchForBestKValue << endl;
498  file << "MinKSearchValue: " << minKSearchValue << endl;
499  file << "MaxKSearchValue: " << maxKSearchValue << endl;
500 
501  if( trained ){
502  if( useNullRejection ){
503  file << "TrainingMu: ";
504  for(UINT j=0; j<trainingMu.size(); j++){
505  file << trainingMu[j] << "\t";
506  }file << endl;
507 
508  file << "TrainingSigma: ";
509  for(UINT j=0; j<trainingSigma.size(); j++){
510  file << trainingSigma[j] << "\t";
511  }file << endl;
512  }
513 
514  file << "NumTrainingSamples: " << trainingData.getNumSamples() << endl;
515  file << "TrainingData: \n";
516 
517  //Right each of the models
518  for(UINT i=0; i<trainingData.getNumSamples(); i++){
519  file<< trainingData[i].getClassLabel() << "\t";
520 
521  for(UINT j=0; j<numInputDimensions; j++){
522  file << trainingData[i][j] << "\t";
523  }
524  file << endl;
525  }
526  }
527 
528  return true;
529 }
530 
531 bool KNN::loadModelFromFile(fstream &file){
532 
533  if(!file.is_open())
534  {
535  errorLog << "loadModelFromFile(fstream &file) - Could not open file to load model!" << endl;
536  return false;
537  }
538 
539  std::string word;
540 
541  file >> word;
542 
543  //Check to see if we should load a legacy file
544  if( word == "GRT_KNN_MODEL_FILE_V1.0" ){
545  return loadLegacyModelFromFile( file );
546  }
547 
548  //Find the file type header
549  if(word != "GRT_KNN_MODEL_FILE_V2.0"){
550  errorLog << "loadModelFromFile(fstream &file) - Could not find Model File Header!" << endl;
551  return false;
552  }
553 
554  //Load the base settings from the file
555  if( !Classifier::loadBaseSettingsFromFile(file) ){
556  errorLog << "loadModelFromFile(string filename) - Failed to load base settings from file!" << endl;
557  return false;
558  }
559 
560  file >> word;
561  if(word != "K:"){
562  errorLog << "loadModelFromFile(fstream &file) - Could not find K!" << endl;
563  return false;
564  }
565  file >> K;
566 
567  file >> word;
568  if(word != "DistanceMethod:"){
569  errorLog << "loadModelFromFile(fstream &file) - Could not find DistanceMethod!" << endl;
570  return false;
571  }
572  file >> distanceMethod;
573 
574  file >> word;
575  if(word != "SearchForBestKValue:"){
576  errorLog << "loadModelFromFile(fstream &file) - Could not find SearchForBestKValue!" << endl;
577  return false;
578  }
579  file >> searchForBestKValue;
580 
581  file >> word;
582  if(word != "MinKSearchValue:"){
583  errorLog << "loadModelFromFile(fstream &file) - Could not find MinKSearchValue!" << endl;
584  return false;
585  }
586  file >> minKSearchValue;
587 
588  file >> word;
589  if(word != "MaxKSearchValue:"){
590  errorLog << "loadModelFromFile(fstream &file) - Could not find MaxKSearchValue!" << endl;
591  return false;
592  }
593  file >> maxKSearchValue;
594 
595  if( trained ){
596 
597  //Resize the buffers
598  trainingMu.resize(numClasses,0);
599  trainingSigma.resize(numClasses,0);
600 
601  if( useNullRejection ){
602  file >> word;
603  if(word != "TrainingMu:"){
604  errorLog << "loadModelFromFile(fstream &file) - Could not find TrainingMu!" << endl;
605  return false;
606  }
607 
608  //Load the trainingMu data
609  for(UINT j=0; j<numClasses; j++){
610  file >> trainingMu[j];
611  }
612 
613  file >> word;
614  if(word != "TrainingSigma:"){
615  errorLog << "loadModelFromFile(fstream &file) - Could not find TrainingSigma!" << endl;
616  return false;
617  }
618 
619  //Load the trainingSigma data
620  for(UINT j=0; j<numClasses; j++){
621  file >> trainingSigma[j];
622  }
623  }
624 
625  file >> word;
626  if(word != "NumTrainingSamples:"){
627  errorLog << "loadModelFromFile(fstream &file) - Could not find NumTrainingSamples!" << endl;
628  return false;
629  }
630  unsigned int numTrainingSamples = 0;
631  file >> numTrainingSamples;
632 
633  file >> word;
634  if(word != "TrainingData:"){
635  errorLog << "loadModelFromFile(fstream &file) - Could not find TrainingData!" << endl;
636  return false;
637  }
638 
639  //Load the training data
640  trainingData.setNumDimensions(numInputDimensions);
641  unsigned int classLabel = 0;
642  vector< double > sample(numInputDimensions,0);
643  for(UINT i=0; i<numTrainingSamples; i++){
644  //Read the class label
645  file >> classLabel;
646 
647  //Read the feature vector
648  for(UINT j=0; j<numInputDimensions; j++){
649  file >> sample[j];
650  }
651 
652  //Add it to the training data
653  trainingData.addSample(classLabel, sample);
654  }
655 
656  maxLikelihood = DEFAULT_NULL_LIKELIHOOD_VALUE;
657  bestDistance = DEFAULT_NULL_DISTANCE_VALUE;
658  classLikelihoods.resize(numClasses,DEFAULT_NULL_LIKELIHOOD_VALUE);
659  classDistances.resize(numClasses,DEFAULT_NULL_DISTANCE_VALUE);
660  }
661 
662  return true;
663 }
664 
665 bool KNN::recomputeNullRejectionThresholds(){
666 
667  if( !trained ){
668  return false;
669  }
670 
671  nullRejectionThresholds.resize(numClasses,0);
672 
673  if( trainingMu.size() != numClasses || trainingSigma.size() != numClasses ){
674  return false;
675  }
676 
677  for(unsigned int j=0; j<numClasses; j++){
678  nullRejectionThresholds[j] = trainingMu[j] + (trainingSigma[j]*nullRejectionCoeff);
679  }
680 
681  return true;
682 }
683 
684 bool KNN::setK(UINT K){
685  if( K > 0 ){
686  this->K = K;
687  return true;
688  }
689  return false;
690 }
691 
692 bool KNN::setMinKSearchValue(UINT minKSearchValue){
693  this->minKSearchValue = minKSearchValue;
694  return true;
695 }
696 
697 bool KNN::setMaxKSearchValue(UINT maxKSearchValue){
698  this->maxKSearchValue = maxKSearchValue;
699  return true;
700 }
701 
702 bool KNN::enableBestKValueSearch(bool searchForBestKValue){
703  this->searchForBestKValue = searchForBestKValue;
704  return true;
705 }
706 
707 bool KNN::setNullRejectionCoeff(double nullRejectionCoeff){
708  if( nullRejectionCoeff > 0 ){
709  this->nullRejectionCoeff = nullRejectionCoeff;
710  recomputeNullRejectionThresholds();
711  return true;
712  }
713  return false;
714 }
715 
716 bool KNN::setDistanceMethod(UINT distanceMethod){
717  if( distanceMethod == EUCLIDEAN_DISTANCE || distanceMethod == COSINE_DISTANCE || distanceMethod == MANHATTAN_DISTANCE ){
718  this->distanceMethod = distanceMethod;
719  return true;
720  }
721  return false;
722 }
723 
724 double KNN::computeEuclideanDistance(const VectorDouble &a,const VectorDouble &b){
725  double dist = 0;
726  for(UINT j=0; j<numInputDimensions; j++){
727  dist += SQR( a[j] - b[j] );
728  }
729  return sqrt( dist );
730 }
731 
732 double KNN::computeCosineDistance(const VectorDouble &a,const VectorDouble &b){
733  double dist = 0;
734 
735  double dotAB = 0;
736  double magA = 0;
737  double magB = 0;
738 
739  for(UINT j=0; j<numInputDimensions; j++){
740  dotAB += a[j] * b[j];
741  magA += SQR(a[j]);
742  magB += SQR(b[j]);
743  }
744 
745  dist = dotAB / (sqrt(magA) * sqrt(magB));
746 
747  return dist;
748 }
749 
750 double KNN::computeManhattanDistance(const VectorDouble &a,const VectorDouble &b){
751  double dist = 0;
752 
753  for(UINT j=0; j<numInputDimensions; j++){
754  dist += fabs( a[j] - b[j] );
755  }
756 
757  return dist;
758 }
759 
760 bool KNN::loadLegacyModelFromFile( fstream &file ){
761 
762  string word;
763 
764  //Find the file type header
765  file >> word;
766  if(word != "NumFeatures:"){
767  errorLog << "loadLegacyModelFromFile(fstream &file) - Could not find NumFeatures!" << endl;
768  return false;
769  }
770  file >> numInputDimensions;
771 
772  file >> word;
773  if(word != "NumClasses:"){
774  errorLog << "loadLegacyModelFromFile(fstream &file) - Could not find NumClasses!" << endl;
775  return false;
776  }
777  file >> numClasses;
778 
779  file >> word;
780  if(word != "K:"){
781  errorLog << "loadLegacyModelFromFile(fstream &file) - Could not find K!" << endl;
782  return false;
783  }
784  file >> K;
785 
786  file >> word;
787  if(word != "DistanceMethod:"){
788  errorLog << "loadLegacyModelFromFile(fstream &file) - Could not find DistanceMethod!" << endl;
789  return false;
790  }
791  file >> distanceMethod;
792 
793  file >> word;
794  if(word != "SearchForBestKValue:"){
795  errorLog << "loadLegacyModelFromFile(fstream &file) - Could not find SearchForBestKValue!" << endl;
796  return false;
797  }
798  file >> searchForBestKValue;
799 
800  file >> word;
801  if(word != "MinKSearchValue:"){
802  errorLog << "loadLegacyModelFromFile(fstream &file) - Could not find MinKSearchValue!" << endl;
803  return false;
804  }
805  file >> minKSearchValue;
806 
807  file >> word;
808  if(word != "MaxKSearchValue:"){
809  errorLog << "loadLegacyModelFromFile(fstream &file) - Could not find MaxKSearchValue!" << endl;
810  return false;
811  }
812  file >> maxKSearchValue;
813 
814  file >> word;
815  if(word != "UseScaling:"){
816  errorLog << "loadLegacyModelFromFile(fstream &file) - Could not find UseScaling!" << endl;
817  return false;
818  }
819  file >> useScaling;
820 
821  file >> word;
822  if(word != "UseNullRejection:"){
823  errorLog << "loadLegacyModelFromFile(fstream &file) - Could not find UseNullRejection!" << endl;
824  return false;
825  }
826  file >> useNullRejection;
827 
828  file >> word;
829  if(word != "NullRejectionCoeff:"){
830  errorLog << "loadLegacyModelFromFile(fstream &file) - Could not find NullRejectionCoeff!" << endl;
831  return false;
832  }
833  file >> nullRejectionCoeff;
834 
836  if( useScaling ){
837  //Resize the ranges buffer
838  ranges.resize( numInputDimensions );
839 
840  file >> word;
841  if(word != "Ranges:"){
842  errorLog << "loadLegacyModelFromFile(fstream &file) - Could not find Ranges!" << endl;
843  cout << "Word: " << word << endl;
844  return false;
845  }
846  for(UINT n=0; n<ranges.size(); n++){
847  file >> ranges[n].minValue;
848  file >> ranges[n].maxValue;
849  }
850  }
851 
852  //Resize the buffers
853  trainingMu.resize(numClasses,0);
854  trainingSigma.resize(numClasses,0);
855 
856  file >> word;
857  if(word != "TrainingMu:"){
858  errorLog << "loadLegacyModelFromFile(fstream &file) - Could not find TrainingMu!" << endl;
859  return false;
860  }
861 
862  //Load the trainingMu data
863  for(UINT j=0; j<numClasses; j++){
864  file >> trainingMu[j];
865  }
866 
867  file >> word;
868  if(word != "TrainingSigma:"){
869  errorLog << "loadLegacyModelFromFile(fstream &file) - Could not find TrainingSigma!" << endl;
870  return false;
871  }
872 
873  //Load the trainingSigma data
874  for(UINT j=0; j<numClasses; j++){
875  file >> trainingSigma[j];
876  }
877 
878  file >> word;
879  if(word != "NumTrainingSamples:"){
880  errorLog << "loadLegacyModelFromFile(fstream &file) - Could not find NumTrainingSamples!" << endl;
881  return false;
882  }
883  unsigned int numTrainingSamples = 0;
884  file >> numTrainingSamples;
885 
886  file >> word;
887  if(word != "TrainingData:"){
888  errorLog << "loadLegacyModelFromFile(fstream &file) - Could not find TrainingData!" << endl;
889  return false;
890  }
891 
892  //Load the training data
893  trainingData.setNumDimensions(numInputDimensions);
894  unsigned int classLabel = 0;
895  vector< double > sample(numInputDimensions,0);
896  for(UINT i=0; i<numTrainingSamples; i++){
897  //Read the class label
898  file >> classLabel;
899 
900  //Read the feature vector
901  for(UINT j=0; j<numInputDimensions; j++){
902  file >> sample[j];
903  }
904 
905  //Add it to the training data
906  trainingData.addSample(classLabel, sample);
907  }
908 
909  //Flag that the model has been trained
910  trained = true;
911 
912  //Compute the null rejection thresholds
913  recomputeNullRejectionThresholds();
914 
915  return true;
916 }
917 
918 } //End of namespace GRT
919 
UINT maxKSearchValue
The minimum K value to start the search from
Definition: KNN.h:236
bool searchForBestKValue
The distance method used to compute the distance between each data point
Definition: KNN.h:234
Definition: AdaBoost.cpp:25
This class implements the K-Nearest Neighbor classification algorithm (http://en.wikipedia.org/wiki/K-nearest_neighbor_algorithm). KNN is a simple but powerful classifier, based on finding the closest K training examples in the feature space for the new input vector. The KNN algorithm is amongst the simplest of all machine learning algorithms: an object is classified by a majority vote of its neighbors, with the object being assigned to the class most common amongst its k nearest neighbors (k is a positive integer, typically small). If k = 1, then the object is simply assigned to the class of its nearest neighbor.
bool setNumDimensions(UINT numDimensions)
vector< ClassTracker > getClassTracker() const
bool scale(const double minTarget, const double maxTarget)
VectorDouble trainingMu
Holds the trainingData to perform the predictions
Definition: KNN.h:238
Definition: KNN.h:51
VectorDouble trainingSigma
Holds the average max-class distance of the training data for each of classes
Definition: KNN.h:239
vector< MinMax > getRanges() const
ClassificationData trainingData
The maximum K value to end the search at
Definition: KNN.h:237
string getClassifierType() const
Definition: Classifier.cpp:159
ClassificationData partition(const UINT partitionPercentage, const bool useStratifiedSampling=false)
UINT minKSearchValue
Sets if the best K value should be searched for or if the model should be trained with K ...
Definition: KNN.h:235
bool addSample(UINT classLabel, const VectorDouble &sample)
UINT distanceMethod
The number of neighbours to search for
Definition: KNN.h:233