GestureRecognitionToolkit  Version: 1.0 Revision: 04-03-15
The Gesture Recognition Toolkit (GRT) is a cross-platform, open-source, c++ machine learning library for real-time gesture recognition.
TimeSeriesClassificationData.cpp
1 /*
2 GRT MIT License
3 Copyright (c) <2012> <Nicholas Gillian, Media Lab, MIT>
4 
5 Permission is hereby granted, free of charge, to any person obtaining a copy of this software
6 and associated documentation files (the "Software"), to deal in the Software without restriction,
7 including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so,
9 subject to the following conditions:
10 
11 The above copyright notice and this permission notice shall be included in all copies or substantial
12 portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
15 LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
16 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
17 WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
18 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
19 */
20 
22 
23 namespace GRT{
24 
25 TimeSeriesClassificationData::TimeSeriesClassificationData(UINT numDimensions,string datasetName,string infoText){
26  debugLog.setProceedingText("[DEBUG TSCD]");
27  errorLog.setProceedingText("[ERROR TSCD]");
28  warningLog.setProceedingText("[WARNING TSCD]");
29 
30  this->numDimensions = numDimensions;
31  this->datasetName = datasetName;
32  this->infoText = infoText;
33  totalNumSamples = 0;
34  crossValidationSetup = false;
35  useExternalRanges = false;
36  allowNullGestureClass = true;
37  if( numDimensions > 0 ){
38  setNumDimensions(numDimensions);
39  }
40 }
41 
43 
44  debugLog.setProceedingText("[DEBUG TSCD]");
45  errorLog.setProceedingText("[ERROR TSCD]");
46  warningLog.setProceedingText("[WARNING TSCD]");
47 
48  *this = rhs;
49 }
50 
52 
54  if( this != &rhs){
55  this->datasetName = rhs.datasetName;
56  this->infoText = rhs.infoText;
57  this->numDimensions = rhs.numDimensions;
62  this->totalNumSamples = rhs.totalNumSamples;
63  this->data = rhs.data;
64  this->classTracker = rhs.classTracker;
65  this->externalRanges = rhs.externalRanges;
66  this->debugLog = rhs.debugLog;
67  this->errorLog = rhs.errorLog;
68  this->warningLog = rhs.warningLog;
69  }
70  return *this;
71 }
72 
74  totalNumSamples = 0;
75  data.clear();
76  classTracker.clear();
77 }
78 
79 bool TimeSeriesClassificationData::setNumDimensions(const UINT numDimensions){
80  if( numDimensions > 0 ){
81  //Clear any previous training data
82  clear();
83 
84  //Set the dimensionality of the training data
85  this->numDimensions = numDimensions;
86 
87  useExternalRanges = false;
88  externalRanges.clear();
89 
90  return true;
91  }
92 
93  errorLog << "setNumDimensions(UINT numDimensions) - The number of dimensions of the dataset must be greater than zero!" << endl;
94  return false;
95 }
96 
97 bool TimeSeriesClassificationData::setDatasetName(const string datasetName){
98 
99  //Make sure there are no spaces in the string
100  if( datasetName.find(" ") == string::npos ){
101  this->datasetName = datasetName;
102  return true;
103  }
104 
105  errorLog << "setDatasetName(string datasetName) - The dataset name cannot contain any spaces!" << endl;
106  return false;
107 }
108 
109 bool TimeSeriesClassificationData::setInfoText(const string infoText){
110  this->infoText = infoText;
111  return true;
112 }
113 
114 bool TimeSeriesClassificationData::setClassNameForCorrespondingClassLabel(const string className,const UINT classLabel){
115 
116  for(UINT i=0; i<classTracker.size(); i++){
117  if( classTracker[i].classLabel == classLabel ){
118  classTracker[i].className = className;
119  return true;
120  }
121  }
122 
123  return false;
124 }
125 
126 bool TimeSeriesClassificationData::setAllowNullGestureClass(const bool allowNullGestureClass){
127  this->allowNullGestureClass = allowNullGestureClass;
128  return true;
129 }
130 
131 bool TimeSeriesClassificationData::addSample(const UINT classLabel,const MatrixDouble &trainingSample){
132 
133  if( trainingSample.getNumCols() != numDimensions ){
134  errorLog << "addSample(UINT classLabel, MatrixDouble trainingSample) - The dimensionality of the training sample (" << trainingSample.getNumCols() << ") does not match that of the dataset (" << numDimensions << ")" << endl;
135  return false;
136  }
137 
138  //The class label must be greater than zero (as zero is used for the null rejection class label
139  if( classLabel == GRT_DEFAULT_NULL_CLASS_LABEL && !allowNullGestureClass ){
140  errorLog << "addSample(UINT classLabel, MatrixDouble sample) - the class label can not be 0!" << endl;
141  return false;
142  }
143 
144  TimeSeriesClassificationSample newSample(classLabel,trainingSample);
145  data.push_back( newSample );
146  totalNumSamples++;
147 
148  if( classTracker.size() == 0 ){
149  ClassTracker tracker(classLabel,1);
150  classTracker.push_back(tracker);
151  }else{
152  bool labelFound = false;
153  for(UINT i=0; i<classTracker.size(); i++){
154  if( classLabel == classTracker[i].classLabel ){
155  classTracker[i].counter++;
156  labelFound = true;
157  break;
158  }
159  }
160  if( !labelFound ){
161  ClassTracker tracker(classLabel,1);
162  classTracker.push_back(tracker);
163  }
164  }
165  return true;
166 }
167 
169  UINT numExamplesRemoved = 0;
170  UINT numExamplesToRemove = 0;
171 
172  //Find out how many training examples we need to remove
173  for(UINT i=0; i<classTracker.size(); i++){
174  if( classTracker[i].classLabel == classLabel ){
175  numExamplesToRemove = classTracker[i].counter;
176  classTracker.erase(classTracker.begin()+i);
177  break;
178  }
179  }
180 
181  //Remove the samples with the matching class ID
182  if( numExamplesToRemove > 0 ){
183  UINT i=0;
184  while( numExamplesRemoved < numExamplesToRemove ){
185  if( data[i].getClassLabel() == classLabel ){
186  data.erase(data.begin()+i);
187  numExamplesRemoved++;
188  }else if( ++i == data.size() ) break;
189  }
190  }
191 
192  totalNumSamples = (UINT)data.size();
193 
194  return numExamplesRemoved;
195 }
196 
198 
199  if( totalNumSamples > 0 ){
200 
201  //Find the corresponding class ID for the last training example
202  UINT classLabel = data[ totalNumSamples-1 ].getClassLabel();
203 
204  //Remove the training example from the buffer
205  data.erase(data.end()-1);
206 
207  totalNumSamples = (UINT)data.size();
208 
209  //Remove the value from the counter
210  for(UINT i=0; i<classTracker.size(); i++){
211  if( classTracker[i].classLabel == classLabel ){
212  classTracker[i].counter--;
213  break;
214  }
215  }
216 
217  return true;
218 
219  }else return false;
220 
221 }
222 
223 bool TimeSeriesClassificationData::relabelAllSamplesWithClassLabel(const UINT oldClassLabel,const UINT newClassLabel){
224  bool oldClassLabelFound = false;
225  bool newClassLabelAllReadyExists = false;
226  UINT indexOfOldClassLabel = 0;
227  UINT indexOfNewClassLabel = 0;
228 
229  //Find out how many training examples we need to relabel
230  for(UINT i=0; i<classTracker.size(); i++){
231  if( classTracker[i].classLabel == oldClassLabel ){
232  indexOfOldClassLabel = i;
233  oldClassLabelFound = true;
234  }
235  if( classTracker[i].classLabel == newClassLabel ){
236  indexOfNewClassLabel = i;
237  newClassLabelAllReadyExists = true;
238  }
239  }
240 
241  //If the old class label was not found then we can't do anything
242  if( !oldClassLabelFound ){
243  return false;
244  }
245 
246  //Relabel the old class labels
247  for(UINT i=0; i<totalNumSamples; i++){
248  if( data[i].getClassLabel() == oldClassLabel ){
249  data[i].setTrainingSample(newClassLabel, data[i].getData());
250  }
251  }
252 
253  //Update the class label counters
254  if( newClassLabelAllReadyExists ){
255  //Add the old sample count to the new sample count
256  classTracker[ indexOfNewClassLabel ].counter += classTracker[ indexOfOldClassLabel ].counter;
257 
258  //Erase the old class tracker
259  classTracker.erase( classTracker.begin() + indexOfOldClassLabel );
260  }else{
261  //Create a new class tracker
262  classTracker.push_back( ClassTracker(newClassLabel,classTracker[ indexOfOldClassLabel ].counter,classTracker[ indexOfOldClassLabel ].className) );
263  }
264 
265  return true;
266 }
267 
268 bool TimeSeriesClassificationData::setExternalRanges(const vector< MinMax > &externalRanges,const bool useExternalRanges){
269 
270  if( externalRanges.size() != numDimensions ) return false;
271 
272  this->externalRanges = externalRanges;
273  this->useExternalRanges = useExternalRanges;
274 
275  return true;
276 }
277 
279  if( externalRanges.size() == numDimensions ){
280  this->useExternalRanges = useExternalRanges;
281  return true;
282  }
283  return false;
284 }
285 
286 bool TimeSeriesClassificationData::scale(const double minTarget,const double maxTarget){
287  vector< MinMax > ranges = getRanges();
288  return scale(ranges,minTarget,maxTarget);
289 }
290 
291 bool TimeSeriesClassificationData::scale(const vector<MinMax> &ranges,const double minTarget,const double maxTarget){
292  if( ranges.size() != numDimensions ) return false;
293 
294  //Scale the training data
295  for(UINT i=0; i<totalNumSamples; i++){
296  for(UINT x=0; x<data[i].getLength(); x++){
297  for(UINT j=0; j<numDimensions; j++){
298  data[i][x][j] = Util::scale(data[i][x][j],ranges[j].minValue,ranges[j].maxValue,minTarget,maxTarget);
299  }
300  }
301  }
302 
303  return true;
304 }
305 
306 bool TimeSeriesClassificationData::save(const string &filename) const{
307 
308  //Check if the file should be saved as a csv file
309  if( Util::stringEndsWith( filename, ".csv" ) ){
310  return saveDatasetToCSVFile( filename );
311  }
312 
313  //Otherwise save it as a custom GRT file
314  return saveDatasetToFile( filename );
315 }
316 
317 bool TimeSeriesClassificationData::load(const string &filename){
318 
319  //Check if the file should be loaded as a csv file
320  if( Util::stringEndsWith( filename, ".csv" ) ){
321  return loadDatasetFromCSVFile( filename );
322  }
323 
324  //Otherwise save it as a custom GRT file
325  return loadDatasetFromFile( filename );
326 }
327 
328 bool TimeSeriesClassificationData::saveDatasetToFile(const string fileName) const{
329 
330  std::fstream file;
331  file.open(fileName.c_str(), std::ios::out);
332 
333  if( !file.is_open() ){
334  errorLog << "saveDatasetToFile(string fileName) - Failed to open file!" << endl;
335  return false;
336  }
337 
338  file << "GRT_LABELLED_TIME_SERIES_CLASSIFICATION_DATA_FILE_V1.0\n";
339  file << "DatasetName: " << datasetName << endl;
340  file << "InfoText: " << infoText << endl;
341  file << "NumDimensions: "<<numDimensions<<endl;
342  file << "TotalNumTrainingExamples: "<<totalNumSamples<<endl;
343  file << "NumberOfClasses: "<<classTracker.size()<<endl;
344  file << "ClassIDsAndCounters: "<<endl;
345 
346  for(UINT i=0; i<classTracker.size(); i++){
347  file << classTracker[i].classLabel << "\t" << classTracker[i].counter << endl;
348  }
349 
350  file << "UseExternalRanges: " << useExternalRanges << endl;
351 
352  if( useExternalRanges ){
353  for(UINT i=0; i<externalRanges.size(); i++){
354  file << externalRanges[i].minValue << "\t" << externalRanges[i].maxValue << endl;
355  }
356  }
357 
358  file << "LabelledTimeSeriesTrainingData:\n";
359 
360  for(UINT x=0; x<totalNumSamples; x++){
361  file << "************TIME_SERIES************\n";
362  file << "ClassID: "<<data[x].getClassLabel() <<endl;
363  file << "TimeSeriesLength: "<<data[x].getLength()<<endl;
364  file << "TimeSeriesData: \n";
365  for(UINT i=0; i<data[x].getLength(); i++){
366  for(UINT j=0; j<numDimensions; j++){
367  file << data[x][i][j];
368  if( j<numDimensions-1 ) file << "\t";
369  }file << endl;
370  }
371  }
372 
373  file.close();
374  return true;
375 }
376 
378 
379  std::fstream file;
380  file.open(filename.c_str(), std::ios::in);
381  UINT numClasses = 0;
382  clear();
383 
384  if( !file.is_open() ){
385  errorLog << "loadDatasetFromFile(string filename) - FILE NOT OPEN!" << endl;
386  return false;
387  }
388 
389  string word;
390 
391  //Check to make sure this is a file with the Training File Format
392  file >> word;
393  if(word != "GRT_LABELLED_TIME_SERIES_CLASSIFICATION_DATA_FILE_V1.0"){
394  file.close();
395  clear();
396  errorLog << "loadDatasetFromFile(string filename) - Failed to find file header!" << endl;
397  return false;
398  }
399 
400  //Get the name of the dataset
401  file >> word;
402  if(word != "DatasetName:"){
403  errorLog << "loadDatasetFromFile(string filename) - failed to find DatasetName!" << endl;
404  file.close();
405  return false;
406  }
407  file >> datasetName;
408 
409  file >> word;
410  if(word != "InfoText:"){
411  errorLog << "loadDatasetFromFile(string filename) - failed to find InfoText!" << endl;
412  file.close();
413  return false;
414  }
415 
416  //Load the info text
417  file >> word;
418  infoText = "";
419  while( word != "NumDimensions:" ){
420  infoText += word + " ";
421  file >> word;
422  }
423 
424  //Get the number of dimensions in the training data
425  if(word != "NumDimensions:"){
426  file.close();
427  clear();
428  errorLog << "loadDatasetFromFile(string filename) - Failed to find NumDimensions!" << endl;
429  return false;
430  }
431  file >> numDimensions;
432 
433  //Get the total number of training examples in the training data
434  file >> word;
435  if(word != "TotalNumTrainingExamples:"){
436  file.close();
437  clear();
438  errorLog << "loadDatasetFromFile(string filename) - Failed to find TotalNumTrainingExamples!" << endl;
439  return false;
440  }
441  file >> totalNumSamples;
442 
443  //Get the total number of classes in the training data
444  file >> word;
445  if(word != "NumberOfClasses:"){
446  file.close();
447  clear();
448  errorLog << "loadDatasetFromFile(string filename) - Failed to find NumberOfClasses!" << endl;
449  return false;
450  }
451  file >> numClasses;
452 
453  //Resize the class counter buffer and load the counters
454  classTracker.resize(numClasses);
455 
456  //Get the total number of classes in the training data
457  file >> word;
458  if(word != "ClassIDsAndCounters:"){
459  file.close();
460  clear();
461  errorLog << "loadDatasetFromFile(string filename) - Failed to find ClassIDsAndCounters!" << endl;
462  return false;
463  }
464 
465  for(UINT i=0; i<classTracker.size(); i++){
466  file >> classTracker[i].classLabel;
467  file >> classTracker[i].counter;
468  }
469 
470  //Get the UseExternalRanges
471  file >> word;
472  if(word != "UseExternalRanges:"){
473  file.close();
474  clear();
475  errorLog << "loadDatasetFromFile(string filename) - Failed to find UseExternalRanges!" << endl;
476  return false;
477  }
478 
479  file >> useExternalRanges;
480 
481  if( useExternalRanges ){
482  externalRanges.resize(numDimensions);
483  for(UINT i=0; i<externalRanges.size(); i++){
484  file >> externalRanges[i].minValue;
485  file >> externalRanges[i].maxValue;
486  }
487  }
488 
489  //Get the main training data
490  file >> word;
491  if(word != "LabelledTimeSeriesTrainingData:"){
492  file.close();
493  clear();
494  errorLog << "loadDatasetFromFile(string filename) - Failed to find LabelledTimeSeriesTrainingData!" << endl;
495  return false;
496  }
497 
498  //Reset the memory
499  data.resize( totalNumSamples, TimeSeriesClassificationSample() );
500 
501  //Load each of the time series
502  for(UINT x=0; x<totalNumSamples; x++){
503  UINT classLabel = 0;
504  UINT timeSeriesLength = 0;
505 
506  file >> word;
507  if( word != "************TIME_SERIES************" ){
508  file.close();
509  clear();
510  errorLog << "loadDatasetFromFile(string filename) - Failed to find TimeSeries Header!" << endl;
511  return false;
512  }
513 
514  file >> word;
515  if( word != "ClassID:" ){
516  file.close();
517  clear();
518  errorLog << "loadDatasetFromFile(string filename) - Failed to find ClassID!" << endl;
519  return false;
520  }
521  file >> classLabel;
522 
523  file >> word;
524  if( word != "TimeSeriesLength:" ){
525  file.close();
526  clear();
527  errorLog << "loadDatasetFromFile(string filename) - Failed to find TimeSeriesLength!" << endl;
528  return false;
529  }
530  file >> timeSeriesLength;
531 
532  file >> word;
533  if( word != "TimeSeriesData:" ){
534  file.close();
535  clear();
536  errorLog << "loadDatasetFromFile(string filename) - Failed to find TimeSeriesData!" << endl;
537  return false;
538  }
539 
540  //Load the time series data
541  MatrixDouble trainingExample(timeSeriesLength,numDimensions);
542  for(UINT i=0; i<timeSeriesLength; i++){
543  for(UINT j=0; j<numDimensions; j++){
544  file >> trainingExample[i][j];
545  }
546  }
547 
548  data[x].setTrainingSample(classLabel,trainingExample);
549  }
550 
551  file.close();
552  return true;
553 }
554 
555 bool TimeSeriesClassificationData::saveDatasetToCSVFile(const string &filename) const{
556 
557  std::fstream file;
558  file.open(filename.c_str(), std::ios::out );
559 
560  if( !file.is_open() ){
561  return false;
562  }
563 
564  //Write the data to the CSV file
565  for(UINT x=0; x<totalNumSamples; x++){
566  for(UINT i=0; i<data[x].getLength(); i++){
567  file << x+1 << ",";
568  file << data[x].getClassLabel() << ",";
569  for(UINT j=0; j<numDimensions; j++){
570  file << data[x][i][j];
571  if( j+1 < numDimensions ){
572  file << ",";
573  }
574  }
575  file << endl;
576  }
577  }
578 
579  file.close();
580 
581  return true;
582 }
583 
585 
586  numDimensions = 0;
587  datasetName = "NOT_SET";
588  infoText = "";
589 
590  //Clear any previous data
591  clear();
592 
593  //Parse the CSV file
594  FileParser parser;
595 
596  if( !parser.parseCSVFile(filename,true) ){
597  errorLog << "loadDatasetFromCSVFile(const string &filename) - Failed to parse CSV file!" << endl;
598  return false;
599  }
600 
601  if( !parser.getConsistentColumnSize() ){
602  errorLog << "loadDatasetFromCSVFile(const string &filename) - The CSV file does not have a consistent number of columns!" << endl;
603  return false;
604  }
605 
606  if( parser.getColumnSize() <= 2 ){
607  errorLog << "loadDatasetFromCSVFile(const string &filename) - The CSV file does not have enough columns! It should contain at least three columns!" << endl;
608  return false;
609  }
610 
611  //Set the number of dimensions
612  numDimensions = parser.getColumnSize()-2;
613 
614  //Reserve the memory for the data
615  data.reserve( parser.getRowSize() );
616 
617  UINT sampleCounter = 0;
618  UINT lastSampleCounter = 0;
619  UINT classLabel = 0;
620  UINT j = 0;
621  UINT n = 0;
622  VectorDouble sample(numDimensions);
623  MatrixDouble timeseries;
624  for(UINT i=0; i<parser.getRowSize(); i++){
625 
626  sampleCounter = Util::stringToInt( parser[i][0] );
627 
628  //Check to see if a new timeseries has started, if so then add the previous time series as a sample and start recording the new time series
629  if( sampleCounter != lastSampleCounter && i != 0 ){
630  //Add the labelled sample to the dataset
631  if( !addSample(classLabel, timeseries) ){
632  warningLog << "loadDatasetFromCSVFile(const string &filename,const UINT classLabelColumnIndex) - Could not add sample " << i << " to the dataset!" << endl;
633  }
634  timeseries.clear();
635  }
636  lastSampleCounter = sampleCounter;
637 
638  //Get the class label
639  classLabel = Util::stringToInt( parser[i][1] );
640 
641  //Get the sample data
642  j=0;
643  n=2;
644  while( j != numDimensions ){
645  sample[j++] = Util::stringToDouble( parser[i][n] );
646  n++;
647  }
648 
649  //Add the sample to the timeseries
650  timeseries.push_back( sample );
651  }
652 
653  return true;
654 }
655 
657 
658  cout << getStatsAsString();
659 
660  return true;
661 }
662 
664 
665  string stats;
666 
667  stats += "DatasetName:\t" + datasetName + "\n";
668  stats += "DatasetInfo:\t" + infoText + "\n";
669  stats += "Number of Dimensions:\t" + Util::toString(numDimensions) + "\n";
670  stats += "Number of Samples:\t" + Util::toString(totalNumSamples) + "\n";
671  stats += "Number of Classes:\t" + Util::toString(getNumClasses()) + "\n";
672  stats += "ClassStats:\n";
673 
674  for(UINT k=0; k<getNumClasses(); k++){
675  stats += "ClassLabel:\t" + Util::toString(classTracker[k].classLabel);
676  stats += "\tNumber of Samples:\t" + Util::toString( classTracker[k].counter );
677  stats +="\tClassName:\t" + classTracker[k].className + "\n";
678  }
679 
680  vector< MinMax > ranges = getRanges();
681 
682  stats += "Dataset Ranges:\n";
683  for(UINT j=0; j<ranges.size(); j++){
684  stats += "[" + Util::toString( j+1 ) + "] Min:\t" + Util::toString( ranges[j].minValue ) + "\tMax: " + Util::toString( ranges[j].maxValue ) + "\n";
685  }
686 
687  stats += "Timeseries Lengths:\n";
688  UINT M = (UINT)data.size();
689  for(UINT j=0; j<M; j++){
690  stats += "ClassLabel: " + Util::toString( data[j].getClassLabel() ) + " Length:\t" + Util::toString( data[j].getLength() ) + "\n";
691  }
692 
693  return stats;
694 }
695 
696 TimeSeriesClassificationData TimeSeriesClassificationData::partition(const UINT trainingSizePercentage,const bool useStratifiedSampling){
697 
698  //Partitions the dataset into a training dataset (which is kept by this instance of the TimeSeriesClassificationData) and
699  //a testing/validation dataset (which is return as a new instance of the TimeSeriesClassificationData). The trainingSizePercentage
700  //therefore sets the size of the data which remains in this instance and the remaining percentage of data is then added to
701  //the testing/validation dataset
702 
703  //The dataset has changed so flag that any previous cross validation setup will now not work
704  crossValidationSetup = false;
705  crossValidationIndexs.clear();
706 
711  vector< UINT > indexs( totalNumSamples );
712 
713  //Create the random partion indexs
714  Random random;
715  UINT randomIndex = 0;
716 
717  if( useStratifiedSampling ){
718  //Break the data into seperate classes
719  vector< vector< UINT > > classData( getNumClasses() );
720 
721  //Add the indexs to their respective classes
722  for(UINT i=0; i<totalNumSamples; i++){
723  classData[ getClassLabelIndexValue( data[i].getClassLabel() ) ].push_back( i );
724  }
725 
726  //Randomize the order of the indexs in each of the class index buffers
727  for(UINT k=0; k<getNumClasses(); k++){
728  UINT numSamples = (UINT)classData[k].size();
729  for(UINT x=0; x<numSamples; x++){
730  //Pick a random index
731  randomIndex = random.getRandomNumberInt(0,numSamples);
732 
733  //Swap the indexs
734  SWAP( classData[k][ x ] ,classData[k][ randomIndex ] );
735  }
736  }
737 
738  //Loop over each class and add the data to the trainingSet and testSet
739  for(UINT k=0; k<getNumClasses(); k++){
740  UINT numTrainingExamples = (UINT) floor( double(classData[k].size()) / 100.0 * double(trainingSizePercentage) );
741 
742  //Add the data to the training and test sets
743  for(UINT i=0; i<numTrainingExamples; i++){
744  trainingSet.addSample( data[ classData[k][i] ].getClassLabel(), data[ classData[k][i] ].getData() );
745  }
746  for(UINT i=numTrainingExamples; i<classData[k].size(); i++){
747  testSet.addSample( data[ classData[k][i] ].getClassLabel(), data[ classData[k][i] ].getData() );
748  }
749  }
750 
751  //Overwrite the training data in this instance with the training data of the trainingSet
752  data = trainingSet.getClassificationData();
753  totalNumSamples = trainingSet.getNumSamples();
754  }else{
755 
756  const UINT numTrainingExamples = (UINT) floor( double(totalNumSamples) / 100.0 * double(trainingSizePercentage) );
757  //Create the random partion indexs
758  Random random;
759  for(UINT i=0; i<totalNumSamples; i++) indexs[i] = i;
760  for(UINT x=0; x<totalNumSamples; x++){
761  //Pick a random index
762  randomIndex = random.getRandomNumberInt(0,totalNumSamples);
763 
764  //Swap the indexs
765  SWAP( indexs[ x ] , indexs[ randomIndex ] );
766  }
767 
768  //Add the data to the training and test sets
769  for(UINT i=0; i<numTrainingExamples; i++){
770  trainingSet.addSample( data[ indexs[i] ].getClassLabel(), data[ indexs[i] ].getData() );
771  }
772  for(UINT i=numTrainingExamples; i<totalNumSamples; i++){
773  testSet.addSample( data[ indexs[i] ].getClassLabel(), data[ indexs[i] ].getData() );
774  }
775 
776  //Overwrite the training data in this instance with the training data of the trainingSet
777  data = trainingSet.getClassificationData();
778  totalNumSamples = trainingSet.getNumSamples();
779  }
780 
781  return testSet;
782 }
783 
785 
786  if( labelledData.getNumDimensions() != numDimensions ){
787  errorLog << "merge(TimeSeriesClassificationData &labelledData) - The number of dimensions in the labelledData (" << labelledData.getNumDimensions() << ") does not match the number of dimensions of this dataset (" << numDimensions << ")" << endl;
788  return false;
789  }
790 
791  //The dataset has changed so flag that any previous cross validation setup will now not work
792  crossValidationSetup = false;
793  crossValidationIndexs.clear();
794 
795  //Add the data from the labelledData to this instance
796  for(UINT i=0; i<labelledData.getNumSamples(); i++){
797  addSample(labelledData[i].getClassLabel(), labelledData[i].getData());
798  }
799 
800  //Set the class names from the dataset
801  vector< ClassTracker > classTracker = labelledData.getClassTracker();
802  for(UINT i=0; i<classTracker.size(); i++){
803  setClassNameForCorrespondingClassLabel(classTracker[i].className, classTracker[i].classLabel);
804  }
805 
806  return true;
807 }
808 
809 bool TimeSeriesClassificationData::spiltDataIntoKFolds(const UINT K,const bool useStratifiedSampling){
810 
811  crossValidationSetup = false;
812  crossValidationIndexs.clear();
813 
814  //K can not be zero
815  if( K > totalNumSamples ){
816  errorLog << "spiltDataIntoKFolds(UINT K) - K can not be zero!" << endl;
817  return false;
818  }
819 
820  //K can not be larger than the number of examples
821  if( K > totalNumSamples ){
822  errorLog << "spiltDataIntoKFolds(UINT K,bool useStratifiedSampling) - K can not be larger than the total number of samples in the dataset!" << endl;
823  return false;
824  }
825 
826  //K can not be larger than the number of examples in a specific class if the stratified sampling option is true
827  if( useStratifiedSampling ){
828  for(UINT c=0; c<classTracker.size(); c++){
829  if( K > classTracker[c].counter ){
830  errorLog << "spiltDataIntoKFolds(UINT K,bool useStratifiedSampling) - K can not be larger than the number of samples in any given class!" << endl;
831  return false;
832  }
833  }
834  }
835 
836  //Setup the dataset for k-fold cross validation
837  kFoldValue = K;
838  vector< UINT > indexs( totalNumSamples );
839 
840  //Work out how many samples are in each fold, the last fold might have more samples than the others
841  UINT numSamplesPerFold = (UINT) floor( totalNumSamples/double(K) );
842 
843  //Resize the cross validation indexs buffer
844  crossValidationIndexs.resize( K );
845 
846  //Create the random partion indexs
847  Random random;
848  UINT randomIndex = 0;
849 
850  if( useStratifiedSampling ){
851  //Break the data into seperate classes
852  vector< vector< UINT > > classData( getNumClasses() );
853 
854  //Add the indexs to their respective classes
855  for(UINT i=0; i<totalNumSamples; i++){
856  classData[ getClassLabelIndexValue( data[i].getClassLabel() ) ].push_back( i );
857  }
858 
859  //Randomize the order of the indexs in each of the class index buffers
860  for(UINT c=0; c<getNumClasses(); c++){
861  UINT numSamples = (UINT)classData[c].size();
862  for(UINT x=0; x<numSamples; x++){
863  //Pick a random index
864  randomIndex = random.getRandomNumberInt(0,numSamples);
865 
866  //Swap the indexs
867  SWAP( classData[c][ x ] , classData[c][ randomIndex ] );
868  }
869  }
870 
871  //Loop over each of the classes and add the data equally to each of the k folds until there is no data left
872  vector< UINT >::iterator iter;
873  for(UINT c=0; c<getNumClasses(); c++){
874  iter = classData[ c ].begin();
875  UINT k = 0;
876  while( iter != classData[c].end() ){
877  crossValidationIndexs[ k ].push_back( *iter );
878  iter++;
879  k++;
880  k = k % K;
881  }
882  }
883 
884  }else{
885  //Randomize the order of the data
886  for(UINT i=0; i<totalNumSamples; i++) indexs[i] = i;
887  for(UINT x=0; x<totalNumSamples; x++){
888  //Pick a random index
889  randomIndex = random.getRandomNumberInt(0,totalNumSamples);
890 
891  //Swap the indexs
892  SWAP( indexs[ x ] , indexs[ randomIndex ] );
893  }
894 
895  UINT counter = 0;
896  UINT foldIndex = 0;
897  for(UINT i=0; i<totalNumSamples; i++){
898  //Add the index to the current fold
899  crossValidationIndexs[ foldIndex ].push_back( indexs[i] );
900 
901  //Move to the next fold if ready
902  if( ++counter == numSamplesPerFold && foldIndex < K-1 ){
903  foldIndex++;
904  counter = 0;
905  }
906  }
907  }
908 
909  crossValidationSetup = true;
910  return true;
911 
912 }
913 
915 
916  TimeSeriesClassificationData trainingData;
917 
918  if( !crossValidationSetup ){
919  errorLog << "getTrainingFoldData(UINT foldIndex) - Cross Validation has not been setup! You need to call the spiltDataIntoKFolds(UINT K,bool useStratifiedSampling) function first before calling this function!" << endl;
920  return trainingData;
921  }
922 
923  if( foldIndex >= kFoldValue ) return trainingData;
924 
925  trainingData.setNumDimensions( numDimensions );
926 
927  //Add the data to the training set, this will consist of all the data that is NOT in the foldIndex
928  UINT index = 0;
929  for(UINT k=0; k<kFoldValue; k++){
930  if( k != foldIndex ){
931  for(UINT i=0; i<crossValidationIndexs[k].size(); i++){
932 
933  index = crossValidationIndexs[k][i];
934  trainingData.addSample( data[ index ].getClassLabel(), data[ index ].getData() );
935  }
936  }
937  }
938 
939  return trainingData;
940 }
941 
944 
945  if( !crossValidationSetup ) return testData;
946 
947  if( foldIndex >= kFoldValue ) return testData;
948 
949  //Add the data to the training
950  testData.setNumDimensions( numDimensions );
951 
952  UINT index = 0;
953  for(UINT i=0; i<crossValidationIndexs[ foldIndex ].size(); i++){
954 
955  index = crossValidationIndexs[ foldIndex ][i];
956  testData.addSample( data[ index ].getClassLabel(), data[ index ].getData() );
957  }
958 
959  return testData;
960 }
961 
964  for(UINT x=0; x<totalNumSamples; x++){
965  if( data[x].getClassLabel() == classLabel ){
966  classData.addSample( classLabel, data[x].getData() );
967  }
968  }
969  return classData;
970 }
971 
973 
974  UnlabelledData unlabelledData;
975 
976  if( totalNumSamples == 0 ){
977  return unlabelledData;
978  }
979 
980  unlabelledData.setNumDimensions( numDimensions );
981 
982  for(UINT i=0; i<totalNumSamples; i++){
983  for(UINT x=0; x<data[i].getLength(); x++){
984  unlabelledData.addSample( data[i].getData().getRowVector( x ) );
985  }
986  }
987 
988  return unlabelledData;
989 }
990 
992  UINT minClassLabel = 99999;
993 
994  for(UINT i=0; i<classTracker.size(); i++){
995  if( classTracker[i].classLabel < minClassLabel ){
996  minClassLabel = classTracker[i].classLabel;
997  }
998  }
999 
1000  return minClassLabel;
1001 }
1002 
1003 
1005  UINT maxClassLabel = 0;
1006 
1007  for(UINT i=0; i<classTracker.size(); i++){
1008  if( classTracker[i].classLabel > maxClassLabel ){
1009  maxClassLabel = classTracker[i].classLabel;
1010  }
1011  }
1012 
1013  return maxClassLabel;
1014 }
1015 
1017  for(UINT k=0; k<classTracker.size(); k++){
1018  if( classTracker[k].classLabel == classLabel ){
1019  return k;
1020  }
1021  }
1022  warningLog << "getClassLabelIndexValue(UINT classLabel) - Failed to find class label: " << classLabel << " in class tracker!" << endl;
1023  return 0;
1024 }
1025 
1027 
1028  for(UINT i=0; i<classTracker.size(); i++){
1029  if( classTracker[i].classLabel == classLabel ){
1030  return classTracker[i].className;
1031  }
1032  }
1033  return "CLASS_LABEL_NOT_FOUND";
1034 }
1035 
1037 
1038  if( useExternalRanges ) return externalRanges;
1039 
1040  vector<MinMax> ranges(numDimensions);
1041 
1042  if( totalNumSamples > 0 ){
1043  for(UINT j=0; j<numDimensions; j++){
1044  ranges[j].minValue = data[0][0][0];
1045  ranges[j].maxValue = data[0][0][0];
1046  for(UINT x=0; x<totalNumSamples; x++){
1047  for(UINT i=0; i<data[x].getLength(); i++){
1048  if( data[x][i][j] < ranges[j].minValue ){ ranges[j].minValue = data[x][i][j]; } //Search for the min value
1049  else if( data[x][i][j] > ranges[j].maxValue ){ ranges[j].maxValue = data[x][i][j]; } //Search for the max value
1050  }
1051  }
1052  }
1053  }
1054  return ranges;
1055 }
1056 
1058 
1059  //Count how many samples are in the entire dataset
1060  UINT M = 0;
1061  UINT index = 0;
1062  for(UINT x=0; x<totalNumSamples; x++){
1063  M += data[x].getLength();
1064  }
1065 
1066  if( M == 0 ) MatrixDouble();
1067 
1068  //Get all the data and concatenate it into 1 matrix
1069  MatrixDouble matrixData(M,numDimensions);
1070  for(UINT x=0; x<totalNumSamples; x++){
1071  for(UINT i=0; i<data[x].getLength(); i++){
1072  for(UINT j=0; j<numDimensions; j++){
1073  matrixData[index][j] = data[x][i][j];
1074  }
1075  index++;
1076  }
1077  }
1078  return matrixData;
1079 }
1080 
1081 } //End of namespace GRT
static std::string toString(const int &i)
Definition: Util.cpp:65
bool enableExternalRangeScaling(const bool useExternalRanges)
bool save(const string &filename) const
vector< ClassTracker > classTracker
A vector of ClassTracker, which keeps track of the number of samples of each class.
Definition: AdaBoost.cpp:25
bool push_back(const std::vector< T > &sample)
Definition: Matrix.h:390
static double scale(const double &x, const double &minSource, const double &maxSource, const double &minTarget, const double &maxTarget, const bool constrain=false)
Definition: Util.cpp:44
vector< TimeSeriesClassificationSample > getClassificationData() const
unsigned int getNumCols() const
Definition: Matrix.h:538
bool addSample(const VectorDouble &sample)
WarningLog warningLog
Default warning log.
UINT eraseAllSamplesWithClassLabel(const UINT classLabel)
TimeSeriesClassificationData partition(const UINT partitionPercentage, const bool useStratifiedSampling=false)
bool setNumDimensions(const UINT numDimensions)
bool addSample(const UINT classLabel, const MatrixDouble &trainingSample)
string infoText
Some infoText about the dataset.
string datasetName
The name of the dataset.
UINT getClassLabelIndexValue(const UINT classLabel) const
static double stringToDouble(const std::string &s)
Definition: Util.cpp:124
UINT totalNumSamples
The total number of samples in the dataset.
TimeSeriesClassificationData(UINT numDimensions=0, string datasetName="NOT_SET", string infoText="")
vector< TimeSeriesClassificationSample > data
The labelled time series classification data.
bool loadDatasetFromCSVFile(const string &filename)
bool relabelAllSamplesWithClassLabel(const UINT oldClassLabel, const UINT newClassLabel)
The TimeSeriesClassificationData is the main data structure for recording, labeling, managing, saving, and loading training data for supervised temporal learning problems. Unlike the ClassificationData, in which each sample consists of 1 N dimensional datum, a TimeSeriesClassificationData sample will consist of an N dimensional time series of length M. The length of each time series sample (i.e. M) can be different for each datum in the dataset.
bool saveDatasetToFile(const string filename) const
bool saveDatasetToCSVFile(const string &filename) const
UINT kFoldValue
The number of folds the dataset has been spilt into for cross valiation.
bool crossValidationSetup
A flag to show if the dataset is ready for cross validation.
bool setClassNameForCorrespondingClassLabel(const string className, const UINT classLabel)
bool allowNullGestureClass
A flag that enables/disables a user from adding new samples with a class label matching the default n...
bool scale(const double minTarget, const double maxTarget)
string getClassNameForCorrespondingClassLabel(const UINT classLabel) const
vector< ClassTracker > getClassTracker() const
int getRandomNumberInt(int minRange, int maxRange)
Definition: Random.h:87
void clear()
Definition: Matrix.h:511
TimeSeriesClassificationData & operator=(const TimeSeriesClassificationData &rhs)
DebugLog debugLog
Default debugging log.
bool merge(const TimeSeriesClassificationData &labelledData)
TimeSeriesClassificationData getClassData(const UINT classLabel) const
bool useExternalRanges
A flag to show if the dataset should be scaled using the externalRanges values.
static int stringToInt(const std::string &s)
Definition: Util.cpp:117
static bool stringEndsWith(const std::string &str, const std::string &ending)
Definition: Util.cpp:141
UINT numDimensions
The number of dimensions in the dataset.
bool setDatasetName(const string datasetName)
bool setExternalRanges(const vector< MinMax > &externalRanges, const bool useExternalRanges=false)
vector< MinMax > externalRanges
A vector containing a set of externalRanges set by the user.
TimeSeriesClassificationData getTrainingFoldData(const UINT foldIndex) const
vector< vector< UINT > > crossValidationIndexs
A vector to hold the indexs of the dataset for the cross validation.
bool spiltDataIntoKFolds(const UINT K, const bool useStratifiedSampling=false)
bool setNumDimensions(const UINT numDimensions)
bool setAllowNullGestureClass(const bool allowNullGestureClass)
TimeSeriesClassificationData getTestFoldData(const UINT foldIndex) const