26 this->datasetName = datasetName;
27 this->numDimensions = numDimensions;
28 this->infoText = infoText;
30 crossValidationSetup =
false;
31 useExternalRanges =
false;
32 allowNullGestureClass =
true;
34 infoLog.setProceedingText(
"[ClassificationData]");
35 debugLog.setProceedingText(
"[DEBUG ClassificationData]");
36 errorLog.setProceedingText(
"[ERROR ClassificationData]");
37 warningLog.setProceedingText(
"[WARNING ClassificationData]");
49 this->datasetName = rhs.datasetName;
50 this->infoText = rhs.infoText;
51 this->numDimensions = rhs.numDimensions;
52 this->totalNumSamples = rhs.totalNumSamples;
53 this->kFoldValue = rhs.kFoldValue;
54 this->crossValidationSetup = rhs.crossValidationSetup;
55 this->useExternalRanges = rhs.useExternalRanges;
56 this->allowNullGestureClass = rhs.allowNullGestureClass;
57 this->externalRanges = rhs.externalRanges;
58 this->classTracker = rhs.classTracker;
59 this->data = rhs.data;
60 this->crossValidationIndexs = rhs.crossValidationIndexs;
61 this->infoLog = rhs.infoLog;
62 this->debugLog = rhs.debugLog;
63 this->errorLog = rhs.errorLog;
64 this->warningLog = rhs.warningLog;
73 crossValidationSetup =
false;
74 crossValidationIndexs.clear();
79 if( numDimensions > 0 ){
84 this->numDimensions = numDimensions;
87 useExternalRanges =
false;
88 externalRanges.clear();
93 errorLog <<
"setNumDimensions(const UINT numDimensions) - The number of dimensions of the dataset must be greater than zero!" << endl;
100 if( datasetName.find(
" ") == string::npos ){
101 this->datasetName = datasetName;
105 errorLog <<
"setDatasetName(const string datasetName) - The dataset name cannot contain any spaces!" << endl;
110 this->infoText = infoText;
116 for(UINT i=0; i<classTracker.size(); i++){
117 if( classTracker[i].classLabel == classLabel ){
118 classTracker[i].className = className;
123 errorLog <<
"setClassNameForCorrespondingClassLabel(const string className,const UINT classLabel) - Failed to find class with label: " << classLabel << endl;
128 this->allowNullGestureClass = allowNullGestureClass;
134 if( sample.size() != numDimensions ){
135 errorLog <<
"addSample(const UINT classLabel, VectorDouble &sample) - the size of the new sample (" << sample.size() <<
") does not match the number of dimensions of the dataset (" << numDimensions <<
")" << endl;
140 if( classLabel == GRT_DEFAULT_NULL_CLASS_LABEL && !allowNullGestureClass ){
141 errorLog <<
"addSample(const UINT classLabel, VectorDouble &sample) - the class label can not be 0!" << endl;
146 crossValidationSetup =
false;
147 crossValidationIndexs.clear();
150 data.push_back( newSample );
153 if( classTracker.size() == 0 ){
155 classTracker.push_back(tracker);
157 bool labelFound =
false;
158 for(UINT i=0; i<classTracker.size(); i++){
159 if( classLabel == classTracker[i].classLabel ){
160 classTracker[i].counter++;
167 classTracker.push_back(tracker);
179 if( totalNumSamples == 0 ){
180 warningLog <<
"removeSample( const UINT index ) - Failed to remove sample, the training dataset is empty!" << endl;
184 if( index >= totalNumSamples ){
185 warningLog <<
"removeSample( const UINT index ) - Failed to remove sample, the index is out of bounds! Number of training samples: " << totalNumSamples <<
" index: " << index << endl;
190 crossValidationSetup =
false;
191 crossValidationIndexs.clear();
194 UINT classLabel = data[ index ].getClassLabel();
197 data.erase( data.begin()+index );
199 totalNumSamples = (UINT)data.size();
202 for(
size_t i=0; i<classTracker.size(); i++){
203 if( classTracker[i].classLabel == classLabel ){
204 classTracker[i].counter--;
214 if( totalNumSamples == 0 ){
215 warningLog <<
"removeLastSample() - Failed to remove sample, the training dataset is empty!" << endl;
226 if( data.capacity() >= N )
return true;
238 for(
size_t i=0; i<classTracker.size(); i++){
239 if( classTracker[i].classLabel == classLabel ){
240 warningLog <<
"addClass(const UINT classLabel,const std::string className) - Failed to add class, it already exists! Class label: " << classLabel << endl;
246 classTracker.push_back(
ClassTracker(classLabel,0,className) );
256 UINT numExamplesRemoved = 0;
257 UINT numExamplesToRemove = 0;
260 crossValidationSetup =
false;
261 crossValidationIndexs.clear();
264 for(UINT i=0; i<classTracker.size(); i++){
265 if( classTracker[i].classLabel == classLabel ){
266 numExamplesToRemove = classTracker[i].counter;
267 classTracker.erase(classTracker.begin()+i);
273 if( numExamplesToRemove > 0 ){
275 while( numExamplesRemoved < numExamplesToRemove ){
276 if( data[i].getClassLabel() == classLabel ){
277 data.erase(data.begin()+i);
278 numExamplesRemoved++;
279 }
else if( ++i == data.size() )
break;
283 totalNumSamples = (UINT)data.size();
285 return numExamplesRemoved;
289 bool oldClassLabelFound =
false;
290 bool newClassLabelAllReadyExists =
false;
291 UINT indexOfOldClassLabel = 0;
292 UINT indexOfNewClassLabel = 0;
295 for(UINT i=0; i<classTracker.size(); i++){
296 if( classTracker[i].classLabel == oldClassLabel ){
297 indexOfOldClassLabel = i;
298 oldClassLabelFound =
true;
300 if( classTracker[i].classLabel == newClassLabel ){
301 indexOfNewClassLabel = i;
302 newClassLabelAllReadyExists =
true;
307 if( !oldClassLabelFound ){
312 for(UINT i=0; i<totalNumSamples; i++){
313 if( data[i].getClassLabel() == oldClassLabel ){
314 data[i].setClassLabel(newClassLabel);
319 if( newClassLabelAllReadyExists ){
321 classTracker[ indexOfNewClassLabel ].counter += classTracker[ indexOfOldClassLabel ].counter;
324 classTracker.push_back(
ClassTracker(newClassLabel,classTracker[ indexOfOldClassLabel ].counter,classTracker[ indexOfOldClassLabel ].className) );
328 classTracker.erase( classTracker.begin() + indexOfOldClassLabel );
338 if( externalRanges.size() != numDimensions )
return false;
340 this->externalRanges = externalRanges;
341 this->useExternalRanges = useExternalRanges;
347 if( externalRanges.size() == numDimensions ){
348 this->useExternalRanges = useExternalRanges;
356 return scale(ranges,minTarget,maxTarget);
360 if( ranges.size() != numDimensions )
return false;
363 for(UINT i=0; i<totalNumSamples; i++){
364 for(UINT j=0; j<numDimensions; j++){
365 data[i][j] =
Util::scale(data[i][j],ranges[j].minValue,ranges[j].maxValue,minTarget,maxTarget);
397 file.open(filename.c_str(), std::ios::out);
399 if( !file.is_open() ){
403 file <<
"GRT_LABELLED_CLASSIFICATION_DATA_FILE_V1.0\n";
404 file <<
"DatasetName: " << datasetName << endl;
405 file <<
"InfoText: " << infoText << endl;
406 file <<
"NumDimensions: " << numDimensions << endl;
407 file <<
"TotalNumExamples: " << totalNumSamples << endl;
408 file <<
"NumberOfClasses: " << classTracker.size() << endl;
409 file <<
"ClassIDsAndCounters: " << endl;
411 for(UINT i=0; i<classTracker.size(); i++){
412 file << classTracker[i].classLabel <<
"\t" << classTracker[i].counter <<
"\t" << classTracker[i].className << endl;
415 file <<
"UseExternalRanges: " << useExternalRanges << endl;
417 if( useExternalRanges ){
418 for(UINT i=0; i<externalRanges.size(); i++){
419 file << externalRanges[i].minValue <<
"\t" << externalRanges[i].maxValue << endl;
425 for(UINT i=0; i<totalNumSamples; i++){
426 file << data[i].getClassLabel();
427 for(UINT j=0; j<numDimensions; j++){
428 file <<
"\t" << data[i][j];
440 file.open(filename.c_str(), std::ios::in);
444 if( !file.is_open() ){
445 errorLog <<
"loadDatasetFromFile(const string &filename) - could not open file!" << endl;
453 if(word !=
"GRT_LABELLED_CLASSIFICATION_DATA_FILE_V1.0"){
454 errorLog <<
"loadDatasetFromFile(const string &filename) - could not find file header!" << endl;
461 if(word !=
"DatasetName:"){
462 errorLog <<
"loadDatasetFromFile(const string &filename) - failed to find DatasetName header!" << endl;
463 errorLog << word << endl;
470 if(word !=
"InfoText:"){
471 errorLog <<
"loadDatasetFromFile(const string &filename) - failed to find InfoText header!" << endl;
479 while( word !=
"NumDimensions:" ){
480 infoText += word +
" ";
485 if( word !=
"NumDimensions:" ){
486 errorLog <<
"loadDatasetFromFile(const string &filename) - failed to find NumDimensions header!" << endl;
490 file >> numDimensions;
494 if( word !=
"TotalNumTrainingExamples:" && word !=
"TotalNumExamples:" ){
495 errorLog <<
"loadDatasetFromFile(const string &filename) - failed to find TotalNumTrainingExamples header!" << endl;
499 file >> totalNumSamples;
503 if(word !=
"NumberOfClasses:"){
504 errorLog <<
"loadDatasetFromFile(string filename) - failed to find NumberOfClasses header!" << endl;
511 classTracker.resize(numClasses);
515 if(word !=
"ClassIDsAndCounters:"){
516 errorLog <<
"loadDatasetFromFile(const string &filename) - failed to find ClassIDsAndCounters header!" << endl;
521 for(UINT i=0; i<classTracker.size(); i++){
522 file >> classTracker[i].classLabel;
523 file >> classTracker[i].counter;
524 file >> classTracker[i].className;
529 if(word !=
"UseExternalRanges:"){
530 errorLog <<
"loadDatasetFromFile(const string &filename) - failed to find UseExternalRanges header!" << endl;
534 file >> useExternalRanges;
537 if( useExternalRanges ){
538 externalRanges.resize(numDimensions);
539 for(UINT i=0; i<externalRanges.size(); i++){
540 file >> externalRanges[i].minValue;
541 file >> externalRanges[i].maxValue;
547 if( word !=
"LabelledTrainingData:" && word !=
"Data:"){
548 errorLog <<
"loadDatasetFromFile(const string &filename) - failed to find LabelledTrainingData header!" << endl;
554 data.resize( totalNumSamples, tempSample );
556 for(UINT i=0; i<totalNumSamples; i++){
558 VectorDouble sample(numDimensions,0);
560 for(UINT j=0; j<numDimensions; j++){
563 data[i].set(classLabel, sample);
577 file.open(filename.c_str(), std::ios::out );
579 if( !file.is_open() ){
584 for(UINT i=0; i<totalNumSamples; i++){
585 file << data[i].getClassLabel();
586 for(UINT j=0; j<numDimensions; j++){
587 file <<
"," << data[i][j];
600 datasetName =
"NOT_SET";
609 if( !parser.parseCSVFile(filename,
true) ){
610 errorLog <<
"loadDatasetFromCSVFile(const string &filename,const UINT classLabelColumnIndex) - Failed to parse CSV file!" << endl;
614 if( !parser.getConsistentColumnSize() ){
615 errorLog <<
"loadDatasetFromCSVFile(const string &filename,const UINT classLabelColumnIndexe) - The CSV file does not have a consistent number of columns!" << endl;
619 if( parser.getColumnSize() <= 1 ){
620 errorLog <<
"loadDatasetFromCSVFile(const string &filename,const UINT classLabelColumnIndex) - The CSV file does not have enough columns! It should contain at least two columns!" << endl;
625 numDimensions = parser.getColumnSize()-1;
628 reserve( parser.getRowSize() );
633 VectorDouble sample(numDimensions);
634 for(UINT i=0; i<parser.getRowSize(); i++){
641 while( j != numDimensions ){
642 if( n != classLabelColumnIndex ){
650 warningLog <<
"loadDatasetFromCSVFile(const string &filename,const UINT classLabelColumnIndex) - Could not add sample " << i <<
" to the dataset!" << endl;
669 sort(classTracker.begin(),classTracker.end(),ClassTracker::sortByClassLabelAscending);
682 crossValidationSetup =
false;
683 crossValidationIndexs.clear();
689 vector< UINT > indexs( totalNumSamples );
693 UINT randomIndex = 0;
695 if( useStratifiedSampling ){
700 for(UINT i=0; i<totalNumSamples; i++){
706 UINT numSamples = (UINT)classData[k].size();
707 for(UINT x=0; x<numSamples; x++){
712 SWAP(classData[k][ x ], classData[k][ randomIndex ]);
717 UINT numTrainingSamples = 0;
718 UINT numTestSamples = 0;
721 UINT numTrainingExamples = (UINT) floor(
double(classData[k].size()) / 100.0 *
double(trainingSizePercentage) );
722 UINT numTestExamples = ((UINT)classData[k].size())-numTrainingExamples;
723 numTrainingSamples += numTrainingExamples;
724 numTestSamples += numTestExamples;
727 trainingSet.
reserve( numTrainingSamples );
728 testSet.
reserve( numTestSamples );
732 UINT numTrainingExamples = (UINT) floor(
double(classData[k].size()) / 100.0 *
double(trainingSizePercentage) );
735 for(UINT i=0; i<numTrainingExamples; i++){
736 trainingSet.
addSample( data[ classData[k][i] ].getClassLabel(), data[ classData[k][i] ].getSample() );
738 for(UINT i=numTrainingExamples; i<classData[k].size(); i++){
739 testSet.
addSample( data[ classData[k][i] ].getClassLabel(), data[ classData[k][i] ].getSample() );
744 const UINT numTrainingExamples = (UINT) floor(
double(totalNumSamples) / 100.0 * double(trainingSizePercentage) );
747 UINT randomIndex = 0;
748 for(UINT i=0; i<totalNumSamples; i++) indexs[i] = i;
749 for(UINT x=0; x<totalNumSamples; x++){
754 SWAP(indexs[ x ],indexs[ randomIndex ]);
758 trainingSet.
reserve( numTrainingExamples );
759 testSet.
reserve( totalNumSamples-numTrainingExamples );
762 for(UINT i=0; i<numTrainingExamples; i++){
763 trainingSet.
addSample( data[ indexs[i] ].getClassLabel(), data[ indexs[i] ].getSample() );
765 for(UINT i=numTrainingExamples; i<totalNumSamples; i++){
766 testSet.
addSample( data[ indexs[i] ].getClassLabel(), data[ indexs[i] ].getSample() );
785 errorLog <<
"merge(const ClassificationData &labelledData) - The number of dimensions in the labelledData (" << labelledData.
getNumDimensions() <<
") does not match the number of dimensions of this dataset (" << numDimensions <<
")" << endl;
790 crossValidationSetup =
false;
791 crossValidationIndexs.clear();
798 addSample(labelledData[i].getClassLabel(), labelledData[i].getSample());
803 for(UINT i=0; i<classTracker.size(); i++){
815 crossValidationSetup =
false;
816 crossValidationIndexs.clear();
819 if( K > totalNumSamples ){
820 errorLog <<
"spiltDataIntoKFolds(const UINT K,const bool useStratifiedSampling) - K can not be zero!" << endl;
825 if( K > totalNumSamples ){
826 errorLog <<
"spiltDataIntoKFolds(const UINT K,const bool useStratifiedSampling) - K can not be larger than the total number of samples in the dataset!" << endl;
831 if( useStratifiedSampling ){
832 for(UINT c=0; c<classTracker.size(); c++){
833 if( K > classTracker[c].counter ){
834 errorLog <<
"spiltDataIntoKFolds(const UINT K,const bool useStratifiedSampling) - K can not be larger than the number of samples in any given class!" << endl;
842 vector< UINT > indexs( totalNumSamples );
845 UINT numSamplesPerFold = (UINT) floor( totalNumSamples/
double(K) );
848 crossValidationIndexs.resize(K);
852 UINT randomIndex = 0;
854 if( useStratifiedSampling ){
859 for(UINT i=0; i<totalNumSamples; i++){
865 UINT numSamples = (UINT)classData[c].size();
866 for(UINT x=0; x<numSamples; x++){
871 SWAP(classData[c][ x ] , classData[c][ randomIndex ]);
876 vector< UINT >::iterator iter;
878 iter = classData[ c ].begin();
880 while( iter != classData[c].end() ){
881 crossValidationIndexs[ k ].push_back( *iter );
890 for(UINT i=0; i<totalNumSamples; i++) indexs[i] = i;
891 for(UINT x=0; x<totalNumSamples; x++){
896 SWAP(indexs[ x ] , indexs[ randomIndex ]);
901 for(UINT i=0; i<totalNumSamples; i++){
903 crossValidationIndexs[ foldIndex ].push_back( indexs[i] );
906 if( ++counter == numSamplesPerFold && foldIndex < K-1 ){
913 crossValidationSetup =
true;
924 if( !crossValidationSetup ){
925 errorLog <<
"getTrainingFoldData(const UINT foldIndex) - Cross Validation has not been setup! You need to call the spiltDataIntoKFolds(UINT K,bool useStratifiedSampling) function first before calling this function!" << endl;
929 if( foldIndex >= kFoldValue )
return trainingData;
933 trainingData.
addClass( classTracker[k].classLabel, classTracker[k].className );
938 for(UINT k=0; k<kFoldValue; k++){
939 if( k != foldIndex ){
940 for(UINT i=0; i<crossValidationIndexs[k].size(); i++){
942 index = crossValidationIndexs[k][i];
943 trainingData.
addSample( data[ index ].getClassLabel(), data[ index ].getSample() );
960 if( !crossValidationSetup )
return testData;
962 if( foldIndex >= kFoldValue )
return testData;
966 testData.
addClass( classTracker[k].classLabel, classTracker[k].className );
969 testData.
reserve( (UINT)crossValidationIndexs[ foldIndex ].size() );
973 for(UINT i=0; i<crossValidationIndexs[ foldIndex ].size(); i++){
975 index = crossValidationIndexs[ foldIndex ][i];
976 testData.
addSample( data[ index ].getClassLabel(), data[ index ].getSample() );
992 for(UINT i=0; i<classTracker.size(); i++){
993 if( classTracker[i].classLabel == classLabel ){
994 classData.
reserve( classTracker[i].counter );
999 for(UINT i=0; i<totalNumSamples; i++){
1000 if( data[i].getClassLabel() == classLabel ){
1001 classData.
addSample(classLabel, data[i].getSample());
1016 if( numSamples == 0 ) numSamples = totalNumSamples;
1018 newDataset.
reserve( numSamples );
1022 newDataset.
addClass( classTracker[k].classLabel );
1027 for(UINT i=0; i<numSamples; i++){
1029 newDataset.
addSample(data[randomIndex].getClassLabel(), data[randomIndex].getSample());
1046 if( totalNumSamples == 0 ){
1047 return regressionData;
1050 const UINT numInputDimensions = numDimensions;
1054 for(UINT i=0; i<totalNumSamples; i++){
1055 VectorDouble targetVector(numTargetDimensions,0);
1058 UINT classLabel = data[i].getClassLabel();
1060 if( classLabel > 0 ){
1061 targetVector[ classLabel-1 ] = 1;
1063 regressionData.
clear();
1064 return regressionData;
1067 regressionData.
addSample(data[i].getSample(),targetVector);
1070 return regressionData;
1077 if( totalNumSamples == 0 ){
1078 return unlabelledData;
1083 for(UINT i=0; i<totalNumSamples; i++){
1084 unlabelledData.
addSample( data[i].getSample() );
1087 return unlabelledData;
1091 UINT minClassLabel = numeric_limits< UINT >::max();
1093 for(UINT i=0; i<classTracker.size(); i++){
1094 if( classTracker[i].classLabel < minClassLabel ){
1095 minClassLabel = classTracker[i].classLabel;
1099 return minClassLabel;
1104 UINT maxClassLabel = 0;
1106 for(UINT i=0; i<classTracker.size(); i++){
1107 if( classTracker[i].classLabel > maxClassLabel ){
1108 maxClassLabel = classTracker[i].classLabel;
1112 return maxClassLabel;
1116 for(UINT k=0; k<classTracker.size(); k++){
1117 if( classTracker[k].classLabel == classLabel ){
1121 warningLog <<
"getClassLabelIndexValue(UINT classLabel) - Failed to find class label: " << classLabel <<
" in class tracker!" << endl;
1127 for(UINT i=0; i<classTracker.size(); i++){
1128 if( classTracker[i].classLabel == classLabel ){
1129 return classTracker[i].className;
1133 return "CLASS_LABEL_NOT_FOUND";
1138 statsText +=
"DatasetName:\t" + datasetName +
"\n";
1139 statsText +=
"DatasetInfo:\t" + infoText +
"\n";
1140 statsText +=
"Number of Dimensions:\t" +
Util::toString( numDimensions ) +
"\n";
1141 statsText +=
"Number of Samples:\t" +
Util::toString( totalNumSamples ) +
"\n";
1143 statsText +=
"ClassStats:\n";
1146 statsText +=
"ClassLabel:\t" +
Util::toString( classTracker[k].classLabel );
1147 statsText +=
"\tNumber of Samples:\t" +
Util::toString(classTracker[k].counter);
1148 statsText +=
"\tClassName:\t" + classTracker[k].className +
"\n";
1153 statsText +=
"Dataset Ranges:\n";
1154 for(UINT j=0; j<ranges.size(); j++){
1164 if( useExternalRanges )
return externalRanges;
1166 vector< MinMax > ranges(numDimensions);
1169 if( totalNumSamples > 0 ){
1170 for(UINT j=0; j<numDimensions; j++){
1171 ranges[j].minValue = data[0][0];
1172 ranges[j].maxValue = data[0][0];
1173 for(UINT i=0; i<totalNumSamples; i++){
1174 if( data[i][j] < ranges[j].minValue ){ ranges[j].minValue = data[i][j]; }
1175 else if( data[i][j] > ranges[j].maxValue ){ ranges[j].maxValue = data[i][j]; }
1188 classLabels[i] = classTracker[i].classLabel;
1200 classSampleCounts[i] = classTracker[i].counter;
1203 return classSampleCounts;
1208 VectorDouble mean(numDimensions,0);
1210 for(UINT j=0; j<numDimensions; j++){
1211 for(UINT i=0; i<totalNumSamples; i++){
1212 mean[j] += data[i][j];
1214 mean[j] /= double(totalNumSamples);
1222 VectorDouble mean =
getMean();
1223 VectorDouble stdDev(numDimensions,0);
1225 for(UINT j=0; j<numDimensions; j++){
1226 for(UINT i=0; i<totalNumSamples; i++){
1227 stdDev[j] += SQR(data[i][j]-mean[j]);
1229 stdDev[j] = sqrt( stdDev[j] /
double(totalNumSamples-1) );
1241 vector< double > binRange(N);
1242 for(UINT i=0; i<ranges.size(); i++){
1243 binRange[i] = (ranges[i].maxValue-ranges[i].minValue)/
double(numBins);
1250 for(UINT i=0; i<M; i++){
1251 if( data[i].getClassLabel() == classLabel ){
1252 for(UINT j=0; j<N; j++){
1254 bool binFound =
false;
1255 for(UINT k=0; k<numBins-1; k++){
1256 if( data[i][j] >= ranges[i].minValue + (binRange[j]*k) && data[i][j] >= ranges[i].minValue + (binRange[j]*(k+1)) ){
1262 if( !binFound ) binIndex = numBins-1;
1263 histData[j][binIndex]++;
1269 if( norm == 0 )
return histData;
1274 histData[i][j] /= norm;
1288 for(UINT i=0; i<totalNumSamples; i++){
1290 for(UINT j=0; j<numDimensions; j++){
1291 mean[classIndex][j] += data[i][j];
1293 counter[ classIndex ]++;
1297 for(UINT j=0; j<numDimensions; j++){
1298 mean[k][j] = counter[j] > 0 ? mean[k][j]/counter[j] : 0;
1313 for(UINT i=0; i<totalNumSamples; i++){
1315 for(UINT j=0; j<numDimensions; j++){
1316 stdDev[classIndex][j] += SQR(data[i][j]-mean[classIndex][j]);
1318 counter[ classIndex ]++;
1322 for(UINT j=0; j<numDimensions; j++){
1323 stdDev[k][j] = sqrt( stdDev[k][j] /
double(counter[k]-1) );
1332 VectorDouble mean =
getMean();
1335 for(UINT j=0; j<numDimensions; j++){
1336 for(UINT k=0; k<numDimensions; k++){
1337 for(UINT i=0; i<totalNumSamples; i++){
1338 covariance[j][k] += (data[i][j]-mean[j]) * (data[i][k]-mean[k]) ;
1340 covariance[j][k] /= double(totalNumSamples-1);
1349 vector< MatrixDouble > histData(K);
1351 for(UINT k=0; k<K; k++){
1358 VectorDouble ClassificationData::getClassProbabilities()
const {
1362 VectorDouble ClassificationData::getClassProbabilities(
const vector< UINT > &classLabels )
const {
1363 const UINT K = (UINT)classLabels.size();
1366 VectorDouble x(K,0);
1367 for(UINT k=0; k<K; k++){
1368 for(UINT n=0; n<N; n++){
1369 if( classLabels[k] == classTracker[n].classLabel ){
1370 x[k] = classTracker[n].counter;
1371 sum += classTracker[n].counter;
1379 for(UINT k=0; k<K; k++){
1394 for(UINT k=0; k<K; k++){
1395 if( classTracker[k].classLabel == classLabel){
1396 N = classTracker[k].counter;
1402 vector< UINT > classIndexes(N);
1403 for(UINT i=0; i<M; i++){
1404 if( data[i].getClassLabel() == classLabel ){
1405 classIndexes[index++] = i;
1409 return classIndexes;
1418 for(UINT i=0; i<M; i++){
1419 for(UINT j=0; j<N; j++){
1420 d[i][j] = data[i][j];
1433 for(UINT k=0; k<numClasses; k++){
1434 for(UINT j=0; j<numDimensions; j++){
1443 for(UINT i=0; i<numSamples; i++){
1449 vector< double > sample( numDimensions );
1450 for(UINT j=0; j<numDimensions; j++){
1455 UINT classLabel = k + 1;
1462 return data.
save( filename );
VectorDouble getStdDev() const
double getRandomNumberGauss(double mu=0.0, double sigma=1.0)
bool save(const string &filename) const
static std::string toString(const int &i)
bool removeSample(const UINT index)
The ClassificationData is the main data structure for recording, labeling, managing, saving, and loading training data for supervised learning problems.
bool setAllowNullGestureClass(bool allowNullGestureClass)
bool setAllValues(const T &value)
VectorDouble getMean() const
bool reserve(const UINT N)
vector< UINT > getClassLabels() const
bool loadDatasetFromFile(const string &filename)
string getStatsAsString() const
UINT getNumDimensions() const
ClassificationData getTrainingFoldData(const UINT foldIndex) const
bool setInputAndTargetDimensions(const UINT numInputDimensions, const UINT numTargetDimensions)
static double scale(const double &x, const double &minSource, const double &maxSource, const double &minTarget, const double &maxTarget, const bool constrain=false)
bool setNumDimensions(UINT numDimensions)
ClassificationData & operator=(const ClassificationData &rhs)
unsigned int getNumCols() const
bool addSample(const VectorDouble &sample)
UINT getNumSamples() const
UINT removeClass(const UINT classLabel)
MatrixDouble getClassStdDev() const
bool merge(const ClassificationData &labelledData)
vector< ClassTracker > getClassTracker() const
UINT getNumClasses() const
static double stringToDouble(const std::string &s)
UINT getClassLabelIndexValue(const UINT classLabel) const
bool scale(const double minTarget, const double maxTarget)
static bool generateGaussDataset(const std::string filename, const UINT numSamples=10000, const UINT numClasses=10, const UINT numDimensions=3, const double range=10, const double sigma=1)
RegressionData reformatAsRegressionData() const
vector< UINT > getClassDataIndexes(const UINT classLabel) const
vector< UINT > getNumSamplesPerClass() const
ClassificationData(UINT numDimensions=0, string datasetName="NOT_SET", string infoText="")
bool saveDatasetToCSVFile(const string &filename) const
int getRandomNumberInt(int minRange, int maxRange)
double getRandomNumberUniform(double minRange=0.0, double maxRange=1.0)
MatrixDouble getDataAsMatrixDouble() const
UnlabelledData reformatAsUnlabelledData() const
MatrixDouble getCovarianceMatrix() const
UINT getMinimumClassLabel() const
bool setExternalRanges(const vector< MinMax > &externalRanges, const bool useExternalRanges=false)
unsigned int getNumRows() const
vector< MinMax > getRanges() const
UINT eraseAllSamplesWithClassLabel(const UINT classLabel)
static int stringToInt(const std::string &s)
bool addSample(const VectorDouble &inputVector, const VectorDouble &targetVector)
static bool stringEndsWith(const std::string &str, const std::string &ending)
bool enableExternalRangeScaling(const bool useExternalRanges)
vector< MatrixDouble > getHistogramData(const UINT numBins) const
bool setInfoText(string infoText)
bool loadDatasetFromCSVFile(const string &filename, const UINT classLabelColumnIndex=0)
ClassificationData getTestFoldData(const UINT foldIndex) const
string getClassNameForCorrespondingClassLabel(const UINT classLabel) const
ClassificationData partition(const UINT partitionPercentage, const bool useStratifiedSampling=false)
virtual ~ClassificationData()
bool spiltDataIntoKFolds(const UINT K, const bool useStratifiedSampling=false)
MatrixDouble getClassMean() const
bool addSample(UINT classLabel, const VectorDouble &sample)
ClassificationData getBootstrappedDataset(UINT numSamples=0) const
bool load(const string &filename)
UINT getMaximumClassLabel() const
bool setNumDimensions(const UINT numDimensions)
bool saveDatasetToFile(const string &filename) const
bool relabelAllSamplesWithClassLabel(const UINT oldClassLabel, const UINT newClassLabel)
bool setClassNameForCorrespondingClassLabel(string className, UINT classLabel)
bool addClass(const UINT classLabel, const std::string className="NOT_SET")
MatrixDouble getClassHistogramData(const UINT classLabel, const UINT numBins) const
bool setDatasetName(string datasetName)
ClassificationData getClassData(const UINT classLabel) const