26 RegisterClassifierModule< MinDist > MinDist::registerModule(
"MinDist");
28 MinDist::MinDist(
bool useScaling,
bool useNullRejection,
double nullRejectionCoeff,UINT numClusters)
30 this->useScaling = useScaling;
31 this->useNullRejection = useNullRejection;
32 this->nullRejectionCoeff = nullRejectionCoeff;
33 this->numClusters = numClusters;
34 supportsNullRejection =
true;
35 classType =
"MinDist";
36 classifierType = classType;
37 classifierMode = STANDARD_CLASSIFIER_MODE;
38 debugLog.setProceedingText(
"[DEBUG MinDist]");
39 errorLog.setProceedingText(
"[ERROR MinDist]");
40 trainingLog.setProceedingText(
"[TRAINING MinDist]");
41 warningLog.setProceedingText(
"[WARNING MinDist]");
45 classType =
"MinDist";
46 classifierType = classType;
47 classifierMode = STANDARD_CLASSIFIER_MODE;
48 debugLog.setProceedingText(
"[DEBUG MinDist]");
49 errorLog.setProceedingText(
"[ERROR MinDist]");
50 trainingLog.setProceedingText(
"[TRAINING MinDist]");
51 warningLog.setProceedingText(
"[WARNING MinDist]");
62 this->numClusters = rhs.numClusters;
63 this->models = rhs.models;
73 if( classifier == NULL )
return false;
79 this->numClusters = ptr->numClusters;
80 this->models = ptr->models;
99 errorLog <<
"train_(trainingData &labelledTrainingData) - Training data has zero samples!" << endl;
103 if( M <= numClusters ){
104 errorLog <<
"train_(trainingData &labelledTrainingData) - There are not enough training samples for the number of clusters. Either reduce the number of clusters or increase the number of training samples!" << endl;
108 numInputDimensions = N;
111 classLabels.resize(K);
112 nullRejectionThresholds.resize(K);
118 trainingData.
scale(0, 1);
122 for(UINT k=0; k<numClasses; k++){
124 trainingLog <<
"Training model for class: " << trainingData.
getClassTracker()[k].classLabel << endl;
130 classLabels[k] = classLabel;
137 for(UINT i=0; i<data.getNumRows(); i++){
138 for(UINT j=0; j<data.getNumCols(); j++){
139 data[i][j] = classData[i][j];
144 models[k].setGamma( nullRejectionCoeff );
145 if( !models[k].
train(classLabel,data,numClusters,minChange,maxNumEpochs) ){
146 errorLog <<
"train_(ClassificationData &labelledTrainingData) - Failed to train model for class: " << classLabel;
147 errorLog <<
". This is might be because this class does not have enough training samples! You should reduce the number of clusters or increase the number of training samples for this class." << endl;
153 nullRejectionThresholds[k] = models[k].getRejectionThreshold();
164 predictedClassLabel = 0;
168 errorLog <<
"predict_(VectorDouble &inputVector) - MinDist Model Not Trained!" << endl;
172 if( inputVector.size() != numInputDimensions ){
173 errorLog <<
"predict_(VectorDouble &inputVector) - The size of the input vector (" << inputVector.size() <<
") does not match the num features in the model (" << numInputDimensions << endl;
178 for(UINT n=0; n<numInputDimensions; n++){
179 inputVector[n] =
scale(inputVector[n], ranges[n].minValue, ranges[n].maxValue, 0, 1);
183 if( classLikelihoods.size() != numClasses ) classLikelihoods.resize(numClasses,0);
184 if( classDistances.size() != numClasses ) classDistances.resize(numClasses,0);
187 double minDist = numeric_limits<double>::max();
188 for(UINT k=0; k<numClasses; k++){
190 classDistances[k] = models[k].predict( inputVector );
193 if( classDistances[k] < minDist ){
194 minDist = classDistances[k];
195 predictedClassLabel = k;
199 classLikelihoods[k] = 1.0 / (classDistances[k] + 0.0001);
200 sum += classLikelihoods[k];
205 for(UINT k=0; k<numClasses; k++){
206 classLikelihoods[k] /= sum;
208 maxLikelihood = classLikelihoods[predictedClassLabel];
209 }
else maxLikelihood = classLikelihoods[predictedClassLabel];
211 if( useNullRejection ){
213 if( minDist <= models[predictedClassLabel].getRejectionThreshold() ) predictedClassLabel = models[predictedClassLabel].getClassLabel();
214 else predictedClassLabel = GRT_DEFAULT_NULL_CLASS_LABEL;
215 }
else predictedClassLabel = models[predictedClassLabel].getClassLabel();
234 for(UINT k=0; k<numClasses; k++) {
235 models[k].setGamma( nullRejectionCoeff );
236 models[k].recomputeThresholdValue();
245 if( nullRejectionCoeff > 0 ){
246 this->nullRejectionCoeff = nullRejectionCoeff;
265 errorLog <<
"saveModelToFile(fstream &file) - The file is not open!" << endl;
270 file<<
"GRT_MINDIST_MODEL_FILE_V2.0\n";
274 errorLog <<
"saveModelToFile(fstream &file) - Failed to save classifier base settings to file!" << endl;
281 for(UINT k=0; k<numClasses; k++){
282 file <<
"ClassLabel: " << models[k].getClassLabel() << endl;
283 file <<
"NumClusters: " << models[k].getNumClusters() << endl;
284 file <<
"RejectionThreshold: " << models[k].getRejectionThreshold() << endl;
285 file <<
"Gamma: " << models[k].getGamma() << endl;
286 file <<
"TrainingMu: " << models[k].getTrainingMu() << endl;
287 file <<
"TrainingSigma: " << models[k].getTrainingSigma() << endl;
288 file <<
"ClusterData:" << endl;
290 for(UINT i=0; i<models[k].getNumClusters(); i++){
291 for(UINT j=0; j<models[k].getNumFeatures(); j++){
292 file << clusters[i][j] <<
"\t";
309 errorLog <<
"loadModelFromFile(string filename) - Could not open file to load model" << endl;
319 if( word ==
"GRT_MINDIST_MODEL_FILE_V1.0" ){
324 if(word !=
"GRT_MINDIST_MODEL_FILE_V2.0"){
325 errorLog <<
"loadModelFromFile(string filename) - Could not find Model File Header" << endl;
331 errorLog <<
"loadModelFromFile(string filename) - Failed to load base settings from file!" << endl;
338 models.resize(numClasses);
339 classLabels.resize(numClasses);
342 for(UINT k=0; k<numClasses; k++){
343 double rejectionThreshold;
345 double trainingSigma;
349 if( word !=
"ClassLabel:" ){
350 errorLog <<
"loadModelFromFile(string filename) - Could not load the class label for class " << k << endl;
353 file >> classLabels[k];
356 if( word !=
"NumClusters:" ){
357 errorLog <<
"loadModelFromFile(string filename) - Could not load the NumClusters for class " << k << endl;
363 if( word !=
"RejectionThreshold:" ){
364 errorLog <<
"loadModelFromFile(string filename) - Could not load the RejectionThreshold for class " << k << endl;
367 file >> rejectionThreshold;
370 if( word !=
"Gamma:" ){
371 errorLog <<
"loadModelFromFile(string filename) - Could not load the Gamma for class " << k << endl;
377 if( word !=
"TrainingMu:" ){
378 errorLog <<
"loadModelFromFile(string filename) - Could not load the TrainingMu for class " << k << endl;
384 if( word !=
"TrainingSigma:" ){
385 errorLog <<
"loadModelFromFile(string filename) - Could not load the TrainingSigma for class " << k << endl;
388 file >> trainingSigma;
391 if( word !=
"ClusterData:" ){
392 errorLog <<
"loadModelFromFile(string filename) - Could not load the ClusterData for class " << k << endl;
398 for(UINT i=0; i<numClusters; i++){
399 for(UINT j=0; j<numInputDimensions; j++){
400 file >> clusters[i][j];
404 models[k].setClassLabel( classLabels[k] );
405 models[k].setClusters( clusters );
406 models[k].setGamma( gamma );
407 models[k].setRejectionThreshold( rejectionThreshold );
408 models[k].setTrainingSigma( trainingSigma );
409 models[k].setTrainingMu( trainingMu );
416 maxLikelihood = DEFAULT_NULL_LIKELIHOOD_VALUE;
417 bestDistance = DEFAULT_NULL_DISTANCE_VALUE;
418 classLikelihoods.resize(numClasses,DEFAULT_NULL_LIKELIHOOD_VALUE);
419 classDistances.resize(numClasses,DEFAULT_NULL_DISTANCE_VALUE);
426 if( numClusters > 0 ){
427 this->numClusters = numClusters;
438 if(word !=
"NumFeatures:"){
439 errorLog <<
"loadModelFromFile(string filename) - Could not find NumFeatures " << endl;
442 file >> numInputDimensions;
445 if(word !=
"NumClasses:"){
446 errorLog <<
"loadModelFromFile(string filename) - Could not find NumClasses" << endl;
452 if(word !=
"UseScaling:"){
453 errorLog <<
"loadModelFromFile(string filename) - Could not find UseScaling" << endl;
459 if(word !=
"UseNullRejection:"){
460 errorLog <<
"loadModelFromFile(string filename) - Could not find UseNullRejection" << endl;
463 file >> useNullRejection;
468 ranges.resize(numInputDimensions);
471 if(word !=
"Ranges:"){
472 errorLog <<
"loadModelFromFile(string filename) - Could not find the Ranges" << endl;
475 for(UINT n=0; n<ranges.size(); n++){
476 file >> ranges[n].minValue;
477 file >> ranges[n].maxValue;
482 models.resize(numClasses);
483 classLabels.resize(numClasses);
486 for(UINT k=0; k<numClasses; k++){
487 double rejectionThreshold;
489 double trainingSigma;
493 if( word !=
"ClassLabel:" ){
494 errorLog <<
"loadModelFromFile(string filename) - Could not load the class label for class " << k << endl;
497 file >> classLabels[k];
500 if( word !=
"NumClusters:" ){
501 errorLog <<
"loadModelFromFile(string filename) - Could not load the NumClusters for class " << k << endl;
507 if( word !=
"RejectionThreshold:" ){
508 errorLog <<
"loadModelFromFile(string filename) - Could not load the RejectionThreshold for class " << k << endl;
511 file >> rejectionThreshold;
514 if( word !=
"Gamma:" ){
515 errorLog <<
"loadModelFromFile(string filename) - Could not load the Gamma for class " << k << endl;
521 if( word !=
"TrainingMu:" ){
522 errorLog <<
"loadModelFromFile(string filename) - Could not load the TrainingMu for class " << k << endl;
528 if( word !=
"TrainingSigma:" ){
529 errorLog <<
"loadModelFromFile(string filename) - Could not load the TrainingSigma for class " << k << endl;
532 file >> trainingSigma;
535 if( word !=
"ClusterData:" ){
536 errorLog <<
"loadModelFromFile(string filename) - Could not load the ClusterData for class " << k << endl;
542 for(UINT i=0; i<numClusters; i++){
543 for(UINT j=0; j<numInputDimensions; j++){
544 file >> clusters[i][j];
548 models[k].setClassLabel( classLabels[k] );
549 models[k].setClusters( clusters );
550 models[k].setGamma( gamma );
551 models[k].setRejectionThreshold( rejectionThreshold );
552 models[k].setTrainingSigma( trainingSigma );
553 models[k].setTrainingMu( trainingMu );
560 maxLikelihood = DEFAULT_NULL_LIKELIHOOD_VALUE;
561 bestDistance = DEFAULT_NULL_DISTANCE_VALUE;
562 classLikelihoods.resize(numClasses,DEFAULT_NULL_LIKELIHOOD_VALUE);
563 classDistances.resize(numClasses,DEFAULT_NULL_DISTANCE_VALUE);
vector< MinDistModel > getModels() const
bool setNumClusters(UINT numClusters)
virtual bool deepCopyFrom(const Classifier *classifier)
virtual bool saveModelToFile(fstream &file) const
virtual bool setNullRejectionCoeff(double nullRejectionCoeff)
bool copyBaseVariables(const Classifier *classifier)
MinDist(bool useScaling=false, bool useNullRejection=false, double nullRejectionCoeff=10.0, UINT numClusters=10)
UINT getNumDimensions() const
bool loadBaseSettingsFromFile(fstream &file)
UINT getNumSamples() const
virtual bool predict_(VectorDouble &inputVector)
virtual bool train(ClassificationData trainingData)
vector< ClassTracker > getClassTracker() const
UINT getNumClasses() const
virtual bool train_(ClassificationData &trainingData)
bool saveBaseSettingsToFile(fstream &file) const
double scale(const double &x, const double &minSource, const double &maxSource, const double &minTarget, const double &maxTarget, const bool constrain=false)
MinDist & operator=(const MinDist &rhs)
bool scale(const double minTarget, const double maxTarget)
virtual bool recomputeNullRejectionThresholds()
vector< MinMax > getRanges() const
bool loadLegacyModelFromFile(fstream &file)
string getClassifierType() const
virtual bool loadModelFromFile(fstream &file)
UINT getNumClusters() const
This class implements the MinDist classifier algorithm.
ClassificationData getClassData(const UINT classLabel) const