28 HMM::HMM(
const UINT hmmType,
const UINT modelType,
const UINT delta,
const bool useScaling,
const bool useNullRejection)
31 this->modelType = modelType;
33 this->useScaling = useScaling;
34 this->useNullRejection = useNullRejection;
46 autoEstimateSigma =
true;
48 supportsNullRejection =
false;
49 classifierMode = TIMESERIES_CLASSIFIER_MODE;
51 classifierType = classType;
52 debugLog.setProceedingText(
"[DEBUG HMM]");
53 errorLog.setProceedingText(
"[ERROR HMM]");
54 warningLog.setProceedingText(
"[WARNING HMM]");
58 classifierMode = TIMESERIES_CLASSIFIER_MODE;
60 classifierType = classType;
61 debugLog.setProceedingText(
"[DEBUG HMM]");
62 errorLog.setProceedingText(
"[ERROR HMM]");
63 warningLog.setProceedingText(
"[WARNING HMM]");
74 this->modelType = rhs.modelType;
75 this->delta = rhs.delta;
76 this->numStates = rhs.numStates;
77 this->numSymbols = rhs.numSymbols;
78 this->downsampleFactor = rhs.downsampleFactor;
79 this->committeeSize = rhs.committeeSize;
80 this->sigma = rhs.sigma;
81 this->autoEstimateSigma = rhs.autoEstimateSigma;
82 this->discreteModels = rhs.discreteModels;
83 this->continuousModels = rhs.continuousModels;
93 if( classifier == NULL )
return false;
98 HMM *ptr = (
HMM*)classifier;
102 this->modelType = ptr->modelType;
103 this->delta = ptr->delta;
104 this->numStates = ptr->numStates;
105 this->numSymbols = ptr->numSymbols;
106 this->downsampleFactor = ptr->downsampleFactor;
107 this->committeeSize = ptr->committeeSize;
108 this->sigma = ptr->sigma;
109 this->autoEstimateSigma = ptr->autoEstimateSigma;
110 this->discreteModels = ptr->discreteModels;
111 this->continuousModels = ptr->continuousModels;
120 errorLog <<
"train(ClassificationData trainingData) - The HMM classifier should be trained using the train(TimeSeriesClassificationData &trainingData) method" << endl;
129 return train_discrete( trainingData );
132 return train_continuous( trainingData );
136 errorLog <<
"train_(TimeSeriesClassificationData &trainingData) - Failed to train model, unknown HMM type!" << endl;
146 errorLog <<
"train_discrete(TimeSeriesClassificationData &trainingData) - There are no training samples to train the HMM classifer!" << endl;
151 errorLog <<
"train_discrete(TimeSeriesClassificationData &trainingData) - The number of dimensions in the training data must be 1. If your training data is not 1 dimensional then you must quantize the training data using one of the GRT quantization algorithms" << endl;
158 discreteModels.resize( numClasses );
159 classLabels.resize( numClasses );
162 for(UINT k=0; k<numClasses; k++){
163 discreteModels[k].resetModel(numStates,numSymbols,modelType,delta);
164 discreteModels[k].setMaxNumEpochs( maxNumEpochs );
165 discreteModels[k].setMinChange( minChange );
169 for(UINT k=0; k<numClasses; k++){
172 classLabels[k] = classID;
176 vector< vector< UINT > > observationSequences;
177 if( !convertDataToObservationSequence( classData, observationSequences ) ){
182 if( !discreteModels[k].
train( observationSequences ) ){
183 errorLog <<
"train_discrete(TimeSeriesClassificationData &trainingData) - Failed to train HMM for class " << classID << endl;
189 nullRejectionThresholds.resize(numClasses);
191 for(UINT k=0; k<numClasses; k++){
194 classLabels[k] = classID;
198 vector< vector< UINT > > observationSequences;
199 if( !convertDataToObservationSequence( classData, observationSequences ) ){
204 double loglikelihood = 0;
205 double avgLoglikelihood = 0;
206 for(UINT i=0; i<observationSequences.size(); i++){
207 loglikelihood = discreteModels[k].predict( observationSequences[i] );
208 avgLoglikelihood += fabs( loglikelihood );
210 nullRejectionThresholds[k] = -( avgLoglikelihood / double( observationSequences.size() ) );
225 errorLog <<
"train_continuous(TimeSeriesClassificationData &trainingData) - There are no training samples to train the CHMM classifer!" << endl;
232 classLabels.resize( numClasses );
233 for(UINT k=0; k<numClasses; k++){
240 trainingData.
scale(0, 1);
244 const UINT numTrainingSamples = trainingData.
getNumSamples();
245 continuousModels.resize( numTrainingSamples );
248 for(UINT k=0; k<numTrainingSamples; k++){
251 continuousModels[k].setDownsampleFactor( downsampleFactor );
252 continuousModels[k].setModelType( modelType );
253 continuousModels[k].setDelta( delta );
254 continuousModels[k].setSigma( sigma );
255 continuousModels[k].setAutoEstimateSigma( autoEstimateSigma );
256 continuousModels[k].enableScaling(
false );
259 if( !continuousModels[k].
train_( trainingData[k] ) ){
260 errorLog <<
"train_continuous(TimeSeriesClassificationData &trainingData) - Failed to train CHMM for sample " << k << endl;
267 warningLog <<
"train_continuous(TimeSeriesClassificationData &trainingData) - The committeeSize is larger than the number of training sample. Setting committeeSize to number of training samples: " << trainingData.
getNumSamples() << endl;
274 if( useNullRejection ){
276 nullRejectionThresholds.resize(numClasses);
286 return predict_discrete( inputVector );
289 return predict_continuous( inputVector );
293 errorLog <<
"predict_(VectorDouble &inputVector) - Failed to predict, unknown HMM type!" << endl;
298 bool HMM::predict_discrete( VectorDouble &inputVector ){
300 predictedClassLabel = 0;
301 maxLikelihood = -10000;
304 errorLog <<
"predict_(VectorDouble &inputVector) - The HMM classifier has not been trained!" << endl;
308 if( inputVector.size() != numInputDimensions ){
309 errorLog <<
"predict_(VectorDouble &inputVector) - The size of the input vector (" << inputVector.size() <<
") does not match the num features in the model (" << numInputDimensions << endl;
313 if( classLikelihoods.size() != numClasses ) classLikelihoods.resize(numClasses,0);
314 if( classDistances.size() != numClasses ) classDistances.resize(numClasses,0);
317 bestDistance = -99e+99;
319 UINT newObservation = (UINT)inputVector[0];
321 if( newObservation >= numSymbols ){
322 errorLog <<
"predict_(VectorDouble &inputVector) - The new observation is not a valid symbol! It should be in the range [0 numSymbols-1]" << endl;
326 for(UINT k=0; k<numClasses; k++){
327 classDistances[k] = discreteModels[k].predict( newObservation );
330 classLikelihoods[k] = antilog( classDistances[k] );
333 if( classDistances[k] > bestDistance ){
334 bestDistance = classDistances[k];
338 sum += classLikelihoods[k];
342 for(UINT k=0; k<numClasses; k++){
343 classLikelihoods[k] /= sum;
346 maxLikelihood = classLikelihoods[ bestIndex ];
347 predictedClassLabel = classLabels[ bestIndex ];
349 if( useNullRejection ){
350 if( maxLikelihood > nullRejectionThresholds[ bestIndex ] ){
351 predictedClassLabel = classLabels[ bestIndex ];
352 }
else predictedClassLabel = GRT_DEFAULT_NULL_CLASS_LABEL;
358 bool HMM::predict_continuous( VectorDouble &inputVector ){
361 errorLog <<
"predict_(VectorDouble &inputVector) - The HMM classifier has not been trained!" << endl;
365 if( inputVector.size() != numInputDimensions ){
366 errorLog <<
"predict_(VectorDouble &inputVector) - The size of the input vector (" << inputVector.size() <<
") does not match the num features in the model (" << numInputDimensions << endl;
372 for(UINT i=0; i<numInputDimensions; i++){
373 inputVector[i] =
scale(inputVector[i], ranges[i].minValue, ranges[i].maxValue, 0, 1);
377 if( classLikelihoods.size() != numClasses ) classLikelihoods.resize(numClasses,0);
378 if( classDistances.size() != numClasses ) classDistances.resize(numClasses,0);
380 std::fill(classLikelihoods.begin(),classLikelihoods.end(),0);
381 std::fill(classDistances.begin(),classDistances.end(),0);
383 bestDistance = -1000;
385 double minValue = -1000;
387 const UINT numModels = (UINT)continuousModels.size();
388 vector< IndexedDouble > results(numModels);
389 for(UINT i=0; i<numModels; i++){
392 if( continuousModels[i].
predict_( inputVector ) ){
393 results[i].value = continuousModels[i].getLoglikelihood();
394 results[i].index = continuousModels[i].getClassLabel();
396 errorLog <<
"predict_(VectorDouble &inputVector) - Prediction failed for model: " << i << endl;
400 if( results[i].value < minValue ){
401 if( !grt_isnan(results[i].value) ){
402 minValue = results[i].value;
406 if( results[i].value > bestDistance ){
407 if( !grt_isnan(results[i].value) ){
408 bestDistance = results[i].value;
417 phase = continuousModels[ bestIndex ].getPhase();
420 std::sort(results.begin(),results.end(),IndexedDouble::sortIndexedDoubleByValueDescending);
423 const double committeeWeight = 1.0 / committeeSize;
424 for(UINT i=0; i<committeeSize; i++){
431 for(UINT k=0; k<numClasses; k++){
432 classLikelihoods[k] = classDistances[k] / sum;
436 for(UINT k=0; k<numClasses; k++){
437 if( classDistances[k] > bestDistance ){
438 bestDistance = classDistances[k];
443 maxLikelihood = classLikelihoods[ bestIndex ];
444 predictedClassLabel = classLabels[ bestIndex ];
448 predictedClassLabel = 0;
458 return predict_discrete( timeseries );
461 return predict_continuous( timeseries );
465 errorLog <<
"predict_(MatrixDouble ×eries) - Failed to predict, unknown HMM type!" << endl;
474 errorLog <<
"predict_continuous(MatrixDouble ×eries) - The HMM classifier has not been trained!" << endl;
479 errorLog <<
"predict_discrete(MatrixDouble ×eries) The number of columns in the input matrix must be 1. It is: " << timeseries.
getNumCols() << endl;
485 vector<UINT> observationSequence( M );
487 for(UINT i=0; i<M; i++){
488 observationSequence[i] = (UINT)timeseries[i][0];
490 if( observationSequence[i] >= numSymbols ){
491 errorLog <<
"predict_discrete(VectorDouble &inputVector) - The new observation is not a valid symbol! It should be in the range [0 numSymbols-1]" << endl;
496 if( classLikelihoods.size() != numClasses ) classLikelihoods.resize(numClasses,0);
497 if( classDistances.size() != numClasses ) classDistances.resize(numClasses,0);
499 bestDistance = -99e+99;
502 for(UINT k=0; k<numClasses; k++){
503 classDistances[k] = discreteModels[k].predict( observationSequence );
506 classLikelihoods[k] = antilog( classDistances[k] );
509 if( classDistances[k] > bestDistance ){
510 bestDistance = classDistances[k];
514 sum += classLikelihoods[k];
518 for(UINT k=0; k<numClasses; k++){
519 classLikelihoods[k] /= sum;
522 maxLikelihood = classLikelihoods[ bestIndex ];
523 predictedClassLabel = classLabels[ bestIndex ];
525 if( useNullRejection ){
526 if( maxLikelihood > nullRejectionThresholds[ bestIndex ] ){
527 predictedClassLabel = classLabels[ bestIndex ];
528 }
else predictedClassLabel = GRT_DEFAULT_NULL_CLASS_LABEL;
538 errorLog <<
"predict_continuous(MatrixDouble ×eries) - The HMM classifier has not been trained!" << endl;
542 if( timeseries.
getNumCols() != numInputDimensions ){
543 errorLog <<
"predict_continuous(MatrixDouble ×eries) - The number of columns in the input matrix (" << timeseries.
getNumCols() <<
") does not match the num features in the model (" << numInputDimensions << endl;
549 const UINT timeseriesLength = timeseries.
getNumRows();
550 for(UINT j=0; j<numInputDimensions; j++){
551 for(UINT i=0; i<timeseriesLength; i++){
552 timeseries[i][j] =
scale(timeseries[i][j], ranges[j].minValue, ranges[j].maxValue, 0, 1);
557 if( classLikelihoods.size() != numClasses ) classLikelihoods.resize(numClasses,0);
558 if( classDistances.size() != numClasses ) classDistances.resize(numClasses,0);
560 std::fill(classLikelihoods.begin(),classLikelihoods.end(),0);
561 std::fill(classDistances.begin(),classDistances.end(),0);
563 bestDistance = -1000;
565 double minValue = -1000;
567 const UINT numModels = (UINT)continuousModels.size();
568 vector< IndexedDouble > results(numModels);
569 for(UINT i=0; i<numModels; i++){
572 if( continuousModels[i].
predict_( timeseries ) ){
573 results[i].value = continuousModels[i].getLoglikelihood();
574 results[i].index = continuousModels[i].getClassLabel();
576 errorLog <<
"predict_(VectorDouble &inputVector) - Prediction failed for model: " << i << endl;
580 if( results[i].value < minValue ){
581 minValue = results[i].value;
584 if( results[i].value > bestDistance ){
585 bestDistance = results[i].value;
591 phase = continuousModels[ bestIndex ].getPhase();
594 std::sort(results.begin(),results.end(),IndexedDouble::sortIndexedDoubleByValueDescending);
597 const double committeeWeight = 1.0 / committeeSize;
598 for(UINT i=0; i<committeeSize; i++){
605 for(UINT k=0; k<numClasses; k++){
606 classLikelihoods[k] = classDistances[k] / sum;
610 for(UINT k=0; k<numClasses; k++){
611 if( classDistances[k] > bestDistance ){
612 bestDistance = classDistances[k];
617 maxLikelihood = classLikelihoods[ bestIndex ];
618 predictedClassLabel = classLabels[ bestIndex ];
622 predictedClassLabel = 0;
634 for(
size_t i=0; i<discreteModels.size(); i++){
635 discreteModels[i].reset();
639 for(
size_t i=0; i<continuousModels.size(); i++){
640 continuousModels[i].reset();
653 discreteModels.clear();
654 continuousModels.clear();
661 cout <<
"HMM Model\n";
664 cout <<
"HmmType: " <<
hmmType << endl;
665 cout <<
"ModelType: " << modelType << endl;
666 cout <<
"Delta: " << delta << endl;
671 cout <<
"NumStates: " << numStates << endl;
672 cout <<
"NumSymbols: " << numSymbols << endl;
673 cout <<
"NumRandomTrainingIterations: " << numRandomTrainingIterations << endl;
674 cout <<
"NumDiscreteModels: " << discreteModels.size() << endl;
675 cout <<
"DiscreteModels: " << endl;
676 for(
size_t i=0; i<discreteModels.size(); i++){
677 if( !discreteModels[i].
print() ){
678 errorLog <<
"saveModelToFile(fstream &file) - Failed to print discrete model " << i <<
" to file!" << endl;
684 cout <<
"DownsampleFactor: " << downsampleFactor << endl;
685 cout <<
"CommitteeSize: " << committeeSize << endl;
686 cout <<
"Sigma: " << sigma << endl;
687 cout <<
"AutoEstimateSigma: " << autoEstimateSigma << endl;
688 cout <<
"NumContinuousModels: " << continuousModels.size() << endl;
689 cout <<
"ContinuousModels: " << endl;
690 for(
size_t i=0; i<continuousModels.size(); i++){
691 if( !continuousModels[i].
print() ){
692 errorLog <<
"saveModelToFile(fstream &file) - Failed to print continuous model " << i <<
" to file!" << endl;
706 errorLog <<
"saveModelToFile( fstream &file ) - File is not open!" << endl;
711 file <<
"HMM_MODEL_FILE_V2.0\n";
715 errorLog <<
"saveModelToFile(fstream &file) - Failed to save classifier base settings to file!" << endl;
720 file <<
"HmmType: " <<
hmmType << endl;
721 file <<
"ModelType: " << modelType << endl;
722 file <<
"Delta: " << delta << endl;
727 file <<
"NumStates: " << numStates << endl;
728 file <<
"NumSymbols: " << numSymbols << endl;
729 file <<
"NumRandomTrainingIterations: " << numRandomTrainingIterations << endl;
730 file <<
"NumDiscreteModels: " << discreteModels.size() << endl;
731 file <<
"DiscreteModels: " << endl;
732 for(
size_t i=0; i<discreteModels.size(); i++){
734 errorLog <<
"saveModelToFile(fstream &file) - Failed to save discrete model " << i <<
" to file!" << endl;
740 file <<
"DownsampleFactor: " << downsampleFactor << endl;
741 file <<
"CommitteeSize: " << committeeSize << endl;
742 file <<
"Sigma: " << sigma << endl;
743 file <<
"NumContinuousModels: " << continuousModels.size() << endl;
744 file <<
"ContinuousModels: " << endl;
745 for(
size_t i=0; i<continuousModels.size(); i++){
747 errorLog <<
"saveModelToFile(fstream &file) - Failed to save continuous model " << i <<
" to file!" << endl;
763 errorLog <<
"loadModelFromFile( fstream &file ) - File is not open!" << endl;
773 if(word !=
"HMM_MODEL_FILE_V2.0"){
774 errorLog <<
"loadModelFromFile( fstream &file ) - Could not find Model File Header!" << endl;
780 errorLog <<
"loadModelFromFile(string filename) - Failed to load base settings from file!" << endl;
786 if(word !=
"HmmType:"){
787 errorLog <<
"loadModelFromFile( fstream &file ) - Could not find HmmType." << endl;
793 if(word !=
"ModelType:"){
794 errorLog <<
"loadModelFromFile( fstream &file ) - Could not find ModelType." << endl;
800 if(word !=
"Delta:"){
801 errorLog <<
"loadModelFromFile( fstream &file ) - Could not find Delta." << endl;
811 if(word !=
"NumStates:"){
812 errorLog <<
"loadModelFromFile( fstream &file ) - Could not find NumStates." << endl;
818 if(word !=
"NumSymbols:"){
819 errorLog <<
"loadModelFromFile( fstream &file ) - Could not find NumSymbols." << endl;
825 if(word !=
"NumRandomTrainingIterations:"){
826 errorLog <<
"loadModelFromFile( fstream &file ) - Could not find NumRandomTrainingIterations." << endl;
829 file >> numRandomTrainingIterations;
832 if(word !=
"NumDiscreteModels:"){
833 errorLog <<
"loadModelFromFile( fstream &file ) - Could not find NumDiscreteModels." << endl;
839 if(word !=
"DiscreteModels:"){
840 errorLog <<
"loadModelFromFile( fstream &file ) - Could not find DiscreteModels." << endl;
845 discreteModels.resize(numModels);
846 for(
size_t i=0; i<discreteModels.size(); i++){
848 errorLog <<
"loadModelFromFile(fstream &file) - Failed to load discrete model " << i <<
" from file!" << endl;
857 if(word !=
"DownsampleFactor:"){
858 errorLog <<
"loadModelFromFile( fstream &file ) - Could not find DownsampleFactor." << endl;
861 file >> downsampleFactor;
864 if(word !=
"CommitteeSize:"){
865 errorLog <<
"loadModelFromFile( fstream &file ) - Could not find CommitteeSize." << endl;
868 file >> committeeSize;
871 if(word !=
"Sigma:"){
872 errorLog <<
"loadModelFromFile( fstream &file ) - Could not find Sigma." << endl;
878 if(word !=
"NumContinuousModels:"){
879 errorLog <<
"loadModelFromFile( fstream &file ) - Could not find NumContinuousModels." << endl;
885 if(word !=
"ContinuousModels:"){
886 errorLog <<
"loadModelFromFile( fstream &file ) - Could not find ContinuousModels." << endl;
891 continuousModels.resize(numModels);
892 for(
size_t i=0; i<continuousModels.size(); i++){
894 errorLog <<
"loadModelFromFile(fstream &file) - Failed to load continuous model " << i <<
" from file!" << endl;
912 observationSequences[i].resize( timeseries.
getNumRows() );
913 for(UINT j=0; j<timeseries.
getNumRows(); j++){
914 if( timeseries[j][0] >= numSymbols ){
915 errorLog <<
"train(TimeSeriesClassificationData &trainingData) - Found an observation sequence with a value outside of the symbol range! Value: " << timeseries[j][0] << endl;
918 observationSequences[i][j] = (UINT)timeseries[j][0];
946 return numRandomTrainingIterations;
950 return discreteModels;
954 return continuousModels;
961 if( hmmType == HMM_DISCRETE || hmmType == HMM_CONTINUOUS ){
966 warningLog <<
"setHMMType(const UINT hmmType) - Unknown HMM type!" << endl;
974 if( modelType == HMM_ERGODIC || modelType == HMM_LEFTRIGHT ){
975 this->modelType = modelType;
979 warningLog <<
"setModelType(const UINT modelType) - Unknown model type!" << endl;
992 warningLog <<
"setDelta(const UINT delta) - Delta must be greater than zero!" << endl;
999 if( downsampleFactor > 0 ){
1000 this->downsampleFactor = downsampleFactor;
1008 if( committeeSize > 0 ){
1009 this->committeeSize = committeeSize;
1020 if( numStates > 0 ){
1021 this->numStates = numStates;
1025 warningLog <<
"setNumStates(const UINT numStates) - Num states must be greater than zero!" << endl;
1033 if( numSymbols > 0 ){
1034 this->numSymbols = numSymbols;
1038 warningLog <<
"setNumSymbols(const UINT numSymbols) - Num symbols must be greater than zero!" << endl;
1046 if( numRandomTrainingIterations > 0 ){
1047 this->numRandomTrainingIterations = numRandomTrainingIterations;
1051 warningLog <<
"setMaxNumIterations(const UINT maxNumIter) - The number of random training iterations must be greater than zero!" << endl;
1057 this->sigma = sigma;
1058 for(
size_t i=0; i<continuousModels.size(); i++){
1059 continuousModels[i].setSigma( sigma );
1066 bool HMM::setAutoEstimateSigma(
const bool autoEstimateSigma){
1070 this->autoEstimateSigma = autoEstimateSigma;
bool setCommitteeSize(const UINT committeeSize)
UINT getNumSymbols() const
UINT getNumSamples() const
virtual bool train(ClassificationData trainingData)
UINT getNumRandomTrainingIterations() const
bool setHMMType(const UINT hmmType)
bool copyBaseVariables(const Classifier *classifier)
UINT getModelType() const
UINT getNumDimensions() const
bool loadBaseSettingsFromFile(fstream &file)
static double scale(const double &x, const double &minSource, const double &maxSource, const double &minTarget, const double &maxTarget, const bool constrain=false)
unsigned int getNumCols() const
virtual bool loadModelFromFile(fstream &file)
vector< DiscreteHiddenMarkovModel > getDiscreteModels() const
bool setDownsampleFactor(const UINT downsampleFactor)
virtual bool predict_(VectorDouble &inputVector)
bool setSigma(const double sigma)
bool saveBaseSettingsToFile(fstream &file) const
double scale(const double &x, const double &minSource, const double &maxSource, const double &minTarget, const double &maxTarget, const bool constrain=false)
virtual bool deepCopyFrom(const Classifier *classifier)
UINT getNumStates() const
UINT getClassLabelIndexValue(UINT classLabel) const
bool setNumRandomTrainingIterations(const UINT numRandomTrainingIterations)
UINT getNumClasses() const
bool scale(const double minTarget, const double maxTarget)
vector< ClassTracker > getClassTracker() const
virtual bool train_(TimeSeriesClassificationData &trainingData)
bool setNumStates(const UINT numStates)
vector< ContinuousHiddenMarkovModel > getContinuousModels() const
HMM(const UINT hmmType=HMM_CONTINUOUS, const UINT modelType=HMM_LEFTRIGHT, const UINT delta=1, const bool useScaling=false, const bool useNullRejection=false)
TimeSeriesClassificationData getClassData(const UINT classLabel) const
unsigned int getNumRows() const
UINT hmmType
Controls if this is a HMM_DISCRETE or a HMM_CONTINUOUS.
HMM & operator=(const HMM &rhs)
virtual bool saveModelToFile(fstream &file) const
virtual bool print() const
string getClassifierType() const
bool setDelta(const UINT delta)
This class acts as the main interface for using a Hidden Markov Model.
static double sum(const std::vector< double > &x)
vector< MinMax > getRanges() const
bool setNumSymbols(const UINT numStates)
bool setModelType(const UINT modelType)