28 DiscreteHiddenMarkovModel::DiscreteHiddenMarkovModel(){
32 numRandomTrainingIterations = 5;
35 modelType = HMM_LEFTRIGHT;
39 debugLog.setProceedingText(
"[DEBUG DiscreteHiddenMarkovModel]");
40 errorLog.setProceedingText(
"[ERROR DiscreteHiddenMarkovModel]");
41 warningLog.setProceedingText(
"[WARNING DiscreteHiddenMarkovModel]");
42 trainingLog.setProceedingText(
"[TRAINING DiscreteHiddenMarkovModel]");
46 DiscreteHiddenMarkovModel::DiscreteHiddenMarkovModel(
const UINT numStates,
const UINT numSymbols,
const UINT modelType,
const UINT delta){
47 this->numStates = numStates;
48 this->numSymbols = numSymbols;
49 this->modelType = modelType;
52 numRandomTrainingIterations = 5;
56 debugLog.setProceedingText(
"[DEBUG DiscreteHiddenMarkovModel]");
57 errorLog.setProceedingText(
"[ERROR DiscreteHiddenMarkovModel]");
58 warningLog.setProceedingText(
"[WARNING DiscreteHiddenMarkovModel]");
59 trainingLog.setProceedingText(
"[TRAINING DiscreteHiddenMarkovModel]");
61 randomizeMatrices(numStates,numSymbols);
65 DiscreteHiddenMarkovModel::DiscreteHiddenMarkovModel(
const MatrixDouble &a,
const MatrixDouble &b,
const VectorDouble &pi,
const UINT modelType,
const UINT delta){
69 numRandomTrainingIterations = 5;
75 debugLog.setProceedingText(
"[DEBUG DiscreteHiddenMarkovModel]");
76 errorLog.setProceedingText(
"[ERROR DiscreteHiddenMarkovModel]");
77 warningLog.setProceedingText(
"[WARNING DiscreteHiddenMarkovModel]");
78 trainingLog.setProceedingText(
"[TRAINING DiscreteHiddenMarkovModel]");
80 if( a.getNumRows() == a.getNumRows() && a.getNumRows() == b.getNumRows() && a.getNumRows() == pi.size() ){
84 this->modelType = modelType;
86 numStates = b.getNumRows();
87 numSymbols = b.getNumCols();
90 errorLog <<
"DiscreteHiddenMarkovModel(...) - The a,b,pi sizes are invalid!" << endl;
94 DiscreteHiddenMarkovModel::DiscreteHiddenMarkovModel(
const DiscreteHiddenMarkovModel &rhs){
95 this->numStates = rhs.numStates;
96 this->numSymbols = rhs.numSymbols;
97 this->delta = rhs.delta;
98 this->numRandomTrainingIterations = rhs.numRandomTrainingIterations;
99 this->cThreshold = rhs.cThreshold;
100 this->modelType = rhs.modelType;
101 this->logLikelihood = rhs.logLikelihood;
105 this->trainingLog = rhs.trainingLog;
107 debugLog.setProceedingText(
"[DEBUG DiscreteHiddenMarkovModel]");
108 errorLog.setProceedingText(
"[ERROR DiscreteHiddenMarkovModel]");
109 warningLog.setProceedingText(
"[WARNING DiscreteHiddenMarkovModel]");
110 trainingLog.setProceedingText(
"[TRAINING DiscreteHiddenMarkovModel]");
114 DiscreteHiddenMarkovModel::~DiscreteHiddenMarkovModel(){
119 bool DiscreteHiddenMarkovModel::resetModel(
const UINT numStates,
const UINT numSymbols,
const UINT modelType,
const UINT delta){
120 this->numStates = numStates;
121 this->numSymbols = numSymbols;
122 this->modelType = modelType;
124 return randomizeMatrices(numStates,numSymbols);
127 bool DiscreteHiddenMarkovModel::randomizeMatrices(
const UINT numStates,
const UINT numSymbols){
134 this->numStates = numStates;
135 this->numSymbols = numSymbols;
136 a.resize(numStates,numStates);
137 b.resize(numStates,numSymbols);
138 pi.resize(numStates);
144 for(UINT i=0; i<a.getNumRows(); i++)
145 for(UINT j=0; j<a.getNumCols(); j++)
146 a[i][j] = random.getRandomNumberUniform(0.9,1);
148 for(UINT i=0; i<b.getNumRows(); i++)
149 for(UINT j=0; j<b.getNumCols(); j++)
150 b[i][j] = random.getRandomNumberUniform(0.9,1);
153 for(UINT i=0; i<numStates; i++)
154 pi[i] = random.getRandomNumberUniform(0.9,1);
163 for(UINT i=0; i<numStates; i++)
164 for(UINT j=0; j<numStates; j++)
165 if((j<i) || (j>i+delta)) a[i][j] = 0.0;
168 for(UINT i=0; i<numStates; i++){
169 pi[i] = i==0 ? 1 : 0;
173 throw(
"HMM_ERROR: Unkown model type!");
180 for (UINT i=0; i<numStates; i++) {
182 for (UINT j=0; j<numStates; j++) sum += a[i][j];
183 for (UINT j=0; j<numStates; j++) a[i][j] /= sum;
185 for (UINT i=0; i<numStates; i++) {
187 for (UINT k=0; k<numSymbols; k++) sum += b[i][k];
188 for (UINT k=0; k<numSymbols; k++) b[i][k] /= sum;
193 for (UINT i=0; i<numStates; i++) sum += pi[i];
194 for (UINT i=0; i<numStates; i++) pi[i] /= sum;
199 double DiscreteHiddenMarkovModel::predict(
const UINT newSample){
205 observationSequence.push_back( newSample );
207 vector< UINT > obs = observationSequence.getDataAsVector();
215 double DiscreteHiddenMarkovModel::predict(
const vector<UINT> &obs){
217 const int N = (int)numStates;
218 const int T = (int)obs.size();
220 MatrixDouble alpha(T,numStates);
228 alpha[t][i] = pi[i]*b[i][ obs[t] ];
236 for(i=0; i<N; i++) alpha[t][i] *= c[t];
244 alpha[t][j] += alpha[t-1][i] * a[i][j];
246 alpha[t][j] *= b[j][obs[t]];
254 for(j=0; j<N; j++) alpha[t][j] *= c[t];
257 if(
int(estimatedStates.size()) != T ) estimatedStates.resize(T);
261 if( alpha[t][i] > maxValue ){
262 maxValue = alpha[t][i];
263 estimatedStates[t] = i;
269 double loglikelihood = 0.0;
270 for(t=0; t<T; t++) loglikelihood += log( c[t] );
271 return -loglikelihood;
277 double DiscreteHiddenMarkovModel::predictLogLikelihood(
const vector<UINT> &obs){
279 const UINT T = (
unsigned int)obs.size();
280 UINT t,i,j,minState = 0;
281 MatrixDouble alpha(T,numStates);
282 double minWeight = 0;
287 for(i=0; i<numStates; i++){
288 alpha[t][i] = (-1.0 * log(pi[i])) - log(b[i][ obs[t] ]);
293 for (j=0; j<numStates; j++){
295 minWeight = alpha[t-1][j] - log(a[0][j]);
297 for (i=1; i<numStates; i++){
298 weight = alpha[t-1][i] - log(a[i][j]);
300 if (weight < minWeight)
307 alpha[t][j] = minWeight - log(b[j][ obs[t] ]);
313 minWeight = alpha[T - 1][0];
315 for(i=1; i<numStates; i++)
317 if (alpha[T-1][i] < minWeight)
320 minWeight = alpha[T-1][i];
325 return exp(-minWeight);
331 bool DiscreteHiddenMarkovModel::forwardBackward(HMMTrainingObject &hmm,
const vector<UINT> &obs){
333 const int N = (int)numStates;
334 const int T = (int)obs.size();
342 hmm.alpha[t][i] = pi[i]*b[i][ obs[t] ];
343 hmm.c[t] += hmm.alpha[t][i];
347 hmm.c[t] = 1.0/hmm.c[t];
350 for(i=0; i<N; i++) hmm.alpha[t][i] *= hmm.c[t];
356 hmm.alpha[t][j] = 0.0;
358 hmm.alpha[t][j] += hmm.alpha[t-1][i] * a[i][j];
360 hmm.alpha[t][j] *= b[j][obs[t]];
361 hmm.c[t] += hmm.alpha[t][j];
365 hmm.c[t] = 1.0/hmm.c[t];
368 for(j=0; j<N; j++) hmm.alpha[t][j] *= hmm.c[t];
373 for(t=0; t<T; t++) hmm.pk += log( hmm.c[t] );
376 if( grt_isinf(hmm.pk) ){
383 for(i=0; i<N; i++) hmm.beta[t][i] = 1.0;
386 for(i=0; i<N; i++) hmm.beta[t][i] *= hmm.c[t];
389 for(t=T-2; t>=0; t--){
394 hmm.beta[t][i] += a[i][j] * b[j][ obs[t+1] ] * hmm.beta[t+1][j];
397 hmm.beta[t][i] *= hmm.c[t];
407 bool DiscreteHiddenMarkovModel::train(
const vector< vector<UINT> > &trainingData){
411 observationSequence.clear();
412 estimatedStates.clear();
413 trainingIterationLog.clear();
415 UINT n,currentIter, bestIndex = 0;
416 double newLoglikelihood, bestLogValue = 0;
418 if( numRandomTrainingIterations > 1 ){
421 vector< MatrixDouble > aTracker( numRandomTrainingIterations );
422 vector< MatrixDouble > bTracker( numRandomTrainingIterations );
423 vector< double > loglikelihoodTracker( numRandomTrainingIterations );
425 UINT maxNumTestIter = maxNumEpochs > 10 ? 10 : maxNumEpochs;
428 for(n=0; n<numRandomTrainingIterations; n++){
430 randomizeMatrices(numStates,numSymbols);
432 if( !train_(trainingData,maxNumTestIter,currentIter,newLoglikelihood) ){
437 loglikelihoodTracker[n] = newLoglikelihood;
442 bestLogValue = loglikelihoodTracker[0];
443 for(n=1; n<numRandomTrainingIterations; n++){
444 if(bestLogValue < loglikelihoodTracker[n]){
445 bestLogValue = loglikelihoodTracker[n];
451 a = aTracker[bestIndex];
452 b = bTracker[bestIndex];
455 randomizeMatrices(numStates,numSymbols);
459 if( !train_(trainingData,maxNumEpochs,currentIter,newLoglikelihood) ){
464 const UINT numObs = (
unsigned int)trainingData.size();
466 UINT averageObsLength = 0;
467 for(k=0; k<numObs; k++){
468 const UINT T = (
unsigned int)trainingData[k].size();
469 averageObsLength += T;
472 averageObsLength = (UINT)floor( averageObsLength/
double(numObs) );
473 observationSequence.resize( averageObsLength );
474 estimatedStates.resize( averageObsLength );
482 bool DiscreteHiddenMarkovModel::train_(
const vector< vector<UINT> > &obs,
const UINT maxIter, UINT ¤tIter,
double &newLoglikelihood){
484 const UINT numObs = (
unsigned int)obs.size();
486 double num,denom,oldLoglikelihood = 0;
487 bool keepTraining =
true;
488 trainingIterationLog.clear();
491 vector< HMMTrainingObject > hmms( numObs );
494 vector< vector< MatrixDouble > > epsilon( numObs );
495 vector< MatrixDouble > gamma( numObs );
498 for(k=0; k<numObs; k++){
499 const UINT T = (UINT)obs[k].size();
500 gamma[k].resize(T,numStates);
501 epsilon[k].resize(T);
502 for(t=0; t<T; t++) epsilon[k][t].resize(numStates,numStates);
505 hmms[k].alpha.resize(T,numStates);
506 hmms[k].beta.resize(T,numStates);
512 oldLoglikelihood = 0;
513 newLoglikelihood = 0;
517 newLoglikelihood = 0.0;
520 for(k=0; k<numObs; k++){
521 if( !forwardBackward(hmms[k],obs[k]) ){
524 newLoglikelihood += hmms[k].pk;
528 newLoglikelihood /= numObs;
530 trainingIterationLog.push_back( newLoglikelihood );
532 if( ++currentIter >= maxIter ){ keepTraining =
false; trainingLog <<
"Max Iter Reached! Stopping Training" << endl; }
533 if( fabs(newLoglikelihood-oldLoglikelihood) < minChange && currentIter > 1 ){ keepTraining =
false; trainingLog <<
"Min Improvement Reached! Stopping Training" << endl; }
536 trainingLog <<
"Iter: " << currentIter <<
" logLikelihood: " << newLoglikelihood <<
" change: " << oldLoglikelihood - newLoglikelihood << endl;
541 oldLoglikelihood = newLoglikelihood;
547 for(i=0; i<numStates; i++){
551 for(k=0; k<numObs; k++){
552 for(t=0; t<obs[k].size()-1; t++){
553 denom += hmms[k].alpha[t][i] * hmms[k].beta[t][i] / hmms[k].c[t];
559 for(j=0; j<numStates; j++){
561 for(k=0; k<numObs; k++){
562 for(t=0; t<obs[k].size()-1; t++){
563 num += hmms[k].alpha[t][i] * a[i][j] * b[j][ obs[k][t+1] ] * hmms[k].beta[t+1][j];
571 errorLog <<
"Denom is zero for A!" << endl;
577 bool renormB =
false;
578 for(i=0; i<numStates; i++){
579 for(j=0; j<numSymbols; j++){
582 for(k=0; k<numObs; k++){
583 const UINT T = (
unsigned int)obs[k].size();
585 if( obs[k][t] == j ){
586 num += hmms[k].alpha[t][i] * hmms[k].beta[t][i] / hmms[k].c[t];
588 denom += hmms[k].alpha[t][i] * hmms[k].beta[t][i] / hmms[k].c[t];
593 errorLog <<
"Denominator is zero for B!" << endl;
599 if( num > 0 ) b[i][j] = denom > 0 ? num/denom : 1.0e-5;
600 else{ b[i][j] = 0; renormB =
true; }
606 for (UINT i=0; i<numStates; i++) {
608 for (UINT k=0; k<numSymbols; k++){
609 b[i][k] += 1.0/numSymbols;
612 for (UINT k=0; k<numSymbols; k++) b[i][k] /= sum;
617 if (modelType==HMM_ERGODIC ){
618 for(k=0; k<numObs; k++){
619 const UINT T = (
unsigned int)obs[k].size();
621 for(t=0; t<T-1; t++){
623 for(i=0; i<numStates; i++){
624 for(j=0; j<numStates; j++){
625 epsilon[k][t][i][j] = hmms[k].alpha[t][i] * a[i][j] * b[j][ obs[k][t+1] ] * hmms[k].beta[t+1][j];
626 denom += epsilon[k][t][i][j];
630 for(i=0; i<numStates; i++){
631 for(j=0; j<numStates; j++){
632 if(denom!=0) epsilon[k][t][i][j] /= denom;
633 else{ epsilon[k][t][i][j] = 0; }
639 for(t=0; t<T-1; t++){
640 for(i=0; i<numStates; i++){
642 for(j=0; j<numStates; j++)
643 gamma[k][t][i] += epsilon[k][t][i][j];
649 for(i=0; i<numStates; i++){
651 for(k=0; k<numObs; k++){
652 sum += gamma[k][0][i];
654 pi[i] = sum / numObs;
659 }
while(keepTraining);
665 bool DiscreteHiddenMarkovModel::reset(){
667 for(UINT i=0; i<observationSequence.getSize(); i++){
668 observationSequence.push_back( 0 );
674 bool DiscreteHiddenMarkovModel::saveModelToFile(fstream &file)
const{
678 errorLog <<
"saveModelToFile( fstream &file ) - File is not open!" << endl;
683 file <<
"DISCRETE_HMM_MODEL_FILE_V1.0\n";
686 if( !MLBase::saveBaseSettingsToFile(file) ){
687 errorLog <<
"saveModelToFile(fstream &file) - Failed to save classifier base settings to file!" << endl;
691 file <<
"NumStates: " << numStates << endl;
692 file <<
"NumSymbols: " << numSymbols << endl;
693 file <<
"ModelType: " << modelType << endl;
694 file <<
"Delta: " << delta << endl;
695 file <<
"Threshold: " << cThreshold << endl;
696 file <<
"NumRandomTrainingIterations: " << numRandomTrainingIterations << endl;
699 for(UINT i=0; i<numStates; i++){
700 for(UINT j=0; j<numStates; j++){
702 if( j+1 < numStates ) file <<
"\t";
707 for(UINT i=0; i<numStates; i++){
708 for(UINT j=0; j<numSymbols; j++){
710 if( j+1 < numSymbols ) file <<
"\t";
715 for(UINT i=0; i<numStates; i++){
717 if( i+1 < numStates ) file <<
"\t";
724 bool DiscreteHiddenMarkovModel::loadModelFromFile(fstream &file){
730 errorLog <<
"loadModelFromFile( fstream &file ) - File is not open!" << endl;
739 if(word !=
"DISCRETE_HMM_MODEL_FILE_V1.0"){
740 errorLog <<
"loadModelFromFile( fstream &file ) - Could not find Model File Header!" << endl;
745 if( !MLBase::loadBaseSettingsFromFile(file) ){
746 errorLog <<
"loadModelFromFile(string filename) - Failed to load base settings from file!" << endl;
751 if(word !=
"NumStates:"){
752 errorLog <<
"loadModelFromFile( fstream &file ) - Could not find the NumStates header." << endl;
758 if(word !=
"NumSymbols:"){
759 errorLog <<
"loadModelFromFile( fstream &file ) - Could not find the NumSymbols header." << endl;
765 if(word !=
"ModelType:"){
766 errorLog <<
"loadModelFromFile( fstream &file ) - Could not find the modelType for the header." << endl;
772 if(word !=
"Delta:"){
773 errorLog <<
"loadModelFromFile( fstream &file ) - Could not find the Delta for the header." << endl;
779 if(word !=
"Threshold:"){
780 errorLog <<
"loadModelFromFile( fstream &file ) - Could not find the Threshold for the header." << endl;
786 if(word !=
"NumRandomTrainingIterations:"){
787 errorLog <<
"loadModelFromFile( fstream &file ) - Could not find the numRandomTrainingIterations header." << endl;
790 file >> numRandomTrainingIterations;
792 a.resize(numStates,numStates);
793 b.resize(numStates,numSymbols);
794 pi.resize(numStates);
799 errorLog <<
"loadModelFromFile( fstream &file ) - Could not find the A matrix header." << endl;
804 for(UINT i=0; i<numStates; i++){
805 for(UINT j=0; j<numStates; j++){
812 errorLog <<
"loadModelFromFile( fstream &file ) - Could not find the B matrix header." << endl;
817 for(UINT i=0; i<numStates; i++){
818 for(UINT j=0; j<numSymbols; j++){
825 errorLog <<
"loadModelFromFile( fstream &file ) - Could not find the Pi matrix header." << endl;
830 for(UINT i=0; i<numStates; i++){
837 bool DiscreteHiddenMarkovModel::print()
const{
839 trainingLog <<
"A: " << endl;
840 for(UINT i=0; i<a.getNumRows(); i++){
841 for(UINT j=0; j<a.getNumCols(); j++){
842 trainingLog << a[i][j] <<
"\t";
847 trainingLog <<
"B: " << endl;
848 for(UINT i=0; i<b.getNumRows(); i++){
849 for(UINT j=0; j<b.getNumCols(); j++){
850 trainingLog << b[i][j] <<
"\t";
855 trainingLog <<
"Pi: ";
856 for(UINT i=0; i<pi.size(); i++){
857 trainingLog << pi[i] <<
"\t";
864 for(UINT i=0; i<a.getNumRows(); i++){
866 for(UINT j=0; j<a.getNumCols(); j++) sum += a[i][j];
867 if( sum <= 0.99 || sum >= 1.01 ) warningLog <<
"WARNING: A Row " << i <<
" Sum: "<< sum << endl;
870 for(UINT i=0; i<b.getNumRows(); i++){
872 for(UINT j=0; j<b.getNumCols(); j++) sum += b[i][j];
873 if( sum <= 0.99 || sum >= 1.01 ) warningLog <<
"WARNING: B Row " << i <<
" Sum: " << sum << endl;
880 VectorDouble DiscreteHiddenMarkovModel::getTrainingIterationLog()
const{
881 return trainingIterationLog;
This class implements a discrete Hidden Markov Model.