GestureRecognitionToolkit  Version: 1.0 Revision: 04-03-15
The Gesture Recognition Toolkit (GRT) is a cross-platform, open-source, c++ machine learning library for real-time gesture recognition.
DiscreteHiddenMarkovModel.cpp
1 /*
2  GRT MIT License
3  Copyright (c) <2012> <Nicholas Gillian, Media Lab, MIT>
4 
5  Permission is hereby granted, free of charge, to any person obtaining a copy of this software
6  and associated documentation files (the "Software"), to deal in the Software without restriction,
7  including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so,
9  subject to the following conditions:
10 
11  The above copyright notice and this permission notice shall be included in all copies or substantial
12  portions of the Software.
13 
14  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
15  LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
16  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
17  WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
18  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
19  */
20 
22 
23 using namespace std;
24 
25 namespace GRT {
26 
27 //Default constructor
28 DiscreteHiddenMarkovModel::DiscreteHiddenMarkovModel(){
29  numStates = 0;
30  numSymbols = 0;
31  delta = 1;
32  numRandomTrainingIterations = 5;
33  maxNumEpochs = 100;
34  cThreshold = -1000;
35  modelType = HMM_LEFTRIGHT;
36  logLikelihood = 0.0;
37  minChange = 1.0e-5;
38 
39  debugLog.setProceedingText("[DEBUG DiscreteHiddenMarkovModel]");
40  errorLog.setProceedingText("[ERROR DiscreteHiddenMarkovModel]");
41  warningLog.setProceedingText("[WARNING DiscreteHiddenMarkovModel]");
42  trainingLog.setProceedingText("[TRAINING DiscreteHiddenMarkovModel]");
43 }
44 
45 //Init the model with a set number of states and symbols
46 DiscreteHiddenMarkovModel::DiscreteHiddenMarkovModel(const UINT numStates,const UINT numSymbols,const UINT modelType,const UINT delta){
47  this->numStates = numStates;
48  this->numSymbols = numSymbols;
49  this->modelType = modelType;
50  this->delta = delta;
51  logLikelihood = 0.0;
52  numRandomTrainingIterations = 5;
53  cThreshold = -1000;
54  logLikelihood = 0.0;
55 
56  debugLog.setProceedingText("[DEBUG DiscreteHiddenMarkovModel]");
57  errorLog.setProceedingText("[ERROR DiscreteHiddenMarkovModel]");
58  warningLog.setProceedingText("[WARNING DiscreteHiddenMarkovModel]");
59  trainingLog.setProceedingText("[TRAINING DiscreteHiddenMarkovModel]");
60 
61  randomizeMatrices(numStates,numSymbols);
62 }
63 
64 //Init the model with a pre-trained a, b, and pi matrices
65 DiscreteHiddenMarkovModel::DiscreteHiddenMarkovModel(const MatrixDouble &a,const MatrixDouble &b,const VectorDouble &pi,const UINT modelType,const UINT delta){
66 
67  numStates = 0;
68  numSymbols = 0;
69  numRandomTrainingIterations = 5;
70  maxNumEpochs = 100;
71  cThreshold = -1000;
72  logLikelihood = 0.0;
73  minChange = 1.0e-5;
74 
75  debugLog.setProceedingText("[DEBUG DiscreteHiddenMarkovModel]");
76  errorLog.setProceedingText("[ERROR DiscreteHiddenMarkovModel]");
77  warningLog.setProceedingText("[WARNING DiscreteHiddenMarkovModel]");
78  trainingLog.setProceedingText("[TRAINING DiscreteHiddenMarkovModel]");
79 
80  if( a.getNumRows() == a.getNumRows() && a.getNumRows() == b.getNumRows() && a.getNumRows() == pi.size() ){
81  this->a = a;
82  this->b = b;
83  this->pi = pi;
84  this->modelType = modelType;
85  this->delta = delta;
86  numStates = b.getNumRows();
87  numSymbols = b.getNumCols();
88  trained = true;
89  }else{
90  errorLog << "DiscreteHiddenMarkovModel(...) - The a,b,pi sizes are invalid!" << endl;
91  }
92 }
93 
94 DiscreteHiddenMarkovModel::DiscreteHiddenMarkovModel(const DiscreteHiddenMarkovModel &rhs){
95  this->numStates = rhs.numStates;
96  this->numSymbols = rhs.numSymbols;
97  this->delta = rhs.delta;
98  this->numRandomTrainingIterations = rhs.numRandomTrainingIterations;
99  this->cThreshold = rhs.cThreshold;
100  this->modelType = rhs.modelType;
101  this->logLikelihood = rhs.logLikelihood;
102  this->a = rhs.a;
103  this->b = rhs.b;
104  this->pi = rhs.pi;
105  this->trainingLog = rhs.trainingLog;
106 
107  debugLog.setProceedingText("[DEBUG DiscreteHiddenMarkovModel]");
108  errorLog.setProceedingText("[ERROR DiscreteHiddenMarkovModel]");
109  warningLog.setProceedingText("[WARNING DiscreteHiddenMarkovModel]");
110  trainingLog.setProceedingText("[TRAINING DiscreteHiddenMarkovModel]");
111 }
112 
113 //Default destructor
114 DiscreteHiddenMarkovModel::~DiscreteHiddenMarkovModel(){
115 
116 }
117 
118 //This can be called at any time to reset the entire model
119 bool DiscreteHiddenMarkovModel::resetModel(const UINT numStates,const UINT numSymbols,const UINT modelType,const UINT delta){
120  this->numStates = numStates;
121  this->numSymbols = numSymbols;
122  this->modelType = modelType;
123  this->delta = delta;
124  return randomizeMatrices(numStates,numSymbols);
125 }
126 
127 bool DiscreteHiddenMarkovModel::randomizeMatrices(const UINT numStates,const UINT numSymbols){
128 
129  //Set the model as untrained as everything will now be reset
130  trained = false;
131  logLikelihood = 0.0;
132 
133  //Set the new state and symbol size
134  this->numStates = numStates;
135  this->numSymbols = numSymbols;
136  a.resize(numStates,numStates);
137  b.resize(numStates,numSymbols);
138  pi.resize(numStates);
139 
140  //Fill Transition and Symbol Matrices randomly
141  //It's best to choose values in the range [0.9 1.1] rather than [0 1]
142  //That way, no single value will get too large or too small a weight when the values are normalized
143  Random random;
144  for(UINT i=0; i<a.getNumRows(); i++)
145  for(UINT j=0; j<a.getNumCols(); j++)
146  a[i][j] = random.getRandomNumberUniform(0.9,1);
147 
148  for(UINT i=0; i<b.getNumRows(); i++)
149  for(UINT j=0; j<b.getNumCols(); j++)
150  b[i][j] = random.getRandomNumberUniform(0.9,1);
151 
152  //Randomise pi
153  for(UINT i=0; i<numStates; i++)
154  pi[i] = random.getRandomNumberUniform(0.9,1);
155 
156  //Set any constraints on the model
157  switch( modelType ){
158  case(HMM_ERGODIC):
159  //Don't need todo anything
160  break;
161  case(HMM_LEFTRIGHT):
162  //Set the state transitions constraints
163  for(UINT i=0; i<numStates; i++)
164  for(UINT j=0; j<numStates; j++)
165  if((j<i) || (j>i+delta)) a[i][j] = 0.0;
166 
167  //Set pi to start in state 0
168  for(UINT i=0; i<numStates; i++){
169  pi[i] = i==0 ? 1 : 0;
170  }
171  break;
172  default:
173  throw("HMM_ERROR: Unkown model type!");
174  return false;
175  break;
176  }
177 
178  //Normalize the matrices
179  double sum=0.0;
180  for (UINT i=0; i<numStates; i++) {
181  sum = 0.;
182  for (UINT j=0; j<numStates; j++) sum += a[i][j];
183  for (UINT j=0; j<numStates; j++) a[i][j] /= sum;
184  }
185  for (UINT i=0; i<numStates; i++) {
186  sum = 0.;
187  for (UINT k=0; k<numSymbols; k++) sum += b[i][k];
188  for (UINT k=0; k<numSymbols; k++) b[i][k] /= sum;
189  }
190 
191  //Normalise pi
192  sum = 0.0;
193  for (UINT i=0; i<numStates; i++) sum += pi[i];
194  for (UINT i=0; i<numStates; i++) pi[i] /= sum;
195 
196  return true;
197 }
198 
199 double DiscreteHiddenMarkovModel::predict(const UINT newSample){
200 
201  if( !trained ){
202  return 0;
203  }
204 
205  observationSequence.push_back( newSample );
206 
207  vector< UINT > obs = observationSequence.getDataAsVector();
208 
209  return predict(obs);
210 }
211 
212 /*double predictLogLikelihood(Vector<UINT> &obs)
213  - This method computes P(O|A,B,Pi) using the forward algorithm
214  */
215 double DiscreteHiddenMarkovModel::predict(const vector<UINT> &obs){
216 
217  const int N = (int)numStates;
218  const int T = (int)obs.size();
219  int t,i,j = 0;
220  MatrixDouble alpha(T,numStates);
221  VectorDouble c(T);
222 
224  //Step 1: Init at t=0
225  t = 0;
226  c[t] = 0.0;
227  for(i=0; i<N; i++){
228  alpha[t][i] = pi[i]*b[i][ obs[t] ];
229  c[t] += alpha[t][i];
230  }
231 
232  //Set the inital scaling coeff
233  c[t] = 1.0/c[t];
234 
235  //Scale alpha
236  for(i=0; i<N; i++) alpha[t][i] *= c[t];
237 
238  //Step 2: Induction
239  for(t=1; t<T; t++){
240  c[t] = 0.0;
241  for(j=0; j<N; j++){
242  alpha[t][j] = 0.0;
243  for(i=0; i<N; i++){
244  alpha[t][j] += alpha[t-1][i] * a[i][j];
245  }
246  alpha[t][j] *= b[j][obs[t]];
247  c[t] += alpha[t][j];
248  }
249 
250  //Set the scaling coeff
251  c[t] = 1.0/c[t];
252 
253  //Scale Alpha
254  for(j=0; j<N; j++) alpha[t][j] *= c[t];
255  }
256 
257  if( int(estimatedStates.size()) != T ) estimatedStates.resize(T);
258  for(t=0; t<T; t++){
259  double maxValue = 0;
260  for(i=0; i<N; i++){
261  if( alpha[t][i] > maxValue ){
262  maxValue = alpha[t][i];
263  estimatedStates[t] = i;
264  }
265  }
266  }
267 
268  //Termination
269  double loglikelihood = 0.0;
270  for(t=0; t<T; t++) loglikelihood += log( c[t] );
271  return -loglikelihood; //Return the negative log likelihood
272 }
273 
274 /*double predictLogLikelihood(Vector<UINT> &obs)
275 - This method computes P(O|A,B,Pi) using the forward algorithm
276 */
277 double DiscreteHiddenMarkovModel::predictLogLikelihood(const vector<UINT> &obs){
278 
279  const UINT T = (unsigned int)obs.size();
280  UINT t,i,j,minState = 0;
281  MatrixDouble alpha(T,numStates);
282  double minWeight = 0;
283  double weight = 0;
284 
285  // Base
286  t = 0;
287  for(i=0; i<numStates; i++){
288  alpha[t][i] = (-1.0 * log(pi[i])) - log(b[i][ obs[t] ]);
289  }
290 
291  // Induction
292  for (t=1; t<T; t++){
293  for (j=0; j<numStates; j++){
294  minState = 0;
295  minWeight = alpha[t-1][j] - log(a[0][j]);
296 
297  for (i=1; i<numStates; i++){
298  weight = alpha[t-1][i] - log(a[i][j]);
299 
300  if (weight < minWeight)
301  {
302  minState = i;
303  minWeight = weight;
304  }
305  }
306 
307  alpha[t][j] = minWeight - log(b[j][ obs[t] ]);
308  }
309  }
310 
311  // Find minimum value for time T-1
312  minState = 1;
313  minWeight = alpha[T - 1][0];
314 
315  for(i=1; i<numStates; i++)
316  {
317  if (alpha[T-1][i] < minWeight)
318  {
319  minState = i;
320  minWeight = alpha[T-1][i];
321  }
322  }
323 
324  // Returns the sequence probability
325  return exp(-minWeight);
326 }
327 
328 /*double forwardBackward(Vector<UINT> &obs)
329 - This method runs one pass of the forward backward algorithm, the hmm training object needs to be resized BEFORE calling this function!
330 */
331 bool DiscreteHiddenMarkovModel::forwardBackward(HMMTrainingObject &hmm,const vector<UINT> &obs){
332 
333  const int N = (int)numStates;
334  const int T = (int)obs.size();
335  int t,i,j = 0;
336 
338  //Step 1: Init at t=0
339  t = 0;
340  hmm.c[t] = 0.0;
341  for(i=0; i<N; i++){
342  hmm.alpha[t][i] = pi[i]*b[i][ obs[t] ];
343  hmm.c[t] += hmm.alpha[t][i];
344  }
345 
346  //Set the inital scaling coeff
347  hmm.c[t] = 1.0/hmm.c[t];
348 
349  //Scale alpha
350  for(i=0; i<N; i++) hmm.alpha[t][i] *= hmm.c[t];
351 
352  //Step 2: Induction
353  for(t=1; t<T; t++){
354  hmm.c[t] = 0.0;
355  for(j=0; j<N; j++){
356  hmm.alpha[t][j] = 0.0;
357  for(i=0; i<N; i++){
358  hmm.alpha[t][j] += hmm.alpha[t-1][i] * a[i][j];
359  }
360  hmm.alpha[t][j] *= b[j][obs[t]];
361  hmm.c[t] += hmm.alpha[t][j];
362  }
363 
364  //Set the scaling coeff
365  hmm.c[t] = 1.0/hmm.c[t];
366 
367  //Scale Alpha
368  for(j=0; j<N; j++) hmm.alpha[t][j] *= hmm.c[t];
369  }
370 
371  //Termination
372  hmm.pk = 0.0;
373  for(t=0; t<T; t++) hmm.pk += log( hmm.c[t] );
374  //hmm.pk = - hmm.pk; //We don't really need to minus here
375 
376  if( grt_isinf(hmm.pk) ){
377  return false;
378  }
379 
381  //Step 1: Init at time t=T (T-1 as everything is zero based)
382  t = T-1;
383  for(i=0; i<N; i++) hmm.beta[t][i] = 1.0;
384 
385  //Scale beta, using the same coeff as A
386  for(i=0; i<N; i++) hmm.beta[t][i] *= hmm.c[t];
387 
388  //Step 2: Induction, from T-1 until 1 (T-2 until 0 as everything is zero based)
389  for(t=T-2; t>=0; t--){
390  for(i=0; i<N; i++){
391  //Calculate the backward step for t, using the scaled beta
392  hmm.beta[t][i]=0.0;
393  for(j=0; j<N; j++)
394  hmm.beta[t][i] += a[i][j] * b[j][ obs[t+1] ] * hmm.beta[t+1][j];
395 
396  //Scale B using the same coeff as A
397  hmm.beta[t][i] *= hmm.c[t];
398  }
399  }
400 
401  return true;
402 }
403 
404 /*bool batchTrain(Vector<UINT> &obs)
405 - This method
406 */
407 bool DiscreteHiddenMarkovModel::train(const vector< vector<UINT> > &trainingData){
408 
409  //Clear any previous models
410  trained = false;
411  observationSequence.clear();
412  estimatedStates.clear();
413  trainingIterationLog.clear();
414 
415  UINT n,currentIter, bestIndex = 0;
416  double newLoglikelihood, bestLogValue = 0;
417 
418  if( numRandomTrainingIterations > 1 ){
419 
420  //A buffer to keep track each AB matrix
421  vector< MatrixDouble > aTracker( numRandomTrainingIterations );
422  vector< MatrixDouble > bTracker( numRandomTrainingIterations );
423  vector< double > loglikelihoodTracker( numRandomTrainingIterations );
424 
425  UINT maxNumTestIter = maxNumEpochs > 10 ? 10 : maxNumEpochs;
426 
427  //Try and find the best starting point
428  for(n=0; n<numRandomTrainingIterations; n++){
429  //Reset the model to a new random starting values
430  randomizeMatrices(numStates,numSymbols);
431 
432  if( !train_(trainingData,maxNumTestIter,currentIter,newLoglikelihood) ){
433  return false;
434  }
435  aTracker[n] = a;
436  bTracker[n] = b;
437  loglikelihoodTracker[n] = newLoglikelihood;
438  }
439 
440  //Get the best result and set it as the a and b starting values
441  bestIndex = 0;
442  bestLogValue = loglikelihoodTracker[0];
443  for(n=1; n<numRandomTrainingIterations; n++){
444  if(bestLogValue < loglikelihoodTracker[n]){
445  bestLogValue = loglikelihoodTracker[n];
446  bestIndex = n;
447  }
448  }
449 
450  //Set a and b
451  a = aTracker[bestIndex];
452  b = bTracker[bestIndex];
453 
454  }else{
455  randomizeMatrices(numStates,numSymbols);
456  }
457 
458  //Perform the actual training
459  if( !train_(trainingData,maxNumEpochs,currentIter,newLoglikelihood) ){
460  return false;
461  }
462 
463  //Calculate the observationSequence buffer length
464  const UINT numObs = (unsigned int)trainingData.size();
465  UINT k = 0;
466  UINT averageObsLength = 0;
467  for(k=0; k<numObs; k++){
468  const UINT T = (unsigned int)trainingData[k].size();
469  averageObsLength += T;
470  }
471 
472  averageObsLength = (UINT)floor( averageObsLength/double(numObs) );
473  observationSequence.resize( averageObsLength );
474  estimatedStates.resize( averageObsLength );
475 
476  //Finally, flag that the model was trained
477  trained = true;
478 
479  return true;
480 }
481 
482 bool DiscreteHiddenMarkovModel::train_(const vector< vector<UINT> > &obs,const UINT maxIter, UINT &currentIter,double &newLoglikelihood){
483 
484  const UINT numObs = (unsigned int)obs.size();
485  UINT i,j,k,t = 0;
486  double num,denom,oldLoglikelihood = 0;
487  bool keepTraining = true;
488  trainingIterationLog.clear();
489 
490  //Create the array to hold the data for each training instance
491  vector< HMMTrainingObject > hmms( numObs );
492 
493  //Create epislon and gamma to hold the re-estimation variables
494  vector< vector< MatrixDouble > > epsilon( numObs );
495  vector< MatrixDouble > gamma( numObs );
496 
497  //Resize the hmms, epsilon and gamma matrices so they are ready to be filled
498  for(k=0; k<numObs; k++){
499  const UINT T = (UINT)obs[k].size();
500  gamma[k].resize(T,numStates);
501  epsilon[k].resize(T);
502  for(t=0; t<T; t++) epsilon[k][t].resize(numStates,numStates);
503 
504  //Resize alpha, beta and phi
505  hmms[k].alpha.resize(T,numStates);
506  hmms[k].beta.resize(T,numStates);
507  hmms[k].c.resize(T);
508  }
509 
510  //For each training seq, run one pass of the forward backward
511  //algorithm then reestimate a and b using the Baum-Welch
512  oldLoglikelihood = 0;
513  newLoglikelihood = 0;
514  currentIter = 0;
515 
516  do{
517  newLoglikelihood = 0.0;
518 
519  //Run the forwardbackward algorithm for each training example
520  for(k=0; k<numObs; k++){
521  if( !forwardBackward(hmms[k],obs[k]) ){
522  return false;
523  }
524  newLoglikelihood += hmms[k].pk;
525  }
526 
527  //Set the new log likelihood as the average of the observations
528  newLoglikelihood /= numObs;
529 
530  trainingIterationLog.push_back( newLoglikelihood );
531 
532  if( ++currentIter >= maxIter ){ keepTraining = false; trainingLog << "Max Iter Reached! Stopping Training" << endl; }
533  if( fabs(newLoglikelihood-oldLoglikelihood) < minChange && currentIter > 1 ){ keepTraining = false; trainingLog << "Min Improvement Reached! Stopping Training" << endl; }
534  //if( newLoglikelihood < oldLoglikelihood ){ cout<<"Warning: Inverted Training!\n";}
535 
536  trainingLog << "Iter: " << currentIter << " logLikelihood: " << newLoglikelihood << " change: " << oldLoglikelihood - newLoglikelihood << endl;
537 
538  print();
539  //PAUSE;
540 
541  oldLoglikelihood = newLoglikelihood;
542 
543  //Only update A, B, and Pi if needed
544  if( keepTraining ){
545 
546  //Re-estimate A
547  for(i=0; i<numStates; i++){
548 
549  //Compute the denominator of A (which is independent of j)
550  denom = 0;
551  for(k=0; k<numObs; k++){
552  for(t=0; t<obs[k].size()-1; t++){
553  denom += hmms[k].alpha[t][i] * hmms[k].beta[t][i] / hmms[k].c[t];
554  }
555  }
556 
557  //Compute the numerator and also update a[i][j]
558  if( denom > 0 ){
559  for(j=0; j<numStates; j++){
560  num = 0;
561  for(k=0; k<numObs; k++){
562  for(t=0; t<obs[k].size()-1; t++){
563  num += hmms[k].alpha[t][i] * a[i][j] * b[j][ obs[k][t+1] ] * hmms[k].beta[t+1][j];
564  }
565  }
566 
567  //Update a[i][j]
568  a[i][j] = num/denom;
569  }
570  }else{
571  errorLog << "Denom is zero for A!" << endl;
572  return false;
573  }
574  }
575 
576  //Re-estimate B
577  bool renormB = false;
578  for(i=0; i<numStates; i++){
579  for(j=0; j<numSymbols; j++){
580  num=0.0;
581  denom = 0.0;
582  for(k=0; k<numObs; k++){
583  const UINT T = (unsigned int)obs[k].size();
584  for(t=0; t<T; t++){
585  if( obs[k][t] == j ){
586  num += hmms[k].alpha[t][i] * hmms[k].beta[t][i] / hmms[k].c[t];
587  }
588  denom += hmms[k].alpha[t][i] * hmms[k].beta[t][i] / hmms[k].c[t];
589  }
590  }
591 
592  if( denom == 0 ){
593  errorLog << "Denominator is zero for B!" << endl;
594  return false;
595  }
596  //Update b[i][j]
597  //If there are no observations at all for a state then the probabilities will be zero which is bad
598  //So instead we flag that B needs to be renormalized later
599  if( num > 0 ) b[i][j] = denom > 0 ? num/denom : 1.0e-5;
600  else{ b[i][j] = 0; renormB = true; }
601  }
602  }
603 
604  if( renormB ){
605  double sum;
606  for (UINT i=0; i<numStates; i++) {
607  sum = 0.;
608  for (UINT k=0; k<numSymbols; k++){
609  b[i][k] += 1.0/numSymbols; //Add a small value to B to make sure the value will not be zero
610  sum += b[i][k];
611  }
612  for (UINT k=0; k<numSymbols; k++) b[i][k] /= sum;
613  }
614  }
615 
616  //Re-estimate Pi - only if the model type is HMM_ERGODIC, otherwise Pi[0] == 1 and everything else is 0
617  if (modelType==HMM_ERGODIC ){
618  for(k=0; k<numObs; k++){
619  const UINT T = (unsigned int)obs[k].size();
620  //Compute epsilon
621  for(t=0; t<T-1; t++){
622  denom = 0.0;
623  for(i=0; i<numStates; i++){
624  for(j=0; j<numStates; j++){
625  epsilon[k][t][i][j] = hmms[k].alpha[t][i] * a[i][j] * b[j][ obs[k][t+1] ] * hmms[k].beta[t+1][j];
626  denom += epsilon[k][t][i][j];
627  }
628  }
629  //Normalize Epsilon
630  for(i=0; i<numStates; i++){
631  for(j=0; j<numStates; j++){
632  if(denom!=0) epsilon[k][t][i][j] /= denom;
633  else{ epsilon[k][t][i][j] = 0; }
634  }
635  }
636  }
637 
638  //Compute gamma
639  for(t=0; t<T-1; t++){
640  for(i=0; i<numStates; i++){
641  gamma[k][t][i]= 0.0;
642  for(j=0; j<numStates; j++)
643  gamma[k][t][i] += epsilon[k][t][i][j];
644  }
645  }
646  }
647 
648  double sum = 0;
649  for(i=0; i<numStates; i++){
650  sum=0.0;
651  for(k=0; k<numObs; k++){
652  sum += gamma[k][0][i];
653  }
654  pi[i] = sum / numObs;
655  }
656  }
657  }
658 
659  }while(keepTraining);
660 
661  return true;
662 
663 }
664 
665 bool DiscreteHiddenMarkovModel::reset(){
666 
667  for(UINT i=0; i<observationSequence.getSize(); i++){
668  observationSequence.push_back( 0 );
669  }
670 
671  return true;
672 }
673 
674 bool DiscreteHiddenMarkovModel::saveModelToFile(fstream &file) const{
675 
676  if(!file.is_open())
677  {
678  errorLog << "saveModelToFile( fstream &file ) - File is not open!" << endl;
679  return false;
680  }
681 
682  //Write the header info
683  file << "DISCRETE_HMM_MODEL_FILE_V1.0\n";
684 
685  //Write the base settings to the file
686  if( !MLBase::saveBaseSettingsToFile(file) ){
687  errorLog <<"saveModelToFile(fstream &file) - Failed to save classifier base settings to file!" << endl;
688  return false;
689  }
690 
691  file << "NumStates: " << numStates << endl;
692  file << "NumSymbols: " << numSymbols << endl;
693  file << "ModelType: " << modelType << endl;
694  file << "Delta: " << delta << endl;
695  file << "Threshold: " << cThreshold << endl;
696  file << "NumRandomTrainingIterations: " << numRandomTrainingIterations << endl;
697 
698  file << "A:\n";
699  for(UINT i=0; i<numStates; i++){
700  for(UINT j=0; j<numStates; j++){
701  file << a[i][j];
702  if( j+1 < numStates ) file << "\t";
703  }file << endl;
704  }
705 
706  file << "B:\n";
707  for(UINT i=0; i<numStates; i++){
708  for(UINT j=0; j<numSymbols; j++){
709  file << b[i][j];
710  if( j+1 < numSymbols ) file << "\t";
711  }file << endl;
712  }
713 
714  file<<"Pi:\n";
715  for(UINT i=0; i<numStates; i++){
716  file << pi[i];
717  if( i+1 < numStates ) file << "\t";
718  }
719  file << endl;
720 
721  return true;
722 }
723 
724 bool DiscreteHiddenMarkovModel::loadModelFromFile(fstream &file){
725 
726  clear();
727 
728  if(!file.is_open())
729  {
730  errorLog << "loadModelFromFile( fstream &file ) - File is not open!" << endl;
731  return false;
732  }
733 
734  std::string word;
735 
736  file >> word;
737 
738  //Find the file type header
739  if(word != "DISCRETE_HMM_MODEL_FILE_V1.0"){
740  errorLog << "loadModelFromFile( fstream &file ) - Could not find Model File Header!" << endl;
741  return false;
742  }
743 
744  //Load the base settings from the file
745  if( !MLBase::loadBaseSettingsFromFile(file) ){
746  errorLog << "loadModelFromFile(string filename) - Failed to load base settings from file!" << endl;
747  return false;
748  }
749 
750  file >> word;
751  if(word != "NumStates:"){
752  errorLog << "loadModelFromFile( fstream &file ) - Could not find the NumStates header." << endl;
753  return false;
754  }
755  file >> numStates;
756 
757  file >> word;
758  if(word != "NumSymbols:"){
759  errorLog << "loadModelFromFile( fstream &file ) - Could not find the NumSymbols header." << endl;
760  return false;
761  }
762  file >> numSymbols;
763 
764  file >> word;
765  if(word != "ModelType:"){
766  errorLog << "loadModelFromFile( fstream &file ) - Could not find the modelType for the header." << endl;
767  return false;
768  }
769  file >> modelType;
770 
771  file >> word;
772  if(word != "Delta:"){
773  errorLog << "loadModelFromFile( fstream &file ) - Could not find the Delta for the header." << endl;
774  return false;
775  }
776  file >> delta;
777 
778  file >> word;
779  if(word != "Threshold:"){
780  errorLog << "loadModelFromFile( fstream &file ) - Could not find the Threshold for the header." << endl;
781  return false;
782  }
783  file >> cThreshold;
784 
785  file >> word;
786  if(word != "NumRandomTrainingIterations:"){
787  errorLog << "loadModelFromFile( fstream &file ) - Could not find the numRandomTrainingIterations header." << endl;
788  return false;
789  }
790  file >> numRandomTrainingIterations;
791 
792  a.resize(numStates,numStates);
793  b.resize(numStates,numSymbols);
794  pi.resize(numStates);
795 
796  //Load the A, B and Pi matrices
797  file >> word;
798  if(word != "A:"){
799  errorLog << "loadModelFromFile( fstream &file ) - Could not find the A matrix header." << endl;
800  return false;
801  }
802 
803  //Load A
804  for(UINT i=0; i<numStates; i++){
805  for(UINT j=0; j<numStates; j++){
806  file >> a[i][j];
807  }
808  }
809 
810  file >> word;
811  if(word != "B:"){
812  errorLog << "loadModelFromFile( fstream &file ) - Could not find the B matrix header." << endl;
813  return false;
814  }
815 
816  //Load B
817  for(UINT i=0; i<numStates; i++){
818  for(UINT j=0; j<numSymbols; j++){
819  file >> b[i][j];
820  }
821  }
822 
823  file >> word;
824  if(word != "Pi:"){
825  errorLog << "loadModelFromFile( fstream &file ) - Could not find the Pi matrix header." << endl;
826  return false;
827  }
828 
829  //Load Pi
830  for(UINT i=0; i<numStates; i++){
831  file >> pi[i];
832  }
833 
834  return true;
835 }
836 
837 bool DiscreteHiddenMarkovModel::print() const{
838 
839  trainingLog << "A: " << endl;
840  for(UINT i=0; i<a.getNumRows(); i++){
841  for(UINT j=0; j<a.getNumCols(); j++){
842  trainingLog << a[i][j] << "\t";
843  }
844  trainingLog << endl;
845  }
846 
847  trainingLog << "B: " << endl;
848  for(UINT i=0; i<b.getNumRows(); i++){
849  for(UINT j=0; j<b.getNumCols(); j++){
850  trainingLog << b[i][j] << "\t";
851  }
852  trainingLog << endl;
853  }
854 
855  trainingLog << "Pi: ";
856  for(UINT i=0; i<pi.size(); i++){
857  trainingLog << pi[i] << "\t";
858  }
859  trainingLog<<endl;
860 
861  //Check the weights all sum to 1
862  if( true ){
863  double sum=0.0;
864  for(UINT i=0; i<a.getNumRows(); i++){
865  sum=0.0;
866  for(UINT j=0; j<a.getNumCols(); j++) sum += a[i][j];
867  if( sum <= 0.99 || sum >= 1.01 ) warningLog << "WARNING: A Row " << i <<" Sum: "<< sum << endl;
868  }
869 
870  for(UINT i=0; i<b.getNumRows(); i++){
871  sum=0.0;
872  for(UINT j=0; j<b.getNumCols(); j++) sum += b[i][j];
873  if( sum <= 0.99 || sum >= 1.01 ) warningLog << "WARNING: B Row " << i << " Sum: " << sum << endl;
874  }
875  }
876 
877  return true;
878 }
879 
880 VectorDouble DiscreteHiddenMarkovModel::getTrainingIterationLog() const{
881  return trainingIterationLog;
882 }
883 
884 }
Definition: AdaBoost.cpp:25
This class implements a discrete Hidden Markov Model.