26 RegisterClassifierModule< RandomForests > RandomForests::registerModule(
"RandomForests");
28 RandomForests::RandomForests(
const DecisionTreeNode &decisionTreeNode,
const UINT forestSize,
const UINT numRandomSplits,
const UINT minNumSamplesPerNode,
const UINT maxDepth,
const UINT trainingMode,
const bool removeFeaturesAtEachSpilt,
const bool useScaling)
30 this->decisionTreeNode = decisionTreeNode.
deepCopy();
31 this->forestSize = forestSize;
32 this->numRandomSplits = numRandomSplits;
33 this->minNumSamplesPerNode = minNumSamplesPerNode;
34 this->maxDepth = maxDepth;
35 this->trainingMode = trainingMode;
36 this->removeFeaturesAtEachSpilt = removeFeaturesAtEachSpilt;
37 this->useScaling = useScaling;
38 classType =
"RandomForests";
39 classifierType = classType;
40 classifierMode = STANDARD_CLASSIFIER_MODE;
41 useNullRejection =
false;
42 supportsNullRejection =
false;
43 debugLog.setProceedingText(
"[DEBUG RandomForests]");
44 errorLog.setProceedingText(
"[ERROR RandomForests]");
45 trainingLog.setProceedingText(
"[TRAINING RandomForests]");
46 warningLog.setProceedingText(
"[WARNING RandomForests]");
50 this->decisionTreeNode = NULL;
51 classType =
"RandomForests";
52 classifierType = classType;
53 classifierMode = STANDARD_CLASSIFIER_MODE;
54 debugLog.setProceedingText(
"[DEBUG RandomForests]");
55 errorLog.setProceedingText(
"[ERROR RandomForests]");
56 trainingLog.setProceedingText(
"[TRAINING RandomForests]");
57 warningLog.setProceedingText(
"[WARNING RandomForests]");
65 if( decisionTreeNode != NULL ){
66 delete decisionTreeNode;
67 decisionTreeNode = NULL;
80 if( this->decisionTreeNode != NULL ){
81 delete decisionTreeNode;
82 decisionTreeNode = NULL;
88 for(UINT i=0; i<rhs.forest.size(); i++){
89 this->forest.push_back( rhs.forest[i]->deepCopy() );
93 this->forestSize = rhs.forestSize;
94 this->numRandomSplits = rhs.numRandomSplits;
95 this->minNumSamplesPerNode = rhs.minNumSamplesPerNode;
96 this->maxDepth = rhs.maxDepth;
97 this->removeFeaturesAtEachSpilt = rhs.removeFeaturesAtEachSpilt;
98 this->trainingMode = rhs.trainingMode;
100 }
else errorLog <<
"deepCopyFrom(const Classifier *classifier) - Failed to copy base variables!" << endl;
107 if( classifier == NULL )
return false;
119 if( this->decisionTreeNode != NULL ){
120 delete decisionTreeNode;
121 decisionTreeNode = NULL;
127 for(UINT i=0; i<ptr->forest.size(); i++){
128 this->forest.push_back( ptr->forest[i]->deepCopy() );
132 this->forestSize = ptr->forestSize;
133 this->numRandomSplits = ptr->numRandomSplits;
134 this->minNumSamplesPerNode = ptr->minNumSamplesPerNode;
135 this->maxDepth = ptr->maxDepth;
136 this->removeFeaturesAtEachSpilt = ptr->removeFeaturesAtEachSpilt;
137 this->trainingMode = ptr->trainingMode;
142 errorLog <<
"deepCopyFrom(const Classifier *classifier) - Failed to copy base variables!" << endl;
157 errorLog <<
"train_(ClassificationData &labelledTrainingData) - Training data has zero samples!" << endl;
161 numInputDimensions = N;
169 trainingData.
scale(0, 1);
176 for(UINT i=0; i<forestSize; i++){
192 if( !tree.
train( data ) ){
193 errorLog <<
"train_(ClassificationData &labelledTrainingData) - Failed to train tree at forest index: " << i << endl;
207 predictedClassLabel = 0;
211 errorLog <<
"predict_(VectorDouble &inputVector) - Model Not Trained!" << endl;
215 if( inputVector.size() != numInputDimensions ){
216 errorLog <<
"predict_(VectorDouble &inputVector) - The size of the input vector (" << inputVector.size() <<
") does not match the num features in the model (" << numInputDimensions << endl;
221 for(UINT n=0; n<numInputDimensions; n++){
222 inputVector[n] =
scale(inputVector[n], ranges[n].minValue, ranges[n].maxValue, 0, 1);
226 if( classLikelihoods.size() != numClasses ) classLikelihoods.resize(numClasses,0);
227 if( classDistances.size() != numClasses ) classDistances.resize(numClasses,0);
229 std::fill(classDistances.begin(),classDistances.end(),0);
233 for(UINT i=0; i<forestSize; i++){
234 if( !forest[i]->
predict(inputVector, y) ){
235 errorLog <<
"predict_(VectorDouble &inputVector) - Tree " << i <<
" failed prediction!" << endl;
239 for(UINT j=0; j<numClasses; j++){
240 classDistances[j] += y[j];
247 for(UINT k=0; k<numClasses; k++){
248 classLikelihoods[k] = classDistances[k] / double(forestSize);
250 if( classLikelihoods[k] > maxLikelihood ){
251 maxLikelihood = classLikelihoods[k];
252 bestDistance = classDistances[k];
257 predictedClassLabel = classLabels[ bestIndex ];
268 for(UINT i=0; i<forest.size(); i++){
269 if( forest[i] != NULL ){
282 cout <<
"RandomForest\n";
283 cout <<
"ForestSize: " << forestSize << endl;
284 cout <<
"NumSplittingSteps: " << numRandomSplits << endl;
285 cout <<
"MinNumSamplesPerNode: " << minNumSamplesPerNode << endl;
286 cout <<
"MaxDepth: " << maxDepth << endl;
287 cout <<
"RemoveFeaturesAtEachSpilt: " << removeFeaturesAtEachSpilt << endl;
288 cout <<
"TrainingMode: " << trainingMode << endl;
289 cout <<
"ForestBuilt: " << (trained ? 1 : 0) << endl;
293 for(UINT i=0; i<forestSize; i++){
294 cout <<
"Tree: " << i+1 << endl;
306 errorLog <<
"saveModelToFile(fstream &file) - The file is not open!" << endl;
311 file <<
"GRT_RANDOM_FOREST_MODEL_FILE_V1.0\n";
315 errorLog <<
"saveModelToFile(fstream &file) - Failed to save classifier base settings to file!" << endl;
319 if( decisionTreeNode != NULL ){
320 file <<
"DecisionTreeNodeType: " << decisionTreeNode->
getNodeType() << endl;
322 Classifier::errorLog <<
"saveModelToFile(fstream &file) - Failed to save decisionTreeNode settings to file!" << endl;
326 file <<
"DecisionTreeNodeType: " <<
"NULL" << endl;
329 file <<
"ForestSize: " << forestSize << endl;
330 file <<
"NumSplittingSteps: " << numRandomSplits << endl;
331 file <<
"MinNumSamplesPerNode: " << minNumSamplesPerNode << endl;
332 file <<
"MaxDepth: " << maxDepth << endl;
333 file <<
"RemoveFeaturesAtEachSpilt: " << removeFeaturesAtEachSpilt << endl;
334 file <<
"TrainingMode: " << trainingMode << endl;
335 file <<
"ForestBuilt: " << (trained ? 1 : 0) << endl;
339 for(UINT i=0; i<forestSize; i++){
340 file <<
"Tree: " << i+1 << endl;
341 file <<
"TreeNodeType: " << forest[i]->getNodeType() << endl;
342 if( !forest[i]->saveToFile( file ) ){
343 errorLog <<
"saveModelToFile(fstream &file) - Failed to save tree " << i <<
" to file!" << endl;
358 errorLog <<
"loadModelFromFile(string filename) - Could not open file to load model" << endl;
363 std::string treeNodeType;
368 if(word !=
"GRT_RANDOM_FOREST_MODEL_FILE_V1.0"){
369 errorLog <<
"loadModelFromFile(string filename) - Could not find Model File Header" << endl;
375 errorLog <<
"loadModelFromFile(string filename) - Failed to load base settings from file!" << endl;
380 if(word !=
"DecisionTreeNodeType:"){
381 Classifier::errorLog <<
"loadModelFromFile(string filename) - Could not find the DecisionTreeNodeType!" << endl;
384 file >> treeNodeType;
386 if( treeNodeType !=
"NULL" ){
390 if( decisionTreeNode == NULL ){
391 Classifier::errorLog <<
"loadModelFromFile(string filename) - Could not create new DecisionTreeNode from type: " << treeNodeType << endl;
396 Classifier::errorLog <<
"loadModelFromFile(fstream &file) - Failed to load decisionTreeNode settings from file!" << endl;
400 Classifier::errorLog <<
"loadModelFromFile(fstream &file) - Failed to load decisionTreeNode! DecisionTreeNodeType is NULL!" << endl;
405 if(word !=
"ForestSize:"){
406 errorLog <<
"loadModelFromFile(string filename) - Could not find the ForestSize!" << endl;
412 if(word !=
"NumSplittingSteps:"){
413 errorLog <<
"loadModelFromFile(string filename) - Could not find the NumSplittingSteps!" << endl;
416 file >> numRandomSplits;
419 if(word !=
"MinNumSamplesPerNode:"){
420 errorLog <<
"loadModelFromFile(string filename) - Could not find the MinNumSamplesPerNode!" << endl;
423 file >> minNumSamplesPerNode;
426 if(word !=
"MaxDepth:"){
427 errorLog <<
"loadModelFromFile(string filename) - Could not find the MaxDepth!" << endl;
433 if(word !=
"RemoveFeaturesAtEachSpilt:"){
434 errorLog <<
"loadModelFromFile(string filename) - Could not find the RemoveFeaturesAtEachSpilt!" << endl;
437 file >> removeFeaturesAtEachSpilt;
440 if(word !=
"TrainingMode:"){
441 errorLog <<
"loadModelFromFile(string filename) - Could not find the TrainingMode!" << endl;
444 file >> trainingMode;
447 if(word !=
"ForestBuilt:"){
448 errorLog <<
"loadModelFromFile(string filename) - Could not find the ForestBuilt!" << endl;
456 if(word !=
"Forest:"){
457 errorLog <<
"loadModelFromFile(string filename) - Could not find the Forest!" << endl;
463 for(UINT i=0; i<forestSize; i++){
467 errorLog <<
"loadModelFromFile(string filename) - Could not find the Tree Header!" << endl;
468 cout <<
"WORD: " << word << endl;
469 cout <<
"Tree i: " << i << endl;
474 if( treeIndex != i+1 ){
475 errorLog <<
"loadModelFromFile(string filename) - Incorrect tree index: " << treeIndex << endl;
480 if(word !=
"TreeNodeType:"){
481 errorLog <<
"loadModelFromFile(string filename) - Could not find the TreeNodeType!" << endl;
482 cout <<
"WORD: " << word << endl;
483 cout <<
"i: " << i << endl;
486 file >> treeNodeType;
492 errorLog <<
"loadModelFromFile(fstream &file) - Failed to create new Tree!" << endl;
497 tree->setParent( NULL );
499 errorLog <<
"loadModelFromFile(fstream &file) - Failed to load tree from file!" << endl;
504 forest.push_back( tree );
516 return numRandomSplits;
520 return minNumSamplesPerNode;
532 return removeFeaturesAtEachSpilt;
535 const vector< DecisionTreeNode* > RandomForests::getForest()
const {
541 if( decisionTreeNode == NULL ){
545 return decisionTreeNode->
deepCopy();
549 if( forestSize > 0 ){
550 this->forestSize = forestSize;
558 if( numRandomSplits > 0 ){
559 this->numRandomSplits = numRandomSplits;
566 if( minNumSamplesPerNode > 0 ){
567 this->minNumSamplesPerNode = minNumSamplesPerNode;
575 this->maxDepth = maxDepth;
582 this->removeFeaturesAtEachSpilt = removeFeaturesAtEachSpilt;
588 if( trainingMode == DecisionTree::BEST_ITERATIVE_SPILT || trainingMode == DecisionTree::BEST_RANDOM_SPLIT ){
589 this->trainingMode = trainingMode;
593 warningLog <<
"setTrainingMode(const UINT mode) - Unknown training mode!" << endl;
599 if( decisionTreeNode != NULL ){
600 delete decisionTreeNode;
601 decisionTreeNode = NULL;
603 this->decisionTreeNode = node.
deepCopy();
virtual bool deepCopyFrom(const Classifier *classifier)
virtual ~RandomForests(void)
virtual bool print() const
bool getRemoveFeaturesAtEachSpilt() const
bool setMinNumSamplesPerNode(const UINT minNumSamplesPerNode)
This class implements a Random Decision Forest classifier.
vector< UINT > getClassLabels() const
bool copyBaseVariables(const Classifier *classifier)
bool setTrainingMode(const UINT trainingMode)
bool setNumRandomSplits(const UINT numSplittingSteps)
virtual bool loadFromFile(fstream &file)
UINT getNumDimensions() const
bool setDecisionTreeNode(const DecisionTreeNode &node)
bool loadBaseSettingsFromFile(fstream &file)
UINT getNumSamples() const
RandomForests & operator=(const RandomForests &rhs)
virtual bool saveToFile(fstream &file) const
virtual bool train(ClassificationData trainingData)
virtual bool loadModelFromFile(fstream &file)
UINT getNumClasses() const
string getNodeType() const
bool enableScaling(bool useScaling)
virtual bool saveModelToFile(fstream &file) const
bool saveBaseSettingsToFile(fstream &file) const
bool setRemoveFeaturesAtEachSpilt(const bool removeFeaturesAtEachSpilt)
double scale(const double &x, const double &minSource, const double &maxSource, const double &minTarget, const double &maxTarget, const bool constrain=false)
DecisionTreeNode * deepCopy() const
bool scale(const double minTarget, const double maxTarget)
bool setNumSplittingSteps(const UINT numSplittingSteps)
DecisionTreeNode * deepCopyTree() const
virtual bool predict(VectorDouble inputVector)
bool setDecisionTreeNode(const DecisionTreeNode &node)
virtual bool predict_(VectorDouble &inputVector)
UINT getMinNumSamplesPerNode() const
bool setForestSize(const UINT forestSize)
UINT getTrainingMode() const
virtual bool train_(ClassificationData &trainingData)
UINT getNumRandomSplits() const
vector< MinMax > getRanges() const
RandomForests(const DecisionTreeNode &decisionTreeNode=DecisionTreeClusterNode(), const UINT forestSize=10, const UINT numRandomSplits=100, const UINT minNumSamplesPerNode=5, const UINT maxDepth=10, const UINT trainingMode=DecisionTree::BEST_RANDOM_SPLIT, const bool removeFeaturesAtEachSpilt=true, const bool useScaling=false)
UINT getForestSize() const
string getClassifierType() const
bool enableNullRejection(bool useNullRejection)
bool setRemoveFeaturesAtEachSpilt(const bool removeFeaturesAtEachSpilt)
static Node * createInstanceFromString(string const &nodeType)
bool setMaxDepth(const UINT maxDepth)
ClassificationData getBootstrappedDataset(UINT numSamples=0) const
DecisionTreeNode * deepCopyDecisionTreeNode() const
bool setMaxDepth(const UINT maxDepth)
bool setMinNumSamplesPerNode(const UINT minNumSamplesPerNode)
bool setTrainingMode(const UINT trainingMode)