10 nodeType =
"DecisionTreeClusterNode";
23 if( x[ featureIndex ] >= threshold )
return true;
54 for(UINT i=0; i<depth; i++) tab +=
"\t";
56 stream << tab <<
"depth: " << depth;
57 stream <<
" nodeSize: " << nodeSize;
58 stream <<
" featureIndex: " << featureIndex;
59 stream <<
" threshold " << threshold;
60 stream <<
" isLeafNode: " << isLeafNode << endl;
62 stream << tab <<
"ClassProbabilities: ";
63 for(UINT i=0; i<classProbabilities.size(); i++){
64 stream << classProbabilities[i] <<
"\t";
68 if( leftChild != NULL ){
69 stream << tab <<
"LeftChild: " << endl;
73 if( rightChild != NULL ){
74 stream << tab <<
"RightChild: " << endl;
91 node->isLeafNode = isLeafNode;
92 node->nodeID = nodeID;
93 node->predictedNodeID = predictedNodeID;
94 node->nodeSize = nodeSize;
95 node->featureIndex = featureIndex;
96 node->threshold = threshold;
97 node->classProbabilities = classProbabilities;
102 node->leftChild->setParent( node );
108 node->rightChild->setParent( node );
127 this->nodeSize = nodeSize;
128 this->featureIndex = featureIndex;
129 this->threshold = threshold;
130 this->classProbabilities = classProbabilities;
134 bool DecisionTreeClusterNode::computeBestSpiltBestIterativeSpilt(
const UINT &numSplittingSteps,
const ClassificationData &trainingData,
const vector< UINT > &features,
const vector< UINT > &classLabels, UINT &featureIndex,
double &minError ){
136 return computeBestSpilt( numSplittingSteps, trainingData, features, classLabels, featureIndex, minError);
139 bool DecisionTreeClusterNode::computeBestSpiltBestRandomSpilt(
const UINT &numSplittingSteps,
const ClassificationData &trainingData,
const vector< UINT > &features,
const vector< UINT > &classLabels, UINT &featureIndex,
double &minError ){
141 return computeBestSpilt( numSplittingSteps, trainingData, features, classLabels, featureIndex, minError);
144 bool DecisionTreeClusterNode::computeBestSpilt(
const UINT &numSplittingSteps,
const ClassificationData &trainingData,
const vector< UINT > &features,
const vector< UINT > &classLabels, UINT &featureIndex,
double &minError ){
147 const UINT N = (UINT)features.size();
148 const UINT K = (UINT)classLabels.size();
150 if( N == 0 )
return false;
152 minError = numeric_limits<double>::max();
154 UINT bestFeatureIndex = 0;
155 double bestThreshold = 0;
157 double giniIndexL = 0;
158 double giniIndexR = 0;
161 vector< UINT > groupIndex(M);
162 VectorDouble groupCounter(2,0);
163 vector< MinMax > ranges = trainingData.
getRanges();
168 UINT numRandomFeatures = numSplittingSteps > N ? N : numSplittingSteps;
169 vector< UINT > randomFeatures = random.
getRandomSubset( 0, N, numRandomFeatures );
172 for(UINT n=0; n<numRandomFeatures; n++){
174 featureIndex = features[ randomFeatures[n] ];
177 for(UINT i=0; i<M; i++){
178 data[i][0] = trainingData[i][featureIndex];
184 kmeans.setComputeTheta(
true );
189 if( !kmeans.
train( data ) ){
190 errorLog <<
"computeBestSpilt() - Failed to train KMeans model for feature: " << featureIndex << endl;
198 threshold += clusters[i][0];
203 groupCounter[0] = groupCounter[1] = 0;
204 classProbabilities.setAllValues(0);
205 for(UINT i=0; i<M; i++){
206 groupIndex[i] = trainingData[ i ][ featureIndex ] >= threshold ? 1 : 0;
207 groupCounter[ groupIndex[i] ]++;
208 classProbabilities[ getClassLabelIndexValue(trainingData[i].getClassLabel(),classLabels) ][ groupIndex[i] ]++;
212 for(UINT k=0; k<K; k++){
213 classProbabilities[k][0] = groupCounter[0]>0 ? classProbabilities[k][0]/groupCounter[0] : 0;
214 classProbabilities[k][1] = groupCounter[1]>0 ? classProbabilities[k][1]/groupCounter[1] : 0;
218 giniIndexL = giniIndexR = 0;
219 for(UINT k=0; k<K; k++){
220 giniIndexL += classProbabilities[k][0] * (1.0-classProbabilities[k][0]);
221 giniIndexR += classProbabilities[k][1] * (1.0-classProbabilities[k][1]);
223 weightL = groupCounter[0]/M;
224 weightR = groupCounter[1]/M;
225 error = (giniIndexL*weightL) + (giniIndexR*weightR);
228 if( error < minError ){
230 bestThreshold = threshold;
231 bestFeatureIndex = featureIndex;
236 featureIndex = bestFeatureIndex;
239 set(M,featureIndex,bestThreshold,trainingData.getClassProbabilities(classLabels));
246 if( !file.is_open() )
248 errorLog <<
"saveParametersToFile(fstream &file) - File is not open!" << endl;
254 errorLog <<
"saveParametersToFile(fstream &file) - Failed to save DecisionTreeNode parameters to file!" << endl;
259 file <<
"FeatureIndex: " << featureIndex << endl;
260 file <<
"Threshold: " << threshold << endl;
269 errorLog <<
"loadParametersFromFile(fstream &file) - File is not open!" << endl;
275 errorLog <<
"loadParametersFromFile(fstream &file) - Failed to load DecisionTreeNode parameters from file!" << endl;
282 if( word !=
"FeatureIndex:" ){
283 errorLog <<
"loadParametersFromFile(fstream &file) - Failed to find FeatureIndex header!" << endl;
286 file >> featureIndex;
289 if( word !=
"Threshold:" ){
290 errorLog <<
"loadParametersFromFile(fstream &file) - Failed to find Threshold header!" << endl;
virtual Node * deepCopyNode() const
virtual bool print() const
virtual Node * deepCopyNode() const
UINT getFeatureIndex() const
bool setMaxNumEpochs(const UINT maxNumEpochs)
virtual ~DecisionTreeClusterNode()
virtual bool saveParametersToFile(fstream &file) const
std::vector< unsigned int > getRandomSubset(const unsigned int startRange, const unsigned int endRange, const unsigned int subsetSize)
DecisionTreeClusterNode * deepCopy() const
UINT getNumSamples() const
virtual bool train(ClassificationData trainingData)
virtual bool getModel(ostream &stream) const
bool setMinChange(const double minChange)
double getThreshold() const
bool setNumClusters(const UINT numClusters)
This file implements a DecisionTreeClusterNode, which is a specific type of node used for a DecisionT...
DecisionTreeClusterNode()
virtual bool getModel(ostream &stream) const
bool set(const UINT nodeSize, const UINT featureIndex, const double threshold, const VectorDouble &classProbabilities)
virtual bool loadParametersFromFile(fstream &file)
virtual bool saveParametersToFile(fstream &file) const
unsigned int getNumRows() const
virtual bool loadParametersFromFile(fstream &file)
vector< MinMax > getRanges() const
virtual bool predict(const VectorDouble &x)
bool setMinNumEpochs(const UINT minNumEpochs)