28 RegisterRegressifierModule< LogisticRegression > LogisticRegression::registerModule(
"LogisticRegression");
30 LogisticRegression::LogisticRegression(
const bool useScaling)
32 this->useScaling = useScaling;
36 classType =
"LogisticRegression";
37 regressifierType = classType;
38 debugLog.setProceedingText(
"[DEBUG LogisticRegression]");
39 errorLog.setProceedingText(
"[ERROR LogisticRegression]");
40 trainingLog.setProceedingText(
"[TRAINING LogisticRegression]");
41 warningLog.setProceedingText(
"[WARNING LogisticRegression]");
44 LogisticRegression::~LogisticRegression(
void)
59 bool LogisticRegression::deepCopyFrom(
const Regressifier *regressifier){
61 if( regressifier == NULL )
return false;
70 return copyBaseVariables( regressifier );
81 trainingResults.clear();
84 errorLog <<
"train_(RegressionData trainingData) - Training data has zero samples!" << endl;
89 errorLog <<
"train_(RegressionData trainingData) - The number of target dimensions is not 1!" << endl;
93 numInputDimensions = N;
94 numOutputDimensions = 1;
95 inputVectorRanges.clear();
96 targetVectorRanges.clear();
107 trainingData.
scale(inputVectorRanges,targetVectorRanges,0.0,1.0);
114 for(UINT j=0; j<N; j++){
119 double lastSquaredError = 0;
122 bool keepTraining =
true;
124 vector< UINT > randomTrainingOrder(M);
126 trainingResults.reserve(M);
131 for(UINT i=0; i<M; i++){
132 randomTrainingOrder[i] = i;
134 std::random_shuffle(randomTrainingOrder.begin(), randomTrainingOrder.end());
137 while( keepTraining ){
140 totalSquaredTrainingError = 0;
141 for(UINT m=0; m<M; m++){
144 UINT i = randomTrainingOrder[m];
147 VectorDouble x = trainingData[i].getInputVector();
148 VectorDouble y = trainingData[i].getTargetVector();
150 for(UINT j=0; j<N; j++){
153 error = y[0] - sigmoid( h );
154 totalSquaredTrainingError += SQR(error);
157 for(UINT j=0; j<N; j++){
158 w[j] += learningRate * error * x[j];
160 w0 += learningRate * error;
164 delta = fabs( totalSquaredTrainingError-lastSquaredError );
165 lastSquaredError = totalSquaredTrainingError;
168 if( delta <= minChange ){
169 keepTraining =
false;
172 if( ++iter >= maxNumEpochs ){
173 keepTraining =
false;
176 if( grt_isinf( totalSquaredTrainingError ) || grt_isnan( totalSquaredTrainingError ) ){
177 errorLog <<
"train_(RegressionData &trainingData) - Training failed! Total squared error is NAN. If scaling is not enabled then you should try to scale your data and see if this solves the issue." << endl;
182 rootMeanSquaredTrainingError = sqrt( totalSquaredTrainingError /
double(M) );
183 result.
setRegressionResult(iter,totalSquaredTrainingError,rootMeanSquaredTrainingError,
this);
184 trainingResults.push_back( result );
187 trainingResultsObserverManager.notifyObservers( result );
189 trainingLog <<
"Epoch: " << iter <<
" SSE: " << totalSquaredTrainingError <<
" Delta: " << delta << endl;
193 regressionData.resize(1,0);
198 bool LogisticRegression::predict_(VectorDouble &inputVector){
201 errorLog <<
"predict_(VectorDouble &inputVector) - Model Not Trained!" << endl;
205 if( !trained )
return false;
207 if( inputVector.size() != numInputDimensions ){
208 errorLog <<
"predict_(VectorDouble &inputVector) - The size of the input vector (" << int(inputVector.size()) <<
") does not match the num features in the model (" << numInputDimensions << endl;
213 for(UINT n=0; n<numInputDimensions; n++){
214 inputVector[n] = scale(inputVector[n], inputVectorRanges[n].minValue, inputVectorRanges[n].maxValue, 0, 1);
218 regressionData[0] = w0;
219 for(UINT j=0; j<numInputDimensions; j++){
220 regressionData[0] += inputVector[j] * w[j];
222 regressionData[0] = sigmoid( regressionData[0] );
225 for(UINT n=0; n<numOutputDimensions; n++){
226 regressionData[n] = scale(regressionData[n], 0, 1, targetVectorRanges[n].minValue, targetVectorRanges[n].maxValue);
233 bool LogisticRegression::saveModelToFile(fstream &file)
const{
237 errorLog <<
"loadModelFromFile(fstream &file) - The file is not open!" << endl;
242 file<<
"GRT_LOGISTIC_REGRESSION_MODEL_FILE_V2.0\n";
245 if( !Regressifier::saveBaseSettingsToFile(file) ){
246 errorLog <<
"saveModelToFile(fstream &file) - Failed to save Regressifier base settings to file!" << endl;
253 for(UINT j=0; j<numInputDimensions; j++){
262 bool LogisticRegression::loadModelFromFile(fstream &file){
265 numInputDimensions = 0;
271 errorLog <<
"loadModelFromFile(string filename) - Could not open file to load model" << endl;
281 if( word ==
"GRT_LOGISTIC_REGRESSION_MODEL_FILE_V1.0" ){
282 return loadLegacyModelFromFile( file );
285 if( word !=
"GRT_LOGISTIC_REGRESSION_MODEL_FILE_V2.0" ){
286 errorLog <<
"loadModelFromFile( fstream &file ) - Could not find Model File Header" << endl;
291 if( !Regressifier::loadBaseSettingsFromFile(file) ){
292 errorLog <<
"loadModelFromFile( fstream &file ) - Failed to save Regressifier base settings to file!" << endl;
299 w.resize(numInputDimensions);
303 if(word !=
"Weights:"){
304 errorLog <<
"loadModelFromFile( fstream &file ) - Could not find the Weights!" << endl;
309 for(UINT j=0; j<numInputDimensions; j++){
318 UINT LogisticRegression::getMaxNumIterations()
const{
319 return getMaxNumEpochs();
322 bool LogisticRegression::setMaxNumIterations(
const UINT maxNumIterations){
323 return setMaxNumEpochs( maxNumIterations );
326 double LogisticRegression::sigmoid(
const double x)
const{
327 return 1.0 / (1 + exp(-x));
330 bool LogisticRegression::loadLegacyModelFromFile( fstream &file ){
335 if(word !=
"NumFeatures:"){
336 errorLog <<
"loadLegacyModelFromFile( fstream &file ) - Could not find NumFeatures!" << endl;
339 file >> numInputDimensions;
342 if(word !=
"NumOutputDimensions:"){
343 errorLog <<
"loadLegacyModelFromFile( fstream &file ) - Could not find NumOutputDimensions!" << endl;
346 file >> numOutputDimensions;
349 if(word !=
"UseScaling:"){
350 errorLog <<
"loadLegacyModelFromFile( fstream &file ) - Could not find UseScaling!" << endl;
358 inputVectorRanges.resize(numInputDimensions);
359 targetVectorRanges.resize(numOutputDimensions);
363 if(word !=
"InputVectorRanges:"){
365 errorLog <<
"loadLegacyModelFromFile( fstream &file ) - Failed to find InputVectorRanges!" << endl;
368 for(UINT j=0; j<inputVectorRanges.size(); j++){
369 file >> inputVectorRanges[j].minValue;
370 file >> inputVectorRanges[j].maxValue;
374 if(word !=
"OutputVectorRanges:"){
376 errorLog <<
"loadLegacyModelFromFile( fstream &file ) - Failed to find OutputVectorRanges!" << endl;
379 for(UINT j=0; j<targetVectorRanges.size(); j++){
380 file >> targetVectorRanges[j].minValue;
381 file >> targetVectorRanges[j].maxValue;
386 w.resize(numInputDimensions);
390 if(word !=
"Weights:"){
391 errorLog <<
"loadLegacyModelFromFile( fstream &file ) - Could not find the Weights!" << endl;
396 for(UINT j=0; j<numInputDimensions; j++){
402 regressionData.resize(1,0);
bool setRegressionResult(unsigned int trainingIteration, double totalSquaredTrainingError, double rootMeanSquaredTrainingError, MLBase *trainer)
This class implements the Logistic Regression algorithm. Logistic Regression is a simple but effectiv...
vector< MinMax > getInputRanges() const
UINT getNumSamples() const
string getRegressifierType() const
vector< MinMax > getTargetRanges() const
double getRandomNumberUniform(double minRange=0.0, double maxRange=1.0)
UINT getNumTargetDimensions() const
UINT getNumInputDimensions() const
bool scale(const double minTarget, const double maxTarget)