26 RegisterFeatureExtractionModule< KMeansFeatures > KMeansFeatures::registerModule(
"KMeansFeatures");
30 classType =
"KMeansFeatures";
31 featureExtractionType = classType;
33 debugLog.setProceedingText(
"[DEBUG KMeansFeatures]");
34 errorLog.setProceedingText(
"[ERROR KMeansFeatures]");
35 warningLog.setProceedingText(
"[WARNING KMeansFeatures]");
37 this->numClustersPerLayer = numClustersPerLayer;
39 this->useScaling = useScaling;
41 if( numClustersPerLayer.size() > 0 ){
42 init( numClustersPerLayer );
48 classType =
"KMeansFeatures";
49 featureExtractionType = classType;
51 debugLog.setProceedingText(
"[DEBUG KMeansFeatures]");
52 errorLog.setProceedingText(
"[ERROR KMeansFeatures]");
53 warningLog.setProceedingText(
"[WARNING KMeansFeatures]");
66 this->numClustersPerLayer = rhs.numClustersPerLayer;
76 if( featureExtraction == NULL )
return false;
87 errorLog <<
"clone(FeatureExtraction *featureExtraction) - FeatureExtraction Types Do Not Match!" << endl;
94 VectorDouble data( numInputDimensions );
98 for(UINT j=0; j<numInputDimensions; j++){
99 data[j] =
scale(inputVector[j],ranges[j].minValue,ranges[j].maxValue,0,1);
102 for(UINT j=0; j<numInputDimensions; j++){
103 data[j] = inputVector[j];
107 const UINT numLayers = getNumLayers();
108 for(UINT layer=0; layer<numLayers; layer++){
109 if( !projectDataThroughLayer(data, featureVector, layer) ){
110 errorLog <<
"computeFeatures(const VectorDouble &inputVector) - Failed to project data through layer: " << layer << endl;
115 if( layer+1 < numLayers ){
116 data = featureVector;
130 file.open(filename.c_str(), std::ios::out);
144 file.open(filename.c_str(), std::ios::in);
158 if( !file.is_open() ){
159 errorLog <<
"saveModelToFile(fstream &file) - The file is not open!" << endl;
164 file <<
"KMEANS_FEATURES_FILE_V1.0" << endl;
168 errorLog <<
"saveFeatureExtractionSettingsToFile(fstream &file) - Failed to save base feature extraction settings to file!" << endl;
172 file <<
"NumLayers: " << getNumLayers() << endl;
173 file <<
"NumClustersPerLayer: ";
174 for(UINT i=0; i<numClustersPerLayer.size(); i++){
175 file <<
" " << numClustersPerLayer[i];
179 file <<
"Alpha: " << alpha << endl;
183 for(UINT i=0; i<ranges.size(); i++){
184 file << ranges[i].minValue <<
" " << ranges[i].maxValue <<
" ";
188 file <<
"Clusters: " << endl;
189 for(UINT k=0; k<clusters.size(); k++){
190 file <<
"NumRows: " << clusters[k].getNumRows() << endl;
191 file <<
"NumCols: " << clusters[k].getNumCols() << endl;
192 for(UINT i=0; i<clusters[k].getNumRows(); i++){
193 for(UINT j=0; j<clusters[k].getNumCols(); j++){
194 file << clusters[k][i][j];
195 if( j+1 < clusters[k].getNumCols() )
210 if( !file.is_open() ){
211 errorLog <<
"loadModelFromFile(fstream &file) - The file is not open!" << endl;
222 if( word !=
"KMEANS_FEATURES_FILE_V1.0" ){
223 errorLog <<
"loadModelFromFile(fstream &file) - Invalid file format!" << endl;
229 errorLog <<
"loadFeatureExtractionSettingsFromFile(fstream &file) - Failed to load base feature extraction settings from file!" << endl;
235 if( word !=
"NumLayers:" ){
236 errorLog <<
"loadModelFromFile(fstream &file) - Failed to read NumLayers header!" << endl;
240 numClustersPerLayer.resize( numLayers );
244 if( word !=
"NumClustersPerLayer:" ){
245 errorLog <<
"loadModelFromFile(fstream &file) - Failed to read NumClustersPerLayer header!" << endl;
248 for(UINT i=0; i<numClustersPerLayer.size(); i++){
249 file >> numClustersPerLayer[i];
254 if( word !=
"Alpha:" ){
255 errorLog <<
"loadModelFromFile(fstream &file) - Failed to read Alpha header!" << endl;
265 if( word !=
"Ranges:" ){
266 errorLog <<
"loadModelFromFile(fstream &file) - Failed to read Ranges header!" << endl;
269 ranges.resize(numInputDimensions);
270 for(UINT i=0; i<ranges.size(); i++){
271 file >> ranges[i].minValue;
272 file >> ranges[i].maxValue;
277 if( word !=
"Clusters:" ){
278 errorLog <<
"loadModelFromFile(fstream &file) - Failed to read Clusters header!" << endl;
281 clusters.resize( numLayers );
283 for(UINT k=0; k<clusters.size(); k++){
287 if( word !=
"NumRows:" ){
288 errorLog <<
"loadModelFromFile(fstream &file) - Failed to read NumRows header!" << endl;
295 if( word !=
"NumCols:" ){
296 errorLog <<
"loadModelFromFile(fstream &file) - Failed to read NumCols header!" << endl;
301 clusters[k].resize(numRows, numCols);
302 for(UINT i=0; i<clusters[k].getNumRows(); i++){
303 for(UINT j=0; j<clusters[k].getNumCols(); j++){
304 file >> clusters[k][i][j];
317 if( numClustersPerLayer.size() == 0 )
return false;
319 this->numClustersPerLayer = numClustersPerLayer;
320 numInputDimensions = 0;
321 numOutputDimensions = 0;
353 errorLog <<
"train_(MatrixDouble &trainingData) - The quantizer has not been initialized!" << endl;
358 featureDataReady =
false;
363 numInputDimensions = N;
364 numOutputDimensions = numClustersPerLayer[ numClustersPerLayer.size()-1 ];
369 for(UINT i=0; i<M; i++){
370 for(UINT j=0; j<N; j++){
371 trainingData[i][j] =
scale(trainingData[i][j],ranges[j].minValue,ranges[j].maxValue,0,1.0);
377 const UINT K = (UINT)numClustersPerLayer.size();
378 for(UINT k=0; k<K; k++){
381 kmeans.setComputeTheta(
true );
386 trainingLog <<
"Layer " << k+1 <<
"/" << K <<
" NumClusters: " << numClustersPerLayer[k] << endl;
387 if( !kmeans.
train_( trainingData ) ){
388 errorLog <<
"train_(MatrixDouble &trainingData) - Failed to train kmeans model at layer: " << k << endl;
393 clusters.push_back( kmeans.getClusters() );
398 VectorDouble input( trainingData.
getNumCols() );
401 for(UINT i=0; i<M; i++){
404 for(UINT j=0; j<input.size(); j++){
405 input[j] = trainingData[i][j];
409 if( !projectDataThroughLayer( input, output, k ) ){
410 errorLog <<
"train_(MatrixDouble &trainingData) - Failed to project sample through layer: " << k << endl;
415 for(UINT j=0; j<output.size(); j++){
416 data[i][j] = output[j];
429 featureVector.resize( numOutputDimensions, 0 );
434 bool KMeansFeatures::projectDataThroughLayer(
const VectorDouble &input, VectorDouble &output,
const UINT layer ){
436 if( layer >= clusters.size() ){
437 errorLog <<
"projectDataThroughLayer(...) - Layer out of bounds! It should be less than: " << clusters.size() << endl;
441 const UINT M = clusters[ layer ].getNumRows();
442 const UINT N = clusters[ layer ].getNumCols();
444 if( input.size() != N ){
445 errorLog <<
"projectDataThroughLayer(...) - The size of the input vector (" << input.size() <<
") does not match the size: " << N << endl;
450 if( output.size() != M ){
460 output[i] += SQR( input[j] - clusters[layer][i][j] );
467 output[i] = sqrt( output[i] );
474 UINT KMeansFeatures::getNumLayers()
const{
475 return (UINT)numClustersPerLayer.size();
478 UINT KMeansFeatures::getLayerSize(
const UINT layerIndex)
const{
479 if( layerIndex >= numClustersPerLayer.size() ){
480 warningLog <<
"LayerIndex is out of bounds. It must be less than the number of layers: " << numClustersPerLayer.size() << endl;
483 return numClustersPerLayer[layerIndex];
486 vector< MatrixDouble > KMeansFeatures::getClusters()
const{
virtual bool saveModelToFile(string filename) const
KMeansFeatures(const vector< UINT > numClustersPerLayer=vector< UINT >(1, 100), const double alpha=0.2, const bool useScaling=true)
bool setMaxNumEpochs(const UINT maxNumEpochs)
unsigned int getNumCols() const
virtual bool computeFeatures(const VectorDouble &inputVector)
bool setMinChange(const double minChange)
bool setNumClusters(const UINT numClusters)
double scale(const double &x, const double &minSource, const double &maxSource, const double &minTarget, const double &maxTarget, const bool constrain=false)
virtual ~KMeansFeatures()
MatrixDouble getDataAsMatrixDouble() const
MatrixDouble getDataAsMatrixDouble() const
MatrixDouble getDataAsMatrixDouble() const
virtual bool train_(MatrixDouble &data)
unsigned int getNumRows() const
std::vector< MinMax > getRanges() const
virtual bool deepCopyFrom(const FeatureExtraction *featureExtraction)
bool setMinNumEpochs(const UINT minNumEpochs)
MatrixDouble getDataAsMatrixDouble() const
virtual bool loadModelFromFile(string filename)
virtual bool train_(ClassificationData &trainingData)
KMeansFeatures & operator=(const KMeansFeatures &rhs)