26 debugLog.setProceedingText(
"[DEBUG TSCD]");
27 errorLog.setProceedingText(
"[ERROR TSCD]");
28 warningLog.setProceedingText(
"[WARNING TSCD]");
37 if( numDimensions > 0 ){
44 debugLog.setProceedingText(
"[DEBUG TSCD]");
45 errorLog.setProceedingText(
"[ERROR TSCD]");
46 warningLog.setProceedingText(
"[WARNING TSCD]");
80 if( numDimensions > 0 ){
93 errorLog <<
"setNumDimensions(UINT numDimensions) - The number of dimensions of the dataset must be greater than zero!" << endl;
100 if( datasetName.find(
" ") == string::npos ){
105 errorLog <<
"setDatasetName(string datasetName) - The dataset name cannot contain any spaces!" << endl;
134 errorLog <<
"addSample(UINT classLabel, MatrixDouble trainingSample) - The dimensionality of the training sample (" << trainingSample.
getNumCols() <<
") does not match that of the dataset (" <<
numDimensions <<
")" << endl;
140 errorLog <<
"addSample(UINT classLabel, MatrixDouble sample) - the class label can not be 0!" << endl;
145 data.push_back( newSample );
152 bool labelFound =
false;
169 UINT numExamplesRemoved = 0;
170 UINT numExamplesToRemove = 0;
182 if( numExamplesToRemove > 0 ){
184 while( numExamplesRemoved < numExamplesToRemove ){
185 if(
data[i].getClassLabel() == classLabel ){
187 numExamplesRemoved++;
188 }
else if( ++i ==
data.size() )
break;
194 return numExamplesRemoved;
224 bool oldClassLabelFound =
false;
225 bool newClassLabelAllReadyExists =
false;
226 UINT indexOfOldClassLabel = 0;
227 UINT indexOfNewClassLabel = 0;
232 indexOfOldClassLabel = i;
233 oldClassLabelFound =
true;
236 indexOfNewClassLabel = i;
237 newClassLabelAllReadyExists =
true;
242 if( !oldClassLabelFound ){
248 if(
data[i].getClassLabel() == oldClassLabel ){
249 data[i].setTrainingSample(newClassLabel,
data[i].getData());
254 if( newClassLabelAllReadyExists ){
288 return scale(ranges,minTarget,maxTarget);
296 for(UINT x=0; x<
data[i].getLength(); x++){
298 data[i][x][j] =
Util::scale(
data[i][x][j],ranges[j].minValue,ranges[j].maxValue,minTarget,maxTarget);
331 file.open(fileName.c_str(), std::ios::out);
333 if( !file.is_open() ){
334 errorLog <<
"saveDatasetToFile(string fileName) - Failed to open file!" << endl;
338 file <<
"GRT_LABELLED_TIME_SERIES_CLASSIFICATION_DATA_FILE_V1.0\n";
340 file <<
"InfoText: " <<
infoText << endl;
344 file <<
"ClassIDsAndCounters: "<<endl;
358 file <<
"LabelledTimeSeriesTrainingData:\n";
361 file <<
"************TIME_SERIES************\n";
362 file <<
"ClassID: "<<
data[x].getClassLabel() <<endl;
363 file <<
"TimeSeriesLength: "<<
data[x].getLength()<<endl;
364 file <<
"TimeSeriesData: \n";
365 for(UINT i=0; i<
data[x].getLength(); i++){
367 file <<
data[x][i][j];
368 if( j<numDimensions-1 ) file <<
"\t";
380 file.open(filename.c_str(), std::ios::in);
384 if( !file.is_open() ){
385 errorLog <<
"loadDatasetFromFile(string filename) - FILE NOT OPEN!" << endl;
393 if(word !=
"GRT_LABELLED_TIME_SERIES_CLASSIFICATION_DATA_FILE_V1.0"){
396 errorLog <<
"loadDatasetFromFile(string filename) - Failed to find file header!" << endl;
402 if(word !=
"DatasetName:"){
403 errorLog <<
"loadDatasetFromFile(string filename) - failed to find DatasetName!" << endl;
410 if(word !=
"InfoText:"){
411 errorLog <<
"loadDatasetFromFile(string filename) - failed to find InfoText!" << endl;
419 while( word !=
"NumDimensions:" ){
425 if(word !=
"NumDimensions:"){
428 errorLog <<
"loadDatasetFromFile(string filename) - Failed to find NumDimensions!" << endl;
435 if(word !=
"TotalNumTrainingExamples:"){
438 errorLog <<
"loadDatasetFromFile(string filename) - Failed to find TotalNumTrainingExamples!" << endl;
445 if(word !=
"NumberOfClasses:"){
448 errorLog <<
"loadDatasetFromFile(string filename) - Failed to find NumberOfClasses!" << endl;
458 if(word !=
"ClassIDsAndCounters:"){
461 errorLog <<
"loadDatasetFromFile(string filename) - Failed to find ClassIDsAndCounters!" << endl;
472 if(word !=
"UseExternalRanges:"){
475 errorLog <<
"loadDatasetFromFile(string filename) - Failed to find UseExternalRanges!" << endl;
481 if( useExternalRanges ){
491 if(word !=
"LabelledTimeSeriesTrainingData:"){
494 errorLog <<
"loadDatasetFromFile(string filename) - Failed to find LabelledTimeSeriesTrainingData!" << endl;
504 UINT timeSeriesLength = 0;
507 if( word !=
"************TIME_SERIES************" ){
510 errorLog <<
"loadDatasetFromFile(string filename) - Failed to find TimeSeries Header!" << endl;
515 if( word !=
"ClassID:" ){
518 errorLog <<
"loadDatasetFromFile(string filename) - Failed to find ClassID!" << endl;
524 if( word !=
"TimeSeriesLength:" ){
527 errorLog <<
"loadDatasetFromFile(string filename) - Failed to find TimeSeriesLength!" << endl;
530 file >> timeSeriesLength;
533 if( word !=
"TimeSeriesData:" ){
536 errorLog <<
"loadDatasetFromFile(string filename) - Failed to find TimeSeriesData!" << endl;
541 MatrixDouble trainingExample(timeSeriesLength,numDimensions);
542 for(UINT i=0; i<timeSeriesLength; i++){
544 file >> trainingExample[i][j];
548 data[x].setTrainingSample(classLabel,trainingExample);
558 file.open(filename.c_str(), std::ios::out );
560 if( !file.is_open() ){
566 for(UINT i=0; i<
data[x].getLength(); i++){
568 file <<
data[x].getClassLabel() <<
",";
570 file <<
data[x][i][j];
571 if( j+1 < numDimensions ){
596 if( !parser.parseCSVFile(filename,
true) ){
597 errorLog <<
"loadDatasetFromCSVFile(const string &filename) - Failed to parse CSV file!" << endl;
601 if( !parser.getConsistentColumnSize() ){
602 errorLog <<
"loadDatasetFromCSVFile(const string &filename) - The CSV file does not have a consistent number of columns!" << endl;
606 if( parser.getColumnSize() <= 2 ){
607 errorLog <<
"loadDatasetFromCSVFile(const string &filename) - The CSV file does not have enough columns! It should contain at least three columns!" << endl;
615 data.reserve( parser.getRowSize() );
617 UINT sampleCounter = 0;
618 UINT lastSampleCounter = 0;
624 for(UINT i=0; i<parser.getRowSize(); i++){
629 if( sampleCounter != lastSampleCounter && i != 0 ){
631 if( !
addSample(classLabel, timeseries) ){
632 warningLog <<
"loadDatasetFromCSVFile(const string &filename,const UINT classLabelColumnIndex) - Could not add sample " << i <<
" to the dataset!" << endl;
636 lastSampleCounter = sampleCounter;
668 stats +=
"DatasetInfo:\t" +
infoText +
"\n";
672 stats +=
"ClassStats:\n";
677 stats +=
"\tClassName:\t" +
classTracker[k].className +
"\n";
682 stats +=
"Dataset Ranges:\n";
683 for(UINT j=0; j<ranges.size(); j++){
687 stats +=
"Timeseries Lengths:\n";
688 UINT M = (UINT)
data.size();
689 for(UINT j=0; j<M; j++){
715 UINT randomIndex = 0;
717 if( useStratifiedSampling ){
728 UINT numSamples = (UINT)classData[k].size();
729 for(UINT x=0; x<numSamples; x++){
734 SWAP( classData[k][ x ] ,classData[k][ randomIndex ] );
740 UINT numTrainingExamples = (UINT) floor(
double(classData[k].size()) / 100.0 *
double(trainingSizePercentage) );
743 for(UINT i=0; i<numTrainingExamples; i++){
744 trainingSet.
addSample(
data[ classData[k][i] ].getClassLabel(),
data[ classData[k][i] ].getData() );
746 for(UINT i=numTrainingExamples; i<classData[k].size(); i++){
747 testSet.
addSample(
data[ classData[k][i] ].getClassLabel(),
data[ classData[k][i] ].getData() );
756 const UINT numTrainingExamples = (UINT) floor(
double(
totalNumSamples) / 100.0 * double(trainingSizePercentage) );
765 SWAP( indexs[ x ] , indexs[ randomIndex ] );
769 for(UINT i=0; i<numTrainingExamples; i++){
770 trainingSet.
addSample(
data[ indexs[i] ].getClassLabel(),
data[ indexs[i] ].getData() );
773 testSet.
addSample(
data[ indexs[i] ].getClassLabel(),
data[ indexs[i] ].getData() );
787 errorLog <<
"merge(TimeSeriesClassificationData &labelledData) - The number of dimensions in the labelledData (" << labelledData.
getNumDimensions() <<
") does not match the number of dimensions of this dataset (" <<
numDimensions <<
")" << endl;
797 addSample(labelledData[i].getClassLabel(), labelledData[i].getData());
802 for(UINT i=0; i<classTracker.size(); i++){
816 errorLog <<
"spiltDataIntoKFolds(UINT K) - K can not be zero!" << endl;
822 errorLog <<
"spiltDataIntoKFolds(UINT K,bool useStratifiedSampling) - K can not be larger than the total number of samples in the dataset!" << endl;
827 if( useStratifiedSampling ){
830 errorLog <<
"spiltDataIntoKFolds(UINT K,bool useStratifiedSampling) - K can not be larger than the number of samples in any given class!" << endl;
848 UINT randomIndex = 0;
850 if( useStratifiedSampling ){
861 UINT numSamples = (UINT)classData[c].size();
862 for(UINT x=0; x<numSamples; x++){
867 SWAP( classData[c][ x ] , classData[c][ randomIndex ] );
872 vector< UINT >::iterator iter;
874 iter = classData[ c ].begin();
876 while( iter != classData[c].end() ){
892 SWAP( indexs[ x ] , indexs[ randomIndex ] );
902 if( ++counter == numSamplesPerFold && foldIndex < K-1 ){
919 errorLog <<
"getTrainingFoldData(UINT foldIndex) - Cross Validation has not been setup! You need to call the spiltDataIntoKFolds(UINT K,bool useStratifiedSampling) function first before calling this function!" << endl;
923 if( foldIndex >=
kFoldValue )
return trainingData;
930 if( k != foldIndex ){
934 trainingData.
addSample(
data[ index ].getClassLabel(),
data[ index ].getData() );
947 if( foldIndex >=
kFoldValue )
return testData;
965 if(
data[x].getClassLabel() == classLabel ){
977 return unlabelledData;
983 for(UINT x=0; x<
data[i].getLength(); x++){
984 unlabelledData.
addSample(
data[i].getData().getRowVector( x ) );
988 return unlabelledData;
992 UINT minClassLabel = 99999;
1000 return minClassLabel;
1005 UINT maxClassLabel = 0;
1013 return maxClassLabel;
1022 warningLog <<
"getClassLabelIndexValue(UINT classLabel) - Failed to find class label: " << classLabel <<
" in class tracker!" << endl;
1033 return "CLASS_LABEL_NOT_FOUND";
1044 ranges[j].minValue =
data[0][0][0];
1045 ranges[j].maxValue =
data[0][0][0];
1047 for(UINT i=0; i<
data[x].getLength(); i++){
1048 if(
data[x][i][j] < ranges[j].minValue ){ ranges[j].minValue =
data[x][i][j]; }
1049 else if(
data[x][i][j] > ranges[j].maxValue ){ ranges[j].maxValue =
data[x][i][j]; }
1063 M +=
data[x].getLength();
1071 for(UINT i=0; i<
data[x].getLength(); i++){
1073 matrixData[index][j] =
data[x][i][j];
static std::string toString(const int &i)
bool enableExternalRangeScaling(const bool useExternalRanges)
UINT getNumSamples() const
bool save(const string &filename) const
vector< ClassTracker > classTracker
A vector of ClassTracker, which keeps track of the number of samples of each class.
UINT getMinimumClassLabel() const
UINT getNumDimensions() const
bool push_back(const std::vector< T > &sample)
static double scale(const double &x, const double &minSource, const double &maxSource, const double &minTarget, const double &maxTarget, const bool constrain=false)
vector< TimeSeriesClassificationSample > getClassificationData() const
unsigned int getNumCols() const
bool addSample(const VectorDouble &sample)
virtual ~TimeSeriesClassificationData()
WarningLog warningLog
Default warning log.
UINT eraseAllSamplesWithClassLabel(const UINT classLabel)
TimeSeriesClassificationData partition(const UINT partitionPercentage, const bool useStratifiedSampling=false)
bool setNumDimensions(const UINT numDimensions)
std::string getStatsAsString() const
bool addSample(const UINT classLabel, const MatrixDouble &trainingSample)
string infoText
Some infoText about the dataset.
string datasetName
The name of the dataset.
bool loadDatasetFromFile(const string filename)
UINT getClassLabelIndexValue(const UINT classLabel) const
static double stringToDouble(const std::string &s)
UINT totalNumSamples
The total number of samples in the dataset.
UINT getMaximumClassLabel() const
TimeSeriesClassificationData(UINT numDimensions=0, string datasetName="NOT_SET", string infoText="")
vector< TimeSeriesClassificationSample > data
The labelled time series classification data.
bool loadDatasetFromCSVFile(const string &filename)
bool relabelAllSamplesWithClassLabel(const UINT oldClassLabel, const UINT newClassLabel)
The TimeSeriesClassificationData is the main data structure for recording, labeling, managing, saving, and loading training data for supervised temporal learning problems. Unlike the ClassificationData, in which each sample consists of 1 N dimensional datum, a TimeSeriesClassificationData sample will consist of an N dimensional time series of length M. The length of each time series sample (i.e. M) can be different for each datum in the dataset.
bool saveDatasetToFile(const string filename) const
bool saveDatasetToCSVFile(const string &filename) const
UINT kFoldValue
The number of folds the dataset has been spilt into for cross valiation.
bool crossValidationSetup
A flag to show if the dataset is ready for cross validation.
bool setClassNameForCorrespondingClassLabel(const string className, const UINT classLabel)
ErrorLog errorLog
Default error log.
UINT getNumClasses() const
bool allowNullGestureClass
A flag that enables/disables a user from adding new samples with a class label matching the default n...
bool scale(const double minTarget, const double maxTarget)
string getClassNameForCorrespondingClassLabel(const UINT classLabel) const
vector< ClassTracker > getClassTracker() const
int getRandomNumberInt(int minRange, int maxRange)
TimeSeriesClassificationData & operator=(const TimeSeriesClassificationData &rhs)
DebugLog debugLog
Default debugging log.
bool merge(const TimeSeriesClassificationData &labelledData)
TimeSeriesClassificationData getClassData(const UINT classLabel) const
bool useExternalRanges
A flag to show if the dataset should be scaled using the externalRanges values.
static int stringToInt(const std::string &s)
static bool stringEndsWith(const std::string &str, const std::string &ending)
UINT numDimensions
The number of dimensions in the dataset.
bool setDatasetName(const string datasetName)
bool setExternalRanges(const vector< MinMax > &externalRanges, const bool useExternalRanges=false)
vector< MinMax > externalRanges
A vector containing a set of externalRanges set by the user.
UnlabelledData reformatAsUnlabelledData() const
TimeSeriesClassificationData getTrainingFoldData(const UINT foldIndex) const
bool load(const string &filename)
vector< vector< UINT > > crossValidationIndexs
A vector to hold the indexs of the dataset for the cross validation.
bool spiltDataIntoKFolds(const UINT K, const bool useStratifiedSampling=false)
vector< MinMax > getRanges() const
bool setNumDimensions(const UINT numDimensions)
MatrixDouble getDataAsMatrixDouble() const
bool setInfoText(const string infoText)
bool setAllowNullGestureClass(const bool allowNullGestureClass)
TimeSeriesClassificationData getTestFoldData(const UINT foldIndex) const