Since RapidMiner only accepts files in certain formats (Excel spreadsheet- .xls, Comma-Separated Values- .csv, etc) and my data was stored as a flat file, it was necessary to preprocess my data.
This included adding commas where there are spaces and adding labels to the data. As it should be expected, there were several problems associated with this task.
Next: Results I
Back to: Tools
// Program name: convert.cpp // Shanley Philip // CSCI 568: Data Mining #include <stdio.h> #include <stdlib.h> #include <iostream> #include <fstream> #include <string.h> using namespace std; void parsefile(string filename) { ifstream input(filename.c_str()); ofstream output; string outfile; outfile = filename + ".csv"; cout << "Writing to: " << outfile << "..." << endl; //===================================== // Change spaces with , //===================================== output.open(outfile.c_str()); string dir = filename.substr(5,3); if (dir == "psd") { string c3 = "C3-0,, C3-1,, C3-2,, C3-3,, C3-4,, C3-5,, C3-6,, C3-7,, C3-8,, C3-9,, C3-10,, C3-11"; string cz = "Cz-0,, Cz-1,, Cz-2,, Cz-3,, Cz-4,, Cz-5,, Cz-6,, Cz-7,, Cz-8,, Cz-9,, Cz-10,, Cz-11"; string c4 = "C4-0,, C4-1,, C4-2,, C4-3,, C4-4,, C4-5,, C4-6,, C4-7,, C4-8,, C4-9,, C4-10,, C4-11"; string cp1 = "CP1-0,, CP1-1,, CP1-2,, CP1-3,, CP1-4,, CP1-5,, CP1-6,, CP1-7,, CP1-8,, CP1-9,, CP1-10,, CP1-11"; string cp2 = "CP2-0,, CP2-1,, CP2-2,, CP2-3,, CP2-4,, CP2-5,, CP2-6,, CP2-7,, CP2-8,, CP2-9,, CP2-10,, CP2-11"; string p3 = "P3-0,, P3-1,, P3-2,, P3-3,, P3-4,, P3-5,, P3-6,, P3-7,, P3-8,, P3-9,, P3-10,, P3-11"; string pz = "PZ-0,, PZ-1,, PZ-2,, PZ-3,, PZ-4,, PZ-5,, PZ-6,, PZ-7,, PZ-8,, PZ-9,, PZ-10,, PZ-11"; string p4 = "P4-0,, P4-1,, P4-2,, P4-3,, P4-4,, P4-5,, P4-6,, P4-7,, P4-8,, P4-9,, P4-10,, P4-11"; output << c3 << ",, " << cz << ",, " << c4 << ",, " << cp1 << ",, " << cp2 << ",, " << p3 << ",, " << pz << ",, " << p4; string filetype = filename.substr(9, 5); if (filetype == "train") output << ",, class_label\n"; else output << "\n"; cout << "Added headers for the psd file..." << endl << endl; } else { output << "Fp1, AF3, F7, F3, FC1, FC5, T7, C3, CP1, CP5, P7, P3, Pz, PO3, O1, Oz, O2, PO4, P4, P8, CP6, CP2, C4, T8, FC6, FC2, F4, F8, AF4, Fp2, Fz, Cz"; string filetype = filename.substr(15, 5); if (filetype == "train") output << ",class_label\n"; else output << "\n"; cout << "Added headers for the raw EEG file..." << endl << endl; } input.seekg(0,ios::end); int length = input.tellg(); input.seekg(0,ios::beg); char *file; file = new char[length]; input.read(file, length); input.close(); for (int i=0; i<length-1; i++) { if ((char) file[i] == ' ' && (char) file[i+1] == ' ') output << ','; else if ((char) file[i] == ' ' && (char) file[i+1] == '-') output << ','; else if ((char) file[i] == '\n') output << '\n'; else output << (char) file[i]; } output << file[length-1]; delete []file; output.close(); } int main(int argc, char* argv[]) { string *filelist = NULL; int filenum(0); if (argc >= 2) { filelist = new string[argc-1]; for (int i=1; i\n Each file must be separated by a single space.\n"; return 1; } if (filenum == 0) { cout << "No files to parse." << endl; return 1; } else { for (int j=0; j