00001 #ifndef MATRIX_FILE_ANALYZER_CPP
00002 #define MATRIX_FILE_ANALYZER_CPP
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037 #include "MatrixFileAnalyzer.h"
00038 #include "EST.h"
00039
00040 #include <cstdio>
00041 #include <sstream>
00042
00043
00044 char* MatrixFileAnalyzer::dataFileName = NULL;
00045
00046
00047 arg_parser::arg_record MatrixFileAnalyzer::argsList[] = {
00048 {"--dataFile", "Data file containing matrix of distance metrics",
00049 &MatrixFileAnalyzer::dataFileName, arg_parser::STRING},
00050 {NULL, NULL, NULL, arg_parser::BOOLEAN}
00051 };
00052
00053 MatrixFileAnalyzer::MatrixFileAnalyzer(const int refESTidx,
00054 const std::string& outputFileName)
00055 : ESTAnalyzer("MatrixFileAnalyzer", refESTidx, outputFileName) {
00056 distanceValues = NULL;
00057 estCount = 0;
00058 }
00059
00060 MatrixFileAnalyzer::~MatrixFileAnalyzer() {
00061 if (distanceValues != NULL) {
00062
00063 for(int row = 0; (row < estCount); row++) {
00064 delete[] distanceValues[row];
00065 }
00066 delete[] distanceValues;
00067 }
00068 distanceValues = NULL;
00069 estCount = 0;
00070 }
00071
00072 void
00073 MatrixFileAnalyzer::showArguments(std::ostream& os) {
00074 ESTAnalyzer::showArguments(os);
00075
00076 arg_parser ap(MatrixFileAnalyzer::argsList);
00077 os << ap;
00078 }
00079
00080 bool
00081 MatrixFileAnalyzer::parseArguments(int& argc, char **argv) {
00082 arg_parser ap(MatrixFileAnalyzer::argsList);
00083 ap.check_args(argc, argv, false);
00084
00085 if (dataFileName == NULL) {
00086 std::cerr << analyzerName
00087 << ": Matrix data file not specified "
00088 << "(use --dataFile option)\n";
00089 return false;
00090 }
00091
00092 return ESTAnalyzer::parseArguments(argc, argv);
00093 }
00094
00095 int
00096 MatrixFileAnalyzer::initialize() {
00097 FILE *input = fopen(dataFileName, "rt");
00098 if ((input == NULL) || ferror(input)) {
00099
00100 std::cerr << "Error opening matrix data file "
00101 << inputFileName << " for reading." << std::endl;
00102 return 1;
00103 }
00104 int currCol = 0;
00105 int currRow = 0;
00106 while (!feof(input)) {
00107
00108 std::string line = readLine(input);
00109 if ((line.length() < 1) || (line[0] == '#')) {
00110
00111 continue;
00112 }
00113
00114 if (distanceValues == NULL) {
00115 if (!parseESTCount(line.c_str())) {
00116
00117 fclose(input);
00118 return 2;
00119 }
00120 } else {
00121
00122 if (currRow >= estCount) {
00123
00124 std::cerr << "Excess data in matrix file.\n";
00125 fclose(input);
00126 return 3;
00127 }
00128 currCol += parseMetrics(line.c_str(), distanceValues[currRow],
00129 currCol, estCount - currCol);
00130 if (currCol == estCount) {
00131
00132 currCol = 0;
00133 currRow++;
00134 }
00135 }
00136 }
00137
00138 fclose(input);
00139
00140 if (currRow != estCount) {
00141 std::cerr << "Insufficient data in matrix file for "
00142 << estCount << " ESTs.\n";
00143 return 4;
00144 }
00145
00146 return 0;
00147 }
00148
00149 bool
00150 MatrixFileAnalyzer::parseESTCount(const char *line) {
00151 char *endPtr = NULL;
00152 estCount = strtol(line, &endPtr, 10);
00153 if ((endPtr == NULL) || (*endPtr != '\0') ||
00154 (estCount < 0) || (estCount > 100000)) {
00155 std::cerr << "Invalid EST count value encountered\n(line = '"
00156 << line << "')\n";
00157 estCount = 0;
00158 return false;
00159 }
00160
00161
00162 distanceValues = new float*[estCount];
00163 for(int rows = 0; (rows < estCount); rows++) {
00164 distanceValues[rows] = new float[estCount];
00165 memset(distanceValues[rows], 0, sizeof(float) * estCount);
00166
00167
00168 char info[32];
00169 sprintf(info, "Dummy EST #%d", rows);
00170 EST::create(rows, info, "No sequence data available");
00171 }
00172
00173
00174 return true;
00175 }
00176
00177 int
00178 MatrixFileAnalyzer::setReferenceEST(const int estIdx) {
00179 if ((estIdx >= 0) && (estIdx < estCount)) {
00180 refESTidx = estIdx;
00181 return 0;
00182 }
00183
00184 return 1;
00185 }
00186
00187 float
00188 MatrixFileAnalyzer::getMetric(const int otherEST) {
00189 if ((otherEST >= 0) && (otherEST < estCount)) {
00190 return distanceValues[refESTidx][otherEST];
00191 }
00192
00193 return -1;
00194 }
00195
00196 int
00197 MatrixFileAnalyzer::parseMetrics(const char* line, float *values,
00198 const int startPos, const int maxValues) {
00199 int index = 0;
00200
00201 std::string tempLine = line;
00202 std::istringstream inStream(line);
00203 while (!inStream.eof()) {
00204 float metric;
00205 inStream >> metric;
00206
00207 values[startPos + index] = metric;
00208 index++;
00209 if (index >= maxValues) {
00210
00211 return index;
00212 }
00213 }
00214
00215 return index;
00216 }
00217
00218 int
00219 MatrixFileAnalyzer::analyze() {
00220 return -1;
00221 }
00222
00223 std::string
00224 MatrixFileAnalyzer::readLine(FILE *fp) {
00225 std::string line;
00226 if (feof(fp) || ferror(fp)) {
00227
00228 return line;
00229 }
00230 char buffer[1024];
00231 while (fgets(buffer, 1024, fp) != NULL) {
00232 line += buffer;
00233 const size_t len = strlen(buffer);
00234 if (buffer[len - 1] == '\n') {
00235
00236 break;
00237 }
00238 }
00239
00240 return line.substr(0, line.length() - 1);
00241 }
00242
00243 #endif