00001 #ifndef FW_ANALYZER_CPP
00002 #define FW_ANALYZER_CPP
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037 #include "FWAnalyzer.h"
00038 #include "ResultLog.h"
00039 #include "EST.h"
00040 #include "TVHeuristic.h"
00041
00042 #include <algorithm>
00043 #include <time.h>
00044
00045
00046 int FWAnalyzer::argumentFrameSize = 100;
00047 int FWAnalyzer::wordSize = 6;
00048
00049
00050 arg_parser::arg_record FWAnalyzer::commonArgsList[] = {
00051 {"--frame", "Frame size (in base pairs, default=100)",
00052 &FWAnalyzer::argumentFrameSize, arg_parser::INTEGER},
00053 {"--word", "Word size (in base pairs, default=6)",
00054 &FWAnalyzer::wordSize, arg_parser::INTEGER},
00055 {NULL, NULL, NULL, arg_parser::BOOLEAN}
00056 };
00057
00058 FWAnalyzer::FWAnalyzer(const std::string& analyzerName, const int refESTidx,
00059 const std::string& outputFile)
00060 : ESTAnalyzer(analyzerName, refESTidx, outputFile) {
00061 frameSize = 100;
00062
00063 }
00064
00065 FWAnalyzer::~FWAnalyzer() {
00066
00067 EST::deleteAllESTs();
00068 }
00069
00070 void
00071 FWAnalyzer::showArguments(std::ostream& os) {
00072 ESTAnalyzer::showArguments(os);
00073
00074 arg_parser ap(FWAnalyzer::commonArgsList);
00075 os << "Options for " << analyzerName << " are:\n";
00076 os << ap;
00077 }
00078
00079 bool
00080 FWAnalyzer::parseArguments(int& argc, char **argv) {
00081 arg_parser ap(FWAnalyzer::commonArgsList);
00082 ap.check_args(argc, argv, false);
00083
00084 frameSize = argumentFrameSize;
00085
00086 if (!ESTAnalyzer::parseArguments(argc, argv)) {
00087
00088 return false;
00089 }
00090 if (frameSize < 0) {
00091
00092 std::cerr << analyzerName
00093 << ": Frame size must be greater than zero "
00094 << "(use --frame option)\n";
00095 return false;
00096 }
00097 if ((wordSize < 0) || (wordSize > frameSize)) {
00098
00099 std::cerr << analyzerName
00100 << ": Word size (greater than frame size) not specified? "
00101 << "(use --word option)\n";
00102 return false;
00103 }
00104
00105 return true;
00106 }
00107
00108 std::string
00109 FWAnalyzer::getFrame(const EST* est, bool start) {
00110 std::string result(est->getSequence());
00111 if (start) {
00112 return result.substr(0, frameSize);
00113 }
00114
00115 return result.substr(result.size() - frameSize);
00116 }
00117
00118 void
00119 FWAnalyzer::dumpEST(ResultLog& log, const EST* est, const bool isReference) {
00120 const std::string frame = getFrame(est, !isReference);
00121
00122 if (isReference) {
00123 const char* start=(htmlLog ? "<font face=\"courier\" color=red>" : "");
00124 const char* end =(htmlLog ? "</font>" : "");
00125 log.report(est->getInfo(), "%s%s%s", "n/a", start, frame.c_str(), end);
00126 } else {
00127 const char* start = (htmlLog ? "<font face=\"courier\">" : "");
00128 const char* end = (htmlLog ? "</font>" : "");
00129 log.report(est->getInfo(), "%s%s%s", "%f",
00130 start, frame.c_str(), end, est->getSimilarity());
00131 }
00132 }
00133
00134 void
00135 FWAnalyzer::dumpESTList(const std::vector<EST*>& estList,
00136 const EST* refEST,
00137 ResultLog& log) {
00138
00139 const char* Titles[] = {"Name", "EST Frame", "Metric", NULL};
00140 log.startTable(Titles);
00141
00142 for(int id = 0; (id < (int) estList.size()); id++) {
00143 if (id % 20 == 0) {
00144
00145 dumpEST(log, refEST, true);
00146 }
00147 if (estList[id] == refEST) {
00148
00149 continue;
00150 }
00151
00152 dumpEST(log, estList[id], false);
00153 }
00154 }
00155
00156 int
00157 FWAnalyzer::initialize() {
00158 if ((estFileName != NULL) && (!loadFASTAFile(estFileName))) {
00159
00160 return 1;
00161 }
00162
00163
00164 if ((chain != NULL) && (chain->initialize())) {
00165
00166 return 2;
00167 }
00168
00169 return 0;
00170 }
00171
00172 int
00173 FWAnalyzer::setReferenceEST(const int estIdx) {
00174
00175 referenceFrame = "";
00176
00177 refESTidx = estIdx;
00178
00179 std::vector<EST*>& estList = EST::getESTList();
00180 if (refESTidx >= (int) estList.size()) {
00181
00182 std::cerr << "Reference EST index is greater than number of ESTs.\n"
00183 << "Cannot continue further processing.\n";
00184 return 2;
00185 }
00186
00187
00188 referenceFrame = getFrame(estList[refESTidx], false);
00189
00190 return 0;
00191 }
00192
00193 float
00194 FWAnalyzer::getMetric(const int estIdx) {
00195 if (estIdx == refESTidx) {
00196
00197
00198 return 0;
00199 }
00200
00201 const EST* est = EST::getESTList()[estIdx];
00202 return analyzeFrame(referenceFrame, getFrame(est), wordSize);
00203 }
00204
00205 int
00206 FWAnalyzer::analyze() {
00207 int result = initialize();
00208 if (result != 0) {
00209
00210 return result;
00211 }
00212
00213
00214 if ((result = setReferenceEST(refESTidx)) != 0) {
00215
00216 return result;
00217 }
00218
00219
00220 std::vector<EST*>& estList = EST::getESTList();
00221
00222
00223 double total = 0;
00224 for(int id = 0; (id < (int) estList.size()); id++) {
00225
00226 const float similarity = ESTAnalyzer::analyze(id);
00227 estList[id]->setSimilarity(similarity);
00228 total += similarity;
00229 }
00230
00231
00232 const double mean = total / estList.size();
00233 double deviations = 0;
00234 for(int id = 0; (id < (int) estList.size()); id++) {
00235 if (id != refESTidx) {
00236 const double diff = estList[id]->getSimilarity() - mean;
00237 deviations += (diff * diff);
00238 }
00239 }
00240 const double variance = deviations / estList.size();
00241
00242
00243 const EST* refEST = estList[refESTidx];
00244
00245 std::sort(estList.begin(), estList.end(), EST::LessEST());
00246
00247
00248 ResultLog log(outputFileName, htmlLog);
00249
00250 dumpHeader(log, mean, variance);
00251
00252 dumpESTList(estList, refEST, log);
00253
00254
00255 return 0;
00256 }
00257
00258 void
00259 FWAnalyzer::dumpHeader(ResultLog& log, const double mean,
00260 const double variance) {
00261 const char *HTMLTags[] = {"<b>", "</b>", "<i>", "</i>", "<u>", "</u>"};
00262 const char *TextTags[] = {"* ", " *", "", "", "_", "_"};
00263 const char **Tags = (htmlLog ? HTMLTags : TextTags);
00264 const char *Title = (htmlLog ? "E S T A N A L Y S I S R E P O R T" : "E S T A N A L Y S I S R E P O R T");
00265
00266 char now_str[128];
00267 getTime(now_str);
00268
00269
00270
00271
00272 log.reportLine("");
00273 log.reportLine("%s %s %s", Tags[0], Title, Tags[1]);
00274 log.reportLine("Analysis conducted on %s", now_str);
00275 log.reportLine("%sEST Analyzer used: %s%s", Tags[0], analyzerName.c_str(),
00276 Tags[1]);
00277 log.reportLine("EST data read from file: %s%s%s", Tags[2], estFileName,
00278 Tags[3]);
00279 log.reportLine("");
00280
00281
00282
00283 log.report("Frame Size: %d", " ", "Word Size: %d", frameSize, wordSize);
00284 log.report("Reference EST index: %d", " ",
00285 "Number of ESTs: %d", refESTidx, EST::getESTList().size());
00286 log.report("Similarity Metric mean: %lf", " ",
00287 "Similarity Metric variance: %lf", mean, variance);
00288 log.endTable();
00289 log.reportLine("");
00290 }
00291
00292 float
00293 FWAnalyzer::analyzeFrame(const std::string&,
00294 const std::string&,
00295 const int) {
00296 return 0;
00297 }
00298
00299 #endif