00001 #ifndef EST_ANALYZER_CPP
00002 #define EST_ANALYZER_CPP
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037 #include "ESTAnalyzer.h"
00038 #include "EST.h"
00039 #include "MPIHelper.h"
00040
00041
00042 #define NO_ERROR 0
00043
00044
00045 bool ESTAnalyzer::readAhead = false;
00046 char* ESTAnalyzer::estFileName = NULL;
00047 bool ESTAnalyzer::htmlLog = false;
00048 bool ESTAnalyzer::noMaskBases = false;
00049
00050
00051 arg_parser::arg_record ESTAnalyzer::commonArgsList[] = {
00052 {"--readAhead", "Use a read head thread to load next EST data (NYI)",
00053 &ESTAnalyzer::readAhead, arg_parser::BOOLEAN},
00054 {"--estFile", "Name of EST file (in FASTA format) to be processed",
00055 &ESTAnalyzer::estFileName, arg_parser::STRING},
00056 {"--html", "Generate analysis report in HTML format",
00057 &ESTAnalyzer::htmlLog, arg_parser::BOOLEAN},
00058 {"--no-mask-bases", "Don't mask out all lower case neucleotides in reads",
00059 &ESTAnalyzer::noMaskBases, arg_parser::BOOLEAN},
00060 {NULL, NULL, NULL, arg_parser::BOOLEAN}
00061 };
00062
00063 ESTAnalyzer::ESTAnalyzer(const std::string& name, const int estIdx,
00064 const std::string& outputFile)
00065 : refESTidx(estIdx), chain(NULL), outputFileName(outputFile),
00066 analyzerName(name) {
00067
00068 }
00069
00070 ESTAnalyzer::~ESTAnalyzer() {
00071
00072 }
00073
00074 int
00075 ESTAnalyzer::setHeuristicChain(HeuristicChain* hChain) {
00076 chain = hChain;
00077 return 0;
00078 }
00079
00080 float
00081 ESTAnalyzer::analyze(const int otherEST, const bool useHeuristics,
00082 const bool useHeavyWeight) {
00083
00084 if (useHeuristics && chain != NULL && !chain->shouldAnalyze(otherEST)) {
00085
00086 return getInvalidMetric();
00087 }
00088 if (useHeavyWeight) {
00089 return getMetric(otherEST);
00090 }
00091
00092 return getValidMetric();
00093 }
00094
00095 void
00096 ESTAnalyzer::showArguments(std::ostream& os) {
00097
00098 arg_parser ap(ESTAnalyzer::commonArgsList);
00099 os << "Common options for all EST analyzers are:\n";
00100 os << ap;
00101 }
00102
00103 bool
00104 ESTAnalyzer::parseArguments(int& argc, char **argv) {
00105 arg_parser ap(ESTAnalyzer::commonArgsList);
00106
00107
00108 ap.check_args(argc, argv, false);
00109
00110
00111 if (estFileName == NULL) {
00112
00113 std::cerr << analyzerName
00114 << ": EST file not specified (use --estFile option)\n";
00115 return false;
00116 }
00117
00118 return true;
00119 }
00120
00121 bool
00122 ESTAnalyzer::loadFASTAFile(const char *fileName, const bool unpopulate) {
00123 static const std::string IgnoreFileName = "<none>";
00124 if (IgnoreFileName == fileName) {
00125
00126
00127 return true;
00128 }
00129 FILE *fastaFile = NULL;
00130 #ifndef _WINDOWS
00131 fastaFile = fopen(fileName, "rt");
00132 #else
00133 fopen_s(&fastaFile, fileName, "rt");
00134 #endif
00135 if ((fastaFile == NULL) || (ferror(fastaFile))) {
00136 std::cerr << analyzerName << "(Rank: ";
00137 std::cerr << MPI_GET_RANK()
00138 << "): Error opening FASTA file "
00139 << fileName << " for reading." << std::endl;
00140 return false;
00141 }
00142
00143 int lineNum = 1;
00144
00145 int filteredCount = 0;
00146
00147 while (!feof(fastaFile)) {
00148 EST *est = EST::create(fastaFile, lineNum, !noMaskBases);
00149 if ((est == NULL) && (!feof(fastaFile) || ferror(fastaFile))) {
00150
00151 fclose(fastaFile);
00152 std::cerr << analyzerName << ": Error loading EST from "
00153 << fileName << " at line: " << lineNum << std::endl;
00154 return false;
00155 }
00156 if (est->getID() == -1) {
00157
00158
00159 filteredCount++;
00160 }
00161 if (unpopulate) {
00162
00163
00164
00165 est->unpopulate();
00166 }
00167 }
00168
00169 if (filteredCount > 0) {
00170 std::cerr << analyzerName << ": " << filteredCount << " sequences "
00171 << "with length less than 50 nt were filtered out of "
00172 << "the data set." << std::endl;
00173 }
00174 fclose(fastaFile);
00175 return true;
00176 }
00177
00178 ESTAnalyzer&
00179 ESTAnalyzer::operator=(const ESTAnalyzer&) {
00180 return *this;
00181 }
00182
00183 #endif