00001 #ifndef LC_FILTER_CPP
00002 #define LC_FILTER_CPP
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037 #include "LCFilter.h"
00038 #include "ClusterMaker.h"
00039 #include "ESTAnalyzer.h"
00040 #include "EST.h"
00041
00042 #include <sstream>
00043
00044
00045 char LCFilter::DefaultPatternList[16] = "A,C";
00046 char* LCFilter::patternList = LCFilter::DefaultPatternList;
00047 int LCFilter::threshold = -1;
00048
00049
00050 arg_parser::arg_record LCFilter::argsList[] = {
00051 {"--lcPatterns", "List of (, separated) patterns to generate dummy ESTs",
00052 &LCFilter::patternList, arg_parser::STRING},
00053 {"--lcThreshold", "Threshold value to detect low complexity sequences",
00054 &LCFilter::threshold, arg_parser::INTEGER},
00055 {NULL, NULL, NULL, arg_parser::BOOLEAN}
00056 };
00057
00058 LCFilter::LCFilter(ClusterMaker *clusterMaker) :
00059 Filter("lcFilter", clusterMaker) {
00060
00061 }
00062
00063 void
00064 LCFilter::showArguments(std::ostream& os) {
00065
00066 arg_parser ap(LCFilter::argsList);
00067 os << ap;
00068 }
00069
00070 bool
00071 LCFilter::parseArguments(int& argc, char **argv) {
00072
00073 arg_parser ap(LCFilter::argsList);
00074 ap.check_args(argc, argv, false);
00075 if (patternList == NULL) {
00076 std::cerr << filterName << ": Pattern list must be specified "
00077 << "(use --lcPatterns option)\n";
00078 return false;
00079 }
00080
00081 return true;
00082 }
00083
00084 int
00085 LCFilter::initialize() {
00086 ASSERT ( clusterMaker != NULL );
00087 ASSERT ( clusterMaker->getAnalyzer() != NULL );
00088 ASSERT ( patternList != NULL );
00089 ASSERT ( DefaultPatternList != NULL );
00090
00091
00092 if (threshold == -1) {
00093
00094 threshold = (int) clusterMaker->getAnalyzer()->getInvalidMetric();
00095 }
00096
00097 const int dummyLen = clusterMaker->getAnalyzer()->getPreferredDummyESTLength();
00098
00099 std::string patStr(patternList);
00100
00101
00102 while (!patStr.empty()) {
00103
00104 const std::string::size_type commaPos = patStr.find(',');
00105 const std::string pattern = patStr.substr(0, commaPos);
00106
00107 addDummyEntry("DummyEST For Pattern " + pattern, pattern, dummyLen);
00108
00109 if (commaPos == std::string::npos) {
00110
00111 patStr.clear();
00112 } else {
00113
00114 patStr = patStr.substr(commaPos + 1);
00115 }
00116 }
00117
00118 return 0;
00119 }
00120
00121 void
00122 LCFilter::addDummyEntry(const std::string& fastaID, const std::string& seq,
00123 const int length) {
00124
00125
00126 std::string fullSeq = seq;
00127 const int repeats = length / seq.length();
00128 for(int i = 0; (i < repeats); i++) {
00129 fullSeq += seq;
00130 }
00131
00132 int estIdx = EST::getESTCount();
00133 EST *est = EST::create(estIdx, fastaID.c_str(), fullSeq.c_str());
00134
00135 est->setProcessed(true);
00136
00137 std::ostringstream clsName;
00138 clsName << "Low Complexity ESTs (filtered by LCFilter Pattern "
00139 << seq << "...)";
00140 int clusterID = clusterMaker->addDummyCluster(clsName.str());
00141
00142 dummyESTList.push_back(DummyESTInfo(estIdx, clusterID));
00143 }
00144
00145 void
00146 LCFilter::finalize() {
00147
00148
00149
00150
00151 EST::deleteLastESTs(dummyESTList.size());
00152
00153 dummyESTList.clear();
00154 }
00155
00156 int
00157 LCFilter::runFilter(const int estIdx) {
00158
00159 ASSERT ( clusterMaker != NULL );
00160 ESTAnalyzer *analyzer = clusterMaker->getAnalyzer();
00161 ASSERT ( analyzer != NULL );
00162
00163 analyzer->setReferenceEST(estIdx);
00164
00165 for(size_t i = 0; (i < dummyESTList.size()); i++) {
00166
00167 float metric = analyzer->analyze(dummyESTList[i].first);
00168
00169 if (analyzer->compareMetrics(metric, (float) threshold)) {
00170
00171
00172
00173
00174 return dummyESTList[i].second;
00175 }
00176 }
00177
00178 return -1;
00179 }
00180
00181 #endif