00001 #ifndef D2_CPP
00002 #define D2_CPP
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037 #include "D2.h"
00038 #include "ESTCodec.h"
00039 #include <algorithm>
00040
00041
00042 int D2::frameShift = 1;
00043
00044
00045 int D2::BitMask = 0;
00046
00047
00048
00049 int D2::bitShift = 0;
00050
00051
00052
00053 int D2::threshold = 0;
00054
00055
00056 arg_parser::arg_record D2::argsList[] = {
00057 {"--frameShift", "Frame Shift (default=1)",
00058 &D2::frameShift, arg_parser::INTEGER},
00059 {"--d2Threshold", "Threshold score to break out of D2 (default=0)",
00060 &D2::threshold, arg_parser::INTEGER},
00061 {NULL, NULL, NULL, arg_parser::BOOLEAN}
00062 };
00063
00064 D2::D2(const int refESTidx, const std::string& outputFileName)
00065 : FWAnalyzer("D2", refESTidx, outputFileName) {
00066 delta = NULL;
00067 alignmentMetric = 0;
00068 }
00069
00070 D2::~D2() {
00071 if (delta != NULL) {
00072 delete [] delta;
00073 }
00074 }
00075
00076 void
00077 D2::showArguments(std::ostream& os) {
00078 FWAnalyzer::showArguments(os);
00079
00080 arg_parser ap(D2::argsList);
00081 os << ap;
00082 }
00083
00084 bool
00085 D2::parseArguments(int& argc, char **argv) {
00086 arg_parser ap(D2::argsList);
00087 ap.check_args(argc, argv, false);
00088
00089 if (frameShift < 1) {
00090 std::cerr << analyzerName
00091 << ": Frame shift must be >= 1"
00092 << "(use --frameShift option)\n";
00093 return false;
00094 }
00095
00096 return FWAnalyzer::parseArguments(argc, argv);
00097 }
00098
00099 int
00100 D2::initialize() {
00101
00102 int result = FWAnalyzer::initialize();
00103 if (result != 0) {
00104
00105 return result;
00106 }
00107
00108 const int MapSize = (1 << (wordSize * 2));
00109 delta = new int[MapSize];
00110
00111
00112
00113 BitMask = (1 << (wordSize * 2)) - 1;
00114
00115 bitShift = 2 * (wordSize - 1);
00116
00117 numWordsInWindow = frameSize - wordSize + 1;
00118
00119
00120 return 0;
00121 }
00122
00123 int
00124 D2::setReferenceEST(const int estIdx) {
00125
00126 if (chain != NULL) {
00127 chain->setReferenceEST(estIdx);
00128 }
00129 if ((estIdx >= 0) && (estIdx < EST::getESTCount())) {
00130 refESTidx = estIdx;
00131
00132 const EST *estS1 = EST::getEST(refESTidx);
00133 const char* s1 = estS1->getSequence();
00134
00135 ESTCodec::NormalEncoder<bitShift, BitMask> encoder;
00136 buildWordTable(s1WordTable, s1, encoder);
00137 return 0;
00138 }
00139
00140 return 1;
00141 }
00142
00143 float
00144 D2::getMetric(const int otherEST) {
00145 VALIDATE({
00146 if (otherEST == refESTidx) {
00147 return 0;
00148 }
00149 if ((otherEST < 0) || (otherEST >= EST::getESTCount())) {
00150
00151 return -1;
00152 }
00153 });
00154
00155
00156 return (float) runD2(otherEST);
00157 }
00158
00159 float
00160 D2::runD2(const int otherEST) {
00161
00162 const EST *estS2 = EST::getEST(otherEST);
00163 const char* sq2 = estS2->getSequence();
00164
00165
00166
00167 int bestMatchIsRC = 0;
00168 if (chain != NULL) {
00169 HeuristicChain::getHeuristicChain()->getHint("D2_DoRC", bestMatchIsRC);
00170 }
00171 if (bestMatchIsRC == 0) {
00172 ESTCodec::NormalEncoder<bitShift, BitMask> encoder;
00173 buildWordTable(s2WordTable, sq2, encoder);
00174 } else {
00175 ESTCodec::RevCompEncoder<bitShift, BitMask> encoder;
00176 buildWordTable(s2WordTable, sq2, encoder);
00177 }
00178
00179
00180
00181
00182 int sq1Start = 0, sq1End = s1WordTable.size() + wordSize - 1;
00183 int sq2Start = 0, sq2End = s2WordTable.size() + wordSize - 1;
00184
00185
00186 memset(delta, 0, sizeof(int) * (1 << (wordSize * 2)));
00187 int score = 0;
00188
00189
00190 const int FirstWinSize = std::min(frameSize,
00191 std::max(sq1End, sq2End)) - wordSize + 1;
00192 for(int i = 0; (i < FirstWinSize); i++) {
00193
00194 if (sq1Start + i < sq1End) {
00195 const int word1 = s1WordTable[sq1Start + i];
00196 score += (delta[word1] << 1) + 1;
00197 delta[word1]++;
00198 }
00199
00200 if (sq2Start + i < sq2End) {
00201 const int word2 = s2WordTable[sq2Start + i];
00202 score -= (delta[word2] << 1) - 1;
00203 delta[word2]--;
00204 }
00205 }
00206
00207
00208
00209 const int LastWordInSq1 = (sq1End - wordSize + 1) - numWordsInWindow;
00210 const int LastWordInSq2 = (sq2End - wordSize + 1) - numWordsInWindow;
00211 const int FirstWordInSq2 = sq2Start + numWordsInWindow - 1;
00212
00213 int minScore = score;
00214 for(int s1Win = sq1Start; (s1Win < LastWordInSq1 || s1Win == sq1Start);
00215 s1Win += 2) {
00216
00217
00218 for(int s2Win = sq2Start; (s2Win < LastWordInSq2); s2Win++) {
00219
00220
00221
00222 updateWindow(s2WordTable[s2Win + numWordsInWindow],
00223 s2WordTable[s2Win], score, minScore);
00224 }
00225
00226 if (s1Win >= LastWordInSq1) {
00227 break;
00228 }
00229
00230
00231
00232 updateWindow(s1WordTable[s1Win], s1WordTable[s1Win + numWordsInWindow],
00233 score, minScore);
00234
00235 if (minScore <= threshold) {
00236 break;
00237 }
00238
00239
00240
00241 for(int s2Win = sq2End - wordSize; (s2Win > FirstWordInSq2);
00242 s2Win--) {
00243
00244
00245
00246 updateWindow(s2WordTable[s2Win - numWordsInWindow],
00247 s2WordTable[s2Win], score, minScore);
00248 }
00249
00250 if ((s1Win+1) >= LastWordInSq1) {
00251 break;
00252 }
00253
00254
00255
00256
00257 updateWindow(s1WordTable[s1Win + 1],
00258 s1WordTable[s1Win + 1 + numWordsInWindow],
00259 score, minScore);
00260
00261 if (minScore <= threshold) {
00262 break;
00263 }
00264 }
00265
00266
00267
00268 return (float) minScore;
00269 }
00270
00271 bool
00272 D2::getAlignmentData(int &alignmentData) {
00273
00274
00275 alignmentData = alignmentMetric;
00276
00277 return true;
00278 }
00279
00280 #endif