00001 #ifndef TWOPASS_D2_CPP
00002 #define TWOPASS_D2_CPP
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037 #include "TwoPassD2.h"
00038 #include "EST.h"
00039 #include "ESTCodec.h"
00040 #include "ParameterSetManager.h"
00041 #include <algorithm>
00042
00043
00044 int TwoPassD2::BitMask = 0;
00045
00046
00047 int TwoPassD2::NHash = 0;
00048
00049
00050
00051 int TwoPassD2::bitShift = 0;
00052
00053
00054
00055 int TwoPassD2::minThreshold = 0;
00056
00057
00058
00059 bool TwoPassD2::noNormalize = false;
00060
00061
00062 arg_parser::arg_record TwoPassD2::argsList[] = {
00063 {"--d2Threshold", "Threshold score to break out of D2 (default=0)",
00064 &TwoPassD2::minThreshold, arg_parser::INTEGER},
00065 {"--noNormalize", "Signals that threshold scores should not be normalized",
00066 &TwoPassD2::noNormalize, arg_parser::BOOLEAN},
00067 {NULL, NULL, NULL, arg_parser::BOOLEAN}
00068 };
00069
00070 TwoPassD2::TwoPassD2(const int refESTidx, const std::string& outputFileName)
00071 : FWAnalyzer("twopassD2", refESTidx, outputFileName) {
00072 s1WordTable = NULL;
00073 s2WordTable = NULL;
00074 delta = NULL;
00075 alignmentMetric = 0;
00076
00077
00078
00079
00080 frameShift = 50;
00081 maxThreshold = 130;
00082 threshold = 40;
00083 }
00084
00085 TwoPassD2::~TwoPassD2() {
00086 if (s1WordTable != NULL) {
00087 delete [] s1WordTable;
00088 }
00089 if (s2WordTable != NULL) {
00090 delete [] s2WordTable;
00091 }
00092 if (delta != NULL) {
00093 delete [] delta;
00094 }
00095 }
00096
00097 void
00098 TwoPassD2::showArguments(std::ostream& os) {
00099 FWAnalyzer::showArguments(os);
00100
00101 arg_parser ap(TwoPassD2::argsList);
00102 os << ap;
00103 }
00104
00105 bool
00106 TwoPassD2::parseArguments(int& argc, char **argv) {
00107 arg_parser ap(TwoPassD2::argsList);
00108 ap.check_args(argc, argv, false);
00109
00110 if (frameShift < 1) {
00111 std::cerr << analyzerName
00112 << ": Frame shift must be >= 1"
00113 << "(use --frameShift option)\n";
00114 return false;
00115 }
00116
00117 return FWAnalyzer::parseArguments(argc, argv);
00118 }
00119
00120 int
00121 TwoPassD2::initialize() {
00122
00123 int result = FWAnalyzer::initialize();
00124 if (result != 0) {
00125
00126 return result;
00127 }
00128
00129 const int MapSize = (1 << (wordSize * 2));
00130 delta = new int[MapSize];
00131
00132
00133
00134 BitMask = (1 << (wordSize * 2)) - 1;
00135 NHash = MapSize;
00136
00137 bitShift = 2 * (wordSize - 1);
00138
00139 const int wordTableSize = EST::getMaxESTLen() +
00140 ParameterSetManager::getParameterSetManager()->getMaxFrameSize();
00141 s1WordTable = new int[wordTableSize];
00142 s2WordTable = new int[wordTableSize];
00143
00144
00145 numWordsInWindow = frameSize - wordSize + 1;
00146
00147
00148 return 0;
00149 }
00150
00151 int
00152 TwoPassD2::setReferenceEST(const int estIdx) {
00153
00154 if (chain != NULL) {
00155 chain->setReferenceEST(estIdx);
00156 }
00157 if ((estIdx >= 0) && (estIdx < EST::getESTCount())) {
00158 refESTidx = estIdx;
00159
00160 const EST *estS1 = EST::getEST(refESTidx);
00161 const char* s1 = estS1->getSequence();
00162 sq1Len = (int)strlen(s1);
00163
00164 ESTCodec::NormalEncoder<bitShift, BitMask> encoder;
00165 buildWordTable(s1WordTable, s1, encoder);
00166 return 0;
00167 }
00168
00169 return 1;
00170 }
00171
00172 float
00173 TwoPassD2::getMetric(const int otherEST) {
00174 VALIDATE({
00175 if (otherEST == refESTidx) {
00176 return 0;
00177 }
00178 if ((otherEST < 0) || (otherEST >= EST::getESTCount())) {
00179
00180 return -1;
00181 }
00182 });
00183
00184
00185 const EST *estS2 = EST::getEST(otherEST);
00186 const char* s2 = estS2->getSequence();
00187 sq2Len = (int)strlen(s2);
00188
00189
00190 updateParameters();
00191
00192
00193
00194 int bestMatchIsRC = 0;
00195 if (chain != NULL) {
00196 HeuristicChain::getHeuristicChain()->getHint("D2_DoRC", bestMatchIsRC);
00197 }
00198 if (bestMatchIsRC) {
00199 ESTCodec::RevCompEncoder<bitShift, BitMask> encoder;
00200 buildWordTable(s2WordTable, s2, encoder);
00201 } else {
00202 ESTCodec::NormalEncoder<bitShift, BitMask> encoder;
00203 buildWordTable(s2WordTable, s2, encoder);
00204 }
00205
00206 int s1Index = 0, s2Index = 0, boundDist = 0;
00207 float normalize = (float) (noNormalize ? 1 : (1.0 / float(threshold)));
00208 if (frameShift > 1) {
00209
00210 float distance = (float) runD2Asymmetric(&s1Index, &s2Index);
00211 if (distance > maxThreshold) {
00212 return distance * normalize;
00213 }
00214
00215 boundDist = frameSize/2;
00216 } else {
00217
00218
00219 boundDist = std::max(sq1Len, sq2Len);
00220 }
00221
00222
00223 return normalize * (float) runD2Bounded(s1Index-boundDist,
00224 s1Index+boundDist+frameSize,
00225 s2Index-boundDist,
00226 s2Index+boundDist+frameSize);
00227 }
00228
00229 void
00230 TwoPassD2::updateParameters() {
00231 ParameterSet* parameterSet = ParameterSetManager::getParameterSetManager()
00232 ->getParameterSet(sq1Len, sq2Len);
00233 ASSERT(parameterSet != NULL);
00234
00235 frameSize = parameterSet->frameSize;
00236 frameShift = parameterSet->frameShift;
00237 threshold = parameterSet->threshold;
00238 maxThreshold = parameterSet->maxThreshold;
00239
00240 numWordsInWindow = frameSize - wordSize + 1;
00241 }
00242
00243 float
00244 TwoPassD2::runD2Asymmetric(int* s1MinScoreIdx, int* s2MinScoreIdx) {
00245
00246
00247
00248 int sq1Start = 0, sq1End = sq1Len;
00249 int sq2Start = 0, sq2End = sq2Len;
00250
00251
00252 memset(delta, 0, sizeof(int) * (1 << (wordSize * 2)));
00253 int score = 0;
00254
00255 for(int i = 0; (i < numWordsInWindow); i++) {
00256
00257 const int w1 = s1WordTable[sq1Start + i];
00258 if (w1 != NHash) {
00259 score += (delta[w1] << 1) + 1;
00260 delta[w1]++;
00261 }
00262
00263 const int w2 = s2WordTable[sq2Start + i];
00264 if (w2 != NHash) {
00265 score -= (delta[w2] << 1) - 1;
00266 delta[w2]--;
00267 }
00268 }
00269
00270
00271
00272 const int LastWindowInSq1 = (sq1End - wordSize + 1) - numWordsInWindow;
00273 const int LastWindowInSq2 = (sq2End - wordSize + 1) - numWordsInWindow;
00274 const int FirstWindowInSq2 = sq2Start + numWordsInWindow - 1;
00275
00276 int minScore = score;
00277 int s1Win, s2Win, i;
00278 for(s1Win = sq1Start; (s1Win < LastWindowInSq1 || s1Win == sq1Start);
00279 s1Win += frameShift*2) {
00280
00281
00282 for(s2Win = sq2Start; (s2Win < LastWindowInSq2); s2Win++) {
00283
00284
00285
00286 updateWindowAsym(s2WordTable[s2Win + numWordsInWindow],
00287 s2WordTable[s2Win], score, minScore,
00288 s1Win, s2Win+1, s1MinScoreIdx, s2MinScoreIdx);
00289 }
00290
00291 if ((s1Win) >= LastWindowInSq1) {
00292 break;
00293 }
00294
00295
00296
00297
00298 for (i = 0; (i < frameShift && s1Win+i < LastWindowInSq1); i++) {
00299 updateWindowAsym(s1WordTable[s1Win + i],
00300 s1WordTable[s1Win + i + numWordsInWindow],
00301 score, minScore,
00302 s1Win+i+1, LastWindowInSq2,
00303 s1MinScoreIdx, s2MinScoreIdx);
00304 }
00305
00306 if (minScore <= minThreshold) {
00307 break;
00308 }
00309
00310
00311
00312 for(s2Win = sq2End - wordSize; (s2Win > FirstWindowInSq2); s2Win--) {
00313
00314
00315
00316 updateWindowAsym(s2WordTable[s2Win - numWordsInWindow],
00317 s2WordTable[s2Win], score, minScore,
00318 s1Win+i, s2Win-numWordsInWindow,
00319 s1MinScoreIdx, s2MinScoreIdx);
00320 }
00321
00322
00323
00324
00325
00326 for (i = frameShift; (i < frameShift*2 && s1Win+i < LastWindowInSq1);
00327 i++) {
00328 updateWindowAsym(s1WordTable[s1Win + i],
00329 s1WordTable[s1Win + i + numWordsInWindow],
00330 score, minScore,
00331 s1Win+i+1, 0,
00332 s1MinScoreIdx, s2MinScoreIdx);
00333 }
00334
00335 if (minScore <= minThreshold) {
00336 break;
00337 }
00338 }
00339
00340 return (float) minScore;
00341 }
00342
00343 float
00344 TwoPassD2::runD2Bounded(int sq1Start, int sq1End, int sq2Start, int sq2End) {
00345
00346 if (sq1Start < 0) sq1Start = 0;
00347 if (sq2Start < 0) sq2Start = 0;
00348 if (sq1End > sq1Len) sq1End = sq1Len;
00349 if (sq2End > sq2Len) sq2End = sq2Len;
00350
00351
00352 memset(delta, 0, sizeof(int) * (1 << (wordSize * 2)));
00353 int score = 0;
00354
00355 for(int i = 0; (i < numWordsInWindow); i++) {
00356
00357 const int w1 = s1WordTable[sq1Start + i];
00358 if (w1 != NHash) {
00359 score += (delta[w1] << 1) + 1;
00360 delta[w1]++;
00361 }
00362
00363 const int w2 = s2WordTable[sq2Start + i];
00364 if (w2 != NHash) {
00365 score -= (delta[w2] << 1) - 1;
00366 delta[w2]--;
00367 }
00368 }
00369
00370
00371 const int LastWindowInSq1 = (sq1End - wordSize + 1) - numWordsInWindow;
00372 const int LastWindowInSq2 = (sq2End - wordSize + 1) - numWordsInWindow;
00373 const int FirstWindowInSq2 = sq2Start + numWordsInWindow - 1;
00374
00375
00376 int minScore = score;
00377 alignmentMetric = sq1Start - sq2Start;
00378 int s1Win, s2Win;
00379 for(s1Win = sq1Start; (s1Win < LastWindowInSq1 || s1Win == sq1Start);
00380 s1Win += 2) {
00381
00382
00383 for(s2Win = sq2Start; (s2Win < LastWindowInSq2); s2Win++) {
00384
00385
00386
00387 updateWindow(s2WordTable[s2Win + numWordsInWindow],
00388 s2WordTable[s2Win], score, minScore,
00389 s1Win-s2Win-1);
00390 }
00391
00392 if (s1Win >= LastWindowInSq1) {
00393 break;
00394 }
00395
00396
00397
00398 updateWindow(s1WordTable[s1Win],
00399 s1WordTable[s1Win + numWordsInWindow],
00400 score, minScore, s1Win+1-s2Win);
00401
00402 if (minScore <= minThreshold) {
00403 break;
00404 }
00405
00406
00407
00408 for(s2Win = sq2End - wordSize; (s2Win > FirstWindowInSq2); s2Win--) {
00409
00410
00411
00412 updateWindow(s2WordTable[s2Win - numWordsInWindow],
00413 s2WordTable[s2Win], score, minScore,
00414 s1Win+1-s2Win+numWordsInWindow);
00415 }
00416
00417 if ((s1Win+1) >= LastWindowInSq1) {
00418 break;
00419 }
00420
00421
00422
00423
00424 updateWindow(s1WordTable[s1Win + 1],
00425 s1WordTable[s1Win + 1 + numWordsInWindow],
00426 score, minScore, s1Win+2-sq2Start);
00427
00428 if (minScore <= minThreshold) {
00429 break;
00430 }
00431 }
00432
00433 return (float) minScore;
00434 }
00435
00436 bool
00437 TwoPassD2::getAlignmentData(int &alignmentData) {
00438
00439
00440 alignmentData = alignmentMetric;
00441
00442 return true;
00443 }
00444
00445 #endif