00001 #ifndef D2_ZIM_CPP
00002 #define D2_ZIM_CPP
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037 #include "D2Zim.h"
00038 #include "EST.h"
00039 #include "ESTCodec.h"
00040 #include <algorithm>
00041
00042
00043 int D2Zim::frameShift = 1;
00044
00045 int D2Zim::BitMask = 0;
00046
00047
00048 size_t D2Zim::wordTableSize = 0;
00049
00050
00051 arg_parser::arg_record D2Zim::argsList[] = {
00052 {"--frameShift", "Frame Shift (default=1)",
00053 &D2Zim::frameShift, arg_parser::INTEGER},
00054 {NULL, NULL, NULL, arg_parser::BOOLEAN}
00055 };
00056
00057 D2Zim::D2Zim(const int refESTidx, const std::string& outputFileName)
00058 : FWAnalyzer("D2", refESTidx, outputFileName) {
00059 fdHashMap = NULL;
00060 s1WordTable = NULL;
00061 s2WordTable = NULL;
00062 }
00063
00064 D2Zim::~D2Zim() {
00065 if (fdHashMap != NULL) {
00066 delete [] fdHashMap;
00067 }
00068 if (s1WordTable != NULL) {
00069 delete [] s1WordTable;
00070 }
00071 if (s2WordTable != NULL) {
00072 delete [] s2WordTable;
00073 }
00074 }
00075
00076 void
00077 D2Zim::showArguments(std::ostream& os) {
00078 FWAnalyzer::showArguments(os);
00079
00080 arg_parser ap(D2Zim::argsList);
00081 os << ap;
00082 }
00083
00084 bool
00085 D2Zim::parseArguments(int& argc, char **argv) {
00086 arg_parser ap(D2Zim::argsList);
00087 ap.check_args(argc, argv, false);
00088
00089 if (frameShift < 1) {
00090 std::cerr << analyzerName
00091 << ": Frame shift must be >= 1"
00092 << "(use --frameShift option)\n";
00093 return false;
00094 }
00095
00096 return FWAnalyzer::parseArguments(argc, argv);
00097 }
00098
00099 int
00100 D2Zim::initialize() {
00101 int result = FWAnalyzer::initialize();
00102 if (result != 0) {
00103
00104 return result;
00105 }
00106
00107 const int MapSize = 1 << (wordSize * 2);
00108
00109
00110
00111
00112 BitMask = (1 << (wordSize * 2)) - 1;
00113
00114 NumWordsWin = frameSize-wordSize;
00115
00116
00117
00118 fdHashMap = new int[MapSize];
00119
00120
00121 wordTableSize = EST::getMaxESTLen() + frameSize;
00122 s1WordTable = new int[wordTableSize];
00123 s2WordTable = new int[wordTableSize];
00124
00125
00126 return 0;
00127 }
00128
00129 int
00130 D2Zim::setReferenceEST(const int estIdx) {
00131
00132 if (chain != NULL) {
00133 chain->setReferenceEST(estIdx);
00134 }
00135 if ((estIdx >= 0) && (estIdx < EST::getESTCount())) {
00136 refESTidx = estIdx;
00137
00138 EST *estS1 = EST::getEST(refESTidx);
00139 const char* s1 = estS1->getSequence();
00140 buildWordTable(s1WordTable, s1);
00141 return 0;
00142 }
00143
00144 return 1;
00145 }
00146
00147 void
00148 D2Zim::buildWordTable(int* wordTable, const char* s) {
00149
00150 memset(wordTable, 0, sizeof(int) * wordTableSize);
00151
00152 ASSERT ( s != NULL );
00153 const int length = strlen(s) - wordSize;
00154 ESTCodec::NormalEncoder<wordSize, BitMask> encoder;
00155 register int hash = 0;
00156 int ignoreMask = 0;
00157 for(int i = 0; (i < wordSize); i++) {
00158 hash = encoder(hash, s[i], ignoreMask);
00159 }
00160
00161 wordTable[0] = hash;
00162
00163 for (int i = 1; (i <= length); i++) {
00164 hash = encoder(hash, s[i], ignoreMask);
00165 if (!ignoreMask) {
00166
00167
00168 wordTable[i] = hash;
00169 }
00170 }
00171 }
00172
00173 void
00174 D2Zim::buildFdHashMaps(int* sed) {
00175
00176 const int MapSize = 1 << (wordSize * 2);
00177 memset(fdHashMap, 0, sizeof(int) * MapSize);
00178
00179 for (int i = 0; (i+wordSize <= frameSize); i++) {
00180 *sed += 2*(fdHashMap[s1WordTable[i]]++) + 1;
00181 *sed += -2*(fdHashMap[s2WordTable[i]]--) + 1;
00182 }
00183 }
00184
00185
00186
00187
00188 #define CHECK_SED_AND_BREAK \
00189 if (sed < minSed) { \
00190 alignmentMetric = s1FramePos; \
00191 if ((minSed = sed) == 0) { \
00192 break; \
00193 } \
00194 }
00195
00196 float
00197 D2Zim::getMetric(const int otherEST) {
00198 if (otherEST == refESTidx) {
00199 return 0;
00200 }
00201 if ((otherEST < 0) || (otherEST >= EST::getESTCount())) {
00202
00203 return -1;
00204 }
00205
00206
00207 int sed = 0;
00208 int minSed = frameSize * 4;
00209 int s1FramePos = 0;
00210
00211 EST *estS1 = EST::getEST(refESTidx);
00212 EST *estS2 = EST::getEST(otherEST);
00213 const char* sq1 = estS1->getSequence();
00214 ASSERT ( sq1 != NULL );
00215 const char* sq2 = estS2->getSequence();
00216 ASSERT ( sq2 != NULL );
00217
00218
00219 buildWordTable(s2WordTable, sq2);
00220
00221
00222 buildFdHashMaps(&sed);
00223
00224 minSed = sed;
00225
00226 const int numFramesS1 = strlen(sq1) - frameSize;
00227 const int numFramesS2 = strlen(sq2) - frameSize;
00228
00229
00230 while (s1FramePos < numFramesS1) {
00231 if (s1FramePos != 0) {
00232 for(int i = 0; (i < frameShift && s1FramePos++ < numFramesS1); i++){
00233 refShiftUpdateFd(&sed, s1FramePos);
00234 CHECK_SED_AND_BREAK;
00235 }
00236 }
00237 for(int s2FramePos = 1; s2FramePos <= numFramesS2; s2FramePos++) {
00238 rightShiftUpdateFd(&sed, s2FramePos);
00239 CHECK_SED_AND_BREAK;
00240 }
00241 if (s1FramePos != numFramesS1) {
00242 for(int i = 0; (i < frameShift && s1FramePos++ < numFramesS1); i++){
00243 refShiftUpdateFd(&sed, s1FramePos);
00244 CHECK_SED_AND_BREAK;
00245 }
00246 }
00247 for(int s2FramePos = numFramesS2-1; s2FramePos >= 0; s2FramePos--) {
00248 leftShiftUpdateFd(&sed, s2FramePos);
00249 CHECK_SED_AND_BREAK;
00250 }
00251 }
00252
00253
00254 return (float) minSed;
00255 }
00256
00257 bool
00258 D2Zim::getAlignmentData(int &alignmentData) {
00259
00260
00261 alignmentData = alignmentMetric;
00262
00263 return true;
00264 }
00265
00266 #endif