00001 #ifndef D2_ZIM_CPP
00002 #define D2_ZIM_CPP
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 
00020 
00021 
00022 
00023 
00024 
00025 
00026 
00027 
00028 
00029 
00030 
00031 
00032 
00033 
00034 
00035 
00036 
00037 #include "D2Zim.h"
00038 #include "EST.h"
00039 #include "ESTCodec.h"
00040 #include <algorithm>
00041 
00042 
00043 int D2Zim::frameShift = 1;
00044 
00045 int D2Zim::BitMask = 0;
00046 
00047 
00048 size_t D2Zim::wordTableSize = 0;
00049 
00050 
00051 arg_parser::arg_record D2Zim::argsList[] = {
00052     {"--frameShift", "Frame Shift (default=1)",
00053      &D2Zim::frameShift, arg_parser::INTEGER},
00054     {NULL, NULL, NULL, arg_parser::BOOLEAN}
00055 };
00056 
00057 D2Zim::D2Zim(const int refESTidx, const std::string& outputFileName)
00058     : FWAnalyzer("D2", refESTidx, outputFileName) {
00059     fdHashMap   = NULL;
00060     s1WordTable = NULL;
00061     s2WordTable = NULL;
00062 }
00063 
00064 D2Zim::~D2Zim() {
00065     if (fdHashMap != NULL) {
00066         delete [] fdHashMap;
00067     }
00068     if (s1WordTable != NULL) {
00069         delete [] s1WordTable;
00070     }
00071     if (s2WordTable != NULL) {
00072         delete [] s2WordTable;
00073     }
00074 }
00075 
00076 void
00077 D2Zim::showArguments(std::ostream& os) {
00078     FWAnalyzer::showArguments(os);
00079     
00080     arg_parser ap(D2Zim::argsList);
00081     os << ap;
00082 }
00083 
00084 bool
00085 D2Zim::parseArguments(int& argc, char **argv) {
00086     arg_parser ap(D2Zim::argsList);
00087     ap.check_args(argc, argv, false);
00088     
00089     if (frameShift < 1) {
00090         std::cerr << analyzerName
00091                   << ": Frame shift must be >= 1"
00092                   << "(use --frameShift option)\n";
00093         return false;
00094     }
00095     
00096     return FWAnalyzer::parseArguments(argc, argv);
00097 }
00098 
00099 int
00100 D2Zim::initialize() {
00101     int result = FWAnalyzer::initialize();
00102     if (result != 0) {
00103         
00104         return result;
00105     }
00106     
00107     const int MapSize = 1 << (wordSize * 2);
00108 
00109     
00110     
00111     
00112     BitMask = (1 << (wordSize * 2)) - 1;
00113 
00114     NumWordsWin = frameSize-wordSize;
00115     
00116     
00117 
00118     fdHashMap = new int[MapSize];
00119 
00120     
00121     wordTableSize = EST::getMaxESTLen() + frameSize;
00122     s1WordTable   = new int[wordTableSize];
00123     s2WordTable   = new int[wordTableSize];
00124     
00125     
00126     return 0;
00127 }
00128 
00129 int
00130 D2Zim::setReferenceEST(const int estIdx) {
00131     
00132     if (chain != NULL) {
00133         chain->setReferenceEST(estIdx);
00134     }
00135     if ((estIdx >= 0) && (estIdx < EST::getESTCount())) {
00136         refESTidx = estIdx;
00137         
00138         EST *estS1 = EST::getEST(refESTidx);
00139         const char* s1 = estS1->getSequence();
00140         buildWordTable(s1WordTable, s1);
00141         return 0; 
00142     }
00143     
00144     return 1;
00145 }
00146 
00147 void
00148 D2Zim::buildWordTable(int* wordTable, const char* s) {
00149     
00150     memset(wordTable, 0, sizeof(int) * wordTableSize);
00151     
00152     ASSERT ( s != NULL );
00153     const int length = strlen(s) - wordSize;
00154     ESTCodec::NormalEncoder<wordSize, BitMask> encoder;
00155     register int hash = 0;
00156     int ignoreMask    = 0;
00157     for(int i = 0; (i < wordSize); i++) {
00158         hash = encoder(hash, s[i], ignoreMask);
00159     }
00160     
00161     wordTable[0] = hash;
00162     
00163     for (int i = 1; (i <= length); i++) {
00164         hash = encoder(hash, s[i], ignoreMask);
00165         if (!ignoreMask) {
00166             
00167             
00168             wordTable[i] = hash;
00169         }
00170     }
00171 }
00172 
00173 void
00174 D2Zim::buildFdHashMaps(int* sed) {
00175     
00176     const int MapSize = 1 << (wordSize * 2);
00177     memset(fdHashMap, 0, sizeof(int) * MapSize);
00178 
00179     for (int i = 0; (i+wordSize <= frameSize); i++) {
00180         *sed +=  2*(fdHashMap[s1WordTable[i]]++) + 1;
00181         *sed += -2*(fdHashMap[s2WordTable[i]]--) + 1;
00182     }
00183 }
00184 
00185 
00186 
00187 
00188 #define CHECK_SED_AND_BREAK           \
00189     if (sed < minSed) {               \
00190         alignmentMetric = s1FramePos; \
00191         if ((minSed = sed) == 0) {    \
00192             break;                    \
00193         }                             \
00194 }
00195 
00196 float
00197 D2Zim::getMetric(const int otherEST) {
00198     if (otherEST == refESTidx) {
00199         return 0; 
00200     }
00201     if ((otherEST < 0) || (otherEST >= EST::getESTCount())) {
00202         
00203         return -1;
00204     }
00205     
00206     
00207     int sed = 0;
00208     int minSed = frameSize * 4; 
00209     int s1FramePos = 0;
00210     
00211     EST *estS1 = EST::getEST(refESTidx);
00212     EST *estS2 = EST::getEST(otherEST);
00213     const char* sq1 = estS1->getSequence();
00214     ASSERT ( sq1 != NULL );
00215     const char* sq2 = estS2->getSequence();
00216     ASSERT ( sq2 != NULL );
00217 
00218     
00219     buildWordTable(s2WordTable, sq2);
00220     
00221     
00222     buildFdHashMaps(&sed);
00223     
00224     minSed = sed;
00225 
00226     const int numFramesS1 = strlen(sq1) - frameSize;
00227     const int numFramesS2 = strlen(sq2) - frameSize;
00228     
00229     
00230     while (s1FramePos < numFramesS1) {
00231         if (s1FramePos != 0) {
00232             for(int i = 0; (i < frameShift && s1FramePos++ < numFramesS1); i++){
00233                 refShiftUpdateFd(&sed, s1FramePos);
00234                 CHECK_SED_AND_BREAK;
00235             }
00236         }
00237         for(int s2FramePos = 1; s2FramePos <= numFramesS2; s2FramePos++) {
00238             rightShiftUpdateFd(&sed, s2FramePos);
00239             CHECK_SED_AND_BREAK;
00240         }
00241         if (s1FramePos != numFramesS1) {
00242             for(int i = 0; (i < frameShift && s1FramePos++ < numFramesS1); i++){
00243                 refShiftUpdateFd(&sed, s1FramePos);
00244                 CHECK_SED_AND_BREAK;
00245             }
00246         }
00247         for(int s2FramePos = numFramesS2-1; s2FramePos >= 0; s2FramePos--) {
00248             leftShiftUpdateFd(&sed, s2FramePos);
00249             CHECK_SED_AND_BREAK;
00250         }
00251     }
00252     
00253     
00254     return (float) minSed;
00255 }
00256 
00257 bool
00258 D2Zim::getAlignmentData(int &alignmentData) {
00259     
00260     
00261     alignmentData = alignmentMetric;
00262     
00263     return true;
00264 }
00265 
00266 #endif