00001 #ifndef EST_CODEC_CPP 00002 #define EST_CODEC_CPP 00003 00004 //-------------------------------------------------------------------- 00005 // 00006 // This file is part of PEACE. 00007 // 00008 // PEACE is free software: you can redistribute it and/or modify it 00009 // under the terms of the GNU General Public License as published by 00010 // the Free Software Foundation, either version 3 of the License, or 00011 // (at your option) any later version. 00012 // 00013 // PEACE is distributed in the hope that it will be useful, but 00014 // WITHOUT ANY WARRANTY; without even the implied warranty of 00015 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00016 // General Public License for more details. 00017 // 00018 // You should have received a copy of the GNU General Public License 00019 // along with PEACE. If not, see <http://www.gnu.org/licenses/>. 00020 // 00021 // Miami University makes no representations or warranties about the 00022 // suitability of the software, either express or implied, including 00023 // but not limited to the implied warranties of merchantability, 00024 // fitness for a particular purpose, or non-infringement. Miami 00025 // University shall not be liable for any damages suffered by licensee 00026 // as a result of using, result of using, modifying or distributing 00027 // this software or its derivatives. 00028 // 00029 // By using or copying this Software, Licensee agrees to abide by the 00030 // intellectual property laws, and all other applicable laws of the 00031 // U.S., and the terms of GNU General Public License (version 3). 00032 // 00033 // Authors: Dhananjai M. Rao raodm@muohio.edu 00034 // 00035 //--------------------------------------------------------------------- 00036 00037 #include "ESTCodec.h" 00038 #include "Utilities.h" 00039 00040 // The statically allocated array to translate characters to their 00041 // normal encoding. 00042 char ESTCodec::charToInt[255]; 00043 00044 // The statically allocated array to translate characters to their 00045 // complementary encoding. 00046 char ESTCodec::charToIntComp[255]; 00047 00048 // The globally unique instance of ESTCodec. 00049 ESTCodec ESTCodec::estCodec; 00050 00051 ESTCodec::ESTCodec() { 00052 // Initialize the array CharToInt for mapping A, T, C, and G to 0, 00053 // 1, 2, and 3 respectively. Initialize values for specific base 00054 // pairs. Leave rest intentionally uninitialized. So that way if 00055 // invalid entires are accessed, valgrind will hopefully report an 00056 // "uninitialized memory access" error. 00057 charToInt[(int) 'A'] = charToInt[(int) 'a'] = 0; 00058 charToInt[(int) 'G'] = charToInt[(int) 'g'] = 2; 00059 charToInt[(int) 'C'] = charToInt[(int) 'c'] = 1; 00060 charToInt[(int) 'T'] = charToInt[(int) 't'] = 3; 00061 // Now initialize the complementary array. 00062 charToIntComp[(int) 'A'] = charToIntComp[(int) 'a'] = 3; 00063 charToIntComp[(int) 'G'] = charToIntComp[(int) 'g'] = 1; 00064 charToIntComp[(int) 'C'] = charToIntComp[(int) 'c'] = 2; 00065 charToIntComp[(int) 'T'] = charToIntComp[(int) 't'] = 0; 00066 // Initialize pointers 00067 revCompTable = NULL; 00068 } 00069 00070 ESTCodec::~ESTCodec() { 00071 // Free up all the reverse complement table that was constructed. 00072 HashMap<int, int*>::iterator curr = revCompTables.begin(); 00073 while (curr != revCompTables.end()) { 00074 int *rcTable = curr->second; 00075 delete [] rcTable; 00076 // Onto the next entry 00077 curr++; 00078 } 00079 // Clear out all the entires in the hash map 00080 revCompTables.clear(); 00081 } 00082 00083 void 00084 ESTCodec::setRevCompTable(const int wordSize) { 00085 if ((revCompTable = revCompTables[wordSize]) == NULL) { 00086 // A table does not exist. So create one 00087 revCompTable = addRevCompTable(wordSize); 00088 } 00089 ASSERT ( revCompTable != NULL ); 00090 } 00091 00092 int* 00093 ESTCodec::addRevCompTable(const int wordSize) { 00094 // Create a reverse complement table with 4^wordSize entries. 00095 const int EntryCount = 1 << (wordSize * 2); // 4^wordSize 00096 // Create the translation table. 00097 int *rcTable = new int[EntryCount]; 00098 // Populate the rcTable now. 00099 for(int entry = 0; (entry < EntryCount); entry++) { 00100 // Obtain complement 00101 int word = ~entry; 00102 // Reverse 2-bits at a time to obtain 00103 int rcValue = 0; 00104 for(int bp = 0; (bp < wordSize); bp++) { 00105 // Shift two words over and OR-in the 2 bits. 00106 rcValue = (rcValue << 2) | (word & 3); 00107 // Get rid of the two bits we used 00108 word >>= 2; 00109 } 00110 // Update the reverse-complement entry 00111 rcTable[entry] = rcValue; 00112 } 00113 // Add newly created reverse-complement entry to the hash map for 00114 // future lookups and use. 00115 revCompTables[wordSize] = rcTable; 00116 // Return newly created table back to caller as per API contract 00117 return rcTable; 00118 } 00119 00120 #endif