00001 #ifndef MST_H 00002 #define MST_H 00003 00004 //-------------------------------------------------------------------- 00005 // 00006 // This file is part of PEACE. 00007 // 00008 // PEACE is free software: you can redistribute it and/or modify it 00009 // under the terms of the GNU General Public License as published by 00010 // the Free Software Foundation, either version 3 of the License, or 00011 // (at your option) any later version. 00012 // 00013 // PEACE is distributed in the hope that it will be useful, but 00014 // WITHOUT ANY WARRANTY; without even the implied warranty of 00015 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00016 // General Public License for more details. 00017 // 00018 // You should have received a copy of the GNU General Public License 00019 // along with PEACE. If not, see <http://www.gnu.org/licenses/>. 00020 // 00021 // Miami University makes no representations or warranties about the 00022 // suitability of the software, either express or implied, including 00023 // but not limited to the implied warranties of merchantability, 00024 // fitness for a particular purpose, or non-infringement. Miami 00025 // University shall not be liable for any damages suffered by licensee 00026 // as a result of using, result of using, modifying or distributing 00027 // this software or its derivatives. 00028 // 00029 // By using or copying this Software, Licensee agrees to abide by the 00030 // intellectual property laws, and all other applicable laws of the 00031 // U.S., and the terms of GNU General Public License (version 3). 00032 // 00033 // Authors: Dhananjai M. Rao raodm@muohio.edu 00034 // 00035 //--------------------------------------------------------------------- 00036 00037 #include "MSTNode.h" 00038 #include <vector> 00039 00040 /** \def NodeList 00041 00042 \brief Typedef for std::vector<MSTNode>. 00043 00044 This typedef is a convenience definition to handle a vector of 00045 MSTNode objects associated with a given MST. 00046 */ 00047 typedef std::vector<MSTNode> NodeList; 00048 00049 /** \brief Class to represent a Minimum Spanning Tree (MST). 00050 00051 This class is used to represent a MST that is built by the 00052 MSTClusterMaker. It essentially maintains a vector of MSTNode 00053 objects. The order of nodes in the MST reflects the order in 00054 which nodes were added to create the tree. Refer to the 00055 documentation on the MSTNode class for details on the data 00056 encapsulated by it. 00057 */ 00058 class MST { 00059 // Insertion operator to make dumping MST for debugging easier. 00060 friend std::ostream& operator<<(std::ostream&, const MST&); 00061 public: 00062 /** The constructor. 00063 00064 The default constructor for the MST. The constructor performs 00065 no special operations and is present merely to adhere to 00066 coding conventions. 00067 00068 \param[in] maxNodes The number of nodes for which space must 00069 be initially reserved. Reserving space reduces the number of 00070 times the MST has to reallocate memory as nodes are added to 00071 it. 00072 00073 \param[in] haveAlignmentMetric If this value is \c true, then 00074 that indicates that this MST also includes additional 00075 alignment information with each node in the MST. 00076 */ 00077 MST(const int maxNodes, const bool haveAlignmentMetric); 00078 00079 /** The destructor. 00080 00081 The destructor performs no special tasks as the base class 00082 (std::vector) performs all the necessary cleanups. 00083 */ 00084 ~MST() {} 00085 00086 /** Determine if nodes in this MST have alignment metric values. 00087 00088 This method can be used to determine if the nodes in this MST 00089 have alignment information saved for each node. 00090 00091 \return This method returns \c true if the nodes in this MST 00092 have alignment metric. 00093 */ 00094 inline bool hasAlignmentMetric() const { return haveAlignmentMetric; } 00095 00096 /** Convenience method to add a MSTNode to the EST. 00097 00098 This method may be used to append a node to the EST. The 00099 first node to be added must be root node (with parentIndex set 00100 to -1). This method performs no special checks to verify the 00101 integrity of the data. 00102 00103 \param[in] parentIdx The index of the parent EST for this 00104 Node. If a parent does not exist, then this value should be 00105 -1. 00106 00107 \param[in] estIdx The index of the EST. This value must be 00108 the index of the corresponding EST in the list of ESTs 00109 returned by EST::getESTList() method. 00110 00111 \param[in] similarity The similarity metric between this EST 00112 and its parent EST. 00113 00114 \param[in] alignmentInfo Additional alignment information 00115 between the parentIdx and the estIdx to be stored in this 00116 node. 00117 00118 \param[in] directionInfo Additional direction information 00119 between the parentIdx and the estIdx to be stored in this 00120 node. 00121 */ 00122 void addNode(const int parentIdx, const int estIdx, 00123 const float similarity, const int alignmentInfo, 00124 const int directionInfo); 00125 00126 /** Obtain a mutable list of nodes in this MST. 00127 00128 This method can be used to obtain a mutable list of nodes in 00129 contained by this MST. 00130 00131 \return A reference to the list of nodes encapsulated by this 00132 class. 00133 */ 00134 NodeList& getNodes() { return nodeList; } 00135 00136 /** Helper method to write MST data to a given file. 00137 00138 This method is a helper method that is used to write MST data 00139 to a given output file. This method is typically called from 00140 the makeClusters() method after MST construction has been 00141 completed. 00142 00143 \param[in] fileName The file to which the MST data is to be 00144 written. 00145 00146 \param[in] srcFile The source file from where the ESTs were 00147 read. This file name is simply used for cross reference in the 00148 data generated for this MST. 00149 00150 \param[in] threshold The threshold value to be used in clustering 00151 of the MST data. Important for downstream analysis such as that 00152 done by assembly tools. 00153 */ 00154 void serialize(const char *fileName, const char* srcFile, 00155 const float threshold) const; 00156 00157 /** Method to create and load data for MST from a text file. 00158 00159 This method creates an MST and populates it with node data 00160 from a given text file. The text file must have been created 00161 through an earlier call to the serialize() method. If the MST 00162 was successfully populated then this method returns a pointer 00163 to the newly created MST. On errors this method returns NULL. 00164 00165 \param[in] fileName The text file from which the data for the 00166 MST must be read. 00167 00168 \return On success this method returns a valid pointer to the 00169 newly created (and populated) MST. On errors it returns NULL. 00170 */ 00171 static MST* deSerialize(const char *fileName); 00172 00173 /** Obtain the total distance of this MST. 00174 00175 This method can be used to determine the total distance of the 00176 MST. This method iterates over all the nodes present in the 00177 MST and adds the distance/similarity metric together. It then 00178 returns the total. 00179 00180 \return The total MST distance for this MST. If the MST has 00181 one (or zero) nodes then this method returns 0 (zero). 00182 */ 00183 float getMSTDistance() const; 00184 00185 private: 00186 /** The list of nodes in this MST. 00187 00188 This vector is used to track the set of nodes added to this 00189 minimum spanning tree. The maximum nodes in the tree is 00190 typically set in the constructor. Nodes are added via the 00191 addNode() method in this class. 00192 */ 00193 NodeList nodeList; 00194 00195 /** Instance variable to indicate if alignment metric in nodes is 00196 valid. 00197 00198 This instance variable is initialized when this MST is 00199 created. If this instance variable is \c true then each Node 00200 in this MST has a valid alignment information. 00201 */ 00202 bool haveAlignmentMetric; 00203 }; 00204 00205 /** \fn std::ostream& operator<<(std::ostream&, const MST&) 00206 00207 Insertion operator to stream MST information to a given output 00208 stream. This method provides a convenient mechanism to dump the 00209 complete MST information for debugging purposes. The nodes in the 00210 MST are displayed in the order in which they were inserted into 00211 the MST. 00212 */ 00213 extern std::ostream& operator<<(std::ostream&, const MST&); 00214 00215 #endif