00001 #ifndef MST_CLUSTER_H 00002 #define MST_CLUSTER_H 00003 00004 //-------------------------------------------------------------------- 00005 // 00006 // This file is part of PEACE. 00007 // 00008 // PEACE is free software: you can redistribute it and/or modify it 00009 // under the terms of the GNU General Public License as published by 00010 // the Free Software Foundation, either version 3 of the License, or 00011 // (at your option) any later version. 00012 // 00013 // PEACE is distributed in the hope that it will be useful, but 00014 // WITHOUT ANY WARRANTY; without even the implied warranty of 00015 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00016 // General Public License for more details. 00017 // 00018 // You should have received a copy of the GNU General Public License 00019 // along with PEACE. If not, see <http://www.gnu.org/licenses/>. 00020 // 00021 // Miami University makes no representations or warranties about the 00022 // suitability of the software, either express or implied, including 00023 // but not limited to the implied warranties of merchantability, 00024 // fitness for a particular purpose, or non-infringement. Miami 00025 // University shall not be liable for any damages suffered by licensee 00026 // as a result of using, result of using, modifying or distributing 00027 // this software or its derivatives. 00028 // 00029 // By using or copying this Software, Licensee agrees to abide by the 00030 // intellectual property laws, and all other applicable laws of the 00031 // U.S., and the terms of GNU General Public License (version 3). 00032 // 00033 // Authors: Dhananjai M. Rao raodm@muohio.edu 00034 // 00035 //--------------------------------------------------------------------- 00036 00037 #include "MST.h" 00038 #include "ESTAnalyzer.h" 00039 00040 class MSTCluster; 00041 00042 /** \def ClusterList 00043 00044 \brief Typedef for std::vector<MSTCluster>. 00045 00046 This typedef is a convenience definition to handle a vector of 00047 Cluster objects associated with a given MSTCluster. 00048 */ 00049 typedef std::vector<MSTCluster*> ClusterList; 00050 00051 /** Class to encapsulate cluster information and aid in building 00052 clusters using Minimum Spanning Tree (MST) data. 00053 00054 This class was introduced to encapsulate and manage the data 00055 associated with various clusters built using MST information. 00056 */ 00057 class MSTCluster { 00058 // Insertion operator to make dumping MST for debugging easier. 00059 friend std::ostream& operator<<(std::ostream&, const MSTCluster&); 00060 public: 00061 MSTCluster(MSTCluster* owner = NULL, const std::string& name = ""); 00062 ~MSTCluster(); 00063 00064 /** Method to make clusters. 00065 00066 This method takes a supplied list of nodes from an MST, 00067 an EST analyzer, and a threshold score, and uses them to 00068 break the MST into clusters based on the threshold. 00069 00070 \param nodeList The list of nodes in the MST 00071 \param analyzer The ESTAnalyzer used for score computation 00072 \param threshold The threshold value for separating clusters 00073 */ 00074 void makeClusters(NodeList& nodeList, const ESTAnalyzer* analyzer, 00075 const float threshold); 00076 00077 /** Method to automatically calculate a threshold. 00078 00079 Currently this method is called only when the threshold supplied 00080 to MSTCluster::makeClusters() is equal to -1. This threshold 00081 corresponds to the CLU analyzer and tells the calculateThreshold 00082 method to calculate the threshold based on the mean and variance 00083 of scoring metrics among the list of MST nodes. 00084 00085 \param nodeList The list of nodes in the MST 00086 00087 \return the calculated threshold as a floating-point number 00088 */ 00089 float calculateThreshold(NodeList& nodeList); 00090 00091 /** Add a MST Node to this cluster. 00092 00093 \param[in] node The node ot be added to the cluster. 00094 */ 00095 void add(const MSTNode& node); 00096 00097 /** Add a child cluster to this cluster. 00098 00099 This method must be used to add a child cluster this 00100 cluster. Note that the parent cluster must be appropriately 00101 set in the child to refer to this MSTCluster. 00102 00103 \note This cluster node takes ownership of the child node. 00104 00105 \param[in] child The child node to be added to the this 00106 cluster. 00107 */ 00108 void add(MSTCluster *child); 00109 00110 /** Method to directly add a given EST to a dummy clusterID. 00111 00112 This method is typically used to add a EST that has been 00113 filtered out by a filter to a specific cluster. The cluster 00114 maker typically creates a dummy MSTNode with the necessary 00115 information to be added. 00116 00117 \param clusterID The ID/index of the cluster to which the 00118 MST node is to be added. 00119 00120 \param[in] node The MSTNode entry to be added to this cluster. 00121 */ 00122 void add(const int clusterID, const MSTNode& node); 00123 00124 /** Print the cluster tree. 00125 00126 This method prints a tree display of the clusters. 00127 */ 00128 void printClusterTree(std::ostream& os = std::cout, 00129 const std::string& prefix = "") const; 00130 00131 /** Print the cluster tree in a different format. 00132 00133 As above, but prints the cluster tree in a format recognizable 00134 by the PEACE GUI and suitable for GUI processing. 00135 */ 00136 void guiPrintClusterTree(std::ostream& os = std::cout, 00137 const char *srcFile = NULL) const; 00138 00139 /** makeMergedClusters 00140 00141 Currently not used in the code. 00142 */ 00143 void makeMergedClusters(const int size, int* parent, bool* root); 00144 00145 /** Gets the ID of this cluster. 00146 00147 \return the cluster ID, an integer 00148 */ 00149 inline int getClusterID() const { return clusterID; } 00150 00151 00152 protected: 00153 /** A helper method for the guiPrintClusterTree method. 00154 00155 Works by printing the data for the current cluster 00156 and then calling guiPrintTree on the sub-clusters (if any). 00157 */ 00158 void guiPrintTree(std::ostream& os) const; 00159 00160 private: 00161 /** List of sub-clusters for this cluster. 00162 00163 This instance variable is used to maintain the list of 00164 sub-clusters for this cluster. Entries are added to this list 00165 whenever sub-clusters are created in the 00166 MSTCluster::makeClusters() method. 00167 */ 00168 ClusterList clusterList; 00169 00170 const MSTCluster* parent; 00171 NodeList members; 00172 const int clusterID; 00173 00174 /** A name set to identify filtered clusters. 00175 00176 The name is set when dummy clusters are created to add ESTs 00177 that were filtered out based on a specific condition. The 00178 named clusters are typically created by filters. By default 00179 clusters don't have a name. These indicate regular clusters. 00180 00181 \see Filter. 00182 */ 00183 const std::string name; 00184 00185 /** Instance variable to track the next available cluster ID. 00186 00187 This instance variable is used to generate unique cluster ID 00188 values for each newly created cluster. It is intialized to 00189 zero. Each time a cluster is instantiated, the constructor 00190 uses this value to set the clusterID and then increments this 00191 value. 00192 */ 00193 static int clusterIDSequence; 00194 00195 /** Global list to maintain reference to all the MSTClusters created. 00196 00197 This list is used to maintain a pointer to all the MST 00198 clusters ever created. This list is used to look up clusters 00199 given the index of the cluster. 00200 00201 \see MSTCluster::getCluster() method 00202 */ 00203 static ClusterList globalClusterList; 00204 00205 }; 00206 00207 /** \fn std::ostream& operator<<(std::ostream&, const MSTCluster&) 00208 00209 Insertion operator to stream MSTCluster information to a given 00210 output stream. This method provides a convenient mechanism to 00211 dump the complete MSTCluster information for debugging purposes. 00212 The clusters in the MSTCluster are displayed in the order in which 00213 they were created. 00214 */ 00215 extern std::ostream& operator<<(std::ostream&, const MSTCluster&); 00216 00217 #endif