00001 #ifndef MST_CLUSTER_CPP
00002 #define MST_CLUSTER_CPP
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037 #include "MSTCluster.h"
00038 #include "Utilities.h"
00039 #include "HashMap.h"
00040 #include "arg_parser.h"
00041 #include <cmath>
00042
00043
00044 int MSTCluster::clusterIDSequence = 0;
00045
00046
00047 typedef HashMap<int, MSTCluster*> ClusterMap;
00048
00049
00050 ClusterList MSTCluster::globalClusterList;
00051
00052 MSTCluster::MSTCluster(MSTCluster* owner, const std::string& clsName) :
00053 parent(owner), clusterID(clusterIDSequence++), name(clsName) {
00054
00055 ASSERT ( clusterID == (int) globalClusterList.size() );
00056 globalClusterList.push_back(this);
00057 }
00058
00059 MSTCluster::~MSTCluster() {
00060
00061 for(size_t i = 0; (i < clusterList.size()); i++) {
00062 delete clusterList[i];
00063 }
00064
00065 clusterList.clear();
00066
00067 members.clear();
00068
00069 globalClusterList[clusterID] = NULL;
00070 }
00071
00072 void
00073 MSTCluster::add(const MSTNode& node) {
00074 members.push_back(node);
00075 }
00076
00077 void
00078 MSTCluster::add(const int clusterID, const MSTNode& node) {
00079
00080 ASSERT ((clusterID >= 0) && (clusterID < (int) globalClusterList.size()));
00081 ASSERT (globalClusterList[clusterID] != NULL);
00082 ASSERT (globalClusterList[clusterID]->clusterID == clusterID);
00083
00084 globalClusterList[clusterID]->members.push_back(node);
00085 }
00086
00087 void
00088 MSTCluster::add(MSTCluster *child) {
00089 ASSERT ( child->parent == this );
00090
00091 clusterList.push_back(child);
00092 }
00093
00094 void
00095 MSTCluster::makeClusters(NodeList& nodeList, const ESTAnalyzer* analyzer,
00096 float threshold) {
00097
00098 if (threshold < 0) {
00099 threshold = calculateThreshold(nodeList);
00100 }
00101
00102 ClusterMap clusterMap;
00103
00104
00105 for(NodeList::const_iterator node = nodeList.begin();
00106 (node != nodeList.end()); node++) {
00107 if (analyzer->compareMetrics(threshold, (*node).getMetric())) {
00108
00109 continue;
00110 }
00111
00112
00113
00114 MSTCluster *subCluster = clusterMap[(*node).parentIdx];
00115 if (subCluster == NULL) {
00116
00117 subCluster = new MSTCluster(this);
00118
00119 clusterMap[(*node).parentIdx] = subCluster;
00120
00121 add(subCluster);
00122
00123
00124
00125
00126 NodeList::const_iterator parent = node;
00127 while (parent != nodeList.begin()) {
00128 if (parent->estIdx == node->parentIdx) {
00129 subCluster->add(*parent);
00130 break;
00131 }
00132
00133 parent--;
00134 }
00135 }
00136 ASSERT ( subCluster != NULL );
00137 subCluster->add(*node);
00138
00139
00140 clusterMap[(*node).estIdx] = subCluster;
00141 }
00142
00143
00144 if (parent != NULL) {
00145
00146
00147 return;
00148 }
00149
00150
00151 for(NodeList::iterator node = nodeList.begin();
00152 (node != nodeList.end()); node++) {
00153 if (clusterMap[(*node).estIdx] == NULL) {
00154
00155
00156 MSTCluster *subCluster = new MSTCluster(this);
00157
00158 subCluster->add(*node);
00159
00160 clusterList.push_back(subCluster);
00161 }
00162 }
00163
00164 nodeList.clear();
00165 }
00166
00167 float
00168 MSTCluster::calculateThreshold(NodeList& nodeList) {
00169 double totalSim = 0;
00170 double totalSimSqr = 0;
00171
00172
00173 for (NodeList::const_iterator curr = nodeList.begin();
00174 (curr != nodeList.end()); curr++) {
00175 const float sim = (*curr).getMetric();
00176 totalSim += sim;
00177 totalSimSqr += (sim*sim);
00178 }
00179 const double mean = totalSim / (nodeList.size()-1);
00180 const double stDev = sqrt((totalSimSqr / (nodeList.size()-1))
00181 - (mean*mean));
00182
00183 return (float) (mean + stDev);
00184 }
00185
00186 void
00187 MSTCluster::makeMergedClusters(const int size, int* parent, bool* root) {
00188 MSTCluster *subCluster;
00189 ClusterMap clusterMap;
00190 for (int estIdx = 0; estIdx < size; estIdx++) {
00191 int rootIdx = estIdx;
00192
00193
00194 while (!root[rootIdx]) {
00195 rootIdx = parent[rootIdx];
00196 }
00197 ASSERT(root[rootIdx]);
00198
00199 if (clusterMap[rootIdx] == NULL) {
00200
00201 subCluster = new MSTCluster(this);
00202 clusterList.push_back(subCluster);
00203 clusterMap[rootIdx] = subCluster;
00204
00205 subCluster->add(MSTNode(-1, rootIdx, 0, 0));
00206 }
00207 if (rootIdx != estIdx) {
00208
00209 subCluster = clusterMap[rootIdx];
00210 subCluster->add(MSTNode(-1, estIdx, 0, 0));
00211 }
00212 }
00213 }
00214
00215 void
00216 MSTCluster::printClusterTree(std::ostream& os,
00217 const std::string& prefix) const {
00218
00219 os << "Cluster #" << clusterID;
00220 if (name != "") {
00221 os << " [" << name << "]";
00222 }
00223 os << " (#sub-clusters="
00224 << clusterList.size() << ", #members=" << members.size() << ")\n";
00225 os << prefix << " |\n";
00226
00227 for(size_t i = 0; (i < members.size()); i++) {
00228 os << prefix << " +---" << members[i].getESTInfo()
00229 << " (" << members[i].estIdx << ")\n";
00230 }
00231
00232 const int IterSize = clusterList.size() - 1;
00233 for(int i = 0; (i < IterSize); i++) {
00234 os << prefix << " +---";
00235 std::string newPrefix = prefix;
00236 newPrefix += " | ";
00237 clusterList[i]->printClusterTree(os, newPrefix);
00238 }
00239 if (IterSize >= 0) {
00240 os << prefix << " +---";
00241 std::string newPrefix = prefix;
00242 newPrefix += " ";
00243 clusterList[IterSize]->printClusterTree(os, newPrefix);
00244 }
00245 }
00246
00247 void
00248 MSTCluster::guiPrintClusterTree(std::ostream& os, const char *srcFile) const {
00249
00250 ASSERT ( parent == NULL );
00251
00252 char now[128], srcTimeStr[128] = "<none>";
00253 if (srcFile != NULL) {
00254
00255 getTimeStamp(srcFile, srcTimeStr);
00256 }
00257
00258 os << "# Cluster Data\n"
00259 << "# Generated on: " << getTime(now)
00260 << "# Generated from source file: "
00261 << ((srcFile != NULL) ? srcFile : "<none>") << "\n"
00262 << "# Source file timestamp: " << srcTimeStr
00263 << "# Data format: <E,estIdx,clstrId> | <C,clstrID,ParntClstrID>\n"
00264 << "# Command Line: " << arg_parser::get_global_args() << "\n";
00265
00266 guiPrintTree(os);
00267 }
00268
00269 void
00270 MSTCluster::guiPrintTree(std::ostream& os) const {
00271
00272 os << "C," << clusterID << ","
00273 << ((parent != NULL) ? parent->clusterID : -1)
00274 << "," << name << std::endl;
00275
00276 for(size_t i = 0; (i < members.size()); i++) {
00277 os << "E," << members[i].getESTIdx()
00278 << "," << clusterID << "\n";
00279 }
00280
00281 for(size_t i = 0; (i < clusterList.size()); i++) {
00282 clusterList[i]->guiPrintTree(os);
00283 }
00284 }
00285
00286 std::ostream&
00287 operator<<(std::ostream& os, const MSTCluster& cluster) {
00288
00289 os << "Cluster #" << cluster.clusterID << " ["
00290 << cluster.name << "]\n";
00291 for(size_t i = 0; (i < cluster.members.size()); i++) {
00292 os << cluster.members[i].getESTInfo() << "\n";
00293 }
00294
00295 for(size_t i = 0; (i < cluster.clusterList.size()); i++) {
00296 os << *cluster.clusterList[i] << std::endl;
00297 }
00298 return os;
00299 }
00300
00301 #endif