00001 #ifndef MST_HEAP_CACHE_H 00002 #define MST_HEAP_CACHE_H 00003 00004 //-------------------------------------------------------------------- 00005 // 00006 // This file is part of PEACE. 00007 // 00008 // PEACE is free software: you can redistribute it and/or modify it 00009 // under the terms of the GNU General Public License as published by 00010 // the Free Software Foundation, either version 3 of the License, or 00011 // (at your option) any later version. 00012 // 00013 // PEACE is distributed in the hope that it will be useful, but 00014 // WITHOUT ANY WARRANTY; without even the implied warranty of 00015 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00016 // General Public License for more details. 00017 // 00018 // You should have received a copy of the GNU General Public License 00019 // along with PEACE. If not, see <http://www.gnu.org/licenses/>. 00020 // 00021 // Miami University makes no representations or warranties about the 00022 // suitability of the software, either express or implied, including 00023 // but not limited to the implied warranties of merchantability, 00024 // fitness for a particular purpose, or non-infringement. Miami 00025 // University shall not be liable for any damages suffered by licensee 00026 // as a result of using, result of using, modifying or distributing 00027 // this software or its derivatives. 00028 // 00029 // By using or copying this Software, Licensee agrees to abide by the 00030 // intellectual property laws, and all other applicable laws of the 00031 // U.S., and the terms of GNU General Public License (version 3). 00032 // 00033 // Authors: Dhananjai M. Rao raodm@muohio.edu 00034 // 00035 //--------------------------------------------------------------------- 00036 00037 #include "MSTCache.h" 00038 #include <queue> 00039 00040 /** A cache to maintain similarity/distance metrics to build a Minimum 00041 Spanning Tree (MST) using a heap. 00042 00043 This class dervies off the \c MSTCache base class provides the 00044 functionality to manage a cache of metrics using a single large 00045 heap. The operations on a heap are fast. However, the current 00046 heap does not allow to rapidly prune unwanted entries right 00047 away. Consequently, pruning of entries is done on a 00048 "whenever-chance-arises" basis. This causes \c MSTHeapCache to 00049 have a much higher memory foot print when compared to 00050 MSTMultiListCache. 00051 */ 00052 class MSTHeapCache : public MSTCache { 00053 public: 00054 /** The constructor. 00055 00056 This MST cache requires information about the number of ESTs 00057 in the system in order to effectively track the ESTs already 00058 added to the MST. These values must be passed in as the 00059 various parameters to this class. The constructor essentially 00060 passes off the parameters to the base class that saves the 00061 information (for future reference) in suitable instance 00062 variables. 00063 00064 \note This object is typically instantiated in the 00065 MSTClusterMaker::makeClusters() method. 00066 00067 \param[in] totalESTCount The total number of ESTs to be 00068 processed (that is, to be added to the MST). 00069 00070 \param[in] startOwnESTidx The starting index of the EST that 00071 is owned by the process that is using this cache. This value 00072 is used to internally normalize est index values to 0 for the 00073 first EST owned by this process. 00074 00075 \param[in] numOwnESTs The number of ESTs owned by the process 00076 that is using this cache. This information is used to reserve 00077 necessary space to hold SMLists for each EST owned by the 00078 manager/worker process. 00079 00080 \param[in] analyzer The analyzer to be used for EST 00081 comparisons, comparing metrics, and ordering entries in this 00082 cache depending on the type of EST analyzer (whether it is 00083 similarity based or distance metric based). 00084 00085 \param[in] repopulateCaches If this parameter is \c true then 00086 the MSTCache will request lists to be repopulated when 00087 needed. Repopulating lists guarantees that ultimately a MST 00088 will be developed. If repopulation is suppressed then the 00089 resulting spanning tree may not be a MST. 00090 00091 \param[in] maxCachePerEST This parameter \b suggests the 00092 maximum number of cached entries that must be maintained for a 00093 given EST. This parameter may not be really used by all 00094 dervied classes (this is only a suggestion). 00095 */ 00096 MSTHeapCache(const int totalESTCount, const int startOwnESTidx, 00097 const int numOwnESTs, const ESTAnalyzer *analyzer, 00098 const bool repopulateCaches, const int maxCachePerEST); 00099 00100 /** The destructor. 00101 00102 The STL data structures used by this class manage all the 00103 memory operations for this class. Consequently, the 00104 destructor does not have any special tasks to perform. It 00105 here present merely to adhere to coding conventions. 00106 */ 00107 virtual ~MSTHeapCache() {} 00108 00109 /** Suggest to clear caches that contain entries for a newly added 00110 EST. 00111 00112 This method can be used to suggest to the cache to prune 00113 entries corresponding to the \c estIdxAdded from the various 00114 data structures maintained by this cache. This method 00115 repeatedly removes entries at the top of the heap if the 00116 entries correspond to estIdx. 00117 00118 \param[in] estIdxAdded The index of the EST that has just been 00119 added to the MST. 00120 00121 \param[out] repopulateList The index values of ESTs whose 00122 cache needs to be repopulated. Currently, this method does 00123 not populate any entries into this list. 00124 00125 \param[in] prefixListSize If this parameter is set to \c true, 00126 then the number of entries added to the repopulateList is 00127 inserted at the beginning of the vector. This eases 00128 transmission of lists via MPI to the master process. 00129 */ 00130 virtual void pruneCaches(const int estIdxAdded, 00131 std::vector<int>& repopulateList, 00132 const bool prefixListSize = true); 00133 00134 /** Add/merges a batch of new cache entries with the current 00135 entries in the cache for a given EST. 00136 00137 This method merges a given batch of new cache entries with 00138 existing cache entries in the cache. Caches are not trimmed 00139 to fit into the suggested maxCachePerEST size. 00140 00141 \param[in] estIdx The zero-based index of the reference EST 00142 against which the batch of cache entries in \c list were 00143 analyzed and generated. 00144 00145 \param[in] list A SMList containing the batch of new cache 00146 entries to be merged with any existing entries in the cache 00147 for the given EST (whose zero-based index is \c estIdx). 00148 */ 00149 virtual void mergeList(const int UNREFERENCED_PARAMETER(estIdx), 00150 const SMList& list); 00151 00152 /** Obtains the top-most similar entry from the MSTCache. 00153 00154 This method simply uses the top-most entry in the heap and 00155 populates the parameters with the appropriate value. Note 00156 that the parameters are initialized to -1, -1, -1.0f, and -1 00157 respectively. 00158 00159 \param[out] srcESTidx The source EST index from where the 00160 similarity metric is being measured. The srcESTidx is already 00161 present in the MST. 00162 00163 \param[out] destESTidx The destination EST index that is the 00164 best choice to be added to the MST (based on the local 00165 information). 00166 00167 \param[out] metric The similarity/distance metric between the 00168 srcESTidx and the destESTidx. 00169 00170 \param[out] alignmentData The alignment data associated with 00171 the srcESTidx and the destESTidx. 00172 00173 \param[out] directionData The direction data associated with 00174 the srcESTidx and the destESTidx. 00175 */ 00176 virtual void getBestEntry(int& srcESTidx, int& destESTidx, 00177 float& metric, int& alignmentData, 00178 int& directionData) const; 00179 00180 /** Display cache usage statistics. 00181 00182 This method currently displays the following information: 00183 00184 <ul> 00185 00186 <li>Total number of entries cached by this cache.</li> 00187 00188 <li>Current number of entries left in the cache.</li> 00189 00190 </ul> 00191 00192 \param[out] os The output stream to which statistics must be 00193 written. 00194 00195 \param[in] MyRank The MPI rank of the process to which this 00196 cache is assocaited. This information is used merely for 00197 displaying more informative statistics and does not influence 00198 the actual values displayed by this method. 00199 */ 00200 virtual void displayStats(std::ostream &os, const int MyRank) const; 00201 00202 protected: 00203 /** A simple wrapper method to handle popping the cache (removing 00204 the top element from the cache) and incrementing the number 00205 of pruned entries. 00206 */ 00207 virtual void popCache(); 00208 00209 private: 00210 /** Number of entries that were pruned from the cache. 00211 00212 This instance variable is used to track the number of entries 00213 that were removed from the cache by the pruneCache() method. 00214 */ 00215 int prunedEntries; 00216 00217 /** The actual heap of cache entries for various nodes. 00218 00219 This cache contains all the cached entries for each EST owned 00220 and managed by this MSTCache. Entries are added by the \c 00221 mergeList method and removed by the \c pruneCache method. 00222 */ 00223 std::priority_queue<CachedESTInfo, std::vector<CachedESTInfo>, 00224 GreaterCachedESTInfo> cache; 00225 00226 /** A dummy operator= 00227 00228 The operator=() is supressed for this class as it has constant 00229 members whose value is set when the object is created. These 00230 values cannot be changed during the lifetime of this object. 00231 00232 \param[in] src The source object from where data is to be 00233 copied. Currently this value is ignored. 00234 00235 \return Reference to this. 00236 */ 00237 MSTHeapCache& operator=(const MSTHeapCache& src); 00238 }; 00239 00240 #endif