00001 #ifndef MST_MULTI_LIST_CACHE_H 00002 #define MST_MULTI_LIST_CACHE_H 00003 00004 //-------------------------------------------------------------------- 00005 // 00006 // This file is part of PEACE. 00007 // 00008 // PEACE is free software: you can redistribute it and/or modify it 00009 // under the terms of the GNU General Public License as published by 00010 // the Free Software Foundation, either version 3 of the License, or 00011 // (at your option) any later version. 00012 // 00013 // PEACE is distributed in the hope that it will be useful, but 00014 // WITHOUT ANY WARRANTY; without even the implied warranty of 00015 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00016 // General Public License for more details. 00017 // 00018 // You should have received a copy of the GNU General Public License 00019 // along with PEACE. If not, see <http://www.gnu.org/licenses/>. 00020 // 00021 // Miami University makes no representations or warranties about the 00022 // suitability of the software, either express or implied, including 00023 // but not limited to the implied warranties of merchantability, 00024 // fitness for a particular purpose, or non-infringement. Miami 00025 // University shall not be liable for any damages suffered by licensee 00026 // as a result of using, result of using, modifying or distributing 00027 // this software or its derivatives. 00028 // 00029 // By using or copying this Software, Licensee agrees to abide by the 00030 // intellectual property laws, and all other applicable laws of the 00031 // U.S., and the terms of GNU General Public License (version 3). 00032 // 00033 // Authors: Dhananjai M. Rao raodm@muohio.edu 00034 // 00035 //--------------------------------------------------------------------- 00036 00037 #include "MSTCache.h" 00038 00039 /** \typedef std::pair<int, SMList> MSTCacheEntry 00040 00041 \brief Shortcut typedef for std::pair<int, SMList> 00042 00043 This typedef is a shortcut for a cache entry that contains the 00044 following pair of information: 00045 00046 <ol> 00047 00048 <li>The first entry is an \c int that indicates the EST index with 00049 which this cache entry is associated. </li> 00050 00051 <li>The second entry is a \c SMList that contains the closet (most 00052 similar) ESTs for the EST indicated by the first entry.</li> 00053 00054 </ol> 00055 */ 00056 typedef std::pair<int, SMList> MSTCacheEntry; 00057 00058 /** A cache that uses multiple lists to maintain similarity/distance 00059 metrics to build a Minimum Spanning Tree (MST). 00060 00061 This class dervies off the \c MSTCache base class provides the 00062 functionality to manage a cache of similarity metrics using a 00063 collection of Lists. This cache is organized as follows: The 00064 cache is essentially a collection of SMList objects, one SMList 00065 for each EST (logically) owned by this cache. The lists are 00066 maintained in a sorted fashion with the best metrics at the top of 00067 the list to facilitate rapid construction of a Minimum Spanning 00068 Tree (MST). 00069 */ 00070 class MSTMultiListCache : public MSTCache { 00071 public: 00072 /** The constructor. 00073 00074 This MST cache requires information about the number of ESTs 00075 in the system in order to effectively track the ESTs already 00076 added to the MST. These values must be passed in as the 00077 various parameters to this class. The constructor essentially 00078 passes off the parameters to the base class that saves the 00079 information (for future reference) in suitable instance 00080 variables. 00081 00082 \note This object is typically instantiated in the 00083 MSTClusterMaker::makeClusters() method. 00084 00085 \param[in] totalESTCount The total number of ESTs to be 00086 processed (that is, to be added to the MST). 00087 00088 \param[in] startOwnESTidx The starting index of the EST that 00089 is owned by the process that is using this cache. This value 00090 is used to internally normalize est index values to 0 for the 00091 first EST owned by this process. 00092 00093 \param[in] numOwnESTs The number of ESTs owned by the process 00094 that is using this cache. This information is used to reserve 00095 necessary space to hold SMLists for each EST owned by the 00096 manager/worker process. 00097 00098 \param[in] analyzer The analyzer to be used for EST 00099 comparisons, comparing metrics, and ordering entries in this 00100 cache depending on the type of EST analyzer (whether it is 00101 similarity based or distance metric based). 00102 00103 \param[in] repopulateCaches If this parameter is \c true then 00104 the MSTCache will request lists to be repopulated when 00105 needed. Repopulating lists guarantees that ultimately a MST 00106 will be developed. If repopulation is suppressed then the 00107 resulting spanning tree may not be a MST. 00108 00109 \param[in] maxCachePerEST This parameter \b suggests the 00110 maximum number of cached entries that must be maintained for a 00111 given EST. This parameter may not be really used by all 00112 dervied classes (this is only a suggestion). 00113 */ 00114 MSTMultiListCache(const int totalESTCount, const int startOwnESTidx, 00115 const int numOwnESTs, const ESTAnalyzer *analyzer, 00116 const bool repopulateCaches, const int maxCachePerEST); 00117 00118 /** The destructor. 00119 00120 The instance variables in this class automatically manage all 00121 the memory associated this class. Consequently, the 00122 destructor does not have any special tasks to perform. It 00123 here present merely to adhere to coding conventions. 00124 */ 00125 virtual ~MSTMultiListCache() {} 00126 00127 /** Clear caches that contain entries for a newly added estIdx. 00128 00129 This method overrides the virtual method in the base class and 00130 prunes entries corresponding to the \c estIdxAdded from the 00131 various lists maintained by this cache. The pruning operation 00132 is removes unused entries and facilitates repopulation of 00133 caches (if requested). 00134 00135 \param[in] estIdxAdded The index of the EST that has just been 00136 added to the MST. 00137 00138 \param[out] repopulateList The index values of ESTs whose 00139 cache needs to be repopulated. The list of EST indexes added 00140 to this list are only the ESTs that are owned by this process. 00141 If none of the caches need to be recomputed then this list is 00142 empty (all existing entries are removed). 00143 00144 \param[in] prefixListSize If this parameter is set to \c true, 00145 then the number of entries added to the repopulateList is 00146 inserted at the beginning of the vector. This eases 00147 transmission of lists via MPI to the master process. 00148 */ 00149 virtual void pruneCaches(const int estIdxAdded, 00150 std::vector<int>& repopulateList, 00151 const bool prefixListSize = true); 00152 00153 /** Merges a given SMList with current entries in the cache for a 00154 given EST. 00155 00156 This method overrides the virtual method in the base class. As 00157 per the API requirement, it merges the given list with 00158 existing cache entries for the given EST such that only \c 00159 maxCachePerEST entries are retained in the cache. The 00160 retained entries are chosen so that they have the best 00161 metrics. Care is taken to ensure that the cache continues to 00162 remain sorted with the best metrics at the top of the cache. 00163 00164 \param[in] estIdx The index of the EST with which the given 00165 list must be merged. 00166 00167 \param[in] list The SMList to be merged with any existing 00168 entries in the cache for the given estIdx. 00169 */ 00170 virtual void mergeList(const int estIdx, const SMList& list); 00171 00172 /** Obtains the top-most similar entry from the MSTCache. 00173 00174 This method overrides the default implementation in the base 00175 class. It searches the analysis metrics stored in this cache 00176 for various ESTs to locate the best entry with the best 00177 metric. It populates the parameters with the appropriate 00178 value. Note that the parameters are initialized to -1, -1, 00179 and -1.0f respectively. 00180 00181 \param[out] srcESTidx The source EST index from where the 00182 similarity metric is being measured. The srcESTidx is already 00183 present in the MST. 00184 00185 \param[out] destESTidx The destination EST index that is the 00186 best choice to be added to the MST (based on the local 00187 information). 00188 00189 \param[out] metric The similarity/distance metric between the 00190 srcESTidx and the destESTidx. 00191 00192 \param[out] alignmentData The alignment data associated with 00193 the srcESTidx and the destESTidx. 00194 00195 \param[out] directionData The direction data associated with 00196 the srcESTidx and the destESTidx. 00197 */ 00198 void getBestEntry(int& srcESTidx, int& destESTidx, 00199 float& metric, int& alignmentData, 00200 int& directionData) const; 00201 00202 00203 /** Display cache usage statistics. 00204 00205 This method can be used to print the current cache usage 00206 statistics. This method prints the following information: 00207 00208 <ul> 00209 00210 <li>Number of ESTs cached by this cache.</li> 00211 00212 <li>Total number of SMEntries currently in the cache.</li> 00213 00214 <li>Average length of each list and the standard deviation.</li> 00215 00216 <li>The number of times the cache had to be repopulated.</li> 00217 00218 </ul> 00219 00220 \param[out] os The output stream to which statistics must be 00221 written. 00222 00223 \param[in] MyRank The MPI rank of the process to which this 00224 cache is assocaited. This information is used merely for 00225 displaying more informative statistics and does not influence 00226 the actual values displayed by this method. 00227 */ 00228 void displayStats(std::ostream &os, const int MyRank) const; 00229 00230 /** Sort a Similarity Metric List (SMList) based on similarity 00231 metric and then prunes it. 00232 00233 This method provides a convenient method for sorting a 00234 similarity metric list based on the similarity metric. This 00235 method simply uses the STL sort method with a suitable Functor 00236 for performing this task. Sorting is performed such that the 00237 highest similarity metric is first in the vector after 00238 sorting. Once sorting has been completed, this method ensures 00239 that the list is no longer than the specified cacheSize. 00240 00241 \param[in,out] list The SMList to be sorted based on the 00242 (similarity or distance) metric associated with each 00243 CachedESTInfo entry in the list. 00244 */ 00245 void preprocess(SMList& list); 00246 00247 protected: 00248 /** Helper method to prune a given SMList. 00249 00250 This is a helper method that is called from the pruneCaches() 00251 method to remove all entries corresponding to the given 00252 estIdx. 00253 00254 \param[in,out] list The SMList whose entries needs to be 00255 pruned. 00256 00257 \param[in] estIdx The index of the EST whose entries must be 00258 removed from this list. 00259 00260 \return This method returns \c true if the list becomes empty 00261 once all the entries have been removed. 00262 */ 00263 bool pruneCache(SMList& list, const int estIdx); 00264 00265 private: 00266 /** The actual vector of cache entries for various nodes. 00267 00268 This cache contains the list of cache entries for each node 00269 owned and managed by this MSTCache. The list is initialized 00270 to have a set of empty lists in the constructor, when the 00271 cache is instantiated for use. 00272 */ 00273 std::vector<MSTCacheEntry> cache; 00274 00275 /** Number of times the cache needed to be repopulated. 00276 00277 This instance variable is used to track the number of times 00278 the cache had to be repopulated as one of the entries ran out 00279 of data. This variable is initialized to 0 (zero), 00280 incremented in the pruneCaches() method, and displayed in the 00281 displayStats() method. 00282 */ 00283 int cacheRepopulation; 00284 00285 /** Number of entries that were pruned from the cache. 00286 00287 This instance variable is used to track the number of entries 00288 that were pruned from the cache by the pruneCache() method. 00289 This value indicates the number of entries in the cache that 00290 were unused. Moreover, this value also a measure of the 00291 amount of unnecessary adjacency information calculation 00292 operations that were performed for this cache by all the 00293 processes put together. 00294 */ 00295 int prunedEntries; 00296 00297 /** Number of times comparisons of ESTs were performed. 00298 00299 Tracks the number of times EST comparisons were performed. 00300 This instance variable is initialized to 0 (zero). The set of 00301 values sorted via teh sortAndPrune() method are used to track 00302 the number of comparisons performed and this instance variable 00303 is suitably adjusted. 00304 */ 00305 long analysisCount; 00306 }; 00307 00308 #endif