00001 #ifndef HEURISTIC_CHAIN_H 00002 #define HEURISTIC_CHAIN_H 00003 00004 //-------------------------------------------------------------------- 00005 // 00006 // This file is part of PEACE. 00007 // 00008 // PEACE is free software: you can redistribute it and/or modify it 00009 // under the terms of the GNU General Public License as published by 00010 // the Free Software Foundation, either version 3 of the License, or 00011 // (at your option) any later version. 00012 // 00013 // PEACE is distributed in the hope that it will be useful, but 00014 // WITHOUT ANY WARRANTY; without even the implied warranty of 00015 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00016 // General Public License for more details. 00017 // 00018 // You should have received a copy of the GNU General Public License 00019 // along with PEACE. If not, see <http://www.gnu.org/licenses/>. 00020 // 00021 // Miami University makes no representations or warranties about the 00022 // suitability of the software, either express or implied, including 00023 // but not limited to the implied warranties of merchantability, 00024 // fitness for a particular purpose, or non-infringement. Miami 00025 // University shall not be liable for any damages suffered by licensee 00026 // as a result of using, result of using, modifying or distributing 00027 // this software or its derivatives. 00028 // 00029 // By using or copying this Software, Licensee agrees to abide by the 00030 // intellectual property laws, and all other applicable laws of the 00031 // U.S., and the terms of GNU General Public License (version 3). 00032 // 00033 // Authors: Dhananjai M. Rao raodm@muohio.edu 00034 // 00035 //--------------------------------------------------------------------- 00036 00037 #include <vector> 00038 #include "Heuristic.h" 00039 #include "HashMap.h" 00040 00041 /** Class that manages a list of heuristics. 00042 00043 <p>This class represents a list of heuristics that are used to try 00044 and minimize the number of times EST analysis is performed on a 00045 given pair of ESTs. Note that EST analysis (using d2 or clu) is a 00046 time consuming task and it must be performed sparingly to reduce 00047 runtimes (particularly when large sets of ESTs need to be 00048 clustered). The heuristics try to elimintate performing EST 00049 analysis on pairs that are most likely unrelated (and will not be 00050 in the same cluster).</p> 00051 00052 <p>Each heuristic in the chain implements a specific heuristic and 00053 returns a \c true or \c false value. If the result is \c false, 00054 then detailed EST analysis is not performed. However, if the 00055 result is \c true, then the next heuristic in the chain is used to 00056 further ensure that the comparison is really needed. It is the 00057 responsiblity of the caller to ensure that the chain of hueristics 00058 is correctly set so that the cheaper (possibly less aggressive) 00059 heuristic is run first followed by more computationally-expensive 00060 (and more aggressively filtering) heuristics are run.</p> 00061 */ 00062 class HeuristicChain { 00063 public: 00064 /** Create the heuristics in the chain. 00065 00066 This method must be used to establish the chain of heuristics. 00067 This method is typically invoked from the \c main method. 00068 This method parses the names of the heuristic specified in the 00069 parameter \c heuristicStr and instantiates suitable heuristics 00070 via the HeuristicFactory. 00071 00072 \param[in] heuristicStr A string containing the list of 00073 heuristics to be created in this chain. This string is 00074 typically specified by the user as a command line parameter. 00075 If this parameter is \c NULL, then this method performs no 00076 specific action. 00077 00078 \param[in] refESTidx The index of the EST that is to be used 00079 to serve as the starting EST for clustering. In a 00080 Minimum-Spanning-Tree (MST) clustering scheme, this EST will 00081 be the root of the MST. 00082 00083 \param[in] outputFile The file to which the results of 00084 clustering are to be written. 00085 */ 00086 static HeuristicChain* 00087 setupChain(const char* heuristicStr, const int refESTidx, 00088 const std::string& outputFile); 00089 00090 /** Get a pointer to the instance of the heuristic chain. 00091 00092 Since this class is a singleton, the constructor is private 00093 and the only way to obtain an instance of the class is through 00094 this method. The heuristic chain is available only after the 00095 \c setupChain method (that is invoked from main right after 00096 command line arguments are validated) has successfully 00097 completed its operation. Until such time this method simply 00098 returns \c NULL. 00099 00100 \return The process-wide unique pointer to the heuristic chain. 00101 */ 00102 static inline HeuristicChain* getHeuristicChain() { 00103 return ptrInstance; 00104 } 00105 00106 /** Display valid command line arguments for heuristics in the 00107 chain. 00108 00109 This method simply calls the showArguments method on each 00110 heuristic in the chain. 00111 00112 \param[out] os The output stream to which the valid command 00113 line arguments must be written. 00114 */ 00115 virtual void showArguments(std::ostream& os); 00116 00117 /** Permits heuristics in the chain to process command line 00118 arguments. 00119 00120 This method iterates over the heuristics that have been added 00121 to this chain and invokes parseArguments() method on each one 00122 of them. This permits each heuristic in the chain to receive 00123 and process command line parameters targeted for the 00124 heuristics. 00125 00126 \param[in,out] argc The number of command line arguments 00127 currently present in argv (the parameter list). 00128 00129 \param[in,out] argv The list of command line arguments to be 00130 consumed by various heuristics, if they find parameters 00131 intended for their use. 00132 00133 \return This method returns \c true if all the heuristics in 00134 the chain successfully processed command line arguments. If an 00135 incorrect command line argument is received by any one of the 00136 heuristics then this method returns \c false to flag an error. 00137 */ 00138 virtual bool parseArguments(int& argc, char **argv); 00139 00140 /** Initializes all the heuristics in the chain. 00141 00142 This method iterates over all the heuristics that have been 00143 added ot this chain and calls initialize() on each one of 00144 them. If any one of the heuristics are unable to initialize 00145 correctly, then this method immediately returns an non-zero 00146 error code. 00147 00148 \return This method returns zero on success. On errors this 00149 method returns a non-zero value. 00150 */ 00151 virtual int initialize(); 00152 00153 /** Set the reference EST against which heuristics are to be run. 00154 00155 This method is invokes the corresponding method on all the 00156 heuristics that have been added to this chain. Setting the 00157 reference EST permits heuristics to build initial tables and 00158 other data structures for analyzing another EST (called via 00159 the shouldAnalyze) against the reference EST to determine if 00160 the heavy weight EST analyzer (d2 or CLU) must be run. 00161 00162 \param[in] estIdx The index of the reference EST. 00163 */ 00164 virtual int setReferenceEST(const int estIdx); 00165 00166 /** Add the given heuristic to the heuristic chain. 00167 00168 This method permits the heuristic chain to takes ownership of 00169 a given heuristic object by added it to its internal chain. 00170 00171 \note The heuristic chain takes ownership of the object 00172 therefore that the heuristic pointer passed to this method 00173 must not be deleted by the caller. 00174 00175 \param[in] heuristic The instance of class Heuristic that 00176 should be added to the heuristic chain. 00177 00178 \return This method returns \c true if the heuristic was 00179 successfully added. On errors this method returns \c false. 00180 */ 00181 virtual bool addHeuristic(Heuristic* heuristic); 00182 00183 /** Determine whether the analyzer should perform core 00184 (computationally intensive) analysis, according to this 00185 heuristic chain. 00186 00187 This method can be used to compare a given EST with the 00188 reference EST (set via the call to the setReferenceEST()) 00189 method. 00190 00191 \param[in] otherEST The index (zero based) of the EST with 00192 which the reference EST is to be compared. 00193 00194 \return This method returns \c true if all of the heuristics 00195 say the EST pair should be analyzed, and \c false if any 00196 heuristic does not. (Conceivably a subclass could extend this 00197 class and use some sort of "consensus" among heuristics.) 00198 */ 00199 inline bool shouldAnalyze(const int otherEST) { 00200 for (size_t i = 0; (i < chain.size()); i++) { 00201 if (!chain[i]->shouldAnalyze(otherEST)) { 00202 // Immediately stop when a heuristic says no 00203 return false; 00204 } 00205 } 00206 // If all heuristics say yes, return true 00207 return true; 00208 } 00209 00210 /** Method to display statistics regarding operation of all the 00211 heuristics in this chain 00212 00213 This method can be used to obtain a dump of the statistics 00214 gathered regarding the operation of all the heuristics in this 00215 chain. The typical statistic generated by heuristics 00216 includes: 00217 00218 <ul> 00219 00220 <li>The number of times the heuristic was called. More 00221 specifically this value indicates the number of times the \c 00222 shouldAnalyze() method was invoked.</li> 00223 00224 <li>The number of successful matches reported by this 00225 heuristic. This number indirectly indicates the number of 00226 times other heuristics or the actual heavy weight algorithm 00227 was invoked.</li> 00228 00229 </ul> 00230 00231 \param[out] os The output stream to which the statistics 00232 regarding the heuristics is to be dumped. 00233 00234 \param[in] rank The rank of the process for which the 00235 statistics is being displayed. 00236 */ 00237 void printStats(std::ostream& os, const int rank) const; 00238 00239 /** Set a hint to be used by other algorithms. 00240 00241 This method can be used by various heuristics to set hints for 00242 use by other algorithms after their operation. The hints are 00243 typically set by heuristics at the end of their \c 00244 runHeuristics method. The hints are then used by other 00245 heuristics or by the heavy weight EST analysis algorithms 00246 (such as D2). 00247 00248 \param[in] hintKey The string identifier associated with the 00249 hint. Using string identifiers makes the use of hints a bit 00250 more programmer friendly and makes the code more readable. 00251 00252 \param[in] hintValue The actual value to be associated with 00253 the hintKey. 00254 */ 00255 inline void setHint(const std::string& hintKey, const int hintValue) { 00256 hints[hintKey] = hintValue; 00257 } 00258 00259 /** Obtain the value associated with a given hint. 00260 00261 This method must be used to obtain the hint value associated 00262 with a given hint key. If the hint value is available, then 00263 it is stored in the \c hintValue parameter. 00264 00265 \param[in] hintKey The string identifier associated with the 00266 hint whose value is to be retrieved. 00267 00268 \param[out] hintValue The parameter into which the hint value 00269 (if available) must be stored. 00270 00271 \return This method returns \c true if the requested hint was 00272 found and \c hintValue was updated. Otherwise this method 00273 returns \c false. 00274 */ 00275 inline bool getHint(const std::string& hintKey, int& hintValue) const { 00276 StringIntMap::const_iterator entry = hints.find(hintKey); 00277 if (entry != hints.end()) { 00278 hintValue = entry->second; 00279 return true; 00280 } 00281 // Hint not found. 00282 return false; 00283 } 00284 00285 /** Method to obtain pointer to a given heuristic object. 00286 00287 This method can be used to obtain a pointer to a specific 00288 heuristic class present in this chain. If the heruistic does 00289 not exist then this method returns NULL. 00290 00291 \note The caller must \b not delete the returned pointer. 00292 00293 \param[in] name The name associated with a given heuristic. 00294 00295 \return If the heursitic was found then this method returns a 00296 valid (non-NULL) pointer to the heuristic object. If the 00297 heuristic was not found, then this method returns NULL. 00298 */ 00299 Heuristic* getHeuristic(const std::string& name) const; 00300 00301 /** The destructor. 00302 00303 The destructor frees up all the heuristics added to this 00304 heuristic chain. 00305 */ 00306 virtual ~HeuristicChain(); 00307 00308 protected: 00309 /** A hash map to store hints from heuristics. 00310 00311 This hash map is used to rapidly store and retrieve hints that 00312 are generated by various heuristics as they operate. These 00313 hints may be used by other heuristics in the chain or by heavy 00314 weight EST analyzers to further optimize their operations. 00315 Entries are added via the \c setHint method. Entries may be 00316 accessed via the \c getHint method. 00317 00318 \note Currently hints are restricted to be integers. Possibly 00319 at a later date this could be changed to store non-integer 00320 values as well. 00321 */ 00322 StringIntMap hints; 00323 00324 private: 00325 /** The constructor. 00326 This is made private because the heuristic chain is a singleton, 00327 and should only be instantiated from the getHeuristicChain() 00328 static method. 00329 */ 00330 HeuristicChain(); 00331 00332 /** The vector containing a list of heuristics in the chain. 00333 00334 This vector contains the list of hueristics assocaited with 00335 this chain. Heuristics are added to the list via the 00336 addHeuristic() method. The heuristics are used by the 00337 shouldAnalyze() method. 00338 */ 00339 std::vector<Heuristic*> chain; 00340 00341 /** The pointer to the singleton instance of this class. 00342 00343 Again, this is made private so that only methods of this class 00344 can access it. The getHeuristicChain() method in this class 00345 must be used to obtain an instance of this class. 00346 */ 00347 static HeuristicChain* ptrInstance; 00348 }; 00349 00350 #endif