00001 #ifndef FILTER_CHAIN_H 00002 #define FILTER_CHAIN_H 00003 00004 //-------------------------------------------------------------------- 00005 // 00006 // This file is part of PEACE. 00007 // 00008 // PEACE is free software: you can redistribute it and/or modify it 00009 // under the terms of the GNU General Public License as published by 00010 // the Free Software Foundation, either version 3 of the License, or 00011 // (at your option) any later version. 00012 // 00013 // PEACE is distributed in the hope that it will be useful, but 00014 // WITHOUT ANY WARRANTY; without even the implied warranty of 00015 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00016 // General Public License for more details. 00017 // 00018 // You should have received a copy of the GNU General Public License 00019 // along with PEACE. If not, see <http://www.gnu.org/licenses/>. 00020 // 00021 // Miami University makes no representations or warranties about the 00022 // suitability of the software, either express or implied, including 00023 // but not limited to the implied warranties of merchantability, 00024 // fitness for a particular purpose, or non-infringement. Miami 00025 // University shall not be liable for any damages suffered by licensee 00026 // as a result of using, result of using, modifying or distributing 00027 // this software or its derivatives. 00028 // 00029 // By using or copying this Software, Licensee agrees to abide by the 00030 // intellectual property laws, and all other applicable laws of the 00031 // U.S., and the terms of GNU General Public License (version 3). 00032 // 00033 // Authors: Dhananjai M. Rao raodm@muohio.edu 00034 // 00035 //--------------------------------------------------------------------- 00036 00037 #include <vector> 00038 #include "Filter.h" 00039 #include "HashMap.h" 00040 00041 /** Class that manages a list of filters. 00042 00043 <p>This class represents a list of filters that are used to try 00044 and elimintate entries that could potentially deteriorate the 00045 quality of clustering generated by PEACE. The filters are run 00046 prior to commencement of the core clustering operation. For 00047 convenient post-processing analysis the filtered ESTs are gathered 00048 in suitable "dummy" clusters to be ignored.</p> 00049 00050 <p>Each filter object in the chain implements a specific type of 00051 filteration operation and returns an integer indicating the 00052 cluster to which an EST is to be assigned. If the cluster ID is 00053 -1, then that indicates that the EST must be subjected to regular 00054 clustering operations.</p> 00055 00056 <p>Note that filter processing proceeds in a given order. The 00057 first filter in the chain is invoked. If that filter returns a 00058 positive cluster ID then rest of the filters are not invoked. On 00059 the other hand, if a filter returns -1, then the next filter in 00060 the chain is invoked. The process continues until all the filters 00061 have been exhausted.</p> 00062 */ 00063 class FilterChain { 00064 public: 00065 /** Create the filters in the chain. 00066 00067 This method must be used to establish the chain of filters. 00068 This method is typically invoked from the \c main method. 00069 This method parses the names of the filter specified in the 00070 parameter \c filterStr and instantiates suitable filters 00071 via the FilterFactory. 00072 00073 \param[in] filterStr A string containing the list of 00074 filters to be created in this chain. This string is 00075 typically specified by the user as a command line parameter. 00076 If this parameter is \c NULL, then this method performs no 00077 specific action. 00078 00079 \param[in] clusterMaker The top-level cluster maker that has 00080 been specified by the user. The cluster maker encapsulates the 00081 EST analyzer object specified by the user. The filters can use 00082 the clusterMaker to perform any special processing they may 00083 need. 00084 */ 00085 static FilterChain* 00086 setupChain(const char* filterStr, ClusterMaker *clusterMaker); 00087 00088 /** Main helper method that applies all filters in a distributed 00089 manner. 00090 00091 This method is a convinence method that has been introduced 00092 here to facilitate the process of applying all filters in a 00093 distributed manner. This method operates as follows: 00094 00095 <ol> 00096 00097 <li>First it initializes all the filters. If initialization 00098 fails then this method immediately exits with an non-zero 00099 error code.</li> 00100 00101 <li>Next it computes the sub-set of ESTs that this filter is 00102 expected to operate on and applies the filters to all the ESTs 00103 within the range it owns.</li> 00104 00105 <li>It participates in interative broadcast in which it 00106 receives filter data from other processes (if any) and 00107 broadcasts its data to others. This process ensures that all 00108 processes have a consistent view of the entries that have been 00109 filtered out on other processes.</li> 00110 00111 <li>Then it finalizes all the filters.</li> 00112 00113 <li>If all the operations were successfully completed, then 00114 this method returns 0 (zero).</li> 00115 00116 </ol> 00117 00118 \param[in] clusterMaker The cluster maker to be used for 00119 merging the filter data received from other processes. 00120 00121 \return This method returns 0 (zero) on success. On errors it 00122 returns a non-zero error code. 00123 */ 00124 static int applyFilters(ClusterMaker *clusterMaker); 00125 00126 /** Get a pointer to the instance of the filter chain. 00127 00128 Since this class is a singleton, the constructor is private 00129 and the only way to obtain an instance of the class is through 00130 this method. The filter chain is available only after the 00131 \c setupChain method (that is invoked from main right after 00132 command line arguments are validated) has successfully 00133 completed its operation. Until such time this method simply 00134 returns \c NULL. 00135 00136 \return The process-wide unique pointer to the filter chain. 00137 */ 00138 static inline FilterChain* getFilterChain() { 00139 return ptrInstance; 00140 } 00141 00142 /** Display valid command line arguments for filters in the 00143 chain. 00144 00145 This method simply calls the showArguments method on each 00146 filter in the chain. 00147 00148 \param[out] os The output stream to which the valid command 00149 line arguments must be written. 00150 */ 00151 virtual void showArguments(std::ostream& os); 00152 00153 /** Permits filters in the chain to process command line 00154 arguments. 00155 00156 This method iterates over the filters that have been added to 00157 this chain and invokes parseArguments() method on each one of 00158 them. This permits each filter in the chain to receive and 00159 process command line parameters targeted for the filters. 00160 00161 \param[in,out] argc The number of command line arguments 00162 currently present in argv (the parameter list). This parameter 00163 is updated when parameters are consumed. 00164 00165 \param[in,out] argv The list of command line arguments to be 00166 consumed by various filters, if they find parameters intended 00167 for their use. This parameter is updated when command line 00168 arguments are consumed by one of the filters. 00169 00170 \return This method returns \c true if all the filters in the 00171 chain successfully processed command line arguments. If an 00172 incorrect command line argument is received by any one of the 00173 filters then this method returns \c false to flag an error. 00174 */ 00175 virtual bool parseArguments(int& argc, char **argv); 00176 00177 /** Initializes all the filters in the chain. 00178 00179 This method iterates over all the filters that have been 00180 added ot this chain and calls initialize() on each one of 00181 them. If any one of the filters are unable to initialize 00182 correctly, then this method immediately returns an non-zero 00183 error code. 00184 00185 \return This method returns zero on success. On errors this 00186 method returns a non-zero value. 00187 */ 00188 virtual int initialize(); 00189 00190 /** Finalizes all the filters in the chain. 00191 00192 This method iterates over all the filters that have been added 00193 ot this chain and calls Filter::finalize() method on each one 00194 of them. The finalize operation permits the filters to wrap up 00195 their operation and perform any cleanups. 00196 */ 00197 virtual void finalize(); 00198 00199 /** Add the given filter to the filter chain. 00200 00201 This method permits the filter chain to takes ownership of a 00202 given filter object by added it to its internal chain. 00203 00204 \note The filter chain takes ownership of the object therefore 00205 that the filter pointer passed to this method must not be 00206 deleted by the caller. 00207 00208 \param[in] filter The instance of class Filter that should be 00209 added to the filter chain. 00210 00211 \return This method returns \c true if the filter was 00212 successfully added. On errors this method returns \c false. 00213 */ 00214 virtual bool addFilter(Filter* filter); 00215 00216 /** Determine whether a given EST passes the filter criterion the 00217 analyzer should perform core (computationally intensive) 00218 analysis, according to this filter chain. 00219 00220 This method can be used to compare a given EST with the 00221 reference EST (set via the call to the setReferenceEST()) 00222 method. 00223 00224 \param[in] estIdx The index (zero based) of the EST with which 00225 the reference EST is to be compared. 00226 00227 \return This method returns the logical cluster to which the 00228 given EST must be added since one of the filters flagged it as 00229 being an EST to be ignored from the main clustering. If the 00230 EST is to be processed as part of the regular clustering 00231 process then this method returns -1. 00232 */ 00233 inline int applyFilters(const int estIdx) { 00234 int clusterID = -1; 00235 for (size_t i = 0; (i < chain.size()); i++) { 00236 if ((clusterID = chain[i]->applyFilter(estIdx)) != -1) { 00237 // Immediately stop when EST fails a filter clause 00238 break; 00239 } 00240 } 00241 // Return the logical/dummy cluster to which ESTs must be 00242 // added. 00243 return clusterID; 00244 } 00245 00246 /** Method to display statistics regarding operation of all the 00247 filters in this chain 00248 00249 This method can be used to obtain a dump of the statistics 00250 gathered regarding the operation of all the filters in this 00251 chain. The typical statistic generated by filters 00252 includes: 00253 00254 <ul> 00255 00256 <li>The number of times the filter was called. More 00257 specifically this value indicates the number of times the \c 00258 applyFilter() method was invoked on a given filter.</li> 00259 00260 <li>The number of ESTs that were permitted to pass through the 00261 filter.</li> 00262 00263 </ul> 00264 00265 \param[out] os The output stream to which the statistics 00266 regarding the filters is to be dumped. 00267 00268 \param[in] rank The rank of the process for which the 00269 statistics is being displayed. This value is used to make the 00270 outputs a bit more informative. 00271 */ 00272 void printStats(std::ostream& os, const int rank) const; 00273 00274 /** Method to obtain pointer to a given filter object. 00275 00276 This method can be used to obtain a pointer to a specific 00277 filter class present in this chain. If the filter does not 00278 exist then this method returns NULL. 00279 00280 \note The caller must \b not delete the returned pointer. 00281 00282 \param[in] name The name associated with a given filter. 00283 00284 \return If the filter was found then this method returns a 00285 valid (non-NULL) pointer to the filter object. If the filter 00286 was not found, then this method returns NULL. 00287 */ 00288 Filter* getFilter(const std::string& name) const; 00289 00290 /** The destructor. 00291 00292 The destructor frees up all the filters added to this 00293 filter chain. 00294 */ 00295 virtual ~FilterChain(); 00296 00297 protected: 00298 /** The vector containing a list of filters in the chain. 00299 00300 This vector contains the list of hueristics assocaited with 00301 this chain. Filters are added to the list via the 00302 addFilter() method. The filters are used by the 00303 shouldAnalyze() method. 00304 */ 00305 std::vector<Filter*> chain; 00306 00307 private: 00308 /** The constructor. 00309 00310 This is made private because the filter chain is a singleton, 00311 and should only be instantiated from the 00312 FilterChain::getFilterChain() static method. 00313 */ 00314 FilterChain(); 00315 00316 /** The pointer to the singleton instance of this class. 00317 00318 Again, this is made private so that only methods of this class 00319 can access it. The getFilterChain() method in this class 00320 must be used to obtain an instance of this class. 00321 */ 00322 static FilterChain* ptrInstance; 00323 00324 /** Helper method to compute the start and ending indexes of the 00325 EST that this process owns. 00326 00327 This method was introduced to keep the math and logic clutter 00328 involved in computing the list of owned ESTs out of the 00329 methods that use the information. This method returns the 00330 range, such that: \c startIndex <= \em ownedESTidx < \c 00331 endIndex. 00332 00333 \note This method must be invoked only after MPI::Intialize() 00334 has beeen called and the ESTs to be processed have be loaded 00335 (so that EST::getESTList() returns a valid list of ESTs). 00336 00337 \param[out] startIndex The starting (zero-based) index value 00338 of the contiguous range of ESTs that this process owns. 00339 00340 \param[out] endIndex The ending (zero-based) index value of 00341 the contiguous range ESTs that this process owns. The value 00342 returned in this parameter is \b not included in the range of 00343 values. 00344 00345 \note Currently, this method has exact implementation as in 00346 MSTClusterMaker::getOwnedESTidx. This method has been 00347 copy-pasted so that filters can operate on their own different 00348 sub-set of ESTs if they choose. Maybe the method can be 00349 combined together. 00350 */ 00351 static void getOwnedESTidx(int& startIndex, int& endIndex); 00352 00353 /** Helper method to perform all-to-all broadcast operation. 00354 00355 This method is invoked from the applyFilters() (static method) 00356 to broadcast the results from filtering out data to all other 00357 processes. In addition, this method also receives broadcasts 00358 from other processes and applies their filtered results to the 00359 local copy. This process ensures that all the processes in the 00360 system have a consistent snapshot of the filtered out ESTs. 00361 00362 \note Possibly this method (which has a loop) can be replaced 00363 by a single MPI all-to-all broadcast call but at expense of 00364 increased memory footprint. 00365 00366 \param[in] clusterMaker The cluster maker to be used for 00367 merging the filter data received from other processes. 00368 */ 00369 static void allToAllBroadcast(ClusterMaker *clusterMaker); 00370 00371 }; 00372 00373 #endif