00001 #ifndef CLUSTER_MAKER_H 00002 #define CLUSTER_MAKER_H 00003 00004 //-------------------------------------------------------------------- 00005 // 00006 // This file is part of PEACE. 00007 // 00008 // PEACE is free software: you can redistribute it and/or modify it 00009 // under the terms of the GNU General Public License as published by 00010 // the Free Software Foundation, either version 3 of the License, or 00011 // (at your option) any later version. 00012 // 00013 // PEACE is distributed in the hope that it will be useful, but 00014 // WITHOUT ANY WARRANTY; without even the implied warranty of 00015 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00016 // General Public License for more details. 00017 // 00018 // You should have received a copy of the GNU General Public License 00019 // along with PEACE. If not, see <http://www.gnu.org/licenses/>. 00020 // 00021 // Miami University makes no representations or warranties about the 00022 // suitability of the software, either express or implied, including 00023 // but not limited to the implied warranties of merchantability, 00024 // fitness for a particular purpose, or non-infringement. Miami 00025 // University shall not be liable for any damages suffered by licensee 00026 // as a result of using, result of using, modifying or distributing 00027 // this software or its derivatives. 00028 // 00029 // By using or copying this Software, Licensee agrees to abide by the 00030 // intellectual property laws, and all other applicable laws of the 00031 // U.S., and the terms of GNU General Public License (version 3). 00032 // 00033 // Authors: Dhananjai M. Rao raodm@muohio.edu 00034 // 00035 //--------------------------------------------------------------------- 00036 00037 #include "arg_parser.h" 00038 00039 // Forward declaration to make compiler happy 00040 class ESTAnalyzer; 00041 00042 /** The base class of all cluster makers. 00043 00044 This class must be the base class of all cluster makers in the 00045 system. This class provides some default functionality that can be 00046 readily used by each cluster maker. 00047 */ 00048 class ClusterMaker { 00049 public: 00050 /** Display valid command line arguments for this cluster maker. 00051 00052 This method must be used to display all valid command line 00053 options that are supported by this cluster maker. Note that 00054 derived classes may override this method to display additional 00055 command line options that are applicable to it. This method 00056 is typically used in the main() method when displaying usage 00057 information. 00058 00059 \note Derived cluster maker classes <b>must</b> override this 00060 method to display help for their custom command line 00061 arguments. When this method is overridden don't forget to 00062 call the corresponding base class implementation to display 00063 common options. 00064 00065 \param[out] os The output stream to which the valid command 00066 line arguments must be written. 00067 */ 00068 virtual void showArguments(std::ostream& os); 00069 00070 /** Process command line arguments. 00071 00072 This method is used to process command line arguments specific 00073 to this cluster maker. This method is typically used from the 00074 main method just after the cluster maker has been 00075 instantiated. This method consumes all valid command line 00076 arguments. If the command line arguments were valid and 00077 successfully processed, then this method returns \c true. 00078 00079 \note Derived cluster maker classes <b>must</b> override this 00080 method to process any command line arguments that are custom 00081 to their operation. When this method is overridden don't 00082 forget to call the corresponding base class implementation to 00083 display common options. 00084 00085 \param[in,out] argc The number of command line arguments to be 00086 processed. 00087 00088 \param[in,out] argv The array of command line arguments. 00089 00090 \return This method returns \c true if the command line 00091 arguments were successfully processed. Otherwise this method 00092 returns \c false. This method returns true if all arguments 00093 are consumed successfully. 00094 */ 00095 virtual bool parseArguments(int& argc, char **argv); 00096 00097 /** Method to begin clustering. 00098 00099 This method must be used to create clusters based on a given 00100 EST analysis method. This method is a pure-virtual method. 00101 Therefore all cluster maker classes must override this method 00102 to perform all the necessary operations. 00103 00104 \note This method must be invoked only after the initialize() 00105 method is invoked. 00106 */ 00107 virtual int makeClusters() = 0; 00108 00109 /** Obtain the EST analyzer set for this cluster maker. 00110 00111 This method must be used to obtain the EST analyzer set for 00112 this cluster maker. 00113 00114 \return The EST analyzer set for this cluster maker. If a 00115 valid EST analyzer has not been set then this method returns 00116 NULL. 00117 */ 00118 inline ESTAnalyzer *getAnalyzer() const { return analyzer; } 00119 00120 /** Add a dummy cluster to the cluster maker. 00121 00122 This method can be used to add a dummy cluster to the cluster 00123 maker. The dummy clusters are added as direct descendants of 00124 the root cluster with the given name. 00125 00126 \note This method is currently used by the Filter hierarchy to 00127 add ESTs that are logically filtered out and must not be part 00128 of the core clustering process. 00129 00130 \param[in] name A human readable name to be set for this 00131 cluster. 00132 00133 \return If a cluster was successfully added, then this method 00134 returns a unique integer that identifies the newly added 00135 cluster. This value must be used to add entries to this 00136 cluster via the ClusterMaker::addEST method. 00137 */ 00138 virtual int addDummyCluster(const std::string name) = 0; 00139 00140 /** Add a EST directly to a given cluster. 00141 00142 This method can be used to add an EST directly to a 00143 cluster. This bypasses any traditional mechanism and directly 00144 adds the EST to the specified cluster. 00145 00146 \note The EST is added with an invalid metric value. ESTs 00147 added to a cluster are not included in the standard clustering 00148 process. Adding an EST that has already been added to the 00149 same/another cluster results in undefined behaviors. 00150 00151 \param[in] clusterID The unique ID of the cluster to which the 00152 EST is to be added. This value must have been obtained from an 00153 earlier (successful) call to the ClusterMaker::addDummyCluster 00154 method. 00155 00156 \param[in] estIdx The EST to be added to the given 00157 cluster. Once the EST has been added to this cluster it will 00158 not be included in the clustering process performed by this 00159 cluster maker. 00160 */ 00161 virtual void addEST(const int clusterID, const int estIdx) = 0; 00162 00163 /** A method to handle initialization tasks for the ClusterMaker. 00164 00165 This method is called after the cluster maker has been created 00166 but before the ESTs have been loaded into the ESTAnalyzer. 00167 00168 \note This method must load all the ESTs to be processed via 00169 the ESTAnalyzer API methods. 00170 00171 \return This method returns zero on success. On errors, this 00172 method returns a non-zero value. 00173 */ 00174 virtual int initialize() = 0; 00175 00176 /** The destructor. 00177 00178 The destructor frees memory allocated for holding any data in 00179 this base class. 00180 */ 00181 virtual ~ClusterMaker(); 00182 00183 protected: 00184 /** The default constructor. 00185 00186 The constructor has been made protected to ensure that this 00187 class is never directly instantiated. Instead one of the 00188 derived cluster maker classes must be instantiated via the 00189 ClusterMakerFactor API methods. 00190 00191 \param[in] name The human readable name for this cluster 00192 maker. This name is used when generating errors, warnings, 00193 and other output messages for this object. 00194 00195 \param[in,out] analyzer The EST analyzer to be used by this 00196 ClusterMaker for generating similarity metrics between two 00197 given ESTs. 00198 00199 \param[in] refESTidx The reference EST's index in a given 00200 multi-FASTA file. Index values start with 0 (zero). The 00201 refESTidx is supplied as a global argument that is processed 00202 in the main() method. This value is simply copied to the 00203 refESTidx member in this class. 00204 00205 \param[in] outputFileName The file name to which output must 00206 be written. If a valid output file is not specified, then 00207 results are written to standard output. The outputFileName is 00208 simply copied to the outputFileName member object. 00209 */ 00210 ClusterMaker(const std::string& name, ESTAnalyzer *analyzer, 00211 const int refESTidx, const std::string& outputFileName); 00212 00213 /** The name of this cluster maker. 00214 00215 This instance variable contains the human recognizable name 00216 for this cluster maker. This value is set when this object is 00217 instantiated (in the constructor) and is never changed during 00218 the life time of this object. This information is used when 00219 generating errors, warnings, and other output messages. 00220 */ 00221 const std::string name; 00222 00223 /** The index of the reference EST in a given file. 00224 00225 This member object is used to hold the index of a reference 00226 EST in a given file. The index values begin from 0 (zero). 00227 This member is initialized in the constructor and is never 00228 changed during the lifetime of this object. This reference 00229 est index is typically the root node of any clustering 00230 operations that are performed. 00231 */ 00232 const int refESTidx; 00233 00234 /** The file to which results must be written. 00235 00236 This member object is used to hold the file name to which all 00237 the clustering results are to be written. This member is 00238 initialized to NULL. However, the value is changed by the 00239 parseArguments method depending on the actual value specified 00240 by the user. 00241 */ 00242 const std::string outputFileName; 00243 00244 /** The analyzer to be used for generating EST similarity metrics. 00245 00246 This pointer is used to hold a pointer to the EST analyzer 00247 that must be used for generating similarity metrics between 00248 two given pairs of ESTs. This pointer is initialized when the 00249 object is instantiated and is never changed during the 00250 lifetime of this object. 00251 */ 00252 ESTAnalyzer* const analyzer; 00253 00254 private: 00255 /** The set of common arguments for all cluster makers. 00256 00257 This instance variable contains a static list of arguments 00258 that are common all the cluster makers. The common argument 00259 list is statically defined and shared by all cluster maker 00260 instances. 00261 00262 \note This makes cluster maker class hierarchy not MT-safe. 00263 */ 00264 static arg_parser::arg_record commonArgsList[]; 00265 00266 /** A dummy operator= 00267 00268 The operator=() is supressed for this class as it has constant 00269 members whose value is set when the object is created. These 00270 values cannot be changed during the lifetime of this 00271 object. 00272 00273 \param[in] src The source object from where data is to be copied. 00274 Currently this value is ignored. 00275 00276 \return Reference to this. 00277 */ 00278 ClusterMaker& operator=(const ClusterMaker& src); 00279 }; 00280 00281 #endif