00001 #ifndef FMWSCA_H 00002 #define FMWSCA_H 00003 00004 //-------------------------------------------------------------------- 00005 // 00006 // This file is part of PEACE. 00007 // 00008 // PEACE is free software: you can redistribute it and/or modify it 00009 // under the terms of the GNU General Public License as published by 00010 // the Free Software Foundation, either version 3 of the License, or 00011 // (at your option) any later version. 00012 // 00013 // PEACE is distributed in the hope that it will be useful, but 00014 // WITHOUT ANY WARRANTY; without even the implied warranty of 00015 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00016 // General Public License for more details. 00017 // 00018 // You should have received a copy of the GNU General Public License 00019 // along with PEACE. If not, see <http://www.gnu.org/licenses/>. 00020 // 00021 // Miami University makes no representations or warranties about the 00022 // suitability of the software, either express or implied, including 00023 // but not limited to the implied warranties of merchantability, 00024 // fitness for a particular purpose, or non-infringement. Miami 00025 // University shall not be liable for any damages suffered by licensee 00026 // as a result of using, result of using, modifying or distributing 00027 // this software or its derivatives. 00028 // 00029 // By using or copying this Software, Licensee agrees to abide by the 00030 // intellectual property laws, and all other applicable laws of the 00031 // U.S., and the terms of GNU General Public License (version 3). 00032 // 00033 // Authors: Dhananjai M. Rao raodm@muohio.edu 00034 // 00035 //--------------------------------------------------------------------- 00036 00037 #include "FWAnalyzer.h" 00038 00039 /** FMWSCA: Framed, Multi-Word String Compare Analyzer. 00040 00041 <p>This analyzer provides a conventional EST analyzer that compares 00042 EST base pair data using a given number of base pairs. The total 00043 number of base pairs to be compared is called a Frame. A frame is 00044 broken into a sequence of words. The frame size and word size (in 00045 terms of number of base pairs) is specified as command line 00046 arguments. This analyzer compares the tail (3' end) of a given 00047 EST sequence with the beginning (5' end) of all other ESTs in a 00048 given file. The file must be in FASTA format.</p> 00049 00050 <p>This class has been implemented by extending the FWAnalyzer 00051 base class. The base class hierarchy provides most of the 00052 standard functionality involved in reading FASTA files and 00053 generating formatted output.</p> 00054 00055 \note An instance of this class is typically created via the 00056 ESTAnalyzerFactory class. 00057 */ 00058 class FMWSCA : public FWAnalyzer { 00059 friend class ESTAnalyzerFactory; 00060 public: 00061 /** The destructor. 00062 00063 The destructor frees up all any dynamic memory allocated by 00064 this object for its operations. 00065 */ 00066 ~FMWSCA(); 00067 00068 /** Display valid command line arguments for this analyzer. 00069 00070 This method must be used to display all valid command line 00071 options that are supported by this analyzer (and its base 00072 classes). 00073 00074 \note This method calls the base class's showArguments first. 00075 00076 \param[out] os The output stream to which the valid command 00077 line arguments must be written. 00078 */ 00079 virtual void showArguments(std::ostream& os); 00080 00081 /** Process command line arguments. 00082 00083 This method is used to process command line arguments specific 00084 to this EST analyzer. This method is typically used from the 00085 main method just after the EST analyzer has been instantiated. 00086 This method consumes all valid command line arguments. If the 00087 command line arguments were valid and successfully processed, 00088 then this method returns \c true. 00089 00090 \note This method consumes its custom command line arguments 00091 first and then call's the base class's parseArguments() method. 00092 00093 \param[in,out] argc The number of command line arguments to be 00094 processed. 00095 00096 \param[in,out] argv The array of command line arguments. 00097 00098 \return This method returns \c true if the command line 00099 arguments were successfully processed. Otherwise this method 00100 returns \c false. 00101 */ 00102 virtual bool parseArguments(int& argc, char **argv); 00103 00104 /** Method to obtain human-readable name for this EST analyzer 00105 00106 This method provides a human-readable string identifying the 00107 EST analyzer. This string is typically used for 00108 display/debugging purposes (particularly via the PEACE 00109 Interactive Console). 00110 00111 \return This method returns the string "FMWSCA" identifiying 00112 this analyzer. 00113 */ 00114 virtual std::string getName() const { return "FMWSCA"; } 00115 00116 protected: 00117 /** Flag to perform case sensitive comparisons. 00118 00119 This flag indicates if the FMWSCA must perform case sensitive 00120 string comparisons. By default this analyzer performs case 00121 insensitive comparisons. The default can be changed by the 00122 user by sepcifying a suitable command line argument. This 00123 member is initialized to false. However, its value may be 00124 changed by the parseArguments() method. 00125 */ 00126 static bool caseSensitive; 00127 00128 /** Method to compare two frames and compute similarity. 00129 00130 For each word in the refFrame, this method searches for number 00131 of occurrences of the word in otherFrame. It then divides the 00132 count by frameSize^2. If two frames are identical it results 00133 in 100. If thw two frames are completely different the result 00134 would be a 0 (zero). 00135 00136 \note This method overrides the default implementation in the 00137 FWAnalyzer base class. 00138 00139 \param[in] refFrame The reference frame for comparison 00140 purposes. Note that the reference frame is always a constant 00141 in a given set of caparisons. Consequently, certain analyzers 00142 can pre-compute and reuse metrics to make analysis fast. 00143 00144 \param[in] otherFrame The other frame for comparison. This 00145 frame is always guaranteed to be from a different EST than the 00146 refFrame. 00147 00148 \param[in] wordSize The size of a word within the given frame. 00149 This value is always greater than 0 (zero) and less than frame 00150 size. 00151 00152 \return This method is return a similarity metric in the range 00153 0 to 100 between the given frame and the refFrame. 00154 */ 00155 using FWAnalyzer::getMetric; 00156 virtual float getMetric(const std::string& refFrame, 00157 const std::string& otherFrame, 00158 const int wordSize); 00159 00160 private: 00161 /** The default constructor. 00162 00163 The default constructor for this class. The constructor is 00164 made private so that this class cannot be directly 00165 instantiated. However, since the ESTAnalyzerFactory is a 00166 friend of this class, an object can be instantiated via teh 00167 ESTAnalyzerFactory::create() method. 00168 00169 00170 \param[in] refESTidx The reference EST index value to be used 00171 when performing EST analysis. This parameter should be >= 0. 00172 This value is simply passed onto the base class. 00173 00174 \param[in] outputFile The name of the output file to which the 00175 EST analysis data is to be written. This parameter is ignored 00176 if this analyzer is used for clustering. If this parameter is 00177 the empty string then output is written to standard output. 00178 This value is simply passed onto the base class. 00179 */ 00180 FMWSCA(const int refESTidx, const std::string& outputFile); 00181 00182 /** The set of c arguments for this EST analyzer. 00183 00184 This instance variable contains a static list of arguments 00185 that are specific to the FMWSCA EST analyzers. 00186 */ 00187 static arg_parser::arg_record argsList[]; 00188 }; 00189 00190 00191 #endif