FreeLing  3.0
weakrule.h
Go to the documentation of this file.
00001 
00002 //
00003 //    Omlet - Open Machine Learning Enhanced Toolkit
00004 //
00005 //    Copyright (C) 2006   TALP Research Center
00006 //                         Universitat Politecnica de Catalunya
00007 //
00008 //    This file is part of the Omlet library
00009 //
00010 //    The Omlet library is free software; you can redistribute it 
00011 //    and/or modify it under the terms of the GNU General Public
00012 //    License as published by the Free Software Foundation; either
00013 //    version 3 of the License, or (at your option) any later version.
00014 //
00015 //    This library is distributed in the hope that it will be useful,
00016 //    but WITHOUT ANY WARRANTY; without even the implied warranty of
00017 //    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 //    General Public License for more details.
00019 //
00020 //    You should have received a copy of the GNU General Public
00021 //    License along with this library; if not, write to the Free Software
00022 //    Foundation, Inc., 51 Franklin St, 5th Floor, Boston, MA 02110-1301 USA
00023 //
00024 //    contact: Lluis Padro (padro@lsi.upc.es)
00025 //             TALP Research Center
00026 //             despatx Omega.S112 - Campus Nord UPC
00027 //             08034 Barcelona.  SPAIN
00028 //
00030 
00031 //
00032 // Author: Xavier Carreras
00033 //
00034 
00035 #ifndef _WEAKRULE
00036 #define _WEAKRULE
00037 
00038 #include <iostream>
00039 #include <string>
00040 #include <vector>
00041 #include <map>
00042 #include <set>
00043 #include "freeling/tree.h"
00044 #include "freeling/omlet/dataset.h"
00045 
00046 
00052 
00053 class wr_params {
00054  public:
00055   int nlabels;
00056   double epsilon;
00057 
00059   wr_params (int nl, double e);
00060 };
00061 
00066 
00067 class weak_rule {
00068 
00069  public:
00071   virtual ~weak_rule() {};
00072  
00075   virtual void classify(const example &i,double pred[]) = 0;
00076 
00078   virtual void read_from_stream(std::wistream *is) = 0;
00079   virtual void write_to_stream(std::wostream *os) = 0;
00080     
00082   virtual void learn(const dataset &ds, double &Z) = 0;
00083 
00087   virtual double Zcalculus(const dataset &ds) const;
00088 };
00089 
00090 
00091 
00092 
00098 
00099 class wr_factory {
00100 
00101  public:
00102   typedef weak_rule* (*WR_constructor)(wr_params*);
00103   static void initialize();
00104   static bool register_weak_rule_type(const std::wstring &type, WR_constructor builder);
00105   static bool unregister_weak_rule_type(const std::wstring &type);
00106   static weak_rule* create_weak_rule(const std::wstring &type, wr_params *p);
00107   static weak_rule* create_weak_rule(const std::wstring &type, int nlabels);
00108 
00109  private:
00110   static std::map<std::wstring, WR_constructor> wr_types;
00111 
00112 };
00113 
00114 
00119 
00120 class mlDTree_params : public wr_params {
00121  public:
00123   int    max_depth;
00124 
00126   mlDTree_params (int nl, double e, int mxd);
00127 };
00128 
00132 
00133 class dt_node {
00134   friend class mlDTree;
00135   //protected:
00136  public:
00137   int     feature;              // 0 when leaf
00138   std::vector<double>  predictions;  // empty when not leaf (when leaf, array of predictions, one for each class)
00139 
00140  public:
00141   // empty constructor
00142   dt_node();
00144   dt_node(int f);
00146   dt_node(int nl, double pr[]);
00148   dt_node(const dt_node &n);
00149 };
00150 
00155 
00156 class mlDTree : public weak_rule {
00157 
00158  private:
00159   // learning parameters for the specific type of weak rule
00160   mlDTree_params params;
00161 
00162   // decision tree itself
00163   tree<dt_node> rule;
00164   // learning auxiliary list.
00165   std::set<int> used_features; 
00166 
00168   void classify (const example &i, double pred[], tree<dt_node>::iterator t);
00169 
00171   void write_to_stream(tree<dt_node>::iterator t, std::wostream *os);
00172   tree<dt_node> read_dt(std::wistream *is);
00173   
00175   tree<dt_node> learn (const dataset &ds, double &Z, int depth);
00176 
00177   bool stopping_criterion(const dataset &ds, int depth);
00179   int best_feature(const dataset &ds, double *W);
00181   void Cprediction(int v, double *W, double result[]);
00185   double Zcalculus(double *W, int ndim);
00186 
00188   mlDTree(const mlDTree &wr0);
00189 
00190  public:
00191 
00192   // Constructor
00193   mlDTree(mlDTree_params *p);
00194 
00198   void classify(const example &i, double pred[]);
00199 
00201   void write_to_stream(std::wostream *os);
00202   void read_from_stream(std::wistream *is);
00203 
00205   void learn(const dataset &ds, double &Z);
00206 };
00207 
00208 
00209 #endif 
00210