|
FreeLing
3.0
|
00001 00002 // 00003 // FreeLing - Open Source Language Analyzers 00004 // 00005 // Copyright (C) 2004 TALP Research Center 00006 // Universitat Politecnica de Catalunya 00007 // 00008 // This library is free software; you can redistribute it and/or 00009 // modify it under the terms of the GNU General Public 00010 // License as published by the Free Software Foundation; either 00011 // version 3 of the License, or (at your option) any later version. 00012 // 00013 // This library is distributed in the hope that it will be useful, 00014 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00015 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00016 // General Public License for more details. 00017 // 00018 // You should have received a copy of the GNU General Public 00019 // License along with this library; if not, write to the Free Software 00020 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 00021 // 00022 // contact: Lluis Padro (padro@lsi.upc.es) 00023 // TALP Research Center 00024 // despatx C6.212 - Campus Nord UPC 00025 // 08034 Barcelona. SPAIN 00026 // 00028 00029 #ifndef _FEX_RULE 00030 #define _FEX_RULE 00031 00032 #include <map> 00033 #include <set> 00034 #include <boost/regex/icu.hpp> 00035 #include "freeling/morfo/language.h" 00036 00037 #define OP_NONE 0 00038 #define OP_AND 1 00039 #define OP_OR 2 00040 00045 00046 class fex_condition { 00047 private: 00049 std::wstring function; 00051 std::wstring focus; 00053 std::wstring split; 00055 std::wstring literal; 00057 std::set<std::wstring> *fileset; 00059 boost::u32regex match_re; 00061 boost::wsmatch re_result; 00063 bool negated; 00065 bool cond_true; 00066 00068 static boost::u32regex split_re; 00069 00071 static std::map<std::wstring, std::set<std::wstring> > set_files; 00072 00074 std::list<std::wstring> get_target(const word &) const; 00075 00076 public: 00077 // constructor 00078 fex_condition(); 00080 fex_condition(const std::wstring&,const std::wstring&,const std::wstring&); 00082 fex_condition(const fex_condition &); 00084 fex_condition& operator=(const fex_condition&); 00085 00087 bool check(const word&) ; 00089 bool is_true() const; 00091 std::wstring get_match(int) const; 00093 void trace(int) const; 00094 }; 00095 00100 00101 class feature_function { 00102 public: 00103 virtual void extract (const sentence &, int, std::list<std::wstring> &) const =0; 00105 virtual ~feature_function() {}; 00106 }; 00107 00108 00112 00113 class fex_rule { 00114 private: 00116 std::wstring pattern; 00118 int left,right; 00120 std::list<fex_condition> conds; 00122 int operation; 00124 std::map<int,std::list<std::wstring> > features; 00125 00127 static boost::u32regex rulepat; 00128 static boost::u32regex rulepat_anch; 00129 static boost::u32regex subexpr; 00130 static boost::u32regex featfun; 00131 00134 void pattern_instance(const sentence &, int, std::list<std::wstring> &) const; 00135 void get_replacements(const std::wstring &, const word &, std::list<std::wstring> &) const; 00136 00137 public: 00139 fex_rule (const std::wstring &, const std::wstring &, int, 00140 const std::list<fex_condition> &); 00142 fex_rule(const fex_rule &); 00144 fex_rule& operator=(const fex_rule&); 00145 00147 void clear_features(); 00150 void precompute(const sentence&, int) ; 00153 void extract(const sentence&, int, int, std::list<std::wstring> &) const; 00155 int get_left() const; 00157 int get_right() const; 00158 00160 static bool check_conds(std::list<fex_condition> &, int, const word &); 00161 00163 void trace(int) const; 00164 00166 static std::map<std::wstring,const feature_function *> feat_functs; 00167 00169 static std::map<std::wstring,const feature_function *> nerc_features; 00170 }; 00171 00172 00177 00178 class fex_rulepack { 00179 public: 00181 std::list<fex_condition> conds; 00183 int operation; 00185 std::list<fex_rule> rules; 00186 00188 fex_rulepack(); 00190 fex_rulepack(const fex_rulepack &); 00192 fex_rulepack& operator=(const fex_rulepack&); 00193 00195 void trace(int) const; 00196 }; 00197 00198 00199 #endif 00200
1.7.6.1