|
FreeLing
3.0
|
00001 00002 // 00003 // FreeLing - Open Source Language Analyzers 00004 // 00005 // Copyright (C) 2004 TALP Research Center 00006 // Universitat Politecnica de Catalunya 00007 // 00008 // This library is free software; you can redistribute it and/or 00009 // modify it under the terms of the GNU General Public 00010 // License as published by the Free Software Foundation; either 00011 // version 3 of the License, or (at your option) any later version. 00012 // 00013 // This library is distributed in the hope that it will be useful, 00014 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00015 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00016 // General Public License for more details. 00017 // 00018 // You should have received a copy of the GNU General Public 00019 // License along with this library; if not, write to the Free Software 00020 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 00021 // 00022 // contact: Lluis Padro (padro@lsi.upc.es) 00023 // TALP Research Center 00024 // despatx C6.212 - Campus Nord UPC 00025 // 08034 Barcelona. SPAIN 00026 // 00028 00029 00030 #ifndef _NUMBERS_MOD 00031 #define _NUMBERS_MOD 00032 00033 #include <map> 00034 #include <boost/regex/icu.hpp> 00035 00036 #include "freeling/morfo/language.h" 00037 #include "freeling/morfo/automat.h" 00038 00039 //#define RE_NUM L"^([0-9]+\\"+MACO_Thousand+L")*[0-9]+(\\"+MACO_Decimal+L")?[0-9]*$" 00040 #define RE_NUM L"^(\\d{1,3}(\\"+MACO_Thousand+L"\\d{3})*|\\d+)(\\"+MACO_Decimal+L"\\d+)?$" 00041 #define RE_CODE L"^.*[0-9].*$" 00042 00043 // Auxiliary, kind of code (normal CODE, e.g. "X-23-12A"; ORDinal number, e.g. "4th") 00044 #define CODE 1 00045 #define ORD 2 00046 00047 00052 00053 class numbers_module : public automat { 00054 00055 protected: 00056 // configuration options 00057 std::wstring MACO_Decimal, MACO_Thousand; 00058 00060 std::map<std::wstring,float> value; 00062 std::map<std::wstring,int> tok; 00064 std::map<int,long double> power; 00065 00067 long double bilion,milion,units; 00068 int block; 00069 int iscode; 00070 boost::u32regex RE_number; 00071 boost::u32regex RE_code; 00072 00073 // reset accumulators 00074 virtual void ResetActions(); 00075 00076 public: 00078 numbers_module(const std::wstring &, const std::wstring &); 00079 }; 00080 00085 00086 class numbers_default : public numbers_module { 00087 00088 private: 00089 int ComputeToken(int,sentence::iterator&,sentence &); 00090 void StateActions(int, int, int, sentence::const_iterator); 00091 void SetMultiwordAnalysis(sentence::iterator, int); 00092 00093 public: 00095 numbers_default(const std::wstring &, const std::wstring &); 00096 }; 00097 00098 00103 00104 class numbers_es : public numbers_module { 00105 00106 private: 00107 int ComputeToken(int,sentence::iterator&, sentence &); 00108 void StateActions(int, int, int, sentence::const_iterator); 00109 void SetMultiwordAnalysis(sentence::iterator, int); 00110 00111 public: 00113 numbers_es(const std::wstring &, const std::wstring &); 00114 }; 00115 00116 00121 00122 class numbers_ca : public numbers_module { 00123 00124 private: 00125 int ComputeToken(int,sentence::iterator&,sentence &); 00126 void StateActions(int, int, int, sentence::const_iterator); 00127 void SetMultiwordAnalysis(sentence::iterator, int); 00128 00129 public: 00131 numbers_ca(const std::wstring &, const std::wstring &); 00132 }; 00133 00138 00139 class numbers_gl : public numbers_module { 00140 00141 private: 00142 int ComputeToken(int,sentence::iterator&,sentence &); 00143 void StateActions(int, int, int, sentence::const_iterator); 00144 void SetMultiwordAnalysis(sentence::iterator, int); 00145 00146 public: 00148 numbers_gl(const std::wstring &, const std::wstring &); 00149 }; 00150 00155 00156 class numbers_pt : public numbers_module { 00157 00158 private: 00159 int ComputeToken(int,sentence::iterator&,sentence &); 00160 void StateActions(int, int, int, sentence::const_iterator); 00161 void SetMultiwordAnalysis(sentence::iterator, int); 00162 00163 public: 00165 numbers_pt(const std::wstring &, const std::wstring &); 00166 }; 00167 00168 00173 00174 class numbers_it : public numbers_module { 00175 00176 private: 00177 int ComputeToken(int,sentence::iterator&, sentence &); 00178 void ResetActions(); 00179 void StateActions(int, int, int, sentence::const_iterator); 00180 void SetMultiwordAnalysis(sentence::iterator, int); 00181 00182 // !! unify process with other languages !! 00183 long double hundreds; //this is additional. 00184 long double thousands; //this is additional. 00185 long double floatUnits; // "e tre quarto". This variable will count how 00186 // many "halfs", "quartrs" we have 00187 00188 public: 00190 numbers_it(const std::wstring &, const std::wstring &); 00191 }; 00192 00193 00198 00199 class numbers_en : public numbers_module { 00200 00201 private: 00202 int ComputeToken(int,sentence::iterator&, sentence &); 00203 void StateActions(int, int, int, sentence::const_iterator); 00204 void SetMultiwordAnalysis(sentence::iterator, int); 00205 00206 public: 00208 numbers_en(const std::wstring &, const std::wstring &); 00209 }; 00210 00215 00216 class numbers_ru : public numbers_module 00217 { 00218 private: 00219 int ComputeToken(int,sentence::iterator&, sentence &); 00220 void StateActions(int, int, int, sentence::const_iterator); 00221 void SetMultiwordAnalysis(sentence::iterator, int); 00222 00223 public: 00224 numbers_ru(const std::wstring &, const std::wstring &); 00225 }; 00226 00227 00228 #endif 00229
1.7.6.1