FreeLing  3.0
numbers_modules.h
Go to the documentation of this file.
00001 
00002 //
00003 //    FreeLing - Open Source Language Analyzers
00004 //
00005 //    Copyright (C) 2004   TALP Research Center
00006 //                         Universitat Politecnica de Catalunya
00007 //
00008 //    This library is free software; you can redistribute it and/or
00009 //    modify it under the terms of the GNU General Public
00010 //    License as published by the Free Software Foundation; either
00011 //    version 3 of the License, or (at your option) any later version.
00012 //
00013 //    This library is distributed in the hope that it will be useful,
00014 //    but WITHOUT ANY WARRANTY; without even the implied warranty of
00015 //    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00016 //    General Public License for more details.
00017 //
00018 //    You should have received a copy of the GNU General Public
00019 //    License along with this library; if not, write to the Free Software
00020 //    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
00021 //
00022 //    contact: Lluis Padro (padro@lsi.upc.es)
00023 //             TALP Research Center
00024 //             despatx C6.212 - Campus Nord UPC
00025 //             08034 Barcelona.  SPAIN
00026 //
00028 
00029 
00030 #ifndef _NUMBERS_MOD
00031 #define _NUMBERS_MOD
00032 
00033 #include <map>
00034 #include <boost/regex/icu.hpp>
00035 
00036 #include "freeling/morfo/language.h"
00037 #include "freeling/morfo/automat.h"
00038 
00039 //#define RE_NUM L"^([0-9]+\\"+MACO_Thousand+L")*[0-9]+(\\"+MACO_Decimal+L")?[0-9]*$"
00040 #define RE_NUM L"^(\\d{1,3}(\\"+MACO_Thousand+L"\\d{3})*|\\d+)(\\"+MACO_Decimal+L"\\d+)?$"
00041 #define RE_CODE L"^.*[0-9].*$"
00042 
00043 // Auxiliary, kind of code (normal CODE, e.g. "X-23-12A";  ORDinal number, e.g. "4th")
00044 #define CODE 1
00045 #define ORD 2
00046 
00047 
00052 
00053 class numbers_module : public automat {
00054 
00055  protected:
00056   // configuration options
00057   std::wstring MACO_Decimal, MACO_Thousand;
00058 
00060   std::map<std::wstring,float> value;
00062   std::map<std::wstring,int> tok;
00064   std::map<int,long double> power;
00065 
00067   long double bilion,milion,units;
00068   int block;
00069   int iscode;
00070   boost::u32regex RE_number;
00071   boost::u32regex RE_code;
00072 
00073   // reset accumulators
00074   virtual void ResetActions();
00075  
00076  public:
00078   numbers_module(const std::wstring &, const std::wstring &);
00079 };
00080 
00085 
00086 class numbers_default : public numbers_module {
00087 
00088  private: 
00089   int ComputeToken(int,sentence::iterator&,sentence &);
00090   void StateActions(int, int, int, sentence::const_iterator);
00091   void SetMultiwordAnalysis(sentence::iterator, int);
00092 
00093  public:
00095   numbers_default(const std::wstring &, const std::wstring &);
00096 };
00097 
00098 
00103 
00104 class numbers_es : public numbers_module {
00105 
00106  private:
00107   int ComputeToken(int,sentence::iterator&, sentence &);
00108   void StateActions(int, int, int, sentence::const_iterator);
00109   void SetMultiwordAnalysis(sentence::iterator, int);
00110 
00111  public:
00113   numbers_es(const std::wstring &, const std::wstring &);
00114 };
00115 
00116 
00121 
00122 class numbers_ca : public numbers_module {
00123 
00124  private:
00125   int ComputeToken(int,sentence::iterator&,sentence &);
00126   void StateActions(int, int, int, sentence::const_iterator);
00127   void SetMultiwordAnalysis(sentence::iterator, int);
00128 
00129  public:
00131   numbers_ca(const std::wstring &, const std::wstring &);
00132 };
00133 
00138 
00139 class numbers_gl : public numbers_module {
00140 
00141  private:
00142   int ComputeToken(int,sentence::iterator&,sentence &);
00143   void StateActions(int, int, int, sentence::const_iterator);
00144   void SetMultiwordAnalysis(sentence::iterator, int);
00145 
00146  public:
00148   numbers_gl(const std::wstring &, const std::wstring &);
00149 };
00150 
00155 
00156 class numbers_pt : public numbers_module {
00157 
00158  private:
00159   int ComputeToken(int,sentence::iterator&,sentence &);
00160   void StateActions(int, int, int, sentence::const_iterator);
00161   void SetMultiwordAnalysis(sentence::iterator, int);
00162 
00163  public:
00165   numbers_pt(const std::wstring &, const std::wstring &);
00166 };
00167 
00168 
00173 
00174 class numbers_it : public numbers_module {
00175 
00176  private:
00177   int  ComputeToken(int,sentence::iterator&, sentence &);
00178   void ResetActions();
00179   void StateActions(int, int, int, sentence::const_iterator);
00180   void SetMultiwordAnalysis(sentence::iterator, int);
00181 
00182   // !! unify process with other languages !! 
00183   long double hundreds; //this is additional.
00184   long double thousands;        //this is additional.
00185   long double floatUnits;   // "e tre quarto". This variable will count how
00186   // many "halfs", "quartrs" we have
00187 
00188  public:
00190   numbers_it(const std::wstring &, const std::wstring &);
00191 };
00192 
00193 
00198 
00199 class numbers_en : public numbers_module {
00200 
00201  private: 
00202   int ComputeToken(int,sentence::iterator&, sentence &);
00203   void StateActions(int, int, int, sentence::const_iterator);
00204   void SetMultiwordAnalysis(sentence::iterator, int);
00205 
00206  public:
00208   numbers_en(const std::wstring &, const std::wstring &);
00209 };
00210 
00215 
00216 class numbers_ru : public numbers_module 
00217 {
00218  private: 
00219   int ComputeToken(int,sentence::iterator&, sentence &);
00220   void StateActions(int, int, int, sentence::const_iterator);
00221   void SetMultiwordAnalysis(sentence::iterator, int);
00222 
00223  public:
00224   numbers_ru(const std::wstring &, const std::wstring &);
00225 };
00226 
00227 
00228 #endif
00229