FreeLing  3.0
util.h
Go to the documentation of this file.
00001 
00002 //
00003 //    Fries - Feature Retriever for Intensional Encoding of Sentences
00004 //
00005 //    Copyright (C) 2006   TALP Research Center
00006 //                         Universitat Politecnica de Catalunya
00007 //
00008 //    This file is part of the Fries library
00009 //
00010 //    The Fries library is free software; you can redistribute it 
00011 //    and/or modify it under the terms of the GNU General Public
00012 //    License as published by the Free Software Foundation; either
00013 //    version 3 of the License, or (at your option) any later version.
00014 //
00015 //    This library is distributed in the hope that it will be useful,
00016 //    but WITHOUT ANY WARRANTY; without even the implied warranty of
00017 //    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 //    General Public License for more details.
00019 //
00020 //    You should have received a copy of the GNU General Public
00021 //    License along with this library; if not, write to the Free Software
00022 //    Foundation, Inc., 51 Franklin St, 5th Floor, Boston, MA 02110-1301 USA
00023 //
00024 //    contact: Lluis Padro (padro@lsi.upc.es)
00025 //             TALP Research Center
00026 //             despatx Omega.S112 - Campus Nord UPC
00027 //             08034 Barcelona.  SPAIN
00028 //
00030 
00031 #ifndef _UTIL
00032 #define _UTIL
00033 
00034 #include <list>
00035 #include <string>
00036 #include <vector>
00037 #include <set>
00038 #include <boost/regex/icu.hpp>
00039 
00040 #include "freeling/windll.h"
00041 
00042 // Capitalization patterns
00043 #define UPPER_NONE 0
00044 #define UPPER_1ST 1
00045 #define UPPER_ALL 2
00046  
00052 
00053 class WINDLL util {
00054  public:
00056   static boost::u32regex RE_has_lowercase;
00057   static boost::u32regex RE_has_alphanum;
00058   static boost::u32regex RE_is_capitalized;
00059   static boost::u32regex RE_all_digits;
00060   static boost::u32regex RE_all_caps;
00061   static boost::u32regex RE_initial_dot;
00062   static boost::u32regex RE_all_caps_dot;
00063   static boost::u32regex RE_capitalized_dot;
00064   static boost::u32regex RE_has_digits;
00065   static boost::u32regex RE_lowercase_dot;
00066 
00068   static void init_locale(const std::wstring &s=L"default");
00070   static void open_utf8_file(std::wifstream &, const std::wstring &);
00072   static void open_utf8_file(std::wofstream &, const std::wstring &);
00074   static std::wstring lowercase(const std::wstring &);
00076   static std::wstring uppercase(const std::wstring &);
00078   static bool has_lowercase(const std::wstring &);
00080   static bool has_alphanum(const std::wstring &);
00082   static bool is_capitalized(const std::wstring &);
00084   static bool all_digits(const std::wstring &);
00086   static bool all_caps(const std::wstring &);
00088   static std::wstring absolute(const std::wstring &, const std::wstring &);
00090   static std::wstring eliminateChars(const std::wstring &, const std::wstring &);
00092   static std::vector<std::wstring> split(const std::wstring &, const std::wstring &);
00094   static void find_and_replace(std::wstring &, const std::wstring &, const std::wstring &);
00096   static int wstring2int(const std::wstring &);
00097   static std::wstring int2wstring(const int);
00098   static double wstring2double(const std::wstring &);
00099   static std::wstring double2wstring(const double);
00100   static long double wstring2longdouble(const std::wstring &);
00101   static std::wstring longdouble2wstring(const long double);
00102   static std::wstring vector2wstring(const std::vector<std::wstring> &, const std::wstring &);
00103   static std::wstring list2wstring(const std::list<std::wstring> &, const std::wstring &);
00104   static std::wstring pairlist2wstring(const std::list<std::pair<std::wstring, double> > &, const std::wstring &, const std::wstring &);
00105   static std::list<std::wstring> wstring2list(const std::wstring &, const std::wstring &);
00106   static std::vector<std::wstring> wstring2vector(const std::wstring &, const std::wstring &);
00107   static std::set<std::wstring> wstring2set(const std::wstring &, const std::wstring &);
00108   static std::wstring set2wstring(const std::set<std::wstring> &, const std::wstring &);
00109   static std::string wstring2string(const std::wstring& s);
00110   static std::wstring string2wstring(const std::string& s);
00111   static int capitalization(const std::wstring &);
00112   static std::wstring capitalize(const std::wstring &, int, bool);
00113 };
00114 
00115 #endif