FreeLing  3.0
hmm_tagger.h
Go to the documentation of this file.
00001 
00002 //
00003 //    FreeLing - Open Source Language Analyzers
00004 //
00005 //    Copyright (C) 2004   TALP Research Center
00006 //                         Universitat Politecnica de Catalunya
00007 //
00008 //    This library is free software; you can redistribute it and/or
00009 //    modify it under the terms of the GNU General Public
00010 //    License as published by the Free Software Foundation; either
00011 //    version 3 of the License, or (at your option) any later version.
00012 //
00013 //    This library is distributed in the hope that it will be useful,
00014 //    but WITHOUT ANY WARRANTY; without even the implied warranty of
00015 //    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00016 //    General Public License for more details.
00017 //
00018 //    You should have received a copy of the GNU General Public
00019 //    License along with this library; if not, write to the Free Software
00020 //    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
00021 //
00022 //    contact: Lluis Padro (padro@lsi.upc.es)
00023 //             TALP Research Center
00024 //             despatx C6.212 - Campus Nord UPC
00025 //             08034 Barcelona.  SPAIN
00026 //
00028 
00029 #ifndef _TAGGER
00030 #define _TAGGER
00031 
00032 #include <map>
00033 #include <list>
00034 #include <set>
00035 
00036 #include "freeling/windll.h"
00037 #include "freeling/morfo/language.h"
00038 #include "freeling/morfo/tagger.h"
00039 
00050 
00051 class trellis {  
00052  private:
00053 
00056   class element {
00057    public:
00059      std::wstring state;
00061      int kbest;
00063      double prob;
00064 
00066      element(const std::wstring &, int, double);
00068      ~element();
00070      bool operator<(const element &) const;
00072      bool operator==(const element &) const;
00073   };
00074 
00077   std::map <std::wstring, std::multiset<element> > *trl;
00079   unsigned int kbest;
00080 
00081  public:
00083   trellis(int, unsigned int kb=1);
00085   ~trellis();
00086 
00088   void insert(int, const std::wstring &, const std::wstring &, int kb, double);
00090   double delta(int, const std::wstring &, unsigned int k=0);
00092   std::pair<std::wstring,int> phi(int, const std::wstring &, unsigned int k=0);
00094   int nbest(int, const std::wstring &);
00095 
00097   static float ZERO_logprob;
00098 };
00099 
00100 
00108 
00109 class emission_states: public std::set<std::wstring> {};
00110 
00111 
00118 
00119 class WINDLL hmm_tagger: public POS_tagger {
00120  private:
00121   // Configuration options
00122   std::wstring Language;
00123 
00125   std::map <std::wstring, double> PTag;
00126   std::map <std::wstring, double> PBg;
00127   std::map <std::wstring, double> PTrg;
00128   std::map <std::wstring, double> PInitial;
00129   std::map <std::wstring, double> PWord;
00130 
00132   std::multimap <std::wstring, std::wstring> Forbidden;
00133 
00135   std::map<std::wstring,double> pA_cache;
00136   std::map<std::wstring,double> pB_cache;
00137 
00139   unsigned int kbest;
00140 
00142   double c[3];
00143 
00144   bool is_forbidden(const std::wstring &, sentence::const_iterator) const;
00145   double ProbA_log(const std::wstring &, const std::wstring &, sentence::const_iterator);
00146   double ProbB_log(const std::wstring &, const word &);
00147   double ProbPi_log(const std::wstring &) const;
00148 
00150   std::list<emission_states> FindStates(const sentence &) const;
00151 
00152  public:
00154   hmm_tagger(const std::wstring &, const std::wstring &, bool, unsigned int, unsigned int kb=1);
00155 
00157   void annotate(sentence &);
00160   double SequenceProb_log(const sentence &, int k=0);
00161 
00162 };
00163 
00164 #endif
00165 
00166