|
FreeLing
3.0
|
00001 00002 // 00003 // FreeLing - Open Source Language Analyzers 00004 // 00005 // Copyright (C) 2004 TALP Research Center 00006 // Universitat Politecnica de Catalunya 00007 // 00008 // This library is free software; you can redistribute it and/or 00009 // modify it under the terms of the GNU General Public 00010 // License as published by the Free Software Foundation; either 00011 // version 3 of the License, or (at your option) any later version. 00012 // 00013 // This library is distributed in the hope that it will be useful, 00014 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00015 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00016 // General Public License for more details. 00017 // 00018 // You should have received a copy of the GNU General Public 00019 // License along with this library; if not, write to the Free Software 00020 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 00021 // 00022 // contact: Lluis Padro (padro@lsi.upc.es) 00023 // TALP Research Center 00024 // despatx C6.212 - Campus Nord UPC 00025 // 08034 Barcelona. SPAIN 00026 // 00028 00029 #ifndef _GRAMMAR 00030 #define _GRAMMAR 00031 00032 #include <string> 00033 #include <list> 00034 #include <map> 00035 #include <set> 00036 00040 00041 class rule { 00042 protected: 00044 std::wstring head; 00046 std::list<std::wstring> right; 00048 int gov; 00049 00050 public: 00052 rule(const std::wstring &, const std::list<std::wstring> &, const int); 00053 rule(const rule & r); 00054 rule(); 00055 rule & operator=(const rule&); 00056 00058 void set_governor(const int); 00060 unsigned int get_governor(void) const; 00062 std::wstring get_head() const; 00064 std::list<std::wstring> get_right() const; 00065 }; 00066 00071 00072 class grammar : public std::multimap<std::wstring,rule> { 00073 00074 private: 00076 std::set<std::wstring> nonterminal; 00078 std::multimap<std::wstring,rule> wild; 00080 std::multimap<std::wstring,std::wstring> filemap; 00082 std::map<std::wstring,int> prior; 00084 std::set<std::wstring> hidden; 00086 std::set<std::wstring> flat; 00088 std::set<std::wstring> notop; 00090 std::set<std::wstring> onlytop; 00092 std::wstring start; 00094 void new_rule(const std::wstring &, const std::list<std::wstring> &, bool, const int rgov); 00095 00096 public: 00097 00098 // no-governor mark 00099 static unsigned int NOGOV; 00100 // default governor (first element in rule) 00101 static unsigned int DEFGOV; 00102 00104 grammar(const std::wstring &); 00105 00106 // obtain the specificity of a terminal symbol 00107 int get_specificity(const std::wstring &) const; 00108 // obtain the priority of a non-terminal symbol 00109 int get_priority(const std::wstring &) const; 00111 std::wstring get_start_symbol() const; 00113 bool is_hidden(const std::wstring &) const; 00115 bool is_flat(const std::wstring &) const; 00117 bool is_notop(const std::wstring &) const; 00119 bool is_onlytop(const std::wstring &) const; 00121 bool is_terminal(const std::wstring &) const; 00123 std::list<rule> get_rules_right(const std::wstring &) const; 00125 std::list<rule> get_rules_right_wildcard(const std::wstring &) const; 00127 bool in_filemap(const std::wstring &, const std::wstring &) const; 00128 }; 00129 00130 //------ TOKENS used by the lexer to parse a grammar file 00131 #define CATEGORY 1 00132 #define FORM 2 00133 #define LEMMA 3 00134 #define COMMENT 4 00135 #define ERROR 5 00136 #define ARROW 6 00137 #define BAR 7 00138 #define COMMA 8 00139 #define DOT 9 00140 #define FLAT 10 00141 #define HIDDEN 11 00142 #define NOTOP 12 00143 #define ONLYTOP 13 00144 #define PRIOR 14 00145 #define START 15 00146 #define FILENAME 16 00147 #define HEAD 17 00148 00149 #endif 00150 00151 00152 00153 00154
1.7.6.1