|
FreeLing
3.0
|
00001 00003 // 00004 // FreeLing - Open Source Language Analyzers 00005 // 00006 // Copyright (C) 2004 TALP Research Center 00007 // Universitat Politecnica de Catalunya 00008 // 00009 // This library is free software; you can redistribute it and/or 00010 // modify it under the terms of the GNU General Public 00011 // License as published by the Free Software Foundation; either 00012 // version 3 of the License, or (at your option) any later version. 00013 // 00014 // This library is distributed in the hope that it will be useful, 00015 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00016 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00017 // General Public License for more details. 00018 // 00019 // You should have received a copy of the GNU General Public 00020 // License along with this library; if not, write to the Free Software 00021 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 00022 // 00023 // contact: Lluis Padro (padro@lsi.upc.es) 00024 // TALP Research Center 00025 // despatx C6.212 - Campus Nord UPC 00026 // 08034 Barcelona. SPAIN 00027 // 00029 00030 #ifndef _PROBABILITIES 00031 #define _PROBABILITIES 00032 00033 #include <map> 00034 00035 #include "freeling/windll.h" 00036 #include "freeling/morfo/language.h" 00037 #include "freeling/morfo/processor.h" 00038 00039 const std::wstring RE_FZ=L"^[FZ]"; 00040 00045 00046 class WINDLL probabilities : public processor { 00047 private: 00049 boost::u32regex RE_PunctNum; 00050 00052 double ProbabilityThreshold; 00053 std::wstring Language; 00054 00057 double BiassSuffixes; 00058 00060 double LidstoneLambda; 00061 00063 bool activate_guesser; 00064 00066 std::map<std::wstring,double> single_tags; 00068 std::map<std::wstring,std::map<std::wstring,double> > class_tags; 00070 std::map<std::wstring,std::map<std::wstring,double> > lexical_tags; 00072 std::map<std::wstring,double> unk_tags; 00074 std::map<std::wstring,std::map<std::wstring,double> > unk_suffs; 00076 double theeta; 00078 std::wstring::size_type long_suff; 00079 00081 void smoothing(word &); 00083 double compute_probability(const std::wstring &, double, const std::wstring &); 00085 double guesser(word &, double); 00086 00087 public: 00089 probabilities(const std::wstring &, const std::wstring &, double); 00090 00092 void annotate_word(word &); 00093 00095 void set_activate_guesser(bool); 00096 00098 void analyze(sentence &); 00100 void analyze(std::list<sentence> &); 00102 sentence analyze(const sentence &); 00104 std::list<sentence> analyze(const std::list<sentence> &); 00105 00106 }; 00107 00108 #endif
1.7.6.1