/* -*- Mode: C++ -*- */ // Copyright 2010 University of Helsinki // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //! @mainpage API to HFST ospell WFST spell-checking //! //! The hfst-ospell API has several layers for different end-users. A suggested //! starting point for new user is the @c ZHfstOspeller object, which reads an //! automaton set from zipped hfst file with metadata and provides high level //! access to it with generic spell-checking, correction and analysis functions. //! Second level of access is the Speller object, which can be used to //! construct spell-checker with two automata and traverse it and query //! low-level properties. The Speller is constructed with two Transducer objects //! which are the low-level access point to the automata with all the gory //! details of transition tables and symbol translations, headers and such. #ifndef HFST_OSPELL_ZHFSTOSPELLER_H_ #define HFST_OSPELL_ZHFSTOSPELLER_H_ #include "hfstol-stdafx.h" #if HAVE_CONFIG_H # include #endif #include #include #include "ospell.h" #include "hfst-ol.h" #include "ZHfstOspellerXmlMetadata.h" namespace hfst_ospell { //! @brief ZHfstOspeller class holds one speller contained in one //! zhfst file. //! Ospeller can perform all basic writer tool functionality that //! is supporte by the automata in the zhfst archive. class ZHfstOspeller { public: //! @brief create speller with default values for undefined //! language. OSPELL_API ZHfstOspeller(); //! @brief destroy all automata used by the speller. OSPELL_API ~ZHfstOspeller(); //! @brief assign a speller-suggestor circumventing the ZHFST format OSPELL_API void inject_speller(Speller * s); //! @brief set upper limit to priority queue when performing // suggestions or analyses. OSPELL_API void set_queue_limit(unsigned long limit); //! @brief set upper limit for weights OSPELL_API void set_weight_limit(Weight limit); //! @brief set search beam OSPELL_API void set_beam(Weight beam); //! @brief set time cutoff for correcting OSPELL_API void set_time_cutoff(float time_cutoff); //! @brief construct speller from named file containing valid //! zhfst archive. OSPELL_API void read_zhfst(const std::string& filename); //! @brief check if the given word is spelled correctly OSPELL_API bool spell(const std::string& wordform); //! @brief construct an ordered set of corrections for misspelled //! word form. OSPELL_API CorrectionQueue suggest(const std::string& wordform); //! @brief analyse word form morphologically //! @param wordform the string to analyse //! @param ask_sugger whether to use the spelling correction model // instead of the detection model AnalysisQueue analyse(const std::string& wordform, bool ask_sugger = false); //! @brief analyse word form morphologically, unconcatenated output //! strings (making it easier to find Multichar_symbols of //! the FST) //! @param wordform the string to analyse //! @param ask_sugger whether to use the spelling correction model // instead of the detection model AnalysisSymbolsQueue analyseSymbols(const std::string& wordform, bool ask_sugger = false); //! @brief construct an ordered set of corrections with analyses AnalysisCorrectionQueue suggest_analyses(const std::string& wordform); //! @brief hyphenate word form HyphenationQueue hyphenate(const std::string& wordform); //! @brief get access to metadata read from XML. const ZHfstOspellerXmlMetadata& get_metadata() const; //! @brief create string representation of the speller for //! programmer to debug std::string metadata_dump() const; private: //! @brief file or path where the speller came from std::string filename_; //! @brief upper bound for suggestions generated and given unsigned long suggestions_maximum_; //! @brief upper bound for suggestion weight generated and given Weight maximum_weight_; //! @brief upper bound for search beam around best candidate Weight beam_; //! @brief upper bound for search time in seconds float time_cutoff_; //! @brief whether automatons loaded yet can be used to check //! spelling bool can_spell_; //! @brief whether automatons loaded yet can be used to correct //! word forms bool can_correct_; //! @brief whether automatons loaded yet can be used to analyse //! word forms bool can_analyse_; //! @brief whether automatons loaded yet can be used to hyphenate //! word forms bool can_hyphenate_; //! @brief dictionaries loaded std::map acceptors_; //! @brief error models loaded std::map errmodels_; //! @brief pointer to current speller Speller* current_speller_; //! @brief pointer to current correction model Speller* current_sugger_; //! @brief pointer to current morphological analyser Speller* current_analyser_; //! @brief pointer to current hyphenator Transducer* current_hyphenator_; //! @brief the metadata of loaded speller ZHfstOspellerXmlMetadata metadata_; }; //! @brief Top-level exception for zhfst handling. //! Contains a human-readable error message that can be displayed to //! end-user as additional info when either solving exception or exiting. class ZHfstException : public std::runtime_error { public: ZHfstException() : std::runtime_error("unknown") {} //! @brief construct error with human readable message. //! //! the message will be displayed when recovering or dying from //! exception explicit ZHfstException(const std::string& message) : std::runtime_error(message) {} }; //! @brief Generic error in metadata parsing. // //! Gets raised if metadata is erroneous or missing. class ZHfstMetaDataParsingError : public ZHfstException { public: explicit ZHfstMetaDataParsingError(const std::string& message) : ZHfstException(message) {} }; //! @brief Exception for XML parser errors. // //! Gets raised if underlying XML parser finds an error in XML data. //! Errors include non-valid XML, missing or erroneous attributes or //! elements, etc. class ZHfstXmlParsingError : public ZHfstException { public: explicit ZHfstXmlParsingError(const std::string& message) : ZHfstException(message) {} }; //! @brief Generic error while reading zip file. //! //! Happens when libarchive is unable to proceed reading zip file or //! zip file is missing required files. class ZHfstZipReadingError : public ZHfstException { public: explicit ZHfstZipReadingError(const std::string& message) : ZHfstException(message) {} }; //! @brief Error when writing to temporary location. // //! This exception gets thrown, when e.g., zip extraction is unable to //! find or open temporary file for writing. class ZHfstTemporaryWritingError : public ZHfstException { public: explicit ZHfstTemporaryWritingError(const std::string& message) : ZHfstException(message) {} }; } // namespace hfst_ospell #endif // HFST_OSPELL_OSPELLER_SET_H_ // vim: set ft=cpp.doxygen: