Concordia
concordia.hpp
1 #ifndef CONCORDIA_HDR
2 #define CONCORDIA_HDR
3 
4 #include <string>
5 #include <vector>
6 #include <boost/shared_ptr.hpp>
7 #include <boost/filesystem.hpp>
8 
9 #include "concordia/common/config.hpp"
10 #include "concordia/example.hpp"
11 #include "concordia/matched_pattern_fragment.hpp"
12 #include "concordia/concordia_config.hpp"
13 #include "concordia/concordia_index.hpp"
14 #include "concordia/index_searcher.hpp"
15 #include "concordia/concordia_search_result.hpp"
16 #include "concordia/tokenized_sentence.hpp"
17 #include "concordia/anubis_search_result.hpp"
18 #include <divsufsort.h>
19 
20 
38 class Concordia {
39 public:
42  Concordia();
43 
49  explicit Concordia(const std::string & indexPath,
50  const std::string & configFilePath)
51  throw(ConcordiaException);
54  virtual ~Concordia();
55 
59  std::string & getVersion();
60 
69  TokenizedSentence tokenize(const std::string & sentence,
70  bool byWhitespace = false,
71  bool generateCodes = true)
72  throw(ConcordiaException);
73 
81  std::vector<TokenizedSentence> tokenizeAll(
82  const std::vector<std::string> & sentences,
83  bool byWhitespace = false,
84  bool generateCodes = true)
85  throw(ConcordiaException);
86 
93  TokenizedSentence addExample(const Example & example)
94  throw(ConcordiaException);
95 
101  void addTokenizedExample(
102  const TokenizedSentence & tokenizedSentence,
103  const SUFFIX_MARKER_TYPE id)
104  throw(ConcordiaException);
105 
112  const std::vector<TokenizedSentence> & tokenizedSentences,
113  const std::vector<SUFFIX_MARKER_TYPE> & ids)
114  throw(ConcordiaException);
115 
122  std::vector<TokenizedSentence> addAllExamples(
123  const std::vector<Example> & examples)
124  throw(ConcordiaException);
125 
133  MatchedPatternFragment simpleSearch(const std::string & pattern,
134  bool byWhitespace = false)
135  throw(ConcordiaException);
136 
147  MatchedPatternFragment lexiconSearch(const std::string & pattern,
148  bool byWhitespace = false)
149  throw(ConcordiaException);
150 
151  SUFFIX_MARKER_TYPE countOccurences(const std::string & pattern)
152  throw(ConcordiaException);
153 
162  std::vector<AnubisSearchResult> anubisSearch(const std::string & pattern)
163  throw(ConcordiaException);
164 
172  boost::shared_ptr<ConcordiaSearchResult> concordiaSearch(
173  const std::string & pattern,
174  bool byWhitespace = false)
175  throw(ConcordiaException);
176 
183 
188  void refreshSAfromRAM() throw(ConcordiaException);
189 
193  void clearIndex() throw(ConcordiaException);
194 
195 private:
196  std::string _getWordMapFilePath();
197 
198  std::string _getHashedIndexFilePath();
199 
200  std::string _getMarkersFilePath();
201 
202  void _initializeIndex() throw(ConcordiaException);
203 
204  static std::string _libraryVersion;
205 
206  std::string _indexPath;
207 
208  boost::shared_ptr<ConcordiaConfig> _config;
209 
210  boost::shared_ptr<ConcordiaIndex> _index;
211 
212  boost::shared_ptr<IndexSearcher> _searcher;
213 
214  boost::shared_ptr<HashGenerator> _hashGenerator;
215 
216  boost::shared_ptr<std::vector<sauchar_t> > _T;
217 
218  boost::shared_ptr<std::vector<saidx_t> > _SA;
219 
220  boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > _markers;
221 };
222 
223 #endif
virtual ~Concordia()
Definition: concordia.cpp:34
Definition: concordia_exception.hpp:11
void addTokenizedExample(const TokenizedSentence &tokenizedSentence, const SUFFIX_MARKER_TYPE id)
Definition: concordia.cpp:100
MatchedPatternFragment lexiconSearch(const std::string &pattern, bool byWhitespace=false)
Definition: concordia.cpp:227
TokenizedSentence addExample(const Example &example)
Definition: concordia.cpp:92
Definition: concordia.hpp:38
std::vector< AnubisSearchResult > anubisSearch(const std::string &pattern)
Definition: concordia.cpp:244
Definition: tokenized_sentence.hpp:26
TokenizedSentence tokenize(const std::string &sentence, bool byWhitespace=false, bool generateCodes=true)
Definition: concordia.cpp:52
std::string & getVersion()
Definition: concordia.cpp:37
std::vector< TokenizedSentence > tokenizeAll(const std::vector< std::string > &sentences, bool byWhitespace=false, bool generateCodes=true)
Definition: concordia.cpp:66
Definition: matched_pattern_fragment.hpp:21
void refreshSAfromRAM()
Definition: concordia.cpp:176
void clearIndex()
Definition: concordia.cpp:270
void loadRAMIndexFromDisk()
Definition: concordia.cpp:125
void addAllTokenizedExamples(const std::vector< TokenizedSentence > &tokenizedSentences, const std::vector< SUFFIX_MARKER_TYPE > &ids)
Definition: concordia.cpp:108
boost::shared_ptr< ConcordiaSearchResult > concordiaSearch(const std::string &pattern, bool byWhitespace=false)
Definition: concordia.cpp:256
Concordia()
Definition: concordia.cpp:18
Definition: example.hpp:14
std::vector< TokenizedSentence > addAllExamples(const std::vector< Example > &examples)
Definition: concordia.cpp:119
MatchedPatternFragment simpleSearch(const std::string &pattern, bool byWhitespace=false)
Definition: concordia.cpp:213