6 #include <boost/shared_ptr.hpp>
7 #include <boost/filesystem.hpp>
9 #include "concordia/common/config.hpp"
10 #include "concordia/example.hpp"
11 #include "concordia/matched_pattern_fragment.hpp"
12 #include "concordia/concordia_config.hpp"
13 #include "concordia/concordia_index.hpp"
14 #include "concordia/index_searcher.hpp"
15 #include "concordia/concordia_search_result.hpp"
16 #include "concordia/tokenized_sentence.hpp"
17 #include "concordia/anubis_search_result.hpp"
18 #include <divsufsort.h>
49 explicit Concordia(
const std::string & indexPath,
50 const std::string & configFilePath)
70 bool byWhitespace =
false,
71 bool generateCodes =
true)
82 const std::vector<std::string> & sentences,
83 bool byWhitespace =
false,
84 bool generateCodes =
true)
103 const SUFFIX_MARKER_TYPE
id)
112 const std::vector<TokenizedSentence> & tokenizedSentences,
113 const std::vector<SUFFIX_MARKER_TYPE> & ids)
123 const std::vector<Example> & examples)
134 bool byWhitespace =
false)
148 bool byWhitespace =
false)
151 SUFFIX_MARKER_TYPE countOccurences(
const std::string & pattern)
162 std::vector<AnubisSearchResult>
anubisSearch(
const std::string & pattern)
173 const std::string & pattern,
174 bool byWhitespace =
false)
196 std::string _getWordMapFilePath();
198 std::string _getHashedIndexFilePath();
200 std::string _getMarkersFilePath();
204 static std::string _libraryVersion;
206 std::string _indexPath;
208 boost::shared_ptr<ConcordiaConfig> _config;
210 boost::shared_ptr<ConcordiaIndex> _index;
212 boost::shared_ptr<IndexSearcher> _searcher;
214 boost::shared_ptr<HashGenerator> _hashGenerator;
216 boost::shared_ptr<std::vector<sauchar_t> > _T;
218 boost::shared_ptr<std::vector<saidx_t> > _SA;
220 boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > _markers;
virtual ~Concordia()
Definition: concordia.cpp:34
Definition: concordia_exception.hpp:11
void addTokenizedExample(const TokenizedSentence &tokenizedSentence, const SUFFIX_MARKER_TYPE id)
Definition: concordia.cpp:100
MatchedPatternFragment lexiconSearch(const std::string &pattern, bool byWhitespace=false)
Definition: concordia.cpp:227
TokenizedSentence addExample(const Example &example)
Definition: concordia.cpp:92
Definition: concordia.hpp:38
std::vector< AnubisSearchResult > anubisSearch(const std::string &pattern)
Definition: concordia.cpp:244
Definition: tokenized_sentence.hpp:26
TokenizedSentence tokenize(const std::string &sentence, bool byWhitespace=false, bool generateCodes=true)
Definition: concordia.cpp:52
std::string & getVersion()
Definition: concordia.cpp:37
std::vector< TokenizedSentence > tokenizeAll(const std::vector< std::string > &sentences, bool byWhitespace=false, bool generateCodes=true)
Definition: concordia.cpp:66
Definition: matched_pattern_fragment.hpp:21
void refreshSAfromRAM()
Definition: concordia.cpp:176
void clearIndex()
Definition: concordia.cpp:270
void loadRAMIndexFromDisk()
Definition: concordia.cpp:125
void addAllTokenizedExamples(const std::vector< TokenizedSentence > &tokenizedSentences, const std::vector< SUFFIX_MARKER_TYPE > &ids)
Definition: concordia.cpp:108
boost::shared_ptr< ConcordiaSearchResult > concordiaSearch(const std::string &pattern, bool byWhitespace=false)
Definition: concordia.cpp:256
Concordia()
Definition: concordia.cpp:18
Definition: example.hpp:14
std::vector< TokenizedSentence > addAllExamples(const std::vector< Example > &examples)
Definition: concordia.cpp:119
MatchedPatternFragment simpleSearch(const std::string &pattern, bool byWhitespace=false)
Definition: concordia.cpp:213