Concordia
concordia_index.hpp
1 #ifndef CONCORDIA_INDEX_HDR
2 #define CONCORDIA_INDEX_HDR
3 
4 #include <boost/shared_ptr.hpp>
5 #include <fstream>
6 #include <iostream>
7 #include <sstream>
8 #include <vector>
9 
10 #include "concordia/common/config.hpp"
11 #include "concordia/example.hpp"
12 #include "concordia/hash_generator.hpp"
13 #include "concordia/concordia_exception.hpp"
14 #include "concordia/tokenized_sentence.hpp"
15 #include <divsufsort.h>
16 
28 public:
34  explicit ConcordiaIndex(const std::string & hashedIndexFilePath,
35  const std::string & markersFilePath)
36  throw(ConcordiaException);
37 
40  virtual ~ConcordiaIndex();
41 
57  boost::shared_ptr<HashGenerator> hashGenerator,
58  boost::shared_ptr<std::vector<sauchar_t> > T,
59  boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
60  const Example & example);
61 
76  boost::shared_ptr<HashGenerator> hashGenerator,
77  boost::shared_ptr<std::vector<sauchar_t> > T,
78  boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
79  const TokenizedSentence & tokenizedSentence,
80  const SUFFIX_MARKER_TYPE id);
81 
96  boost::shared_ptr<HashGenerator> hashGenerator,
97  boost::shared_ptr<std::vector<sauchar_t> > T,
98  boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
99  const std::vector<TokenizedSentence> & tokenizedSentences,
100  const std::vector<SUFFIX_MARKER_TYPE> & ids);
101 
116  std::vector<TokenizedSentence> addAllExamples(
117  boost::shared_ptr<HashGenerator> hashGenerator,
118  boost::shared_ptr<std::vector<sauchar_t> > T,
119  boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
120  const std::vector<Example> & examples);
121 
126  boost::shared_ptr<std::vector<saidx_t> > generateSuffixArray(
127  boost::shared_ptr<std::vector<sauchar_t> > T);
128 
129 private:
130  void _addSingleTokenizedExample(
131  std::ofstream & hashedIndexFile,
132  std::ofstream & markersFile,
133  boost::shared_ptr<HashGenerator> hashGenerator,
134  boost::shared_ptr<std::vector<sauchar_t> > T,
135  boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
136  const TokenizedSentence & tokenizedSentence,
137  const SUFFIX_MARKER_TYPE id);
138 
139  TokenizedSentence _addSingleExample(
140  std::ofstream & hashedIndexFile,
141  std::ofstream & markersFile,
142  boost::shared_ptr<HashGenerator> hashGenerator,
143  boost::shared_ptr<std::vector<sauchar_t> > T,
144  boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
145  const Example & example);
146 
147  std::string _hashedIndexFilePath;
148 
149  std::string _markersFilePath;
150 };
151 
152 #endif
ConcordiaIndex(const std::string &hashedIndexFilePath, const std::string &markersFilePath)
Definition: concordia_index.cpp:12
Definition: concordia_exception.hpp:11
std::vector< TokenizedSentence > addAllExamples(boost::shared_ptr< HashGenerator > hashGenerator, boost::shared_ptr< std::vector< sauchar_t > > T, boost::shared_ptr< std::vector< SUFFIX_MARKER_TYPE > > markers, const std::vector< Example > &examples)
Definition: concordia_index.cpp:39
TokenizedSentence addExample(boost::shared_ptr< HashGenerator > hashGenerator, boost::shared_ptr< std::vector< sauchar_t > > T, boost::shared_ptr< std::vector< SUFFIX_MARKER_TYPE > > markers, const Example &example)
Definition: concordia_index.cpp:66
Definition: tokenized_sentence.hpp:26
boost::shared_ptr< std::vector< saidx_t > > generateSuffixArray(boost::shared_ptr< std::vector< sauchar_t > > T)
Definition: concordia_index.cpp:22
virtual ~ConcordiaIndex()
Definition: concordia_index.cpp:19
Definition: example.hpp:14
Definition: concordia_index.hpp:27
void addTokenizedExample(boost::shared_ptr< HashGenerator > hashGenerator, boost::shared_ptr< std::vector< sauchar_t > > T, boost::shared_ptr< std::vector< SUFFIX_MARKER_TYPE > > markers, const TokenizedSentence &tokenizedSentence, const SUFFIX_MARKER_TYPE id)
Definition: concordia_index.cpp:87
void addAllTokenizedExamples(boost::shared_ptr< HashGenerator > hashGenerator, boost::shared_ptr< std::vector< sauchar_t > > T, boost::shared_ptr< std::vector< SUFFIX_MARKER_TYPE > > markers, const std::vector< TokenizedSentence > &tokenizedSentences, const std::vector< SUFFIX_MARKER_TYPE > &ids)
Definition: concordia_index.cpp:105