00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #ifndef OM_HGUARD_INMEMORY_DATABASE_H
00025 #define OM_HGUARD_INMEMORY_DATABASE_H
00026
00027 #include "leafpostlist.h"
00028 #include "termlist.h"
00029 #include "database.h"
00030 #include <map>
00031 #include <vector>
00032 #include <algorithm>
00033 #include <xapian/document.h>
00034 #include "inmemory_positionlist.h"
00035 #include <omassert.h>
00036
00037 using namespace std;
00038
00039
00040
00041 class InMemoryPosting {
00042 public:
00043 Xapian::docid did;
00044 bool valid;
00045 vector<Xapian::termpos> positions;
00046 Xapian::termcount wdf;
00047
00048
00049 void merge(const InMemoryPosting & post) {
00050 Assert(did == post.did);
00051
00052 positions.insert(positions.end(),
00053 post.positions.begin(),
00054 post.positions.end());
00055
00056 sort(positions.begin(), positions.end());
00057 }
00058 };
00059
00060 class InMemoryTermEntry {
00061 public:
00062 string tname;
00063 vector<Xapian::termpos> positions;
00064 Xapian::termcount wdf;
00065
00066
00067 void merge(const InMemoryTermEntry & post) {
00068 Assert(tname == post.tname);
00069
00070 positions.insert(positions.end(),
00071 post.positions.begin(),
00072 post.positions.end());
00073
00074 sort(positions.begin(), positions.end());
00075 }
00076 };
00077
00078
00079 class InMemoryPostingLessThan {
00080 public:
00081 int operator() (const InMemoryPosting &p1, const InMemoryPosting &p2)
00082 {
00083 return p1.did < p2.did;
00084 }
00085 };
00086
00087
00088 class InMemoryTermEntryLessThan {
00089 public:
00090 int operator() (const InMemoryTermEntry&p1, const InMemoryTermEntry&p2)
00091 {
00092 return p1.tname < p2.tname;
00093 }
00094 };
00095
00096
00097 class InMemoryTerm {
00098 public:
00099
00100 vector<InMemoryPosting> docs;
00101
00102 Xapian::termcount term_freq;
00103 Xapian::termcount collection_freq;
00104
00105 InMemoryTerm() : term_freq(0), collection_freq(0) {}
00106
00107 void add_posting(const InMemoryPosting & post);
00108 };
00109
00111 class InMemoryDoc {
00112 public:
00113 bool is_valid;
00114
00115 vector<InMemoryTermEntry> terms;
00116
00117
00118
00119 InMemoryDoc() : is_valid(false) {}
00120
00121
00122 InMemoryDoc(bool is_valid_) : is_valid(is_valid_) {}
00123
00124 void add_posting(const InMemoryTermEntry & post);
00125 };
00126
00127 class InMemoryDatabase;
00128
00131 class InMemoryPostList : public LeafPostList {
00132 friend class InMemoryDatabase;
00133 private:
00134 vector<InMemoryPosting>::const_iterator pos;
00135 vector<InMemoryPosting>::const_iterator end;
00136 Xapian::doccount termfreq;
00137 bool started;
00138
00142 InMemoryPositionList mypositions;
00143
00144 Xapian::Internal::RefCntPtr<const InMemoryDatabase> db;
00145
00146 InMemoryPostList(Xapian::Internal::RefCntPtr<const InMemoryDatabase> db,
00147 const InMemoryTerm & imterm);
00148 public:
00149 Xapian::doccount get_termfreq() const;
00150
00151 Xapian::docid get_docid() const;
00152 Xapian::doclength get_doclength() const;
00153 Xapian::termcount get_wdf() const;
00154 PositionList * read_position_list();
00155 PositionList * open_position_list() const;
00156
00157 PostList *next(Xapian::weight w_min);
00158
00159 PostList *skip_to(Xapian::docid did, Xapian::weight w_min);
00160
00161
00162 bool at_end() const;
00163
00164 string get_description() const;
00165 };
00166
00169 class InMemoryAllDocsPostList : public LeafPostList {
00170 friend class InMemoryDatabase;
00171 private:
00172 Xapian::docid did;
00173
00174 Xapian::Internal::RefCntPtr<const InMemoryDatabase> db;
00175
00176 InMemoryAllDocsPostList(Xapian::Internal::RefCntPtr<const InMemoryDatabase> db);
00177 public:
00178 Xapian::doccount get_termfreq() const;
00179
00180 Xapian::docid get_docid() const;
00181 Xapian::doclength get_doclength() const;
00182 Xapian::termcount get_wdf() const;
00183 PositionList * read_position_list();
00184 PositionList * open_position_list() const;
00185
00186 PostList *next(Xapian::weight w_min);
00187
00188 PostList *skip_to(Xapian::docid did, Xapian::weight w_min);
00189
00190
00191 bool at_end() const;
00192
00193 string get_description() const;
00194 };
00195
00196
00197 class InMemoryTermList : public TermList {
00198 friend class InMemoryDatabase;
00199 private:
00200 vector<InMemoryTermEntry>::const_iterator pos;
00201 vector<InMemoryTermEntry>::const_iterator end;
00202 Xapian::termcount terms;
00203 bool started;
00204
00205 Xapian::Internal::RefCntPtr<const InMemoryDatabase> db;
00206 Xapian::docid did;
00207 Xapian::termcount document_length;
00208
00209 InMemoryTermList(Xapian::Internal::RefCntPtr<const InMemoryDatabase> db,
00210 Xapian::docid did,
00211 const InMemoryDoc & doc,
00212 Xapian::termcount len);
00213 public:
00214 Xapian::termcount get_approx_size() const;
00215
00217 void accumulate_stats(Xapian::Internal::ExpandStats & stats) const;
00218
00219 string get_termname() const;
00220 Xapian::termcount get_wdf() const;
00221 Xapian::doccount get_termfreq() const;
00222 TermList * next();
00223 bool at_end() const;
00224 Xapian::termcount positionlist_count() const;
00225 Xapian::PositionIterator positionlist_begin() const;
00226 };
00227
00232 class InMemoryDatabase : public Xapian::Database::Internal {
00233 friend class InMemoryAllDocsPostList;
00234
00235 map<string, InMemoryTerm> postlists;
00236 vector<InMemoryDoc> termlists;
00237 vector<std::string> doclists;
00238 vector<std::map<Xapian::valueno, string> > valuelists;
00239
00240 vector<Xapian::termcount> doclengths;
00241
00242 std::map<string, string> metadata;
00243
00244 Xapian::doccount totdocs;
00245
00246 Xapian::doclength totlen;
00247
00248 bool positions_present;
00249
00250
00251 InMemoryDatabase& operator=(const InMemoryDatabase &);
00252 InMemoryDatabase(const InMemoryDatabase &);
00253
00254 void make_term(const string & tname);
00255
00256 bool doc_exists(Xapian::docid did) const;
00257 Xapian::docid make_doc(const string & docdata);
00258
00259
00260 void finish_add_doc(Xapian::docid did, const Xapian::Document &document);
00261 void add_values(Xapian::docid did, const map<Xapian::valueno, string> &values_);
00262
00263 void make_posting(InMemoryDoc * doc,
00264 const string & tname,
00265 Xapian::docid did,
00266 Xapian::termpos position,
00267 Xapian::termcount wdf,
00268 bool use_position = true);
00269
00271
00273 void flush();
00274 void cancel();
00275
00276 Xapian::docid add_document(const Xapian::Document & document);
00277
00278
00279
00280
00281
00282 #if (!defined __GNUC__ && !defined _MSC_VER) || __GNUC__ > 2
00283 using Xapian::Database::Internal::delete_document;
00284 using Xapian::Database::Internal::replace_document;
00285 #endif
00286 void delete_document(Xapian::docid did);
00287 void replace_document(Xapian::docid did, const Xapian::Document & document);
00289
00290 public:
00295 InMemoryDatabase();
00296
00297 ~InMemoryDatabase();
00298
00299 Xapian::doccount get_doccount() const;
00300
00301 Xapian::docid get_lastdocid() const;
00302
00303 Xapian::doclength get_avlength() const;
00304 Xapian::doclength get_doclength(Xapian::docid did) const;
00305
00306 Xapian::doccount get_termfreq(const string & tname) const;
00307 Xapian::termcount get_collection_freq(const string & tname) const;
00308 bool term_exists(const string & tname) const;
00309 bool has_positions() const;
00310
00311 LeafPostList * open_post_list(const string & tname) const;
00312 TermList * open_term_list(Xapian::docid did) const;
00313 Xapian::Document::Internal * open_document(Xapian::docid did, bool lazy) const;
00314
00315 std::string get_metadata(const std::string & key) const;
00316 TermList * open_metadata_keylist(const std::string &prefix) const;
00317 void set_metadata(const std::string & key, const std::string & value);
00318
00319 Xapian::termcount positionlist_count(Xapian::docid did,
00320 const string & tname) const;
00321 PositionList * open_position_list(Xapian::docid did,
00322 const string & tname) const;
00323 TermList * open_allterms(const string & prefix) const;
00324 };
00325
00326 #endif