00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #ifndef OM_HGUARD_INMEMORY_DATABASE_H
00025 #define OM_HGUARD_INMEMORY_DATABASE_H
00026
00027 #include "leafpostlist.h"
00028 #include "termlist.h"
00029 #include "database.h"
00030 #include <map>
00031 #include <vector>
00032 #include <algorithm>
00033 #include <xapian/document.h>
00034 #include "inmemory_positionlist.h"
00035 #include "internaltypes.h"
00036 #include "omassert.h"
00037 #include "noreturn.h"
00038
00039 using namespace std;
00040
00041 struct ValueStats;
00042
00043
00044
00045 class InMemoryPosting {
00046 public:
00047 Xapian::docid did;
00048 bool valid;
00049 vector<Xapian::termpos> positions;
00050 Xapian::termcount wdf;
00051
00052
00053 void merge(const InMemoryPosting & post) {
00054 Assert(did == post.did);
00055
00056 positions.insert(positions.end(),
00057 post.positions.begin(),
00058 post.positions.end());
00059
00060 sort(positions.begin(), positions.end());
00061 }
00062 };
00063
00064 class InMemoryTermEntry {
00065 public:
00066 string tname;
00067 vector<Xapian::termpos> positions;
00068 Xapian::termcount wdf;
00069
00070
00071 void merge(const InMemoryTermEntry & post) {
00072 Assert(tname == post.tname);
00073
00074 positions.insert(positions.end(),
00075 post.positions.begin(),
00076 post.positions.end());
00077
00078 sort(positions.begin(), positions.end());
00079 }
00080 };
00081
00082
00083 class InMemoryPostingLessThan {
00084 public:
00085 int operator() (const InMemoryPosting &p1, const InMemoryPosting &p2)
00086 {
00087 return p1.did < p2.did;
00088 }
00089 };
00090
00091
00092 class InMemoryTermEntryLessThan {
00093 public:
00094 int operator() (const InMemoryTermEntry&p1, const InMemoryTermEntry&p2)
00095 {
00096 return p1.tname < p2.tname;
00097 }
00098 };
00099
00100
00101 class InMemoryTerm {
00102 public:
00103
00104 vector<InMemoryPosting> docs;
00105
00106 Xapian::termcount term_freq;
00107 Xapian::termcount collection_freq;
00108
00109 InMemoryTerm() : term_freq(0), collection_freq(0) {}
00110
00111 void add_posting(const InMemoryPosting & post);
00112 };
00113
00115 class InMemoryDoc {
00116 public:
00117 bool is_valid;
00118
00119 vector<InMemoryTermEntry> terms;
00120
00121
00122
00123 InMemoryDoc() : is_valid(false) {}
00124
00125
00126 InMemoryDoc(bool is_valid_) : is_valid(is_valid_) {}
00127
00128 void add_posting(const InMemoryTermEntry & post);
00129 };
00130
00131 class InMemoryDatabase;
00132
00135 class InMemoryPostList : public LeafPostList {
00136 friend class InMemoryDatabase;
00137 private:
00138 vector<InMemoryPosting>::const_iterator pos;
00139 vector<InMemoryPosting>::const_iterator end;
00140 Xapian::doccount termfreq;
00141 bool started;
00142
00146 InMemoryPositionList mypositions;
00147
00148 Xapian::Internal::RefCntPtr<const InMemoryDatabase> db;
00149
00150 InMemoryPostList(Xapian::Internal::RefCntPtr<const InMemoryDatabase> db,
00151 const InMemoryTerm & imterm, const std::string & term_);
00152 public:
00153 Xapian::doccount get_termfreq() const;
00154
00155 Xapian::docid get_docid() const;
00156 Xapian::termcount get_doclength() const;
00157 Xapian::termcount get_wdf() const;
00158 PositionList * read_position_list();
00159 PositionList * open_position_list() const;
00160
00161 PostList *next(Xapian::weight w_min);
00162
00163 PostList *skip_to(Xapian::docid did, Xapian::weight w_min);
00164
00165
00166 bool at_end() const;
00167
00168 string get_description() const;
00169 };
00170
00173 class InMemoryAllDocsPostList : public LeafPostList {
00174 friend class InMemoryDatabase;
00175 private:
00176 Xapian::docid did;
00177
00178 Xapian::Internal::RefCntPtr<const InMemoryDatabase> db;
00179
00180 InMemoryAllDocsPostList(Xapian::Internal::RefCntPtr<const InMemoryDatabase> db);
00181 public:
00182 Xapian::doccount get_termfreq() const;
00183
00184 Xapian::docid get_docid() const;
00185 Xapian::termcount get_doclength() const;
00186 Xapian::termcount get_wdf() const;
00187 PositionList * read_position_list();
00188 PositionList * open_position_list() const;
00189
00190 PostList *next(Xapian::weight w_min);
00191
00192 PostList *skip_to(Xapian::docid did, Xapian::weight w_min);
00193
00194
00195 bool at_end() const;
00196
00197 string get_description() const;
00198 };
00199
00200
00201 class InMemoryTermList : public TermList {
00202 friend class InMemoryDatabase;
00203 private:
00204 vector<InMemoryTermEntry>::const_iterator pos;
00205 vector<InMemoryTermEntry>::const_iterator end;
00206 Xapian::termcount terms;
00207 bool started;
00208
00209 Xapian::Internal::RefCntPtr<const InMemoryDatabase> db;
00210 Xapian::docid did;
00211 Xapian::termcount document_length;
00212
00213 InMemoryTermList(Xapian::Internal::RefCntPtr<const InMemoryDatabase> db,
00214 Xapian::docid did,
00215 const InMemoryDoc & doc,
00216 Xapian::termcount len);
00217 public:
00218 Xapian::termcount get_approx_size() const;
00219
00221 void accumulate_stats(Xapian::Internal::ExpandStats & stats) const;
00222
00223 string get_termname() const;
00224 Xapian::termcount get_wdf() const;
00225 Xapian::doccount get_termfreq() const;
00226 TermList * next();
00227 TermList * skip_to(const std::string & term);
00228 bool at_end() const;
00229 Xapian::termcount positionlist_count() const;
00230 Xapian::PositionIterator positionlist_begin() const;
00231 };
00232
00233 class InMemoryDocument;
00234
00239 class InMemoryDatabase : public Xapian::Database::Internal {
00240 friend class InMemoryAllDocsPostList;
00241 friend class InMemoryDocument;
00242
00243 map<string, InMemoryTerm> postlists;
00244 vector<InMemoryDoc> termlists;
00245 vector<std::string> doclists;
00246 vector<std::map<Xapian::valueno, string> > valuelists;
00247 std::map<Xapian::valueno, ValueStats> valuestats;
00248
00249 vector<Xapian::termcount> doclengths;
00250
00251 std::map<string, string> metadata;
00252
00253 Xapian::doccount totdocs;
00254
00255 totlen_t totlen;
00256
00257 bool positions_present;
00258
00259
00260 bool closed;
00261
00262
00263 InMemoryDatabase& operator=(const InMemoryDatabase &);
00264 InMemoryDatabase(const InMemoryDatabase &);
00265
00266 void make_term(const string & tname);
00267
00268 bool doc_exists(Xapian::docid did) const;
00269 Xapian::docid make_doc(const string & docdata);
00270
00271
00272 void finish_add_doc(Xapian::docid did, const Xapian::Document &document);
00273 void add_values(Xapian::docid did, const map<Xapian::valueno, string> &values_);
00274
00275 void make_posting(InMemoryDoc * doc,
00276 const string & tname,
00277 Xapian::docid did,
00278 Xapian::termpos position,
00279 Xapian::termcount wdf,
00280 bool use_position = true);
00281
00283
00285 void commit();
00286 void cancel();
00287
00288 Xapian::docid add_document(const Xapian::Document & document);
00289
00290
00291
00292
00293
00294 #ifndef _MSC_VER
00295 using Xapian::Database::Internal::delete_document;
00296 using Xapian::Database::Internal::replace_document;
00297 #endif
00298 void delete_document(Xapian::docid did);
00299 void replace_document(Xapian::docid did, const Xapian::Document & document);
00301
00302 public:
00307 InMemoryDatabase();
00308
00309 ~InMemoryDatabase();
00310
00311 void reopen();
00312 void close();
00313 bool is_closed() const { return closed; }
00314
00315 Xapian::doccount get_doccount() const;
00316
00317 Xapian::docid get_lastdocid() const;
00318
00319 totlen_t get_total_length() const;
00320 Xapian::doclength get_avlength() const;
00321 Xapian::termcount get_doclength(Xapian::docid did) const;
00322
00323 Xapian::doccount get_termfreq(const string & tname) const;
00324 Xapian::termcount get_collection_freq(const string & tname) const;
00325 Xapian::doccount get_value_freq(Xapian::valueno slot) const;
00326 std::string get_value_lower_bound(Xapian::valueno slot) const;
00327 std::string get_value_upper_bound(Xapian::valueno slot) const;
00328 bool term_exists(const string & tname) const;
00329 bool has_positions() const;
00330
00331 LeafPostList * open_post_list(const string & tname) const;
00332 TermList * open_term_list(Xapian::docid did) const;
00333 Xapian::Document::Internal * open_document(Xapian::docid did, bool lazy) const;
00334
00335 std::string get_metadata(const std::string & key) const;
00336 TermList * open_metadata_keylist(const std::string &prefix) const;
00337 void set_metadata(const std::string & key, const std::string & value);
00338
00339 Xapian::termcount positionlist_count(Xapian::docid did,
00340 const string & tname) const;
00341 PositionList * open_position_list(Xapian::docid did,
00342 const string & tname) const;
00343 TermList * open_allterms(const string & prefix) const;
00344
00345 XAPIAN_NORETURN(static void throw_database_closed());
00346 };
00347
00348 #endif