xapian-core  1.4.19
inmemory_database.cc
Go to the documentation of this file.
1 
4 /* Copyright 1999,2000,2001 BrightStation PLC
5  * Copyright 2002 Ananova Ltd
6  * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2014,2017 Olly Betts
7  * Copyright 2006,2009 Lemur Consulting Ltd
8  *
9  * This program is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU General Public License as
11  * published by the Free Software Foundation; either version 2 of the
12  * License, or (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
22  * USA
23  */
24 
25 #include <config.h>
26 
27 #include "inmemory_database.h"
28 
29 #include "debuglog.h"
30 
31 #include "expand/expandweight.h"
32 #include "inmemory_document.h"
33 #include "inmemory_alltermslist.h"
34 #include "str.h"
35 #include "backends/valuestats.h"
36 
37 #include <algorithm>
38 #include <string>
39 #include <vector>
40 #include <map>
41 
42 #include <xapian/error.h>
43 #include <xapian/valueiterator.h>
44 
45 using std::make_pair;
47 
48 inline void
50 {
51  // Add document to right place in list
52  vector<InMemoryPosting>::iterator p;
53  p = lower_bound(docs.begin(), docs.end(),
54  post, InMemoryPostingLessThan());
55  if (p == docs.end() || InMemoryPostingLessThan()(post, *p)) {
56  docs.insert(p, post);
57  } else if (!p->valid) {
58  *p = post;
59  } else {
60  (*p).merge(post);
61  }
62 }
63 
64 inline void
66 {
67  // Add document to right place in list
68  vector<InMemoryTermEntry>::iterator p;
69  p = lower_bound(terms.begin(), terms.end(),
71  if (p == terms.end() || InMemoryTermEntryLessThan()(post, *p)) {
72  terms.insert(p, post);
73  } else {
74  (*p).merge(post);
75  }
76 }
77 
79 // Postlist //
81 
83  const InMemoryTerm & imterm,
84  const std::string & term_)
85  : LeafPostList(term_),
86  pos(imterm.docs.begin()),
87  end(imterm.docs.end()),
88  termfreq(imterm.term_freq),
89  started(false),
90  db(db_),
91  wdf_upper_bound(0)
92 {
93  while (pos != end && !pos->valid) ++pos;
94  if (pos != end) {
95  auto first_wdf = (*pos).wdf;
96  wdf_upper_bound = max(first_wdf, imterm.collection_freq - first_wdf);
97  }
98 }
99 
102 {
103  return termfreq;
104 }
105 
108 {
110  Assert(started);
111  Assert(!at_end());
112  return (*pos).did;
113 }
114 
115 PostList *
116 InMemoryPostList::next(double /*w_min*/)
117 {
119  if (started) {
120  Assert(!at_end());
121  ++pos;
122  while (pos != end && !pos->valid) ++pos;
123  } else {
124  started = true;
125  }
126  return NULL;
127 }
128 
129 PostList *
131 {
133  // FIXME - see if we can make more efficient, perhaps using better
134  // data structure. Note, though, that a binary search of
135  // the remaining list may NOT be a good idea (search time is then
136  // O(log {length of list}), as opposed to O(distance we want to skip)
137  // Since we will frequently only be skipping a short distance, this
138  // could well be worse.
139 
140  // If we've not started, it's OK to call skip_to().
141  Assert(!at_end() || !started);
142  started = true;
143  while (!at_end() && (*pos).did < did) {
144  (void) next(w_min);
145  }
146  return NULL;
147 }
148 
149 bool
151 {
153  return (pos == end);
154 }
155 
156 string
158 {
159  return "InMemoryPostList " + str(termfreq);
160 }
161 
164 {
166  return db->get_doclength(get_docid());
167 }
168 
171 {
172  return db->get_unique_terms(get_docid());
173 }
174 
175 PositionList *
177 {
179  mypositions.set_data(pos->positions);
180  return &mypositions;
181 }
182 
183 PositionList *
185 {
187  return new InMemoryPositionList(pos->positions);
188 }
189 
192 {
194  return (*pos).wdf;
195 }
196 
199 {
201  return wdf_upper_bound;
202 }
203 
205 // Termlist //
207 
209  Xapian::docid did_,
210  const InMemoryDoc & doc,
211  Xapian::termcount len)
212  : pos(doc.terms.begin()), end(doc.terms.end()), terms(doc.terms.size()),
213  started(false), db(db_), did(did_), document_length(len)
214 {
215  LOGLINE(DB, "InMemoryTermList::InMemoryTermList(): " <<
216  terms << " terms starting from " << pos->tname);
217 }
218 
221 {
223  Assert(started);
224  Assert(!at_end());
225  return (*pos).wdf;
226 }
227 
230 {
232  Assert(started);
233  Assert(!at_end());
234 
235  Xapian::doccount tf;
236  db->get_freqs((*pos).tname, &tf, NULL);
237  return tf;
238 }
239 
242 {
244  return terms;
245 }
246 
247 void
249 {
251  Assert(started);
252  Assert(!at_end());
253  stats.accumulate(shard_index,
256  db->get_doccount());
257 }
258 
259 string
261 {
263  Assert(started);
264  Assert(!at_end());
265  return (*pos).tname;
266 }
267 
268 TermList *
270 {
272  if (started) {
273  Assert(!at_end());
274  ++pos;
275  } else {
276  started = true;
277  }
278  return NULL;
279 }
280 
281 TermList *
282 InMemoryTermList::skip_to(const string & term)
283 {
284  if (rare(db->is_closed()))
286 
287  while (pos != end && pos->tname < term) {
288  ++pos;
289  }
290 
291  started = true;
292  return NULL;
293 }
294 
295 bool
297 {
299  Assert(started);
300  return (pos == end);
301 }
302 
305 {
307  return db->positionlist_count(did, (*pos).tname);
308 }
309 
312 {
314  return Xapian::PositionIterator(db->open_position_list(did, (*pos).tname));
315 }
316 
318 // InMemoryAllDocsPostList //
320 
322  : LeafPostList(std::string()), did(0), db(db_)
323 {
324 }
325 
328 {
330  return db->totdocs;
331 }
332 
335 {
337  Assert(did > 0);
338  Assert(did <= db->termlists.size());
339  Assert(db->termlists[did - 1].is_valid);
340  return did;
341 }
342 
345 {
347  return db->get_doclength(did);
348 }
349 
352 {
353  return db->get_unique_terms(did);
354 }
355 
358 {
359  return 1;
360 }
361 
362 PositionList *
364 {
365  throw Xapian::UnimplementedError("Can't open position list for all docs iterator");
366 }
367 
368 PositionList *
370 {
371  throw Xapian::UnimplementedError("Can't open position list for all docs iterator");
372 }
373 
374 PostList *
376 {
378  Assert(!at_end());
379  do {
380  ++did;
381  } while (did <= db->termlists.size() && !db->termlists[did - 1].is_valid);
382  return NULL;
383 }
384 
385 PostList *
387 {
389  Assert(!at_end());
390  if (did <= did_) {
391  did = did_;
392  while (did <= db->termlists.size() && !db->termlists[did - 1].is_valid) {
393  ++did;
394  }
395  }
396  return NULL;
397 }
398 
399 bool
401 {
403  return (did > db->termlists.size());
404 }
405 
408 {
409  return 1;
410 }
411 
412 string
414 {
415  return "InMemoryAllDocsPostList " + str(did);
416 }
417 
419 // Actual database class //
421 
423  : totdocs(0), totlen(0), positions_present(false), closed(false)
424 {
425  // Updates are applied immediately so we can't support transactions.
427 
428  // We keep an empty entry in postlists for convenience of implementing
429  // allterms iteration and returning a PostList for an absent term.
430  postlists.insert(make_pair(string(), InMemoryTerm()));
431 }
432 
434 {
435  dtor_called();
436 }
437 
438 bool
440 {
442  return false;
443 }
444 
445 void
447 {
448  // Free all the resources, and mark the db as closed.
449  postlists.clear();
450  termlists.clear();
451  doclists.clear();
452  valuelists.clear();
453  valuestats.clear();
454  doclengths.clear();
455  metadata.clear();
456  closed = true;
457 }
458 
459 LeafPostList *
460 InMemoryDatabase::open_post_list(const string & tname) const
461 {
463  if (tname.empty()) {
465  return new InMemoryAllDocsPostList(ptrtothis);
466  }
467  map<string, InMemoryTerm>::const_iterator i = postlists.find(tname);
468  if (i == postlists.end() || i->second.term_freq == 0) {
469  i = postlists.begin();
470  // Check that our dummy entry for string() is present.
471  Assert(i->first.empty());
472  }
474  return new InMemoryPostList(ptrtothis, i->second, tname);
475 }
476 
477 bool
479 {
481  return (did > 0 && did <= termlists.size() && termlists[did - 1].is_valid);
482 }
483 
484 void
485 InMemoryDatabase::get_freqs(const string & term,
486  Xapian::doccount * termfreq_ptr,
487  Xapian::termcount * collfreq_ptr) const
488 {
490  map<string, InMemoryTerm>::const_iterator i = postlists.find(term);
491  if (i != postlists.end()) {
492  if (termfreq_ptr)
493  *termfreq_ptr = i->second.term_freq;
494  if (collfreq_ptr)
495  *collfreq_ptr = i->second.collection_freq;
496  } else {
497  if (termfreq_ptr)
498  *termfreq_ptr = 0;
499  if (collfreq_ptr)
500  *collfreq_ptr = 0;
501  }
502 }
503 
506 {
508  map<Xapian::valueno, ValueStats>::const_iterator i = valuestats.find(slot);
509  if (i == valuestats.end()) return 0;
510  return i->second.freq;
511 }
512 
513 std::string
515 {
517  map<Xapian::valueno, ValueStats>::const_iterator i = valuestats.find(slot);
518  if (i == valuestats.end()) return string();
519  return i->second.lower_bound;
520 }
521 
522 std::string
524 {
526  map<Xapian::valueno, ValueStats>::const_iterator i = valuestats.find(slot);
527  if (i == valuestats.end()) return string();
528  return i->second.upper_bound;
529 }
530 
533 {
535  return totdocs;
536 }
537 
540 {
542  return termlists.size();
543 }
544 
547 {
548  return totlen;
549 }
550 
553 {
555  if (!doc_exists(did)) {
556  throw Xapian::DocNotFoundError(string("Docid ") + str(did) +
557  string(" not found"));
558  }
559  return doclengths[did - 1];
560 }
561 
564 {
566  if (did == 0 || did > termlists.size() || !termlists[did - 1].is_valid)
567  throw Xapian::DocNotFoundError(string("Docid ") + str(did) +
568  string(" not found"));
569  // get_unique_terms() really ought to only count terms with wdf > 0, but
570  // that's expensive to calculate on demand, so for now let's just ensure
571  // unique_terms <= doclen.
572  Xapian::termcount terms = termlists[did - 1].terms.size();
573  return std::min(terms, Xapian::termcount(doclengths[did - 1]));
574 }
575 
576 TermList *
578 {
580  Assert(did != 0);
581  if (!doc_exists(did)) {
582  // FIXME: the docid in this message will be local, not global
583  throw Xapian::DocNotFoundError(string("Docid ") + str(did) +
584  string(" not found"));
585  }
587  termlists[did - 1], doclengths[did - 1]);
588 }
589 
592 {
594  Assert(did != 0);
595  if (!lazy && !doc_exists(did)) {
596  // FIXME: the docid in this message will be local, not global
597  throw Xapian::DocNotFoundError(string("Docid ") + str(did) +
598  string(" not found"));
599  }
600  return new InMemoryDocument(this, did);
601 }
602 
603 std::string
604 InMemoryDatabase::get_metadata(const std::string & key) const
605 {
607  map<string, string>::const_iterator i = metadata.find(key);
608  if (i == metadata.end())
609  return string();
610  return i->second;
611 }
612 
613 TermList *
615 {
617  if (metadata.empty()) return NULL;
618  // FIXME: nobody implemented this yet...
619  throw Xapian::UnimplementedError("InMemory backend doesn't currently implement Database::metadata_keys_begin()");
620 }
621 
622 void
623 InMemoryDatabase::set_metadata(const std::string & key,
624  const std::string & value)
625 {
627  if (!value.empty()) {
628  metadata[key] = value;
629  } else {
630  metadata.erase(key);
631  }
632 }
633 
636  const string & tname) const
637 {
639  if (!doc_exists(did)) {
640  return 0;
641  }
642  const InMemoryDoc &doc = termlists[did - 1];
643 
644  InMemoryTermEntry temp;
645  temp.tname = tname;
646  auto t = lower_bound(doc.terms.begin(), doc.terms.end(),
647  temp, InMemoryTermEntryLessThan());
648  if (t != doc.terms.end() && t->tname == tname) {
649  return t->positions.size();
650  }
651  return 0;
652 }
653 
654 PositionList *
656  const string & tname) const
657 {
659  if (usual(doc_exists(did))) {
660  const InMemoryDoc &doc = termlists[did - 1];
661 
662  InMemoryTermEntry temp;
663  temp.tname = tname;
664  auto t = lower_bound(doc.terms.begin(), doc.terms.end(),
665  temp, InMemoryTermEntryLessThan());
666  if (t != doc.terms.end() && t->tname == tname) {
667  return new InMemoryPositionList(t->positions);
668  }
669  }
670  return new InMemoryPositionList(false);
671 }
672 
673 void
675  const map<Xapian::valueno, string> &values_)
676 {
678  if (did > valuelists.size()) {
679  valuelists.resize(did);
680  }
681  valuelists[did - 1] = values_;
682 
683  // Update the statistics.
684  map<Xapian::valueno, string>::const_iterator j;
685  for (j = values_.begin(); j != values_.end(); ++j) {
686  std::pair<map<Xapian::valueno, ValueStats>::iterator, bool> i;
687  i = valuestats.insert(make_pair(j->first, ValueStats()));
688 
689  // Now, modify the stored statistics.
690  if ((i.first->second.freq)++ == 0) {
691  // If the value count was previously zero, set the upper and lower
692  // bounds to the newly added value.
693  i.first->second.lower_bound = j->second;
694  i.first->second.upper_bound = j->second;
695  } else {
696  // Otherwise, simply make sure they reflect the new value.
697  if (j->second < i.first->second.lower_bound) {
698  i.first->second.lower_bound = j->second;
699  }
700  if (j->second > i.first->second.upper_bound) {
701  i.first->second.upper_bound = j->second;
702  }
703  }
704  }
705 }
706 
707 // We implicitly commit each modification right away, so nothing to do here.
708 void
710 {
711 }
712 
713 // We implicitly commit each modification right away, so nothing to do here.
714 void
716 {
717 }
718 
719 void
721 {
723  if (!doc_exists(did)) {
724  throw Xapian::DocNotFoundError(string("Docid ") + str(did) +
725  string(" not found"));
726  }
727  termlists[did - 1].is_valid = false;
728  doclists[did - 1] = string();
729  map<Xapian::valueno, string>::const_iterator j;
730  for (j = valuelists[did - 1].begin(); j != valuelists[did - 1].end(); ++j) {
731  map<Xapian::valueno, ValueStats>::iterator i;
732  i = valuestats.find(j->first);
733  if (--(i->second.freq) == 0) {
734  i->second.lower_bound.resize(0);
735  i->second.upper_bound.resize(0);
736  }
737  }
738  valuelists[did - 1].clear();
739 
740  totlen -= doclengths[did - 1];
741  doclengths[did - 1] = 0;
742  totdocs--;
743  // A crude check, but it's hard to be more precise with the current
744  // InMemory structure without being very inefficient.
745  if (totdocs == 0) positions_present = false;
746 
747  vector<InMemoryTermEntry>::const_iterator i;
748  for (i = termlists[did - 1].terms.begin();
749  i != termlists[did - 1].terms.end();
750  ++i) {
751  map<string, InMemoryTerm>::iterator t = postlists.find(i->tname);
752  Assert(t != postlists.end());
753  t->second.collection_freq -= i->wdf;
754  --t->second.term_freq;
755 
756  // Just invalidate erased doc ids - otherwise we need to erase
757  // in a vector (inefficient) and we break any posting lists
758  // iterating over this posting list.
759  InMemoryPosting temp;
760  temp.did = did;
761  auto p = lower_bound(t->second.docs.begin(), t->second.docs.end(),
762  temp, InMemoryPostingLessThan());
763  if (p != t->second.docs.end() && p->did == did) {
764  p->valid = false;
765  }
766  }
767  termlists[did - 1].terms.clear();
768 }
769 
770 void
772  const Xapian::Document & document)
773 {
774  LOGCALL_VOID(DB, "InMemoryDatabase::replace_document", did | document);
775 
777 
778  if (doc_exists(did)) {
779  map<Xapian::valueno, string>::const_iterator j;
780  for (j = valuelists[did - 1].begin(); j != valuelists[did - 1].end(); ++j) {
781  map<Xapian::valueno, ValueStats>::iterator i;
782  i = valuestats.find(j->first);
783  if (--(i->second.freq) == 0) {
784  i->second.lower_bound.resize(0);
785  i->second.upper_bound.resize(0);
786  }
787  }
788 
789  totlen -= doclengths[did - 1];
790  totdocs--;
791  } else if (did > termlists.size()) {
792  termlists.resize(did);
793  termlists[did - 1].is_valid = true;
794  doclengths.resize(did);
795  doclists.resize(did);
796  valuelists.resize(did);
797  } else {
798  termlists[did - 1].is_valid = true;
799  }
800 
801  vector<InMemoryTermEntry>::const_iterator i;
802  for (i = termlists[did - 1].terms.begin();
803  i != termlists[did - 1].terms.end();
804  ++i) {
805  map<string, InMemoryTerm>::iterator t = postlists.find(i->tname);
806  Assert(t != postlists.end());
807  t->second.collection_freq -= i->wdf;
808  --t->second.term_freq;
809 
810  // Just invalidate erased doc ids - otherwise we need to erase
811  // in a vector (inefficient) and we break any posting lists
812  // iterating over this posting list.
813  InMemoryPosting temp;
814  temp.did = did;
815  auto p = lower_bound(t->second.docs.begin(), t->second.docs.end(),
816  temp, InMemoryPostingLessThan());
817  if (p != t->second.docs.end() && p->did == did) {
818  p->valid = false;
819  }
820  }
821 
822  doclengths[did - 1] = 0;
823  doclists[did - 1] = document.get_data();
824 
825  finish_add_doc(did, document);
826 }
827 
830 {
831  LOGCALL(DB, Xapian::docid, "InMemoryDatabase::add_document", document);
833 
834  Xapian::docid did = make_doc(document.get_data());
835 
836  finish_add_doc(did, document);
837 
838  RETURN(did);
839 }
840 
841 void
843 {
844  {
845  map<Xapian::valueno, string> values;
846  Xapian::ValueIterator k = document.values_begin();
847  for ( ; k != document.values_end(); ++k) {
848  values.insert(make_pair(k.get_valueno(), *k));
849  LOGLINE(DB, "InMemoryDatabase::finish_add_doc(): adding value " <<
850  k.get_valueno() << " -> " << *k);
851  }
852  add_values(did, values);
853  }
854 
855  InMemoryDoc doc(true);
856  Xapian::TermIterator i = document.termlist_begin();
857  for ( ; i != document.termlist_end(); ++i) {
858  make_term(*i);
859 
860  LOGLINE(DB, "InMemoryDatabase::finish_add_doc(): adding term " << *i);
862  if (j == i.positionlist_end()) {
863  /* Make sure the posting exists, even without a position. */
864  make_posting(&doc, *i, did, 0, i.get_wdf(), false);
865  } else {
866  positions_present = true;
867  for ( ; j != i.positionlist_end(); ++j) {
868  make_posting(&doc, *i, did, *j, i.get_wdf());
869  }
870  }
871 
872  Assert(did > 0 && did <= doclengths.size());
873  doclengths[did - 1] += i.get_wdf();
874  totlen += i.get_wdf();
875  postlists[*i].collection_freq += i.get_wdf();
876  ++postlists[*i].term_freq;
877  }
878  swap(termlists[did - 1], doc);
879 
880  totdocs++;
881 }
882 
883 void
884 InMemoryDatabase::make_term(const string & tname)
885 {
886  postlists[tname]; // Initialise, if not already there.
887 }
888 
890 InMemoryDatabase::make_doc(const string & docdata)
891 {
892  termlists.push_back(InMemoryDoc(true));
893  doclengths.push_back(0);
894  doclists.push_back(docdata);
895 
896  AssertEqParanoid(termlists.size(), doclengths.size());
897 
898  return termlists.size();
899 }
900 
902  const string & tname,
903  Xapian::docid did,
904  Xapian::termpos position,
905  Xapian::termcount wdf,
906  bool use_position)
907 {
908  Assert(doc);
909  Assert(postlists.find(tname) != postlists.end());
910  Assert(did > 0 && did <= termlists.size());
911  Assert(did > 0 && did <= doclengths.size());
912  Assert(doc_exists(did));
913 
914  // Make the posting
915  InMemoryPosting posting;
916  posting.did = did;
917  if (use_position) {
918  posting.positions.push_back(position);
919  }
920  posting.wdf = wdf;
921  posting.valid = true;
922 
923  // Now record the posting
924  postlists[tname].add_posting(posting);
925 
926  // Make the termentry
927  InMemoryTermEntry termentry;
928  termentry.tname = tname;
929  if (use_position) {
930  termentry.positions.push_back(position);
931  }
932  termentry.wdf = wdf;
933 
934  // Now record the termentry
935  doc->add_posting(termentry);
936 }
937 
938 bool
939 InMemoryDatabase::term_exists(const string & tname) const
940 {
942  Assert(!tname.empty());
943  map<string, InMemoryTerm>::const_iterator i = postlists.find(tname);
944  if (i == postlists.end()) return false;
945  return (i->second.term_freq != 0);
946 }
947 
948 bool
950 {
952  return positions_present;
953 }
954 
955 TermList *
956 InMemoryDatabase::open_allterms(const string & prefix) const
957 {
959  return new InMemoryAllTermsList(&postlists,
961  prefix);
962 }
963 
964 void
966 {
967  throw Xapian::DatabaseClosedError("Database has been closed");
968 }
Xapian::Internal::intrusive_ptr< const InMemoryDatabase > db
vector< InMemoryPosting >::const_iterator end
#define RETURN(A)
Definition: debuglog.h:482
#define Assert(COND)
Definition: omassert.h:122
vector< InMemoryTermEntry >::const_iterator end
string get_description() const
Return a string description of this object.
Class to hold statistics for a given slot.
Definition: valuestats.h:29
PositionList * read_position_list()
Read the position list for the term in the current document and return a pointer to it (owned by the ...
PostList * skip_to(Xapian::docid did, double w_min)
Skip forward to the specified docid.
Abstract base class for postlists.
Definition: postlist.h:37
Xapian::docid make_doc(const string &docdata)
Statistics about values.
Indicates an attempt to access a closed database.
Definition: error.h:1097
vector< Xapian::termpos > positions
PositionList * open_position_list() const
Read the position list for the term in the current document and return a pointer to it (not owned by ...
Xapian::Document::Internal * open_document(Xapian::docid did, bool lazy) const
Open a document.
Xapian::totallength get_total_length() const
Return the total length of all documents in this database.
void merge(const InMemoryPosting &post)
void make_posting(InMemoryDoc *doc, const string &tname, Xapian::docid did, Xapian::termpos position, Xapian::termcount wdf, bool use_position=true)
bool at_end() const
Return true if the current position is past the last entry in this list.
string get_description() const
Return a string description of this object.
Xapian::termcount wdf_upper_bound
void commit()
Implementation of virtual methods: see Database for details.
#define usual(COND)
Definition: config.h:544
Iterate all terms in an inmemory db.
Xapian::doccount get_termfreq() const
Return the term frequency for the term at the current position.
friend class InMemoryAllDocsPostList
bool is_closed() const
XAPIAN_TOTALLENGTH_TYPE totallength
The total length of all documents in a database.
Definition: types.h:139
Xapian::doccount get_termfreq() const
Return the exact term frequency.
ValueIterator values_begin() const
Iterator for the values in this document.
Definition: omdocument.cc:210
Xapian::termcount get_doclength() const
Return the length of current document.
Xapian::totallength totlen
Xapian::termcount get_wdf_upper_bound() const
Xapian::doccount get_value_freq(Xapian::valueno slot) const
Return the frequency of a given value slot.
vector< Xapian::termcount > doclengths
void finish_add_doc(Xapian::docid did, const Xapian::Document &document)
A document in the database, possibly plus modifications.
Definition: document.h:41
Class for iterating over document values.
Definition: valueiterator.h:40
#define LOGCALL_VOID(CATEGORY, FUNC, PARAMS)
Definition: debuglog.h:477
Abstract base class for termlists.
Definition: termlist.h:39
STL namespace.
A document read from a InMemoryDatabase.
Convert types to std::string.
Xapian::termcount terms
void cancel()
Implementation of virtual methods: see Database for details.
PositionList * open_position_list() const
Read the position list for the term in the current document and return a pointer to it (not owned by ...
vector< InMemoryTermEntry >::const_iterator pos
Xapian::docid get_docid() const
Return the current docid.
LeafPostList * open_post_list(const string &tname) const
Open a posting list.
bool has_positions() const
Check whether this database contains any positional information.
TermList * open_allterms(const string &prefix) const
Open an allterms list.
Abstract base class for leaf postlists.
Definition: leafpostlist.h:38
#define false
Definition: header.h:9
Xapian::doccount get_doccount() const
Return the number of docs in this (sub) database.
TermIterator termlist_end() const
Equivalent end iterator for termlist_begin().
Definition: document.h:260
void add_posting(const InMemoryPosting &post)
void accumulate_stats(Xapian::Internal::ExpandStats &stats) const
Collate weighting information for the current term.
vector< Xapian::termpos > positions
#define rare(COND)
Definition: config.h:543
InMemoryTermList(Xapian::Internal::intrusive_ptr< const InMemoryDatabase > db, Xapian::docid did, const InMemoryDoc &doc, Xapian::termcount len)
Xapian::termcount document_length
Xapian::termcount wdf
PositionList * open_position_list(Xapian::docid did, const string &tname) const
Open a position list for the given term in the given document.
enum Xapian::Database::Internal::@2 transaction_state
Transaction state.
Hierarchy of classes which Xapian can throw as exceptions.
vector< InMemoryPosting > docs
Class for iterating over a list of terms.
Definition: termiterator.h:41
std::string get_value_lower_bound(Xapian::valueno slot) const
Get a lower bound on the values stored in the given value slot.
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:72
bool at_end() const
Return true if the current position is past the last entry in this list.
PositionIterator positionlist_end() const
Return an end PositionIterator for the current term.
Definition: termiterator.h:104
Class representing a document and the terms indexing it.
void accumulate(size_t shard_index, Xapian::termcount wdf, Xapian::termcount doclen, Xapian::doccount subtf, Xapian::doccount subdbsize)
Definition: expandweight.h:76
InMemoryPositionList mypositions
List of positions of the current term.
Xapian::docid add_document(const Xapian::Document &document)
Implementation of virtual methods: see Database for details.
Xapian::docid get_docid() const
Return the current docid.
bool doc_exists(Xapian::docid did) const
Xapian::docid get_lastdocid() const
Return the last used document id of this (sub) database.
Collate statistics and calculate the term weights for the ESet.
vector< InMemoryTermEntry > terms
Xapian::termcount collection_freq
InMemoryDatabase()
Create and open an in-memory database.
string get_termname() const
Return the termname at the current position.
std::string get_value_upper_bound(Xapian::valueno slot) const
Get an upper bound on the values stored in the given value slot.
void replace_document(Xapian::docid did, const Xapian::Document &document)
Implementation of virtual methods: see Database for details.
std::map< string, string > metadata
Xapian::doccount totdocs
Internal * next()
Advance the current position to the next document in the postlist.
Definition: postlist.h:194
Xapian::termcount get_unique_terms() const
Return the number of unique terms in the current document.
A PostList in an inmemory database.
Xapian::termcount wdf
string str(int value)
Convert int to std::string.
Definition: str.cc:90
A position list in a inmemory database.
TermList * open_term_list(Xapian::docid did) const
Open a term list.
Class for iterating over document values.
C++ class definition for inmemory database access.
std::map< Xapian::valueno, ValueStats > valuestats
Class for iterating over term positions.
Xapian::termcount get_wdf_upper_bound() const
Xapian::termcount positionlist_count() const
Return the length of the position list for the current position.
void close()
Close the database.
Xapian::Internal::intrusive_ptr< const InMemoryDatabase > db
Xapian::termcount get_wdf() const
Return the wdf for the term at the current position.
ValueIterator values_end() const
Equivalent end iterator for values_begin().
Definition: document.h:271
Indicates an attempt to access a document not present in the database.
Definition: error.h:674
vector< std::map< Xapian::valueno, string > > valuelists
void get_freqs(const string &term, Xapian::doccount *termfreq_ptr, Xapian::termcount *collfreq_ptr) const
Returns frequencies for a term.
Xapian::doccount termfreq
void add_values(Xapian::docid did, const map< Xapian::valueno, string > &values_)
static void throw_database_closed()
TermList * skip_to(const std::string &term)
Skip forward to the specified term.
Xapian::termcount get_unique_terms(Xapian::docid did) const
Get the number of unique term in document.
#define AssertEqParanoid(A, B)
Definition: omassert.h:131
class for alltermslists over several databases
vector< std::string > doclists
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:38
Xapian::termcount positionlist_count(Xapian::docid did, const string &tname) const
Xapian::termcount get_wdf() const
Return the wdf for the document at the current position.
map< string, InMemoryTerm > postlists
void delete_document(Xapian::docid did)
Implementation of virtual methods: see Database for details.
size_t shard_index
Which shard of a multidatabase this is from.
Definition: termlist.h:114
Xapian::Internal::intrusive_ptr< const InMemoryDatabase > db
Xapian::termcount get_unique_terms() const
Return the number of unique terms in the current document.
bool term_exists(const string &tname) const
Check whether a given term is in the database.
Collates statistics while calculating term weight in an ESet.
Definition: expandweight.h:37
bool reopen()
Reopen the database to the latest available revision.
void make_term(const string &tname)
unsigned valueno
The number for a value slot in a document.
Definition: types.h:108
unsigned XAPIAN_TERMPOS_BASE_TYPE termpos
A term position within a document or query.
Definition: types.h:83
void dtor_called()
Internal method to perform cleanup when a writable database is destroyed with uncommitted changes...
Definition: database.cc:87
Xapian::termcount get_doclength(Xapian::docid did) const
Get the length of a given document.
vector< InMemoryPosting >::const_iterator pos
TermList * open_metadata_keylist(const std::string &prefix) const
Open a termlist returning each metadata key.
friend class InMemoryDocument
Xapian::termcount get_doclength() const
Return the length of current document.
Xapian::docid did
#define LOGLINE(a, b)
Definition: debuglog.h:483
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:52
std::string get_metadata(const std::string &key) const
Get the metadata associated with a given key.
std::string get_data() const
Get data stored in the document.
Definition: omdocument.cc:71
InMemoryPostList(Xapian::Internal::intrusive_ptr< const InMemoryDatabase > db, const InMemoryTerm &imterm, const std::string &term_)
Abstract base class for iterating term positions in a document.
Definition: positionlist.h:31
void set_metadata(const std::string &key, const std::string &value)
Set the metadata associated with a given key.
Xapian::doccount get_termfreq() const
Return the exact term frequency.
Xapian::termcount get_wdf() const
Return the wdf for the term at the current position.
A smart pointer that uses intrusive reference counting.
Definition: intrusive_ptr.h:81
Xapian::PositionIterator positionlist_begin() const
Return a PositionIterator for the current position.
Xapian::termcount get_wdf() const
Return the wdf for the document at the current position.
TermIterator termlist_begin() const
Iterator for the terms in this document.
Definition: omdocument.cc:197
PositionList * read_position_list()
Read the position list for the term in the current document and return a pointer to it (owned by the ...
InMemoryAllDocsPostList(Xapian::Internal::intrusive_ptr< const InMemoryDatabase > db)
PositionIterator positionlist_begin() const
Return a PositionIterator for the current term.
A handle representing a document in a Xapian database.
Definition: document.h:61
bool at_end() const
Return true if the current position is past the last term in this list.
Debug logging macros.
TermList * next()
Advance the current position to the next term in the termlist.
#define LOGCALL(CATEGORY, TYPE, FUNC, PARAMS)
Definition: debuglog.h:476
void set_data(const OmDocumentTerm::term_positions &positions_)
Fill list with data, and move the position to the start.
Xapian::valueno get_valueno() const
Return the value slot number for the current position.
PostList * skip_to(Xapian::docid did, double w_min)
Skip forward to the specified docid.
UnimplementedError indicates an attempt to use an unimplemented feature.
Definition: error.h:325
Xapian::termcount term_freq
void add_posting(const InMemoryTermEntry &post)
Xapian::termcount get_approx_size() const
Return approximate size of this termlist.
vector< InMemoryDoc > termlists