xapian-core  1.4.27
inmemory_database.cc
Go to the documentation of this file.
1 
4 /* Copyright 1999,2000,2001 BrightStation PLC
5  * Copyright 2002 Ananova Ltd
6  * Copyright 2002-2023 Olly Betts
7  * Copyright 2006,2009 Lemur Consulting Ltd
8  *
9  * This program is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU General Public License as
11  * published by the Free Software Foundation; either version 2 of the
12  * License, or (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
22  * USA
23  */
24 
25 #include <config.h>
26 
27 #include "inmemory_database.h"
28 
29 #include "debuglog.h"
30 
31 #include "expand/expandweight.h"
32 #include "inmemory_document.h"
33 #include "inmemory_alltermslist.h"
34 #include "str.h"
35 #include "backends/valuestats.h"
36 
37 #include <algorithm>
38 #include <string>
39 #include <vector>
40 #include <map>
41 
42 #include <xapian/error.h>
43 #include <xapian/valueiterator.h>
44 
45 using std::make_pair;
47 
48 inline void
51  Xapian::termpos position,
52  bool use_position)
53 {
54  InMemoryPosting posting;
55  posting.did = did;
56 
57  // Find the right place in the sorted list.
58  vector<InMemoryPosting>::iterator p;
59  p = lower_bound(docs.begin(), docs.end(),
60  posting, InMemoryPostingLessThan());
61  if (p == docs.end() || InMemoryPostingLessThan()(posting, *p)) {
62  // Adding new entry.
63  if (use_position) {
64  posting.positions.push_back(position);
65  }
66  posting.wdf = wdf;
67  posting.valid = true;
68  docs.insert(p, std::move(posting));
69  } else if (!p->valid) {
70  // Resurrecting deleted entry.
71  p->did = did;
72  p->positions.clear();
73  if (use_position) {
74  p->positions.push_back(position);
75  }
76  p->wdf = wdf;
77  p->valid = true;
78  } else if (use_position) {
79  // Adding position to existing entry.
80  p->add_position(position);
81  }
82 }
83 
84 inline void
85 InMemoryDoc::add_posting(const string& tname,
87  Xapian::termpos position,
88  bool use_position)
89 {
90  InMemoryTermEntry termentry;
91  termentry.tname = tname;
92 
93  // Find the right place in the sorted list.
94  vector<InMemoryTermEntry>::iterator p;
95  p = lower_bound(terms.begin(), terms.end(),
96  termentry, InMemoryTermEntryLessThan());
97  if (p == terms.end() || InMemoryTermEntryLessThan()(termentry, *p)) {
98  // Adding new entry.
99  if (use_position) {
100  termentry.positions.push_back(position);
101  }
102  termentry.wdf = wdf;
103  terms.insert(p, std::move(termentry));
104  } else if (use_position) {
105  p->add_position(position);
106  }
107 }
108 
110 // Postlist //
112 
114  const InMemoryTerm & imterm,
115  const std::string & term_)
116  : LeafPostList(term_),
117  pos(imterm.docs.begin()),
118  end(imterm.docs.end()),
119  termfreq(imterm.term_freq),
120  started(false),
121  db(db_),
122  wdf_upper_bound(0)
123 {
124  while (pos != end && !pos->valid) ++pos;
125  if (pos != end) {
126  auto first_wdf = (*pos).wdf;
127  wdf_upper_bound = max(first_wdf, imterm.collection_freq - first_wdf);
128  }
129 }
130 
133 {
134  return termfreq;
135 }
136 
139 {
141  Assert(started);
142  Assert(!at_end());
143  return (*pos).did;
144 }
145 
146 PostList *
147 InMemoryPostList::next(double /*w_min*/)
148 {
150  if (started) {
151  Assert(!at_end());
152  ++pos;
153  while (pos != end && !pos->valid) ++pos;
154  } else {
155  started = true;
156  }
157  return NULL;
158 }
159 
160 PostList *
162 {
164  // FIXME - see if we can make more efficient, perhaps using better
165  // data structure. Note, though, that a binary search of
166  // the remaining list may NOT be a good idea (search time is then
167  // O(log {length of list}), as opposed to O(distance we want to skip)
168  // Since we will frequently only be skipping a short distance, this
169  // could well be worse.
170 
171  // If we've not started, it's OK to call skip_to().
172  Assert(!at_end() || !started);
173  started = true;
174  while (!at_end() && (*pos).did < did) {
175  (void) next(w_min);
176  }
177  return NULL;
178 }
179 
180 bool
182 {
184  return (pos == end);
185 }
186 
187 string
189 {
190  return "InMemoryPostList " + str(termfreq);
191 }
192 
195 {
197  return db->get_doclength(get_docid());
198 }
199 
202 {
203  return db->get_unique_terms(get_docid());
204 }
205 
206 PositionList *
208 {
210  mypositions.set_data(pos->positions);
211  return &mypositions;
212 }
213 
214 PositionList *
216 {
218  return new InMemoryPositionList(pos->positions);
219 }
220 
223 {
225  return (*pos).wdf;
226 }
227 
230 {
232  return wdf_upper_bound;
233 }
234 
236 // Termlist //
238 
240  Xapian::docid did_,
241  const InMemoryDoc & doc,
242  Xapian::termcount len)
243  : pos(doc.terms.begin()), end(doc.terms.end()), terms(doc.terms.size()),
244  started(false), db(db_), did(did_), document_length(len)
245 {
246  LOGLINE(DB, "InMemoryTermList::InMemoryTermList(): " <<
247  terms << " terms starting from " << pos->tname);
248 }
249 
252 {
254  Assert(started);
255  Assert(!at_end());
256  return (*pos).wdf;
257 }
258 
261 {
263  Assert(started);
264  Assert(!at_end());
265 
266  Xapian::doccount tf;
267  db->get_freqs((*pos).tname, &tf, NULL);
268  return tf;
269 }
270 
273 {
275  return terms;
276 }
277 
278 void
280 {
282  Assert(started);
283  Assert(!at_end());
284  stats.accumulate(shard_index,
287  db->get_doccount());
288 }
289 
290 string
292 {
294  Assert(started);
295  Assert(!at_end());
296  return (*pos).tname;
297 }
298 
299 TermList *
301 {
303  if (started) {
304  Assert(!at_end());
305  ++pos;
306  } else {
307  started = true;
308  }
309  return NULL;
310 }
311 
312 TermList *
313 InMemoryTermList::skip_to(const string & term)
314 {
315  if (rare(db->is_closed()))
317 
318  while (pos != end && pos->tname < term) {
319  ++pos;
320  }
321 
322  started = true;
323  return NULL;
324 }
325 
326 bool
328 {
330  Assert(started);
331  return (pos == end);
332 }
333 
336 {
338  return db->positionlist_count(did, (*pos).tname);
339 }
340 
343 {
345  return Xapian::PositionIterator(db->open_position_list(did, (*pos).tname));
346 }
347 
349 // InMemoryAllDocsPostList //
351 
353  : LeafPostList(std::string()), did(0), db(db_)
354 {
355 }
356 
359 {
361  return db->totdocs;
362 }
363 
366 {
368  Assert(did > 0);
369  Assert(did <= db->termlists.size());
370  Assert(db->termlists[did - 1].is_valid);
371  return did;
372 }
373 
376 {
378  return db->get_doclength(did);
379 }
380 
383 {
384  return db->get_unique_terms(did);
385 }
386 
389 {
390  return 1;
391 }
392 
393 PositionList *
395 {
396  throw Xapian::UnimplementedError("Can't open position list for all docs iterator");
397 }
398 
399 PositionList *
401 {
402  throw Xapian::UnimplementedError("Can't open position list for all docs iterator");
403 }
404 
405 PostList *
407 {
409  Assert(!at_end());
410  do {
411  ++did;
412  } while (did <= db->termlists.size() && !db->termlists[did - 1].is_valid);
413  return NULL;
414 }
415 
416 PostList *
418 {
420  Assert(!at_end());
421  if (did <= did_) {
422  did = did_;
423  while (did <= db->termlists.size() && !db->termlists[did - 1].is_valid) {
424  ++did;
425  }
426  }
427  return NULL;
428 }
429 
430 bool
432 {
434  return (did > db->termlists.size());
435 }
436 
439 {
440  return 1;
441 }
442 
443 string
445 {
446  return "InMemoryAllDocsPostList " + str(did);
447 }
448 
450 // Actual database class //
452 
454  : totdocs(0), totlen(0), positions_present(false), closed(false)
455 {
456  // Updates are applied immediately so we can't support transactions.
458 
459  // We keep an empty entry in postlists for convenience of implementing
460  // allterms iteration and returning a PostList for an absent term.
461  postlists.insert(make_pair(string(), InMemoryTerm()));
462 }
463 
465 {
466  dtor_called();
467 }
468 
469 bool
471 {
473  return false;
474 }
475 
476 void
478 {
479  // Free all the resources, and mark the db as closed.
480  postlists.clear();
481  termlists.clear();
482  doclists.clear();
483  valuelists.clear();
484  valuestats.clear();
485  doclengths.clear();
486  metadata.clear();
487  closed = true;
488 }
489 
490 LeafPostList *
491 InMemoryDatabase::open_post_list(const string & tname) const
492 {
494  if (tname.empty()) {
495  return new InMemoryAllDocsPostList(this);
496  }
497  map<string, InMemoryTerm>::const_iterator i = postlists.find(tname);
498  if (i == postlists.end() || i->second.term_freq == 0) {
499  i = postlists.begin();
500  // Check that our dummy entry for string() is present.
501  Assert(i->first.empty());
502  }
503  return new InMemoryPostList(this, i->second, tname);
504 }
505 
506 bool
508 {
510  return (did > 0 && did <= termlists.size() && termlists[did - 1].is_valid);
511 }
512 
513 void
514 InMemoryDatabase::get_freqs(const string & term,
515  Xapian::doccount * termfreq_ptr,
516  Xapian::termcount * collfreq_ptr) const
517 {
519  map<string, InMemoryTerm>::const_iterator i = postlists.find(term);
520  if (i != postlists.end()) {
521  if (termfreq_ptr)
522  *termfreq_ptr = i->second.term_freq;
523  if (collfreq_ptr)
524  *collfreq_ptr = i->second.collection_freq;
525  } else {
526  if (termfreq_ptr)
527  *termfreq_ptr = 0;
528  if (collfreq_ptr)
529  *collfreq_ptr = 0;
530  }
531 }
532 
535 {
537  map<Xapian::valueno, ValueStats>::const_iterator i = valuestats.find(slot);
538  if (i == valuestats.end()) return 0;
539  return i->second.freq;
540 }
541 
542 std::string
544 {
546  map<Xapian::valueno, ValueStats>::const_iterator i = valuestats.find(slot);
547  if (i == valuestats.end()) return string();
548  return i->second.lower_bound;
549 }
550 
551 std::string
553 {
555  map<Xapian::valueno, ValueStats>::const_iterator i = valuestats.find(slot);
556  if (i == valuestats.end()) return string();
557  return i->second.upper_bound;
558 }
559 
562 {
564  return totdocs;
565 }
566 
569 {
571  return Xapian::docid(termlists.size());
572 }
573 
576 {
577  return totlen;
578 }
579 
582 {
584  if (!doc_exists(did)) {
585  throw Xapian::DocNotFoundError(string("Docid ") + str(did) +
586  string(" not found"));
587  }
588  return doclengths[did - 1];
589 }
590 
593 {
595  if (did == 0 || did > termlists.size() || !termlists[did - 1].is_valid)
596  throw Xapian::DocNotFoundError(string("Docid ") + str(did) +
597  string(" not found"));
598  // get_unique_terms() really ought to only count terms with wdf > 0, but
599  // that's expensive to calculate on demand, so for now let's just ensure
600  // unique_terms <= doclen.
601  Xapian::termcount terms = termlists[did - 1].terms.size();
602  return std::min(terms, Xapian::termcount(doclengths[did - 1]));
603 }
604 
605 TermList *
607 {
609  Assert(did != 0);
610  if (!doc_exists(did)) {
611  // FIXME: the docid in this message will be local, not global
612  throw Xapian::DocNotFoundError(string("Docid ") + str(did) +
613  string(" not found"));
614  }
616  termlists[did - 1], doclengths[did - 1]);
617 }
618 
621 {
623  Assert(did != 0);
624  if (!lazy && !doc_exists(did)) {
625  // FIXME: the docid in this message will be local, not global
626  throw Xapian::DocNotFoundError(string("Docid ") + str(did) +
627  string(" not found"));
628  }
629  return new InMemoryDocument(this, did);
630 }
631 
632 std::string
633 InMemoryDatabase::get_metadata(const std::string & key) const
634 {
636  map<string, string>::const_iterator i = metadata.find(key);
637  if (i == metadata.end())
638  return string();
639  return i->second;
640 }
641 
642 TermList *
644 {
646  if (metadata.empty()) return NULL;
647  // FIXME: nobody implemented this yet...
648  throw Xapian::UnimplementedError("InMemory backend doesn't currently implement Database::metadata_keys_begin()");
649 }
650 
651 void
652 InMemoryDatabase::set_metadata(const std::string & key,
653  const std::string & value)
654 {
656  if (!value.empty()) {
657  metadata[key] = value;
658  } else {
659  metadata.erase(key);
660  }
661 }
662 
665  const string & tname) const
666 {
668  if (!doc_exists(did)) {
669  return 0;
670  }
671  const InMemoryDoc &doc = termlists[did - 1];
672 
673  InMemoryTermEntry temp;
674  temp.tname = tname;
675  auto t = lower_bound(doc.terms.begin(), doc.terms.end(),
676  temp, InMemoryTermEntryLessThan());
677  if (t != doc.terms.end() && t->tname == tname) {
678  return t->positions.size();
679  }
680  return 0;
681 }
682 
683 PositionList *
685  const string & tname) const
686 {
688  if (usual(doc_exists(did))) {
689  const InMemoryDoc &doc = termlists[did - 1];
690 
691  InMemoryTermEntry temp;
692  temp.tname = tname;
693  auto t = lower_bound(doc.terms.begin(), doc.terms.end(),
694  temp, InMemoryTermEntryLessThan());
695  if (t != doc.terms.end() && t->tname == tname) {
696  return new InMemoryPositionList(t->positions);
697  }
698  }
699  return new InMemoryPositionList(false);
700 }
701 
702 void
704  const map<Xapian::valueno, string> &values_)
705 {
707  if (did > valuelists.size()) {
708  valuelists.resize(did);
709  }
710  valuelists[did - 1] = values_;
711 
712  // Update the statistics.
713  map<Xapian::valueno, string>::const_iterator j;
714  for (j = values_.begin(); j != values_.end(); ++j) {
715  std::pair<map<Xapian::valueno, ValueStats>::iterator, bool> i;
716  i = valuestats.insert(make_pair(j->first, ValueStats()));
717 
718  // Now, modify the stored statistics.
719  if ((i.first->second.freq)++ == 0) {
720  // If the value count was previously zero, set the upper and lower
721  // bounds to the newly added value.
722  i.first->second.lower_bound = j->second;
723  i.first->second.upper_bound = j->second;
724  } else {
725  // Otherwise, simply make sure they reflect the new value.
726  if (j->second < i.first->second.lower_bound) {
727  i.first->second.lower_bound = j->second;
728  }
729  if (j->second > i.first->second.upper_bound) {
730  i.first->second.upper_bound = j->second;
731  }
732  }
733  }
734 }
735 
736 // We implicitly commit each modification right away, so nothing to do here.
737 void
739 {
740 }
741 
742 // We implicitly commit each modification right away, so nothing to do here.
743 void
745 {
746 }
747 
748 void
750 {
752  if (!doc_exists(did)) {
753  throw Xapian::DocNotFoundError(string("Docid ") + str(did) +
754  string(" not found"));
755  }
756  termlists[did - 1].is_valid = false;
757  doclists[did - 1] = string();
758  map<Xapian::valueno, string>::const_iterator j;
759  for (j = valuelists[did - 1].begin(); j != valuelists[did - 1].end(); ++j) {
760  map<Xapian::valueno, ValueStats>::iterator i;
761  i = valuestats.find(j->first);
762  if (--(i->second.freq) == 0) {
763  i->second.lower_bound.resize(0);
764  i->second.upper_bound.resize(0);
765  }
766  }
767  valuelists[did - 1].clear();
768 
769  totlen -= doclengths[did - 1];
770  doclengths[did - 1] = 0;
771  totdocs--;
772  // A crude check, but it's hard to be more precise with the current
773  // InMemory structure without being very inefficient.
774  if (totdocs == 0) positions_present = false;
775 
776  vector<InMemoryTermEntry>::const_iterator i;
777  for (i = termlists[did - 1].terms.begin();
778  i != termlists[did - 1].terms.end();
779  ++i) {
780  map<string, InMemoryTerm>::iterator t = postlists.find(i->tname);
781  Assert(t != postlists.end());
782  t->second.collection_freq -= i->wdf;
783  --t->second.term_freq;
784 
785  // Just invalidate erased doc ids - otherwise we need to erase
786  // in a vector (inefficient) and we break any posting lists
787  // iterating over this posting list.
788  InMemoryPosting temp;
789  temp.did = did;
790  auto p = lower_bound(t->second.docs.begin(), t->second.docs.end(),
791  temp, InMemoryPostingLessThan());
792  if (p != t->second.docs.end() && p->did == did) {
793  p->valid = false;
794  }
795  }
796  termlists[did - 1].terms.clear();
797 }
798 
799 void
801  const Xapian::Document & document)
802 {
803  LOGCALL_VOID(DB, "InMemoryDatabase::replace_document", did | document);
804 
806 
807  if (doc_exists(did)) {
808  map<Xapian::valueno, string>::const_iterator j;
809  for (j = valuelists[did - 1].begin(); j != valuelists[did - 1].end(); ++j) {
810  map<Xapian::valueno, ValueStats>::iterator i;
811  i = valuestats.find(j->first);
812  if (--(i->second.freq) == 0) {
813  i->second.lower_bound.resize(0);
814  i->second.upper_bound.resize(0);
815  }
816  }
817 
818  totlen -= doclengths[did - 1];
819  totdocs--;
820  } else if (did > termlists.size()) {
821  termlists.resize(did);
822  termlists[did - 1].is_valid = true;
823  doclengths.resize(did);
824  doclists.resize(did);
825  valuelists.resize(did);
826  } else {
827  termlists[did - 1].is_valid = true;
828  }
829 
830  vector<InMemoryTermEntry>::const_iterator i;
831  for (i = termlists[did - 1].terms.begin();
832  i != termlists[did - 1].terms.end();
833  ++i) {
834  map<string, InMemoryTerm>::iterator t = postlists.find(i->tname);
835  Assert(t != postlists.end());
836  t->second.collection_freq -= i->wdf;
837  --t->second.term_freq;
838 
839  // Just invalidate erased doc ids - otherwise we need to erase
840  // in a vector (inefficient) and we break any posting lists
841  // iterating over this posting list.
842  InMemoryPosting temp;
843  temp.did = did;
844  auto p = lower_bound(t->second.docs.begin(), t->second.docs.end(),
845  temp, InMemoryPostingLessThan());
846  if (p != t->second.docs.end() && p->did == did) {
847  p->valid = false;
848  }
849  }
850 
851  doclengths[did - 1] = 0;
852  doclists[did - 1] = document.get_data();
853 
854  finish_add_doc(did, document);
855 }
856 
859 {
860  LOGCALL(DB, Xapian::docid, "InMemoryDatabase::add_document", document);
862 
863  Xapian::docid did = make_doc(document.get_data());
864 
865  finish_add_doc(did, document);
866 
867  RETURN(did);
868 }
869 
870 void
872 {
873  {
874  map<Xapian::valueno, string> values;
875  Xapian::ValueIterator k = document.values_begin();
876  for ( ; k != document.values_end(); ++k) {
877  values.insert(make_pair(k.get_valueno(), *k));
878  LOGLINE(DB, "InMemoryDatabase::finish_add_doc(): adding value " <<
879  k.get_valueno() << " -> " << *k);
880  }
881  add_values(did, values);
882  }
883 
884  InMemoryDoc doc(true);
885  Xapian::TermIterator i = document.termlist_begin();
886  for ( ; i != document.termlist_end(); ++i) {
887  make_term(*i);
888 
889  LOGLINE(DB, "InMemoryDatabase::finish_add_doc(): adding term " << *i);
891  if (j == i.positionlist_end()) {
892  /* Make sure the posting exists, even without a position. */
893  make_posting(&doc, *i, did, 0, i.get_wdf(), false);
894  } else {
895  positions_present = true;
896  for ( ; j != i.positionlist_end(); ++j) {
897  make_posting(&doc, *i, did, *j, i.get_wdf());
898  }
899  }
900 
901  Assert(did > 0 && did <= doclengths.size());
902  doclengths[did - 1] += i.get_wdf();
903  totlen += i.get_wdf();
904  postlists[*i].collection_freq += i.get_wdf();
905  ++postlists[*i].term_freq;
906  }
907  swap(termlists[did - 1], doc);
908 
909  totdocs++;
910 }
911 
912 void
913 InMemoryDatabase::make_term(const string & tname)
914 {
915  postlists[tname]; // Initialise, if not already there.
916 }
917 
919 InMemoryDatabase::make_doc(const string & docdata)
920 {
921  if (rare(termlists.size() == Xapian::docid(-1))) {
922  // Really unlikely to actually happen for inmemory.
923  throw Xapian::DatabaseError("Run out of docids");
924  }
925  termlists.push_back(InMemoryDoc(true));
926  doclengths.push_back(0);
927  doclists.push_back(docdata);
928 
929  AssertEqParanoid(termlists.size(), doclengths.size());
930 
931  return Xapian::docid(termlists.size());
932 }
933 
935  const string & tname,
936  Xapian::docid did,
937  Xapian::termpos position,
938  Xapian::termcount wdf,
939  bool use_position)
940 {
941  Assert(doc);
942  Assert(postlists.find(tname) != postlists.end());
943  Assert(did > 0 && did <= termlists.size());
944  Assert(did > 0 && did <= doclengths.size());
945  Assert(doc_exists(did));
946 
947  postlists[tname].add_posting(did, wdf, position, use_position);
948  doc->add_posting(tname, wdf, position, use_position);
949 }
950 
951 bool
952 InMemoryDatabase::term_exists(const string & tname) const
953 {
955  Assert(!tname.empty());
956  map<string, InMemoryTerm>::const_iterator i = postlists.find(tname);
957  if (i == postlists.end()) return false;
958  return (i->second.term_freq != 0);
959 }
960 
961 bool
963 {
965  return positions_present;
966 }
967 
968 TermList *
969 InMemoryDatabase::open_allterms(const string & prefix) const
970 {
972  return new InMemoryAllTermsList(&postlists,
974  prefix);
975 }
976 
977 void
979 {
980  throw Xapian::DatabaseClosedError("Database has been closed");
981 }
Xapian::Internal::intrusive_ptr< const InMemoryDatabase > db
vector< InMemoryPosting >::const_iterator end
#define RETURN(A)
Definition: debuglog.h:493
#define Assert(COND)
Definition: omassert.h:122
vector< InMemoryTermEntry >::const_iterator end
InMemoryPostList(const InMemoryDatabase *db, const InMemoryTerm &imterm, const std::string &term_)
string get_description() const
Return a string description of this object.
Class to hold statistics for a given slot.
Definition: valuestats.h:29
PositionList * read_position_list()
Read the position list for the term in the current document and return a pointer to it (owned by the ...
PostList * skip_to(Xapian::docid did, double w_min)
Skip forward to the specified docid.
Abstract base class for postlists.
Definition: postlist.h:37
Xapian::docid make_doc(const string &docdata)
Statistics about values.
Indicates an attempt to access a closed database.
Definition: error.h:1097
vector< Xapian::termpos > positions
PositionList * open_position_list() const
Read the position list for the term in the current document and return a pointer to it (not owned by ...
A database held entirely in memory.
Xapian::Document::Internal * open_document(Xapian::docid did, bool lazy) const
Open a document.
Xapian::totallength get_total_length() const
Return the total length of all documents in this database.
void make_posting(InMemoryDoc *doc, const string &tname, Xapian::docid did, Xapian::termpos position, Xapian::termcount wdf, bool use_position=true)
bool at_end() const
Return true if the current position is past the last entry in this list.
string get_description() const
Return a string description of this object.
Xapian::termcount wdf_upper_bound
void commit()
Implementation of virtual methods: see Database for details.
#define usual(COND)
Definition: config.h:576
Iterate all terms in an inmemory db.
Xapian::doccount get_termfreq() const
Return the term frequency for the term at the current position.
friend class InMemoryAllDocsPostList
bool is_closed() const
XAPIAN_TOTALLENGTH_TYPE totallength
The total length of all documents in a database.
Definition: types.h:139
Xapian::doccount get_termfreq() const
Return the exact term frequency.
ValueIterator values_begin() const
Iterator for the values in this document.
Definition: omdocument.cc:210
Xapian::termcount get_doclength() const
Return the length of current document.
Xapian::totallength totlen
Xapian::termcount get_wdf_upper_bound() const
Xapian::doccount get_value_freq(Xapian::valueno slot) const
Return the frequency of a given value slot.
vector< Xapian::termcount > doclengths
void finish_add_doc(Xapian::docid did, const Xapian::Document &document)
A document in the database, possibly plus modifications.
Definition: document.h:43
Class for iterating over document values.
Definition: valueiterator.h:40
#define LOGCALL_VOID(CATEGORY, FUNC, PARAMS)
Definition: debuglog.h:488
Abstract base class for termlists.
Definition: termlist.h:39
STL namespace.
A document read from a InMemoryDatabase.
Convert types to std::string.
Xapian::termcount terms
void cancel()
Implementation of virtual methods: see Database for details.
PositionList * open_position_list() const
Read the position list for the term in the current document and return a pointer to it (not owned by ...
vector< InMemoryTermEntry >::const_iterator pos
Xapian::docid get_docid() const
Return the current docid.
LeafPostList * open_post_list(const string &tname) const
Open a posting list.
bool has_positions() const
Check whether this database contains any positional information.
TermList * open_allterms(const string &prefix) const
Open an allterms list.
Abstract base class for leaf postlists.
Definition: leafpostlist.h:39
#define false
Definition: header.h:9
Xapian::doccount get_doccount() const
Return the number of docs in this (sub) database.
TermIterator termlist_end() const
Equivalent end iterator for termlist_begin().
Definition: document.h:270
void accumulate_stats(Xapian::Internal::ExpandStats &stats) const
Collate weighting information for the current term.
vector< Xapian::termpos > positions
#define rare(COND)
Definition: config.h:575
InMemoryTermList(Xapian::Internal::intrusive_ptr< const InMemoryDatabase > db, Xapian::docid did, const InMemoryDoc &doc, Xapian::termcount len)
Xapian::termcount document_length
Xapian::termcount wdf
void add_posting(const std::string &tname, Xapian::termcount wdf, Xapian::termpos position, bool use_position)
PositionList * open_position_list(Xapian::docid did, const string &tname) const
Open a position list for the given term in the given document.
enum Xapian::Database::Internal::@2 transaction_state
Transaction state.
Hierarchy of classes which Xapian can throw as exceptions.
vector< InMemoryPosting > docs
Class for iterating over a list of terms.
Definition: termiterator.h:41
std::string get_value_lower_bound(Xapian::valueno slot) const
Get a lower bound on the values stored in the given value slot.
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:72
void add_posting(Xapian::docid did, Xapian::termcount wdf, Xapian::termpos position, bool use_position)
bool at_end() const
Return true if the current position is past the last entry in this list.
PositionIterator positionlist_end() const
Return an end PositionIterator for the current term.
Definition: termiterator.h:110
Class representing a document and the terms indexing it.
void accumulate(size_t shard_index, Xapian::termcount wdf, Xapian::termcount doclen, Xapian::doccount subtf, Xapian::doccount subdbsize)
Definition: expandweight.h:76
InMemoryPositionList mypositions
List of positions of the current term.
Xapian::docid add_document(const Xapian::Document &document)
Implementation of virtual methods: see Database for details.
Xapian::docid get_docid() const
Return the current docid.
bool doc_exists(Xapian::docid did) const
Xapian::docid get_lastdocid() const
Return the last used document id of this (sub) database.
Collate statistics and calculate the term weights for the ESet.
vector< InMemoryTermEntry > terms
Xapian::termcount collection_freq
InMemoryDatabase()
Create and open an in-memory database.
string get_termname() const
Return the termname at the current position.
std::string get_value_upper_bound(Xapian::valueno slot) const
Get an upper bound on the values stored in the given value slot.
void replace_document(Xapian::docid did, const Xapian::Document &document)
Implementation of virtual methods: see Database for details.
std::map< string, string > metadata
InMemoryAllDocsPostList(const InMemoryDatabase *db)
Xapian::doccount totdocs
Internal * next()
Advance the current position to the next document in the postlist.
Definition: postlist.h:194
Xapian::termcount get_unique_terms() const
Return the number of unique terms in the current document.
A PostList in an inmemory database.
Xapian::termcount wdf
string str(int value)
Convert int to std::string.
Definition: str.cc:90
A position list in a inmemory database.
TermList * open_term_list(Xapian::docid did) const
Open a term list.
Class for iterating over document values.
C++ class definition for inmemory database access.
std::map< Xapian::valueno, ValueStats > valuestats
Class for iterating over term positions.
Xapian::termcount get_wdf_upper_bound() const
Xapian::termcount positionlist_count() const
Return the length of the position list for the current position.
void close()
Close the database.
Xapian::Internal::intrusive_ptr< const InMemoryDatabase > db
Xapian::termcount get_wdf() const
Return the wdf for the term at the current position.
ValueIterator values_end() const
Equivalent end iterator for values_begin().
Definition: document.h:281
Indicates an attempt to access a document not present in the database.
Definition: error.h:674
vector< std::map< Xapian::valueno, string > > valuelists
void get_freqs(const string &term, Xapian::doccount *termfreq_ptr, Xapian::termcount *collfreq_ptr) const
Returns frequencies for a term.
Xapian::doccount termfreq
void add_values(Xapian::docid did, const map< Xapian::valueno, string > &values_)
static void throw_database_closed()
TermList * skip_to(const std::string &term)
Skip forward to the specified term.
Xapian::termcount get_unique_terms(Xapian::docid did) const
Get the number of unique term in document.
#define AssertEqParanoid(A, B)
Definition: omassert.h:131
class for alltermslists over several databases
vector< std::string > doclists
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:38
Xapian::termcount positionlist_count(Xapian::docid did, const string &tname) const
Xapian::termcount get_wdf() const
Return the wdf for the document at the current position.
map< string, InMemoryTerm > postlists
void delete_document(Xapian::docid did)
Implementation of virtual methods: see Database for details.
size_t shard_index
Which shard of a multidatabase this is from.
Definition: termlist.h:114
Xapian::Internal::intrusive_ptr< const InMemoryDatabase > db
Xapian::termcount get_unique_terms() const
Return the number of unique terms in the current document.
bool term_exists(const string &tname) const
Check whether a given term is in the database.
Collates statistics while calculating term weight in an ESet.
Definition: expandweight.h:37
bool reopen()
Reopen the database to the latest available revision.
void make_term(const string &tname)
unsigned valueno
The number for a value slot in a document.
Definition: types.h:108
unsigned XAPIAN_TERMPOS_BASE_TYPE termpos
A term position within a document or query.
Definition: types.h:83
void dtor_called()
Internal method to perform cleanup when a writable database is destroyed with uncommitted changes...
Definition: database.cc:87
Xapian::termcount get_doclength(Xapian::docid did) const
Get the length of a given document.
vector< InMemoryPosting >::const_iterator pos
TermList * open_metadata_keylist(const std::string &prefix) const
Open a termlist returning each metadata key.
friend class InMemoryDocument
Xapian::termcount get_doclength() const
Return the length of current document.
Xapian::docid did
#define LOGLINE(a, b)
Definition: debuglog.h:494
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:52
std::string get_metadata(const std::string &key) const
Get the metadata associated with a given key.
DatabaseError indicates some sort of database related error.
Definition: error.h:367
std::string get_data() const
Get data stored in the document.
Definition: omdocument.cc:71
Abstract base class for iterating term positions in a document.
Definition: positionlist.h:31
void set_metadata(const std::string &key, const std::string &value)
Set the metadata associated with a given key.
Xapian::doccount get_termfreq() const
Return the exact term frequency.
Xapian::termcount get_wdf() const
Return the wdf for the term at the current position.
A smart pointer that uses intrusive reference counting.
Definition: intrusive_ptr.h:81
Xapian::PositionIterator positionlist_begin() const
Return a PositionIterator for the current position.
Xapian::termcount get_wdf() const
Return the wdf for the document at the current position.
TermIterator termlist_begin() const
Start iterating the terms in this document.
Definition: omdocument.cc:197
PositionList * read_position_list()
Read the position list for the term in the current document and return a pointer to it (owned by the ...
PositionIterator positionlist_begin() const
Return a PositionIterator for the current term.
A handle representing a document in a Xapian database.
Definition: document.h:61
bool at_end() const
Return true if the current position is past the last term in this list.
Debug logging macros.
TermList * next()
Advance the current position to the next term in the termlist.
#define LOGCALL(CATEGORY, TYPE, FUNC, PARAMS)
Definition: debuglog.h:487
void set_data(const OmDocumentTerm::term_positions &positions_)
Fill list with data, and move the position to the start.
Xapian::valueno get_valueno() const
Return the value slot number for the current position.
PostList * skip_to(Xapian::docid did, double w_min)
Skip forward to the specified docid.
UnimplementedError indicates an attempt to use an unimplemented feature.
Definition: error.h:325
Xapian::termcount term_freq
Xapian::termcount get_approx_size() const
Return approximate size of this termlist.
vector< InMemoryDoc > termlists