xapian-core  2.0.0
inmemory_database.cc
Go to the documentation of this file.
1 
4 /* Copyright 1999,2000,2001 BrightStation PLC
5  * Copyright 2002 Ananova Ltd
6  * Copyright 2002-2024 Olly Betts
7  * Copyright 2006,2009 Lemur Consulting Ltd
8  *
9  * This program is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU General Public License as
11  * published by the Free Software Foundation; either version 2 of the
12  * License, or (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, see
21  * <https://www.gnu.org/licenses/>.
22  */
23 
24 #include <config.h>
25 
26 #include "inmemory_database.h"
27 
28 #include "debuglog.h"
29 
31 #include "expand/expandweight.h"
32 #include "inmemory_document.h"
33 #include "inmemory_alltermslist.h"
34 #include "str.h"
35 #include "backends/valuestats.h"
36 
37 #include <algorithm>
38 #include <string>
39 #include <string_view>
40 #include <vector>
41 #include <map>
42 
43 #include <xapian/error.h>
44 #include <xapian/valueiterator.h>
45 
46 using namespace std;
48 
49 inline void
52  Xapian::termpos position,
53  bool use_position)
54 {
55  InMemoryPosting posting;
56  posting.did = did;
57 
58  // Find the right place in the sorted list.
59  vector<InMemoryPosting>::iterator p;
60  p = lower_bound(docs.begin(), docs.end(),
61  posting, InMemoryPostingLessThan());
62  if (p == docs.end() || InMemoryPostingLessThan()(posting, *p)) {
63  // Adding new entry.
64  if (use_position) {
65  posting.positions.push_back(position);
66  }
67  posting.wdf = wdf;
68  posting.valid = true;
69  docs.insert(p, std::move(posting));
70  } else if (!p->valid) {
71  // Resurrecting deleted entry.
72  p->did = did;
73  p->positions.clear();
74  if (use_position) {
75  p->positions.push_back(position);
76  }
77  p->wdf = wdf;
78  p->valid = true;
79  } else if (use_position) {
80  // Adding position to existing entry.
81  p->add_position(position);
82  }
83 }
84 
85 inline void
86 InMemoryDoc::add_posting(const string& tname,
88  Xapian::termpos position,
89  bool use_position)
90 {
91  InMemoryTermEntry termentry;
92  termentry.tname = tname;
93 
94  // Find the right place in the sorted list.
95  vector<InMemoryTermEntry>::iterator p;
96  p = lower_bound(terms.begin(), terms.end(),
97  termentry, InMemoryTermEntryLessThan());
98  if (p == terms.end() || InMemoryTermEntryLessThan()(termentry, *p)) {
99  // Adding new entry.
100  if (use_position) {
101  termentry.positions.push_back(position);
102  }
103  termentry.wdf = wdf;
104  terms.insert(p, std::move(termentry));
105  } else if (use_position) {
106  p->add_position(position);
107  }
108 }
109 
111 // Postlist //
113 
115  const InMemoryTerm & imterm,
116  std::string_view term_)
117  : LeafPostList(term_),
118  pos(imterm.docs.begin()),
119  end(imterm.docs.end()),
120  started(false),
121  db(db_),
122  wdf_upper_bound(0)
123 {
124  termfreq = imterm.term_freq;
125  collfreq = imterm.collection_freq;
126  while (pos != end && !pos->valid) ++pos;
127  if (pos != end) {
128  auto first_wdf = (*pos).wdf;
129  wdf_upper_bound = max(first_wdf, imterm.collection_freq - first_wdf);
130  }
131 }
132 
135 {
137  Assert(started);
138  Assert(!at_end());
139  return (*pos).did;
140 }
141 
142 PostList *
143 InMemoryPostList::next(double /*w_min*/)
144 {
146  if (started) {
147  Assert(!at_end());
148  ++pos;
149  while (pos != end && !pos->valid) ++pos;
150  } else {
151  started = true;
152  }
153  return NULL;
154 }
155 
156 PostList *
158 {
160  // FIXME - see if we can make more efficient, perhaps using better
161  // data structure. Note, though, that a binary search of
162  // the remaining list may NOT be a good idea (search time is then
163  // O(log {length of list}), as opposed to O(distance we want to skip)
164  // Since we will frequently only be skipping a short distance, this
165  // could well be worse.
166 
167  // If we've not started, it's OK to call skip_to().
168  Assert(!at_end() || !started);
169  started = true;
170  while (!at_end() && (*pos).did < did) {
171  (void) next(w_min);
172  }
173  return NULL;
174 }
175 
176 bool
178 {
180  return (pos == end);
181 }
182 
183 void
185  Xapian::docid& last) const
186 {
187  Assert(!started);
188  if (pos != end) {
189  first = pos->did;
190  last = (end - 1)->did;
191  } else {
192  last = 0;
193  }
194 }
195 
196 string
198 {
199  return term + ":" + str(termfreq);
200 }
201 
204 {
205  return db->get_wdfdocmax(get_docid());
206 }
207 
208 PositionList *
210 {
212  mypositions.assign(pos->positions.copy());
213  return &mypositions;
214 }
215 
216 PositionList *
218 {
220  if (pos->positions.empty()) return nullptr;
221  return new InMemoryPositionList(pos->positions.copy());
222 }
223 
226 {
228  return (*pos).wdf;
229 }
230 
233 {
235  return wdf_upper_bound;
236 }
237 
239 // Termlist //
241 
243  Xapian::docid did_,
244  const InMemoryDoc & doc,
245  Xapian::termcount len)
246  : pos(doc.terms.begin()), end(doc.terms.end()), terms(doc.terms.size()),
247  started(false), db(db_), did(did_), document_length(len)
248 {
249  LOGLINE(DB, "InMemoryTermList::InMemoryTermList(): " <<
250  terms << " terms starting from " << pos->tname);
251 }
252 
255 {
257  Assert(started);
258  Assert(pos != end);
259  return (*pos).wdf;
260 }
261 
264 {
266  Assert(started);
267  Assert(pos != end);
268 
269  Xapian::doccount tf;
270  db->get_freqs((*pos).tname, &tf, NULL);
271  return tf;
272 }
273 
276 {
278  return terms;
279 }
280 
281 void
283 {
285  Assert(started);
286  Assert(pos != end);
287  stats.accumulate(shard_index,
290  db->get_doccount());
291 }
292 
293 TermList *
295 {
297  if (started) {
298  Assert(pos != end);
299  ++pos;
300  } else {
301  started = true;
302  }
303  if (pos == end)
304  return this;
305  current_term = pos->tname;
306  return NULL;
307 }
308 
309 TermList*
311 {
312  if (rare(db->is_closed()))
314 
315  while (pos != end && pos->tname < term) {
316  ++pos;
317  }
318 
319  started = true;
320  if (pos == end)
321  return this;
322  current_term = pos->tname;
323  return NULL;
324 }
325 
328 {
330  return db->positionlist_count(did, (*pos).tname);
331 }
332 
335 {
337  return db->open_position_list(did, (*pos).tname);
338 }
339 
341 // InMemoryAllDocsPostList //
343 
345  : LeafPostList({}), did(0), db(db_)
346 {
347  collfreq = termfreq = db->totdocs;
348 }
349 
352 {
354  Assert(did > 0);
355  Assert(did <= db->termlists.size());
356  Assert(db->termlists[did - 1].is_valid);
357  return did;
358 }
359 
362 {
363  return 1;
364 }
365 
366 PositionList *
368 {
369  throw Xapian::UnimplementedError("Can't open position list for all docs iterator");
370 }
371 
372 PositionList *
374 {
375  throw Xapian::UnimplementedError("Can't open position list for all docs iterator");
376 }
377 
378 PostList *
380 {
382  Assert(!at_end());
383  do {
384  ++did;
385  } while (did <= db->termlists.size() && !db->termlists[did - 1].is_valid);
386  return NULL;
387 }
388 
389 PostList *
391 {
393  Assert(!at_end());
394  if (did <= did_) {
395  did = did_;
396  while (did <= db->termlists.size() && !db->termlists[did - 1].is_valid) {
397  ++did;
398  }
399  }
400  return NULL;
401 }
402 
403 bool
405 {
407  return (did > db->termlists.size());
408 }
409 
412 {
413  return 1;
414 }
415 
416 string
418 {
419  return "InMemoryAllDocsPostList " + str(did);
420 }
421 
423 // Actual database class //
425 
426 // Updates are applied immediately so we can't support transactions.
428  : Xapian::Database::Internal(TRANSACTION_UNIMPLEMENTED),
429  totdocs(0), totlen(0), positions_present(false), closed(false)
430 {
431  // We keep an empty entry in postlists for convenience of implementing
432  // allterms iteration.
433  postlists.insert(make_pair(string(), InMemoryTerm()));
434 }
435 
437 {
438  dtor_called();
439 }
440 
441 bool
443 {
445  return false;
446 }
447 
448 void
450 {
451  // Free all the resources, and mark the db as closed.
452  postlists.clear();
453  termlists.clear();
454  doclists.clear();
455  valuelists.clear();
456  valuestats.clear();
457  doclengths.clear();
458  metadata.clear();
459  closed = true;
460 }
461 
462 PostList*
464 {
466 }
467 
470  bool need_read_pos) const
471 {
472  (void)need_read_pos;
474  if (term.empty()) {
475  Assert(!need_read_pos);
477  if (rare(doccount == 0)) {
478  return nullptr;
479  }
480  if (doccount == termlists.size()) {
481  // The used docid range is exactly 1 to doccount inclusive.
483  }
484  return new InMemoryAllDocsPostList(this);
485  }
486  auto i = postlists.find(term);
487  if (i == postlists.end() || i->second.term_freq == 0) {
488  return nullptr;
489  }
490  return new InMemoryPostList(this, i->second, term);
491 }
492 
493 bool
495 {
497  return (did > 0 && did <= termlists.size() && termlists[did - 1].is_valid);
498 }
499 
500 void
502  Xapian::doccount* termfreq_ptr,
503  Xapian::termcount* collfreq_ptr) const
504 {
506  auto i = postlists.find(term);
507  if (i != postlists.end()) {
508  if (termfreq_ptr)
509  *termfreq_ptr = i->second.term_freq;
510  if (collfreq_ptr)
511  *collfreq_ptr = i->second.collection_freq;
512  } else {
513  if (termfreq_ptr)
514  *termfreq_ptr = 0;
515  if (collfreq_ptr)
516  *collfreq_ptr = 0;
517  }
518 }
519 
522 {
524  auto i = valuestats.find(slot);
525  if (i == valuestats.end()) return 0;
526  return i->second.freq;
527 }
528 
529 std::string
531 {
533  auto i = valuestats.find(slot);
534  if (i == valuestats.end()) return string();
535  return i->second.lower_bound;
536 }
537 
538 std::string
540 {
542  auto i = valuestats.find(slot);
543  if (i == valuestats.end()) return string();
544  return i->second.upper_bound;
545 }
546 
549 {
550  // A zero-length document can't contain any terms, so we ignore such
551  // documents for the purposes of this lower bound.
552  return 1;
553 }
554 
557 {
558  // Not a very tight bound in general, but InMemory isn't really built for
559  // performance.
561 }
562 
565 {
566  // Not a very tight bound in general, but InMemory isn't really built for
567  // performance.
569  get_freqs(term, NULL, &cf);
570  return cf;
571 }
572 
575 {
577  return totdocs;
578 }
579 
582 {
584  return Xapian::docid(termlists.size());
585 }
586 
589 {
590  return totlen;
591 }
592 
595 {
597  if (!doc_exists(did)) {
598  throw Xapian::DocNotFoundError(string("Docid ") + str(did) +
599  string(" not found"));
600  }
601  return doclengths[did - 1];
602 }
603 
606 {
608  if (did == 0 || did > termlists.size() || !termlists[did - 1].is_valid)
609  throw Xapian::DocNotFoundError(string("Docid ") + str(did) +
610  string(" not found"));
611  // get_unique_terms() really ought to only count terms with wdf > 0, but
612  // that's expensive to calculate on demand, so for now let's just ensure
613  // unique_terms <= doclen.
614  Xapian::termcount terms = termlists[did - 1].terms.size();
615  return std::min(terms, Xapian::termcount(doclengths[did - 1]));
616 }
617 
620 {
622  if (did == 0 || did > termlists.size() || !termlists[did - 1].is_valid)
623  throw Xapian::DocNotFoundError(string("Docid ") + str(did) +
624  string(" not found"));
625  Xapian::termcount max_wdf = 0;
626  for (auto&& i : termlists[did - 1].terms) {
627  if (i.wdf > max_wdf) max_wdf = i.wdf;
628  }
629  return max_wdf;
630 }
631 
632 TermList *
634 {
636  Assert(did != 0);
637  if (!doc_exists(did)) {
638  // FIXME: the docid in this message will be local, not global
639  throw Xapian::DocNotFoundError(string("Docid ") + str(did) +
640  string(" not found"));
641  }
643  termlists[did - 1], doclengths[did - 1]);
644 }
645 
646 TermList *
648 {
650 }
651 
654 {
656  Assert(did != 0);
657  if (!lazy && !doc_exists(did)) {
658  // FIXME: the docid in this message will be local, not global
659  throw Xapian::DocNotFoundError(string("Docid ") + str(did) +
660  string(" not found"));
661  }
662  return new InMemoryDocument(this, did);
663 }
664 
665 std::string
666 InMemoryDatabase::get_metadata(std::string_view key) const
667 {
669  auto i = metadata.find(key);
670  if (i == metadata.end())
671  return string();
672  return i->second;
673 }
674 
675 TermList *
677 {
679  if (metadata.empty()) return NULL;
680  // FIXME: nobody implemented this yet...
681  throw Xapian::UnimplementedError("InMemory backend doesn't currently implement Database::metadata_keys_begin()");
682 }
683 
684 void
685 InMemoryDatabase::set_metadata(std::string_view key,
686  std::string_view value)
687 {
689  if (!value.empty()) {
690 #ifdef __cpp_lib_associative_heterogeneous_insertion // C++26
691  metadata.insert_or_assign(key, value);
692 #else
693  metadata.insert_or_assign(string(key), value);
694 #endif
695  } else {
696 #ifdef __cpp_lib_associative_heterogeneous_erasure // C++23
697  metadata.erase(key);
698 #else
699  metadata.erase(string(key));
700 #endif
701  }
702 }
703 
706  string_view tname) const
707 {
709  if (!doc_exists(did)) {
710  return 0;
711  }
712  const InMemoryDoc &doc = termlists[did - 1];
713 
714  InMemoryTermEntry temp;
715  temp.tname = tname;
716  auto t = lower_bound(doc.terms.begin(), doc.terms.end(),
717  temp, InMemoryTermEntryLessThan());
718  if (t != doc.terms.end() && t->tname == tname) {
719  return t->positions.size();
720  }
721  return 0;
722 }
723 
726  string_view tname) const
727 {
729  if (usual(doc_exists(did))) {
730  const InMemoryDoc &doc = termlists[did - 1];
731 
732  InMemoryTermEntry temp;
733  temp.tname = tname;
734  auto t = lower_bound(doc.terms.begin(), doc.terms.end(),
735  temp, InMemoryTermEntryLessThan());
736  if (t != doc.terms.end() && t->tname == tname) {
737  return new InMemoryPositionList(t->positions);
738  }
739  }
740  return nullptr;
741 }
742 
743 void
745  const map<Xapian::valueno, string> &values_)
746 {
748  if (did > valuelists.size()) {
749  valuelists.resize(did);
750  }
751  valuelists[did - 1] = values_;
752 
753  // Update the statistics.
754  for (auto&& j : values_) {
755  auto i = valuestats.insert(make_pair(j.first, ValueStats()));
756 
757  // Now, modify the stored statistics.
758  if ((i.first->second.freq)++ == 0) {
759  // If the value count was previously zero, set the upper and lower
760  // bounds to the newly added value.
761  i.first->second.lower_bound = j.second;
762  i.first->second.upper_bound = j.second;
763  } else {
764  // Otherwise, simply make sure they reflect the new value.
765  if (j.second < i.first->second.lower_bound) {
766  i.first->second.lower_bound = j.second;
767  }
768  if (j.second > i.first->second.upper_bound) {
769  i.first->second.upper_bound = j.second;
770  }
771  }
772  }
773 }
774 
775 // We implicitly commit each modification right away, so nothing to do here.
776 void
778 {
779 }
780 
781 // We implicitly commit each modification right away, so nothing to do here.
782 void
784 {
785 }
786 
787 void
789 {
791  if (!doc_exists(did)) {
792  throw Xapian::DocNotFoundError(string("Docid ") + str(did) +
793  string(" not found"));
794  }
795  termlists[did - 1].is_valid = false;
796  doclists[did - 1] = string();
797  for (auto&& j : valuelists[did - 1]) {
798  auto i = valuestats.find(j.first);
799  if (--(i->second.freq) == 0) {
800  i->second.lower_bound.resize(0);
801  i->second.upper_bound.resize(0);
802  }
803  }
804  valuelists[did - 1].clear();
805 
806  totlen -= doclengths[did - 1];
807  doclengths[did - 1] = 0;
808  totdocs--;
809  // A crude check, but it's hard to be more precise with the current
810  // InMemory structure without being very inefficient.
811  if (totdocs == 0) positions_present = false;
812 
813  for (auto&& i : termlists[did - 1].terms) {
814  auto t = postlists.find(i.tname);
815  Assert(t != postlists.end());
816  t->second.collection_freq -= i.wdf;
817  --t->second.term_freq;
818 
819  // Just invalidate erased doc ids - otherwise we need to erase
820  // in a vector (inefficient) and we break any posting lists
821  // iterating over this posting list.
822  InMemoryPosting temp;
823  temp.did = did;
824  auto p = lower_bound(t->second.docs.begin(), t->second.docs.end(),
825  temp, InMemoryPostingLessThan());
826  if (p != t->second.docs.end() && p->did == did) {
827  p->valid = false;
828  }
829  }
830  termlists[did - 1].terms.clear();
831 }
832 
833 void
835  const Xapian::Document & document)
836 {
837  LOGCALL_VOID(DB, "InMemoryDatabase::replace_document", did | document);
838 
840 
841  if (doc_exists(did)) {
842  for (auto&& j : valuelists[did - 1]) {
843  auto i = valuestats.find(j.first);
844  if (--(i->second.freq) == 0) {
845  i->second.lower_bound.resize(0);
846  i->second.upper_bound.resize(0);
847  }
848  }
849 
850  totlen -= doclengths[did - 1];
851  totdocs--;
852  } else if (did > termlists.size()) {
853  termlists.resize(did);
854  termlists[did - 1].is_valid = true;
855  doclengths.resize(did);
856  doclists.resize(did);
857  valuelists.resize(did);
858  } else {
859  termlists[did - 1].is_valid = true;
860  }
861 
862  for (auto&& i : termlists[did - 1].terms) {
863  auto t = postlists.find(i.tname);
864  Assert(t != postlists.end());
865  t->second.collection_freq -= i.wdf;
866  --t->second.term_freq;
867 
868  // Just invalidate erased doc ids - otherwise we need to erase
869  // in a vector (inefficient) and we break any posting lists
870  // iterating over this posting list.
871  InMemoryPosting temp;
872  temp.did = did;
873  auto p = lower_bound(t->second.docs.begin(), t->second.docs.end(),
874  temp, InMemoryPostingLessThan());
875  if (p != t->second.docs.end() && p->did == did) {
876  p->valid = false;
877  }
878  }
879 
880  doclengths[did - 1] = 0;
881  doclists[did - 1] = document.get_data();
882 
883  finish_add_doc(did, document);
884 }
885 
888 {
889  LOGCALL(DB, Xapian::docid, "InMemoryDatabase::add_document", document);
891 
892  Xapian::docid did = make_doc(document.get_data());
893 
894  finish_add_doc(did, document);
895 
896  RETURN(did);
897 }
898 
899 void
901 {
902  {
903  map<Xapian::valueno, string> values;
904  Xapian::ValueIterator k = document.values_begin();
905  for ( ; k != document.values_end(); ++k) {
906  values.insert(make_pair(k.get_valueno(), *k));
907  LOGLINE(DB, "InMemoryDatabase::finish_add_doc(): adding value " <<
908  k.get_valueno() << " -> " << *k);
909  }
910  add_values(did, values);
911  }
912 
913  InMemoryDoc doc(true);
914  Xapian::TermIterator i = document.termlist_begin();
915  for ( ; i != document.termlist_end(); ++i) {
916  make_term(*i);
917 
918  LOGLINE(DB, "InMemoryDatabase::finish_add_doc(): adding term " << *i);
920  if (j == i.positionlist_end()) {
921  /* Make sure the posting exists, even without a position. */
922  make_posting(&doc, *i, did, 0, i.get_wdf(), false);
923  } else {
924  positions_present = true;
925  for ( ; j != i.positionlist_end(); ++j) {
926  make_posting(&doc, *i, did, *j, i.get_wdf());
927  }
928  }
929 
930  Assert(did > 0 && did <= doclengths.size());
931  doclengths[did - 1] += i.get_wdf();
932  totlen += i.get_wdf();
933  postlists[*i].collection_freq += i.get_wdf();
934  ++postlists[*i].term_freq;
935  }
936  swap(termlists[did - 1], doc);
937 
938  totdocs++;
939 }
940 
941 void
942 InMemoryDatabase::make_term(const string & tname)
943 {
944  postlists[tname]; // Initialise, if not already there.
945 }
946 
948 InMemoryDatabase::make_doc(const string & docdata)
949 {
950  if (rare(termlists.size() == Xapian::docid(-1))) {
951  // Really unlikely to actually happen for inmemory.
952  throw Xapian::DatabaseError("Run out of docids");
953  }
954  termlists.push_back(InMemoryDoc(true));
955  doclengths.push_back(0);
956  doclists.push_back(docdata);
957 
958  AssertEqParanoid(termlists.size(), doclengths.size());
959 
960  return Xapian::docid(termlists.size());
961 }
962 
964  const string & tname,
965  Xapian::docid did,
966  Xapian::termpos position,
967  Xapian::termcount wdf,
968  bool use_position)
969 {
970  Assert(doc);
971  Assert(postlists.find(tname) != postlists.end());
972  Assert(did > 0 && did <= termlists.size());
973  Assert(did > 0 && did <= doclengths.size());
974  Assert(doc_exists(did));
975 
976  postlists[tname].add_posting(did, wdf, position, use_position);
977  doc->add_posting(tname, wdf, position, use_position);
978 }
979 
980 bool
982 {
984  if (term.empty()) {
985  return totdocs != 0;
986  }
987  auto i = postlists.find(term);
988  if (i == postlists.end()) return false;
989  return (i->second.term_freq != 0);
990 }
991 
992 bool
994 {
996  return positions_present;
997 }
998 
999 TermList*
1000 InMemoryDatabase::open_allterms(string_view prefix) const
1001 {
1003  return new InMemoryAllTermsList(&postlists,
1005  prefix);
1006 }
1007 
1008 void
1010  Xapian::docid& last) const
1011 {
1013  first = 1;
1014  last = Xapian::docid(termlists.size());
1015  if (last == 0 || last == totdocs) {
1016  // Empty database or contiguous range starting at 1.
1017  return;
1018  }
1019  while (!termlists[first - 1].is_valid) ++first;
1020  while (!termlists[last - 1].is_valid) --last;
1021 }
1022 
1025 {
1026  // InMemoryDatabase doesn't really distinguish writable and read-only.
1027  return this;
1028 }
1029 
1030 void
1032 {
1033  throw Xapian::DatabaseClosedError("Database has been closed");
1034 }
1035 
1036 string
1038 {
1039  return "InMemory";
1040 }
1041 
1042 #ifdef DISABLE_GPL_LIBXAPIAN
1043 # error GPL source we cannot relicense included in libxapian
1044 #endif
A PostList iterating all docids when they form a contiguous range.
PositionList * open_position_list() const
Read the position list for the term in the current document and return a pointer to it (not owned by ...
PostList * skip_to(Xapian::docid did, double w_min)
Skip forward to the specified docid.
Xapian::docid get_docid() const
Return the current docid.
Xapian::Internal::intrusive_ptr< const InMemoryDatabase > db
bool at_end() const
Return true if the current position is past the last entry in this list.
InMemoryAllDocsPostList(const InMemoryDatabase *db)
Xapian::termcount get_wdf() const
Return the wdf for the document at the current position.
std::string get_description() const
Return a string description of this object.
Xapian::termcount get_wdf_upper_bound() const
PositionList * read_position_list()
Read the position list for the term in the current document and return a pointer to it (owned by the ...
class for alltermslists over several databases
A database held entirely in memory.
Xapian::termcount get_wdfdocmax(Xapian::docid did) const
Get the max wdf in document.
std::string get_value_upper_bound(Xapian::valueno slot) const
Get an upper bound on the values stored in the given value slot.
static void throw_database_closed()
TermList * open_term_list_direct(Xapian::docid did) const
Like open_term_list() but without MultiTermList wrapper.
Xapian::termcount get_doclength(Xapian::docid did) const
TermList * open_term_list(Xapian::docid did) const
Xapian::termcount get_wdf_upper_bound(std::string_view term) const
Get an upper bound on the wdf of term term.
std::vector< InMemoryDoc > termlists
Xapian::termcount get_doclength_lower_bound() const
Get a lower bound on the length of a document in this DB.
TermList * open_metadata_keylist(std::string_view prefix) const
Open a termlist returning each metadata key.
void commit()
Implementation of virtual methods: see Database for details.
std::vector< Xapian::termcount > doclengths
void cancel()
Cancel pending modifications to the database.
std::string get_value_lower_bound(Xapian::valueno slot) const
Get a lower bound on the values stored in the given value slot.
void replace_document(Xapian::docid did, const Xapian::Document &document)
Xapian::doccount get_value_freq(Xapian::valueno slot) const
Return the frequency of a given value slot.
void get_used_docid_range(Xapian::docid &first, Xapian::docid &last) const
Find lowest and highest docids actually in use.
std::string get_description() const
Return a string describing this object.
void add_values(Xapian::docid did, const std::map< Xapian::valueno, std::string > &values_)
std::map< Xapian::valueno, ValueStats > valuestats
PostList * open_post_list(std::string_view tname) const
Return a PostList suitable for use in a PostingIterator.
Xapian::docid add_document(const Xapian::Document &document)
void make_term(const std::string &tname)
Xapian::docid get_lastdocid() const
Return the last used document id of this (sub) database.
void delete_document(Xapian::docid did)
LeafPostList * open_leaf_post_list(std::string_view term, bool need_read_pos) const
Create a LeafPostList for use during a match.
PositionList * open_position_list(Xapian::docid did, std::string_view tname) const
Xapian::docid make_doc(const std::string &docdata)
Xapian::termcount positionlist_count(Xapian::docid did, std::string_view tname) const
bool reopen()
Reopen the database to the latest available revision.
Xapian::Database::Internal * update_lock(int flags)
Lock a read-only database for writing or unlock a writable database.
void make_posting(InMemoryDoc *doc, const std::string &tname, Xapian::docid did, Xapian::termpos position, Xapian::termcount wdf, bool use_position=true)
Xapian::termcount get_unique_terms(Xapian::docid did) const
Get the number of unique terms in document.
bool doc_exists(Xapian::docid did) const
std::vector< std::string > doclists
Xapian::Document::Internal * open_document(Xapian::docid did, bool lazy) const
Open a handle on a document.
Xapian::doccount get_doccount() const
bool term_exists(std::string_view term) const
void close()
Close the database.
bool is_closed() const
TermList * open_allterms(std::string_view prefix) const
friend class InMemoryDocument
std::map< std::string, std::string, std::less<> > metadata
Xapian::totallength totlen
std::map< std::string, InMemoryTerm, std::less<> > postlists
Xapian::doccount totdocs
std::vector< std::map< Xapian::valueno, std::string > > valuelists
friend class InMemoryAllDocsPostList
void finish_add_doc(Xapian::docid did, const Xapian::Document &document)
void set_metadata(std::string_view key, std::string_view value)
Set the metadata associated with a given key.
Xapian::termcount get_doclength_upper_bound() const
Get an upper bound on the length of a document in this DB.
void get_freqs(std::string_view term, Xapian::doccount *termfreq_ptr, Xapian::termcount *collfreq_ptr) const
Returns frequencies for a term.
std::string get_metadata(std::string_view key) const
Get the metadata associated with a given key.
Xapian::totallength get_total_length() const
Return the total length of all documents in this database.
bool has_positions() const
Check whether this database contains any positional information.
InMemoryDatabase()
Create and open an in-memory database.
Class representing a document and the terms indexing it.
void add_posting(const std::string &tname, Xapian::termcount wdf, Xapian::termpos position, bool use_position)
std::vector< InMemoryTermEntry > terms
PositionList from an InMemory DB or a Document object.
void assign(Xapian::VecCOW< Xapian::termpos > &&positions_)
Move assign positional data.
A PostList in an inmemory database.
std::string get_description() const
Return a string description of this object.
Xapian::termcount wdf_upper_bound
Xapian::docid get_docid() const
Return the current docid.
InMemoryPositionList mypositions
List of positions of the current term.
InMemoryPostList(const InMemoryDatabase *db, const InMemoryTerm &imterm, std::string_view term_)
void get_docid_range(Xapian::docid &first, Xapian::docid &last) const
Get the bounds on the range of docids this PostList can return.
PostList * skip_to(Xapian::docid did, double w_min)
Skip forward to the specified docid.
Xapian::termcount get_wdf() const
Return the wdf for the document at the current position.
Xapian::termcount get_wdfdocmax() const
std::vector< InMemoryPosting >::const_iterator pos
bool at_end() const
Return true if the current position is past the last entry in this list.
PositionList * open_position_list() const
Read the position list for the term in the current document and return a pointer to it (not owned by ...
Xapian::Internal::intrusive_ptr< const InMemoryDatabase > db
PositionList * read_position_list()
Read the position list for the term in the current document and return a pointer to it (owned by the ...
Xapian::termcount get_wdf_upper_bound() const
std::vector< InMemoryPosting >::const_iterator end
Xapian::VecCOW< Xapian::termpos > positions
Xapian::docid did
Xapian::termcount wdf
Xapian::VecCOW< Xapian::termpos > positions
Xapian::termcount wdf
InMemoryTermList(Xapian::Internal::intrusive_ptr< const InMemoryDatabase > db, Xapian::docid did, const InMemoryDoc &doc, Xapian::termcount len)
Xapian::termcount get_approx_size() const
Return approximate size of this termlist.
Xapian::termcount get_wdf() const
Return the wdf for the term at the current position.
void accumulate_stats(Xapian::Internal::ExpandStats &stats) const
Collate weighting information for the current term.
Xapian::doccount get_termfreq() const
Return the term frequency for the term at the current position.
Xapian::Internal::intrusive_ptr< const InMemoryDatabase > db
TermList * next()
Advance the current position to the next term in the termlist.
std::vector< InMemoryTermEntry >::const_iterator pos
Xapian::termcount terms
TermList * skip_to(std::string_view term)
Skip forward to the specified term.
Xapian::termcount document_length
std::vector< InMemoryTermEntry >::const_iterator end
Xapian::termcount positionlist_count() const
Return the length of the position list for the current position.
PositionList * positionlist_begin() const
Return PositionList for the current position.
Xapian::termcount collection_freq
void add_posting(Xapian::docid did, Xapian::termcount wdf, Xapian::termpos position, bool use_position)
Xapian::termcount term_freq
Abstract base class for leaf postlists.
Definition: leafpostlist.h:40
Xapian::termcount collfreq
The collection frequency of the term.
Definition: leafpostlist.h:57
std::string term
The term name for this postlist (empty for an alldocs postlist).
Definition: leafpostlist.h:51
Indicates an attempt to access a closed database.
Definition: error.h:1085
DatabaseError indicates some sort of database related error.
Definition: error.h:355
Virtual base class for Database internals.
void dtor_called()
Helper to process uncommitted changes when a writable db is destroyed.
Indicates an attempt to access a document not present in the database.
Definition: error.h:662
Abstract base class for a document.
Class representing a document.
Definition: document.h:64
std::string get_data() const
Get the document data.
Definition: document.cc:75
ValueIterator values_begin() const
Start iterating the values in this document.
Definition: document.cc:208
TermIterator termlist_end() const noexcept
End iterator corresponding to termlist_begin().
Definition: document.h:219
TermIterator termlist_begin() const
Start iterating the terms in this document.
Definition: document.cc:179
ValueIterator values_end() const noexcept
End iterator corresponding to values_begin().
Definition: document.h:259
Collates statistics while calculating term weight in an ESet.
Definition: expandweight.h:37
void accumulate(size_t shard_index, Xapian::termcount wdf, Xapian::termcount doclen, Xapian::doccount subtf, Xapian::doccount subdbsize)
Definition: expandweight.h:71
Abstract base class for postlists.
Definition: postlist.h:40
PostList * next()
Advance the current position to the next document in the postlist.
Definition: postlist.h:168
Xapian::doccount termfreq
Estimate of the number of documents this PostList will return.
Definition: postlist.h:52
A smart pointer that uses intrusive reference counting.
Definition: intrusive_ptr.h:83
Abstract base class for iterating term positions in a document.
Definition: positionlist.h:32
Class for iterating over term positions.
Abstract base class for termlists.
Definition: termlist.h:42
std::string current_term
The current term.
Definition: termlist.h:54
size_t shard_index
Which shard of a multidatabase this is from.
Definition: termlist.h:126
Class for iterating over a list of terms.
Definition: termiterator.h:41
PositionIterator positionlist_end() const noexcept
Return an end PositionIterator for the current term.
Definition: termiterator.h:109
Xapian::termcount get_wdf() const
Return the wdf for the term at the current position.
PositionIterator positionlist_begin() const
Return a PositionIterator for the current term.
UnimplementedError indicates an attempt to use an unimplemented feature.
Definition: error.h:313
Class for iterating over document values.
Definition: valueiterator.h:39
Xapian::valueno get_valueno() const
Return the value slot number for the current position.
#define usual(COND)
Definition: config.h:608
#define rare(COND)
Definition: config.h:607
Iterate all document ids when they form a contiguous range.
string term
PositionList * p
Xapian::termpos pos
Debug logging macros.
#define RETURN(...)
Definition: debuglog.h:484
#define LOGCALL(CATEGORY, TYPE, FUNC, PARAMS)
Definition: debuglog.h:478
#define LOGLINE(a, b)
Definition: debuglog.h:485
#define LOGCALL_VOID(CATEGORY, FUNC, PARAMS)
Definition: debuglog.h:479
Hierarchy of classes which Xapian can throw as exceptions.
Collate statistics and calculate the term weights for the ESet.
#define false
Definition: header.h:9
Iterate all terms in an inmemory db.
C++ class definition for inmemory database access.
A document read from a InMemoryDatabase.
string str(int value)
Convert int to std::string.
Definition: str.cc:91
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:82
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:64
unsigned valueno
The number for a value slot in a document.
Definition: types.h:90
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:37
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:51
unsigned XAPIAN_TERMPOS_BASE_TYPE termpos
A term position within a document or query.
Definition: types.h:75
XAPIAN_TOTALLENGTH_TYPE totallength
The total length of all documents in a database.
Definition: types.h:114
#define AssertEqParanoid(A, B)
Definition: omassert.h:131
#define Assert(COND)
Definition: omassert.h:122
Convert types to std::string.
Class to hold statistics for a given slot.
Definition: valuestats.h:28
Class for iterating over document values.
Statistics about values.