xapian-core  1.4.21
inmemory_database.cc
Go to the documentation of this file.
1 
4 /* Copyright 1999,2000,2001 BrightStation PLC
5  * Copyright 2002 Ananova Ltd
6  * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2014,2017 Olly Betts
7  * Copyright 2006,2009 Lemur Consulting Ltd
8  *
9  * This program is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU General Public License as
11  * published by the Free Software Foundation; either version 2 of the
12  * License, or (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
22  * USA
23  */
24 
25 #include <config.h>
26 
27 #include "inmemory_database.h"
28 
29 #include "debuglog.h"
30 
31 #include "expand/expandweight.h"
32 #include "inmemory_document.h"
33 #include "inmemory_alltermslist.h"
34 #include "str.h"
35 #include "backends/valuestats.h"
36 
37 #include <algorithm>
38 #include <string>
39 #include <vector>
40 #include <map>
41 
42 #include <xapian/error.h>
43 #include <xapian/valueiterator.h>
44 
45 using std::make_pair;
47 
48 inline void
50 {
51  // Add document to right place in list
52  vector<InMemoryPosting>::iterator p;
53  p = lower_bound(docs.begin(), docs.end(),
54  post, InMemoryPostingLessThan());
55  if (p == docs.end() || InMemoryPostingLessThan()(post, *p)) {
56  docs.insert(p, post);
57  } else if (!p->valid) {
58  *p = post;
59  } else {
60  (*p).merge(post);
61  }
62 }
63 
64 inline void
66 {
67  // Add document to right place in list
68  vector<InMemoryTermEntry>::iterator p;
69  p = lower_bound(terms.begin(), terms.end(),
71  if (p == terms.end() || InMemoryTermEntryLessThan()(post, *p)) {
72  terms.insert(p, post);
73  } else {
74  (*p).merge(post);
75  }
76 }
77 
79 // Postlist //
81 
83  const InMemoryTerm & imterm,
84  const std::string & term_)
85  : LeafPostList(term_),
86  pos(imterm.docs.begin()),
87  end(imterm.docs.end()),
88  termfreq(imterm.term_freq),
89  started(false),
90  db(db_),
91  wdf_upper_bound(0)
92 {
93  while (pos != end && !pos->valid) ++pos;
94  if (pos != end) {
95  auto first_wdf = (*pos).wdf;
96  wdf_upper_bound = max(first_wdf, imterm.collection_freq - first_wdf);
97  }
98 }
99 
102 {
103  return termfreq;
104 }
105 
108 {
110  Assert(started);
111  Assert(!at_end());
112  return (*pos).did;
113 }
114 
115 PostList *
116 InMemoryPostList::next(double /*w_min*/)
117 {
119  if (started) {
120  Assert(!at_end());
121  ++pos;
122  while (pos != end && !pos->valid) ++pos;
123  } else {
124  started = true;
125  }
126  return NULL;
127 }
128 
129 PostList *
131 {
133  // FIXME - see if we can make more efficient, perhaps using better
134  // data structure. Note, though, that a binary search of
135  // the remaining list may NOT be a good idea (search time is then
136  // O(log {length of list}), as opposed to O(distance we want to skip)
137  // Since we will frequently only be skipping a short distance, this
138  // could well be worse.
139 
140  // If we've not started, it's OK to call skip_to().
141  Assert(!at_end() || !started);
142  started = true;
143  while (!at_end() && (*pos).did < did) {
144  (void) next(w_min);
145  }
146  return NULL;
147 }
148 
149 bool
151 {
153  return (pos == end);
154 }
155 
156 string
158 {
159  return "InMemoryPostList " + str(termfreq);
160 }
161 
164 {
166  return db->get_doclength(get_docid());
167 }
168 
171 {
172  return db->get_unique_terms(get_docid());
173 }
174 
175 PositionList *
177 {
179  mypositions.set_data(pos->positions);
180  return &mypositions;
181 }
182 
183 PositionList *
185 {
187  return new InMemoryPositionList(pos->positions);
188 }
189 
192 {
194  return (*pos).wdf;
195 }
196 
199 {
201  return wdf_upper_bound;
202 }
203 
205 // Termlist //
207 
209  Xapian::docid did_,
210  const InMemoryDoc & doc,
211  Xapian::termcount len)
212  : pos(doc.terms.begin()), end(doc.terms.end()), terms(doc.terms.size()),
213  started(false), db(db_), did(did_), document_length(len)
214 {
215  LOGLINE(DB, "InMemoryTermList::InMemoryTermList(): " <<
216  terms << " terms starting from " << pos->tname);
217 }
218 
221 {
223  Assert(started);
224  Assert(!at_end());
225  return (*pos).wdf;
226 }
227 
230 {
232  Assert(started);
233  Assert(!at_end());
234 
235  Xapian::doccount tf;
236  db->get_freqs((*pos).tname, &tf, NULL);
237  return tf;
238 }
239 
242 {
244  return terms;
245 }
246 
247 void
249 {
251  Assert(started);
252  Assert(!at_end());
253  stats.accumulate(shard_index,
256  db->get_doccount());
257 }
258 
259 string
261 {
263  Assert(started);
264  Assert(!at_end());
265  return (*pos).tname;
266 }
267 
268 TermList *
270 {
272  if (started) {
273  Assert(!at_end());
274  ++pos;
275  } else {
276  started = true;
277  }
278  return NULL;
279 }
280 
281 TermList *
282 InMemoryTermList::skip_to(const string & term)
283 {
284  if (rare(db->is_closed()))
286 
287  while (pos != end && pos->tname < term) {
288  ++pos;
289  }
290 
291  started = true;
292  return NULL;
293 }
294 
295 bool
297 {
299  Assert(started);
300  return (pos == end);
301 }
302 
305 {
307  return db->positionlist_count(did, (*pos).tname);
308 }
309 
312 {
314  return Xapian::PositionIterator(db->open_position_list(did, (*pos).tname));
315 }
316 
318 // InMemoryAllDocsPostList //
320 
322  : LeafPostList(std::string()), did(0), db(db_)
323 {
324 }
325 
328 {
330  return db->totdocs;
331 }
332 
335 {
337  Assert(did > 0);
338  Assert(did <= db->termlists.size());
339  Assert(db->termlists[did - 1].is_valid);
340  return did;
341 }
342 
345 {
347  return db->get_doclength(did);
348 }
349 
352 {
353  return db->get_unique_terms(did);
354 }
355 
358 {
359  return 1;
360 }
361 
362 PositionList *
364 {
365  throw Xapian::UnimplementedError("Can't open position list for all docs iterator");
366 }
367 
368 PositionList *
370 {
371  throw Xapian::UnimplementedError("Can't open position list for all docs iterator");
372 }
373 
374 PostList *
376 {
378  Assert(!at_end());
379  do {
380  ++did;
381  } while (did <= db->termlists.size() && !db->termlists[did - 1].is_valid);
382  return NULL;
383 }
384 
385 PostList *
387 {
389  Assert(!at_end());
390  if (did <= did_) {
391  did = did_;
392  while (did <= db->termlists.size() && !db->termlists[did - 1].is_valid) {
393  ++did;
394  }
395  }
396  return NULL;
397 }
398 
399 bool
401 {
403  return (did > db->termlists.size());
404 }
405 
408 {
409  return 1;
410 }
411 
412 string
414 {
415  return "InMemoryAllDocsPostList " + str(did);
416 }
417 
419 // Actual database class //
421 
423  : totdocs(0), totlen(0), positions_present(false), closed(false)
424 {
425  // Updates are applied immediately so we can't support transactions.
427 
428  // We keep an empty entry in postlists for convenience of implementing
429  // allterms iteration and returning a PostList for an absent term.
430  postlists.insert(make_pair(string(), InMemoryTerm()));
431 }
432 
434 {
435  dtor_called();
436 }
437 
438 bool
440 {
442  return false;
443 }
444 
445 void
447 {
448  // Free all the resources, and mark the db as closed.
449  postlists.clear();
450  termlists.clear();
451  doclists.clear();
452  valuelists.clear();
453  valuestats.clear();
454  doclengths.clear();
455  metadata.clear();
456  closed = true;
457 }
458 
459 LeafPostList *
460 InMemoryDatabase::open_post_list(const string & tname) const
461 {
463  if (tname.empty()) {
464  return new InMemoryAllDocsPostList(this);
465  }
466  map<string, InMemoryTerm>::const_iterator i = postlists.find(tname);
467  if (i == postlists.end() || i->second.term_freq == 0) {
468  i = postlists.begin();
469  // Check that our dummy entry for string() is present.
470  Assert(i->first.empty());
471  }
472  return new InMemoryPostList(this, i->second, tname);
473 }
474 
475 bool
477 {
479  return (did > 0 && did <= termlists.size() && termlists[did - 1].is_valid);
480 }
481 
482 void
483 InMemoryDatabase::get_freqs(const string & term,
484  Xapian::doccount * termfreq_ptr,
485  Xapian::termcount * collfreq_ptr) const
486 {
488  map<string, InMemoryTerm>::const_iterator i = postlists.find(term);
489  if (i != postlists.end()) {
490  if (termfreq_ptr)
491  *termfreq_ptr = i->second.term_freq;
492  if (collfreq_ptr)
493  *collfreq_ptr = i->second.collection_freq;
494  } else {
495  if (termfreq_ptr)
496  *termfreq_ptr = 0;
497  if (collfreq_ptr)
498  *collfreq_ptr = 0;
499  }
500 }
501 
504 {
506  map<Xapian::valueno, ValueStats>::const_iterator i = valuestats.find(slot);
507  if (i == valuestats.end()) return 0;
508  return i->second.freq;
509 }
510 
511 std::string
513 {
515  map<Xapian::valueno, ValueStats>::const_iterator i = valuestats.find(slot);
516  if (i == valuestats.end()) return string();
517  return i->second.lower_bound;
518 }
519 
520 std::string
522 {
524  map<Xapian::valueno, ValueStats>::const_iterator i = valuestats.find(slot);
525  if (i == valuestats.end()) return string();
526  return i->second.upper_bound;
527 }
528 
531 {
533  return totdocs;
534 }
535 
538 {
540  return termlists.size();
541 }
542 
545 {
546  return totlen;
547 }
548 
551 {
553  if (!doc_exists(did)) {
554  throw Xapian::DocNotFoundError(string("Docid ") + str(did) +
555  string(" not found"));
556  }
557  return doclengths[did - 1];
558 }
559 
562 {
564  if (did == 0 || did > termlists.size() || !termlists[did - 1].is_valid)
565  throw Xapian::DocNotFoundError(string("Docid ") + str(did) +
566  string(" not found"));
567  // get_unique_terms() really ought to only count terms with wdf > 0, but
568  // that's expensive to calculate on demand, so for now let's just ensure
569  // unique_terms <= doclen.
570  Xapian::termcount terms = termlists[did - 1].terms.size();
571  return std::min(terms, Xapian::termcount(doclengths[did - 1]));
572 }
573 
574 TermList *
576 {
578  Assert(did != 0);
579  if (!doc_exists(did)) {
580  // FIXME: the docid in this message will be local, not global
581  throw Xapian::DocNotFoundError(string("Docid ") + str(did) +
582  string(" not found"));
583  }
585  termlists[did - 1], doclengths[did - 1]);
586 }
587 
590 {
592  Assert(did != 0);
593  if (!lazy && !doc_exists(did)) {
594  // FIXME: the docid in this message will be local, not global
595  throw Xapian::DocNotFoundError(string("Docid ") + str(did) +
596  string(" not found"));
597  }
598  return new InMemoryDocument(this, did);
599 }
600 
601 std::string
602 InMemoryDatabase::get_metadata(const std::string & key) const
603 {
605  map<string, string>::const_iterator i = metadata.find(key);
606  if (i == metadata.end())
607  return string();
608  return i->second;
609 }
610 
611 TermList *
613 {
615  if (metadata.empty()) return NULL;
616  // FIXME: nobody implemented this yet...
617  throw Xapian::UnimplementedError("InMemory backend doesn't currently implement Database::metadata_keys_begin()");
618 }
619 
620 void
621 InMemoryDatabase::set_metadata(const std::string & key,
622  const std::string & value)
623 {
625  if (!value.empty()) {
626  metadata[key] = value;
627  } else {
628  metadata.erase(key);
629  }
630 }
631 
634  const string & tname) const
635 {
637  if (!doc_exists(did)) {
638  return 0;
639  }
640  const InMemoryDoc &doc = termlists[did - 1];
641 
642  InMemoryTermEntry temp;
643  temp.tname = tname;
644  auto t = lower_bound(doc.terms.begin(), doc.terms.end(),
645  temp, InMemoryTermEntryLessThan());
646  if (t != doc.terms.end() && t->tname == tname) {
647  return t->positions.size();
648  }
649  return 0;
650 }
651 
652 PositionList *
654  const string & tname) const
655 {
657  if (usual(doc_exists(did))) {
658  const InMemoryDoc &doc = termlists[did - 1];
659 
660  InMemoryTermEntry temp;
661  temp.tname = tname;
662  auto t = lower_bound(doc.terms.begin(), doc.terms.end(),
663  temp, InMemoryTermEntryLessThan());
664  if (t != doc.terms.end() && t->tname == tname) {
665  return new InMemoryPositionList(t->positions);
666  }
667  }
668  return new InMemoryPositionList(false);
669 }
670 
671 void
673  const map<Xapian::valueno, string> &values_)
674 {
676  if (did > valuelists.size()) {
677  valuelists.resize(did);
678  }
679  valuelists[did - 1] = values_;
680 
681  // Update the statistics.
682  map<Xapian::valueno, string>::const_iterator j;
683  for (j = values_.begin(); j != values_.end(); ++j) {
684  std::pair<map<Xapian::valueno, ValueStats>::iterator, bool> i;
685  i = valuestats.insert(make_pair(j->first, ValueStats()));
686 
687  // Now, modify the stored statistics.
688  if ((i.first->second.freq)++ == 0) {
689  // If the value count was previously zero, set the upper and lower
690  // bounds to the newly added value.
691  i.first->second.lower_bound = j->second;
692  i.first->second.upper_bound = j->second;
693  } else {
694  // Otherwise, simply make sure they reflect the new value.
695  if (j->second < i.first->second.lower_bound) {
696  i.first->second.lower_bound = j->second;
697  }
698  if (j->second > i.first->second.upper_bound) {
699  i.first->second.upper_bound = j->second;
700  }
701  }
702  }
703 }
704 
705 // We implicitly commit each modification right away, so nothing to do here.
706 void
708 {
709 }
710 
711 // We implicitly commit each modification right away, so nothing to do here.
712 void
714 {
715 }
716 
717 void
719 {
721  if (!doc_exists(did)) {
722  throw Xapian::DocNotFoundError(string("Docid ") + str(did) +
723  string(" not found"));
724  }
725  termlists[did - 1].is_valid = false;
726  doclists[did - 1] = string();
727  map<Xapian::valueno, string>::const_iterator j;
728  for (j = valuelists[did - 1].begin(); j != valuelists[did - 1].end(); ++j) {
729  map<Xapian::valueno, ValueStats>::iterator i;
730  i = valuestats.find(j->first);
731  if (--(i->second.freq) == 0) {
732  i->second.lower_bound.resize(0);
733  i->second.upper_bound.resize(0);
734  }
735  }
736  valuelists[did - 1].clear();
737 
738  totlen -= doclengths[did - 1];
739  doclengths[did - 1] = 0;
740  totdocs--;
741  // A crude check, but it's hard to be more precise with the current
742  // InMemory structure without being very inefficient.
743  if (totdocs == 0) positions_present = false;
744 
745  vector<InMemoryTermEntry>::const_iterator i;
746  for (i = termlists[did - 1].terms.begin();
747  i != termlists[did - 1].terms.end();
748  ++i) {
749  map<string, InMemoryTerm>::iterator t = postlists.find(i->tname);
750  Assert(t != postlists.end());
751  t->second.collection_freq -= i->wdf;
752  --t->second.term_freq;
753 
754  // Just invalidate erased doc ids - otherwise we need to erase
755  // in a vector (inefficient) and we break any posting lists
756  // iterating over this posting list.
757  InMemoryPosting temp;
758  temp.did = did;
759  auto p = lower_bound(t->second.docs.begin(), t->second.docs.end(),
760  temp, InMemoryPostingLessThan());
761  if (p != t->second.docs.end() && p->did == did) {
762  p->valid = false;
763  }
764  }
765  termlists[did - 1].terms.clear();
766 }
767 
768 void
770  const Xapian::Document & document)
771 {
772  LOGCALL_VOID(DB, "InMemoryDatabase::replace_document", did | document);
773 
775 
776  if (doc_exists(did)) {
777  map<Xapian::valueno, string>::const_iterator j;
778  for (j = valuelists[did - 1].begin(); j != valuelists[did - 1].end(); ++j) {
779  map<Xapian::valueno, ValueStats>::iterator i;
780  i = valuestats.find(j->first);
781  if (--(i->second.freq) == 0) {
782  i->second.lower_bound.resize(0);
783  i->second.upper_bound.resize(0);
784  }
785  }
786 
787  totlen -= doclengths[did - 1];
788  totdocs--;
789  } else if (did > termlists.size()) {
790  termlists.resize(did);
791  termlists[did - 1].is_valid = true;
792  doclengths.resize(did);
793  doclists.resize(did);
794  valuelists.resize(did);
795  } else {
796  termlists[did - 1].is_valid = true;
797  }
798 
799  vector<InMemoryTermEntry>::const_iterator i;
800  for (i = termlists[did - 1].terms.begin();
801  i != termlists[did - 1].terms.end();
802  ++i) {
803  map<string, InMemoryTerm>::iterator t = postlists.find(i->tname);
804  Assert(t != postlists.end());
805  t->second.collection_freq -= i->wdf;
806  --t->second.term_freq;
807 
808  // Just invalidate erased doc ids - otherwise we need to erase
809  // in a vector (inefficient) and we break any posting lists
810  // iterating over this posting list.
811  InMemoryPosting temp;
812  temp.did = did;
813  auto p = lower_bound(t->second.docs.begin(), t->second.docs.end(),
814  temp, InMemoryPostingLessThan());
815  if (p != t->second.docs.end() && p->did == did) {
816  p->valid = false;
817  }
818  }
819 
820  doclengths[did - 1] = 0;
821  doclists[did - 1] = document.get_data();
822 
823  finish_add_doc(did, document);
824 }
825 
828 {
829  LOGCALL(DB, Xapian::docid, "InMemoryDatabase::add_document", document);
831 
832  Xapian::docid did = make_doc(document.get_data());
833 
834  finish_add_doc(did, document);
835 
836  RETURN(did);
837 }
838 
839 void
841 {
842  {
843  map<Xapian::valueno, string> values;
844  Xapian::ValueIterator k = document.values_begin();
845  for ( ; k != document.values_end(); ++k) {
846  values.insert(make_pair(k.get_valueno(), *k));
847  LOGLINE(DB, "InMemoryDatabase::finish_add_doc(): adding value " <<
848  k.get_valueno() << " -> " << *k);
849  }
850  add_values(did, values);
851  }
852 
853  InMemoryDoc doc(true);
854  Xapian::TermIterator i = document.termlist_begin();
855  for ( ; i != document.termlist_end(); ++i) {
856  make_term(*i);
857 
858  LOGLINE(DB, "InMemoryDatabase::finish_add_doc(): adding term " << *i);
860  if (j == i.positionlist_end()) {
861  /* Make sure the posting exists, even without a position. */
862  make_posting(&doc, *i, did, 0, i.get_wdf(), false);
863  } else {
864  positions_present = true;
865  for ( ; j != i.positionlist_end(); ++j) {
866  make_posting(&doc, *i, did, *j, i.get_wdf());
867  }
868  }
869 
870  Assert(did > 0 && did <= doclengths.size());
871  doclengths[did - 1] += i.get_wdf();
872  totlen += i.get_wdf();
873  postlists[*i].collection_freq += i.get_wdf();
874  ++postlists[*i].term_freq;
875  }
876  swap(termlists[did - 1], doc);
877 
878  totdocs++;
879 }
880 
881 void
882 InMemoryDatabase::make_term(const string & tname)
883 {
884  postlists[tname]; // Initialise, if not already there.
885 }
886 
888 InMemoryDatabase::make_doc(const string & docdata)
889 {
890  termlists.push_back(InMemoryDoc(true));
891  doclengths.push_back(0);
892  doclists.push_back(docdata);
893 
894  AssertEqParanoid(termlists.size(), doclengths.size());
895 
896  return termlists.size();
897 }
898 
900  const string & tname,
901  Xapian::docid did,
902  Xapian::termpos position,
903  Xapian::termcount wdf,
904  bool use_position)
905 {
906  Assert(doc);
907  Assert(postlists.find(tname) != postlists.end());
908  Assert(did > 0 && did <= termlists.size());
909  Assert(did > 0 && did <= doclengths.size());
910  Assert(doc_exists(did));
911 
912  // Make the posting
913  InMemoryPosting posting;
914  posting.did = did;
915  if (use_position) {
916  posting.positions.push_back(position);
917  }
918  posting.wdf = wdf;
919  posting.valid = true;
920 
921  // Now record the posting
922  postlists[tname].add_posting(posting);
923 
924  // Make the termentry
925  InMemoryTermEntry termentry;
926  termentry.tname = tname;
927  if (use_position) {
928  termentry.positions.push_back(position);
929  }
930  termentry.wdf = wdf;
931 
932  // Now record the termentry
933  doc->add_posting(termentry);
934 }
935 
936 bool
937 InMemoryDatabase::term_exists(const string & tname) const
938 {
940  Assert(!tname.empty());
941  map<string, InMemoryTerm>::const_iterator i = postlists.find(tname);
942  if (i == postlists.end()) return false;
943  return (i->second.term_freq != 0);
944 }
945 
946 bool
948 {
950  return positions_present;
951 }
952 
953 TermList *
954 InMemoryDatabase::open_allterms(const string & prefix) const
955 {
957  return new InMemoryAllTermsList(&postlists,
959  prefix);
960 }
961 
962 void
964 {
965  throw Xapian::DatabaseClosedError("Database has been closed");
966 }
Xapian::Internal::intrusive_ptr< const InMemoryDatabase > db
vector< InMemoryPosting >::const_iterator end
#define RETURN(A)
Definition: debuglog.h:482
#define Assert(COND)
Definition: omassert.h:122
vector< InMemoryTermEntry >::const_iterator end
InMemoryPostList(const InMemoryDatabase *db, const InMemoryTerm &imterm, const std::string &term_)
string get_description() const
Return a string description of this object.
Class to hold statistics for a given slot.
Definition: valuestats.h:29
PositionList * read_position_list()
Read the position list for the term in the current document and return a pointer to it (owned by the ...
PostList * skip_to(Xapian::docid did, double w_min)
Skip forward to the specified docid.
Abstract base class for postlists.
Definition: postlist.h:37
Xapian::docid make_doc(const string &docdata)
Statistics about values.
Indicates an attempt to access a closed database.
Definition: error.h:1097
vector< Xapian::termpos > positions
PositionList * open_position_list() const
Read the position list for the term in the current document and return a pointer to it (not owned by ...
A database held entirely in memory.
Xapian::Document::Internal * open_document(Xapian::docid did, bool lazy) const
Open a document.
Xapian::totallength get_total_length() const
Return the total length of all documents in this database.
void merge(const InMemoryPosting &post)
void make_posting(InMemoryDoc *doc, const string &tname, Xapian::docid did, Xapian::termpos position, Xapian::termcount wdf, bool use_position=true)
bool at_end() const
Return true if the current position is past the last entry in this list.
string get_description() const
Return a string description of this object.
Xapian::termcount wdf_upper_bound
void commit()
Implementation of virtual methods: see Database for details.
#define usual(COND)
Definition: config.h:574
Iterate all terms in an inmemory db.
Xapian::doccount get_termfreq() const
Return the term frequency for the term at the current position.
friend class InMemoryAllDocsPostList
bool is_closed() const
XAPIAN_TOTALLENGTH_TYPE totallength
The total length of all documents in a database.
Definition: types.h:139
Xapian::doccount get_termfreq() const
Return the exact term frequency.
ValueIterator values_begin() const
Iterator for the values in this document.
Definition: omdocument.cc:210
Xapian::termcount get_doclength() const
Return the length of current document.
Xapian::totallength totlen
Xapian::termcount get_wdf_upper_bound() const
Xapian::doccount get_value_freq(Xapian::valueno slot) const
Return the frequency of a given value slot.
vector< Xapian::termcount > doclengths
void finish_add_doc(Xapian::docid did, const Xapian::Document &document)
A document in the database, possibly plus modifications.
Definition: document.h:41
Class for iterating over document values.
Definition: valueiterator.h:40
#define LOGCALL_VOID(CATEGORY, FUNC, PARAMS)
Definition: debuglog.h:477
Abstract base class for termlists.
Definition: termlist.h:39
STL namespace.
A document read from a InMemoryDatabase.
Convert types to std::string.
Xapian::termcount terms
void cancel()
Implementation of virtual methods: see Database for details.
PositionList * open_position_list() const
Read the position list for the term in the current document and return a pointer to it (not owned by ...
vector< InMemoryTermEntry >::const_iterator pos
Xapian::docid get_docid() const
Return the current docid.
LeafPostList * open_post_list(const string &tname) const
Open a posting list.
bool has_positions() const
Check whether this database contains any positional information.
TermList * open_allterms(const string &prefix) const
Open an allterms list.
Abstract base class for leaf postlists.
Definition: leafpostlist.h:38
#define false
Definition: header.h:9
Xapian::doccount get_doccount() const
Return the number of docs in this (sub) database.
TermIterator termlist_end() const
Equivalent end iterator for termlist_begin().
Definition: document.h:260
void add_posting(const InMemoryPosting &post)
void accumulate_stats(Xapian::Internal::ExpandStats &stats) const
Collate weighting information for the current term.
vector< Xapian::termpos > positions
#define rare(COND)
Definition: config.h:573
InMemoryTermList(Xapian::Internal::intrusive_ptr< const InMemoryDatabase > db, Xapian::docid did, const InMemoryDoc &doc, Xapian::termcount len)
Xapian::termcount document_length
Xapian::termcount wdf
PositionList * open_position_list(Xapian::docid did, const string &tname) const
Open a position list for the given term in the given document.
enum Xapian::Database::Internal::@2 transaction_state
Transaction state.
Hierarchy of classes which Xapian can throw as exceptions.
vector< InMemoryPosting > docs
Class for iterating over a list of terms.
Definition: termiterator.h:41
std::string get_value_lower_bound(Xapian::valueno slot) const
Get a lower bound on the values stored in the given value slot.
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:72
bool at_end() const
Return true if the current position is past the last entry in this list.
PositionIterator positionlist_end() const
Return an end PositionIterator for the current term.
Definition: termiterator.h:104
Class representing a document and the terms indexing it.
void accumulate(size_t shard_index, Xapian::termcount wdf, Xapian::termcount doclen, Xapian::doccount subtf, Xapian::doccount subdbsize)
Definition: expandweight.h:76
InMemoryPositionList mypositions
List of positions of the current term.
Xapian::docid add_document(const Xapian::Document &document)
Implementation of virtual methods: see Database for details.
Xapian::docid get_docid() const
Return the current docid.
bool doc_exists(Xapian::docid did) const
Xapian::docid get_lastdocid() const
Return the last used document id of this (sub) database.
Collate statistics and calculate the term weights for the ESet.
vector< InMemoryTermEntry > terms
Xapian::termcount collection_freq
InMemoryDatabase()
Create and open an in-memory database.
string get_termname() const
Return the termname at the current position.
std::string get_value_upper_bound(Xapian::valueno slot) const
Get an upper bound on the values stored in the given value slot.
void replace_document(Xapian::docid did, const Xapian::Document &document)
Implementation of virtual methods: see Database for details.
std::map< string, string > metadata
InMemoryAllDocsPostList(const InMemoryDatabase *db)
Xapian::doccount totdocs
Internal * next()
Advance the current position to the next document in the postlist.
Definition: postlist.h:194
Xapian::termcount get_unique_terms() const
Return the number of unique terms in the current document.
A PostList in an inmemory database.
Xapian::termcount wdf
string str(int value)
Convert int to std::string.
Definition: str.cc:90
A position list in a inmemory database.
TermList * open_term_list(Xapian::docid did) const
Open a term list.
Class for iterating over document values.
C++ class definition for inmemory database access.
std::map< Xapian::valueno, ValueStats > valuestats
Class for iterating over term positions.
Xapian::termcount get_wdf_upper_bound() const
Xapian::termcount positionlist_count() const
Return the length of the position list for the current position.
void close()
Close the database.
Xapian::Internal::intrusive_ptr< const InMemoryDatabase > db
Xapian::termcount get_wdf() const
Return the wdf for the term at the current position.
ValueIterator values_end() const
Equivalent end iterator for values_begin().
Definition: document.h:271
Indicates an attempt to access a document not present in the database.
Definition: error.h:674
vector< std::map< Xapian::valueno, string > > valuelists
void get_freqs(const string &term, Xapian::doccount *termfreq_ptr, Xapian::termcount *collfreq_ptr) const
Returns frequencies for a term.
Xapian::doccount termfreq
void add_values(Xapian::docid did, const map< Xapian::valueno, string > &values_)
static void throw_database_closed()
TermList * skip_to(const std::string &term)
Skip forward to the specified term.
Xapian::termcount get_unique_terms(Xapian::docid did) const
Get the number of unique term in document.
#define AssertEqParanoid(A, B)
Definition: omassert.h:131
class for alltermslists over several databases
vector< std::string > doclists
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:38
Xapian::termcount positionlist_count(Xapian::docid did, const string &tname) const
Xapian::termcount get_wdf() const
Return the wdf for the document at the current position.
map< string, InMemoryTerm > postlists
void delete_document(Xapian::docid did)
Implementation of virtual methods: see Database for details.
size_t shard_index
Which shard of a multidatabase this is from.
Definition: termlist.h:114
Xapian::Internal::intrusive_ptr< const InMemoryDatabase > db
Xapian::termcount get_unique_terms() const
Return the number of unique terms in the current document.
bool term_exists(const string &tname) const
Check whether a given term is in the database.
Collates statistics while calculating term weight in an ESet.
Definition: expandweight.h:37
bool reopen()
Reopen the database to the latest available revision.
void make_term(const string &tname)
unsigned valueno
The number for a value slot in a document.
Definition: types.h:108
unsigned XAPIAN_TERMPOS_BASE_TYPE termpos
A term position within a document or query.
Definition: types.h:83
void dtor_called()
Internal method to perform cleanup when a writable database is destroyed with uncommitted changes...
Definition: database.cc:87
Xapian::termcount get_doclength(Xapian::docid did) const
Get the length of a given document.
vector< InMemoryPosting >::const_iterator pos
TermList * open_metadata_keylist(const std::string &prefix) const
Open a termlist returning each metadata key.
friend class InMemoryDocument
Xapian::termcount get_doclength() const
Return the length of current document.
Xapian::docid did
#define LOGLINE(a, b)
Definition: debuglog.h:483
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:52
std::string get_metadata(const std::string &key) const
Get the metadata associated with a given key.
std::string get_data() const
Get data stored in the document.
Definition: omdocument.cc:71
Abstract base class for iterating term positions in a document.
Definition: positionlist.h:31
void set_metadata(const std::string &key, const std::string &value)
Set the metadata associated with a given key.
Xapian::doccount get_termfreq() const
Return the exact term frequency.
Xapian::termcount get_wdf() const
Return the wdf for the term at the current position.
A smart pointer that uses intrusive reference counting.
Definition: intrusive_ptr.h:81
Xapian::PositionIterator positionlist_begin() const
Return a PositionIterator for the current position.
Xapian::termcount get_wdf() const
Return the wdf for the document at the current position.
TermIterator termlist_begin() const
Iterator for the terms in this document.
Definition: omdocument.cc:197
PositionList * read_position_list()
Read the position list for the term in the current document and return a pointer to it (owned by the ...
PositionIterator positionlist_begin() const
Return a PositionIterator for the current term.
A handle representing a document in a Xapian database.
Definition: document.h:61
bool at_end() const
Return true if the current position is past the last term in this list.
Debug logging macros.
TermList * next()
Advance the current position to the next term in the termlist.
#define LOGCALL(CATEGORY, TYPE, FUNC, PARAMS)
Definition: debuglog.h:476
void set_data(const OmDocumentTerm::term_positions &positions_)
Fill list with data, and move the position to the start.
Xapian::valueno get_valueno() const
Return the value slot number for the current position.
PostList * skip_to(Xapian::docid did, double w_min)
Skip forward to the specified docid.
UnimplementedError indicates an attempt to use an unimplemented feature.
Definition: error.h:325
Xapian::termcount term_freq
void add_posting(const InMemoryTermEntry &post)
Xapian::termcount get_approx_size() const
Return approximate size of this termlist.
vector< InMemoryDoc > termlists