xapian-core  1.4.31
omdatabase.cc
Go to the documentation of this file.
1 /* omdatabase.cc: External interface for running queries
2  *
3  * Copyright 1999,2000,2001 BrightStation PLC
4  * Copyright 2001,2002 Ananova Ltd
5  * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2013,2014,2016,2026 Olly Betts
6  * Copyright 2006,2008 Lemur Consulting Ltd
7  *
8  * This program is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU General Public License as
10  * published by the Free Software Foundation; either version 2 of the
11  * License, or (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
21  * USA
22  */
23 
24 #include <config.h>
25 
26 #include "autoptr.h"
27 
28 #include <xapian/constants.h>
29 #include <xapian/error.h>
31 #include <xapian/postingiterator.h>
32 #include <xapian/termiterator.h>
33 #include <xapian/unicode.h>
34 
35 #include "omassert.h"
36 #include "debuglog.h"
37 #include "backends/alltermslist.h"
42 #include "backends/database.h"
43 #include "editdistance.h"
44 #include "expand/ortermlist.h"
45 #include "internaltypes.h"
46 #include "noreturn.h"
47 #include "pack.h"
48 
49 #include <algorithm>
50 #include <cstdlib> // For abs().
51 #include <cstring>
52 #include <vector>
53 
54 using namespace std;
56 
57 XAPIAN_NORETURN(static void docid_zero_invalid());
58 static void docid_zero_invalid()
59 {
60  throw Xapian::InvalidArgumentError("Document ID 0 is invalid");
61 }
62 
63 XAPIAN_NORETURN(static void no_subdatabases());
64 static void no_subdatabases()
65 {
66  throw Xapian::InvalidOperationError("No subdatabases");
67 }
68 
69 XAPIAN_NORETURN(static void empty_metadata_key());
70 static void empty_metadata_key()
71 {
72  throw Xapian::InvalidArgumentError("Empty metadata keys are invalid");
73 }
74 
75 inline size_t
76 sub_db(Xapian::docid did, size_t n_dbs)
77 {
78  return (did - 1) % n_dbs;
79 }
80 
81 inline size_t
82 sub_docid(Xapian::docid did, size_t n_dbs)
83 {
84  return (did - 1) / n_dbs + 1;
85 }
86 
87 namespace Xapian {
88 
89 Database::Database(Database&&) = default;
90 
91 Database&
92 Database::operator=(Database&&) = default;
93 
94 Database::Database()
95 {
96  LOGCALL_CTOR(API, "Database", NO_ARGS);
97 }
98 
99 Database::Database(Database::Internal *internal_)
100 {
101  LOGCALL_CTOR(API, "Database", internal_);
102  intrusive_ptr<Database::Internal> newi(internal_);
103  internal.push_back(newi);
104 }
105 
106 Database::Database(const Database &other)
107 {
108  LOGCALL_CTOR(API, "Database", other);
109  internal = other.internal;
110 }
111 
112 void
113 Database::operator=(const Database &other)
114 {
115  LOGCALL_VOID(API, "Database::operator=", other);
116  internal = other.internal;
117 }
118 
119 Database::~Database()
120 {
121  LOGCALL_DTOR(API, "Database");
122 }
123 
124 bool
125 Database::reopen()
126 {
127  LOGCALL(API, bool, "Database::reopen", NO_ARGS);
128  bool maybe_changed = false;
129  vector<intrusive_ptr<Database::Internal> >::iterator i;
130  for (i = internal.begin(); i != internal.end(); ++i) {
131  if ((*i)->reopen())
132  maybe_changed = true;
133  }
134  RETURN(maybe_changed);
135 }
136 
137 void
139 {
140  LOGCALL_VOID(API, "Database::close", NO_ARGS);
141  vector<intrusive_ptr<Database::Internal> >::iterator i;
142  for (i = internal.begin(); i != internal.end(); ++i) {
143  (*i)->close();
144  }
145 }
146 
147 void
148 Database::add_database(const Database & database)
149 {
150  LOGCALL_VOID(API, "Database::add_database", database);
151  if (this == &database) {
152  LOGLINE(API, "Database added to itself");
153  throw Xapian::InvalidArgumentError("Can't add a Database to itself");
154  }
155  vector<intrusive_ptr<Database::Internal> >::const_iterator i;
156  for (i = database.internal.begin(); i != database.internal.end(); ++i) {
157  internal.push_back(*i);
158  }
159 }
160 
162 Database::postlist_begin(const string &tname) const
163 {
164  LOGCALL(API, PostingIterator, "Database::postlist_begin", tname);
165 
166  // Don't bother checking that the term exists first. If it does, we
167  // just end up doing more work, and if it doesn't, we save very little
168  // work.
169 
170  // Handle the common case of a single database specially.
171  if (internal.size() == 1)
172  RETURN(PostingIterator(internal[0]->open_post_list(tname)));
173 
174  if (rare(internal.empty()))
176 
177  vector<LeafPostList *> pls;
178  try {
179  vector<intrusive_ptr<Database::Internal> >::const_iterator i;
180  for (i = internal.begin(); i != internal.end(); ++i) {
181  pls.push_back((*i)->open_post_list(tname));
182  pls.back()->next();
183  }
184  Assert(pls.begin() != pls.end());
185  } catch (...) {
186  vector<LeafPostList *>::iterator i;
187  for (i = pls.begin(); i != pls.end(); ++i) {
188  delete *i;
189  *i = 0;
190  }
191  throw;
192  }
193 
194  RETURN(PostingIterator(new MultiPostList(pls, *this)));
195 }
196 
198 Database::termlist_begin(Xapian::docid did) const
199 {
200  LOGCALL(API, TermIterator, "Database::termlist_begin", did);
201  if (did == 0)
203 
204  unsigned int multiplier = internal.size();
205  if (rare(multiplier == 0))
206  no_subdatabases();
207  TermList *tl;
208  if (multiplier == 1) {
209  // There's no need for the MultiTermList wrapper in the common case
210  // where we're only dealing with a single database.
211  tl = internal[0]->open_term_list(did);
212  } else {
213  Assert(multiplier != 0);
214  Xapian::doccount n = (did - 1) % multiplier; // which actual database
215  Xapian::docid m = (did - 1) / multiplier + 1; // real docid in that database
216 
217  tl = new MultiTermList(internal[n]->open_term_list(m), *this, n);
218  }
219  RETURN(TermIterator(tl));
220 }
221 
223 Database::allterms_begin(const std::string & prefix) const
224 {
225  LOGCALL(API, TermIterator, "Database::allterms_begin", NO_ARGS);
226  TermList * tl;
227  if (rare(internal.size() == 0)) {
228  tl = NULL;
229  } else if (internal.size() == 1) {
230  tl = internal[0]->open_allterms(prefix);
231  } else {
232  tl = new MultiAllTermsList(internal, prefix);
233  }
234  RETURN(TermIterator(tl));
235 }
236 
237 bool
238 Database::has_positions() const
239 {
240  LOGCALL(API, bool, "Database::has_positions", NO_ARGS);
241  // If any sub-database has positions, the combined database does.
242  vector<intrusive_ptr<Database::Internal> >::const_iterator i;
243  for (i = internal.begin(); i != internal.end(); ++i) {
244  if ((*i)->has_positions()) RETURN(true);
245  }
246  RETURN(false);
247 }
248 
250 Database::positionlist_begin(Xapian::docid did, const string &tname) const
251 {
252  LOGCALL(API, PositionIterator, "Database::positionlist_begin", did | tname);
253  if (tname.empty())
254  throw InvalidArgumentError("Zero length terms are invalid");
255  if (did == 0)
257 
258  unsigned int multiplier = internal.size();
259  if (rare(multiplier == 0))
260  no_subdatabases();
261  Xapian::doccount n = (did - 1) % multiplier; // which actual database
262  Xapian::docid m = (did - 1) / multiplier + 1; // real docid in that database
263  RETURN(PositionIterator(internal[n]->open_position_list(m, tname)));
264 }
265 
267 Database::get_doccount() const
268 {
269  LOGCALL(API, Xapian::doccount, "Database::get_doccount", NO_ARGS);
270  Xapian::doccount docs = 0;
271  vector<intrusive_ptr<Database::Internal> >::const_iterator i;
272  for (i = internal.begin(); i != internal.end(); ++i) {
273  docs += (*i)->get_doccount();
274  }
275  RETURN(docs);
276 }
277 
279 Database::get_lastdocid() const
280 {
281  LOGCALL(API, Xapian::docid, "Database::get_lastdocid", NO_ARGS);
282  Xapian::docid did = 0;
283 
284  unsigned int multiplier = internal.size();
285  for (Xapian::doccount i = 0; i < multiplier; ++i) {
286  Xapian::docid did_i = internal[i]->get_lastdocid();
287  if (did_i) did = std::max(did, (did_i - 1) * multiplier + i + 1);
288  }
289  RETURN(did);
290 }
291 
293 Database::get_avlength() const
294 {
295  LOGCALL(API, Xapian::doclength, "Database::get_avlength", NO_ARGS);
296  Xapian::doccount docs = 0;
297  Xapian::totallength totlen = 0;
298 
299  vector<intrusive_ptr<Database::Internal> >::const_iterator i;
300  for (i = internal.begin(); i != internal.end(); ++i) {
301  docs += (*i)->get_doccount();
302  totlen += (*i)->get_total_length();
303  }
304  LOGLINE(UNKNOWN, "get_avlength() = " << totlen << " / " << docs <<
305  " (from " << internal.size() << " dbs)");
306 
307  if (docs == 0) RETURN(0.0);
308  RETURN(totlen / double(docs));
309 }
310 
312 Database::get_total_length() const
313 {
314  LOGCALL(API, Xapian::totallength, "Database::get_total_length", NO_ARGS);
315  Xapian::totallength total_length = 0;
316  for (auto&& sub_db : internal) {
317  total_length += sub_db->get_total_length();
318  }
319  RETURN(total_length);
320 }
321 
323 Database::get_termfreq(const string & tname) const
324 {
325  LOGCALL(API, Xapian::doccount, "Database::get_termfreq", tname);
326  if (tname.empty()) RETURN(get_doccount());
327 
328  Xapian::doccount tf = 0;
329  vector<intrusive_ptr<Database::Internal> >::const_iterator i;
330  for (i = internal.begin(); i != internal.end(); ++i) {
331  Xapian::doccount sub_tf;
332  (*i)->get_freqs(tname, &sub_tf, NULL);
333  tf += sub_tf;
334  }
335  RETURN(tf);
336 }
337 
339 Database::get_collection_freq(const string & tname) const
340 {
341  LOGCALL(API, Xapian::termcount, "Database::get_collection_freq", tname);
342  if (tname.empty()) RETURN(get_doccount());
343 
344  Xapian::termcount cf = 0;
345  vector<intrusive_ptr<Database::Internal> >::const_iterator i;
346  for (i = internal.begin(); i != internal.end(); ++i) {
347  Xapian::termcount sub_cf;
348  (*i)->get_freqs(tname, NULL, &sub_cf);
349  cf += sub_cf;
350  }
351  RETURN(cf);
352 }
353 
355 Database::get_value_freq(Xapian::valueno slot) const
356 {
357  LOGCALL(API, Xapian::doccount, "Database::get_value_freq", slot);
358 
359  Xapian::doccount vf = 0;
360  vector<intrusive_ptr<Database::Internal> >::const_iterator i;
361  for (i = internal.begin(); i != internal.end(); ++i) {
362  vf += (*i)->get_value_freq(slot);
363  }
364  RETURN(vf);
365 }
366 
367 string
368 Database::get_value_lower_bound(Xapian::valueno slot) const
369 {
370  LOGCALL(API, string, "Database::get_value_lower_bound", slot);
371 
372  if (rare(internal.empty())) RETURN(string());
373 
374  string full_lb;
375  for (auto&& subdb : internal) {
376  string lb = subdb->get_value_lower_bound(slot);
377  if (lb.empty())
378  continue;
379  if (full_lb.empty() || lb < full_lb)
380  full_lb = std::move(lb);
381  }
382  RETURN(full_lb);
383 }
384 
385 std::string
386 Database::get_value_upper_bound(Xapian::valueno slot) const
387 {
388  LOGCALL(API, std::string, "Database::get_value_upper_bound", slot);
389 
390  std::string full_ub;
391  vector<intrusive_ptr<Database::Internal> >::const_iterator i;
392  for (i = internal.begin(); i != internal.end(); ++i) {
393  std::string ub = (*i)->get_value_upper_bound(slot);
394  if (ub > full_ub)
395  full_ub = ub;
396  }
397  RETURN(full_ub);
398 }
399 
401 Database::get_doclength_lower_bound() const
402 {
403  LOGCALL(API, Xapian::termcount, "Database::get_doclength_lower_bound", NO_ARGS);
404 
405  if (rare(internal.empty())) RETURN(0);
406 
407  Xapian::termcount full_lb = 0;
408  vector<intrusive_ptr<Database::Internal> >::const_iterator i;
409  for (i = internal.begin(); i != internal.end(); ++i) {
410  // Skip sub-databases which are empty or only contain documents with
411  // doclen==0.
412  if ((*i)->get_total_length() != 0) {
413  Xapian::termcount lb = (*i)->get_doclength_lower_bound();
414  if (full_lb == 0 || lb < full_lb) full_lb = lb;
415  }
416  }
417  RETURN(full_lb);
418 }
419 
421 Database::get_doclength_upper_bound() const
422 {
423  LOGCALL(API, Xapian::termcount, "Database::get_doclength_upper_bound", NO_ARGS);
424 
425  Xapian::termcount full_ub = 0;
426  vector<intrusive_ptr<Database::Internal> >::const_iterator i;
427  for (i = internal.begin(); i != internal.end(); ++i) {
428  Xapian::termcount ub = (*i)->get_doclength_upper_bound();
429  if (ub > full_ub) full_ub = ub;
430  }
431  RETURN(full_ub);
432 }
433 
435 Database::get_wdf_upper_bound(const string & term) const
436 {
437  LOGCALL(API, Xapian::termcount, "Database::get_wdf_upper_bound", term);
438  if (term.empty()) RETURN(0);
439 
440  Xapian::termcount full_ub = 0;
441  vector<intrusive_ptr<Database::Internal> >::const_iterator i;
442  for (i = internal.begin(); i != internal.end(); ++i) {
443  Xapian::termcount ub = (*i)->get_wdf_upper_bound(term);
444  if (ub > full_ub) full_ub = ub;
445  }
446  RETURN(full_ub);
447 }
448 
450 Database::valuestream_begin(Xapian::valueno slot) const
451 {
452  LOGCALL(API, ValueIterator, "Database::valuestream_begin", slot);
453  if (internal.size() == 0)
455  if (internal.size() != 1)
456  RETURN(ValueIterator(new MultiValueList(internal, slot)));
457  RETURN(ValueIterator(internal[0]->open_value_list(slot)));
458 }
459 
461 Database::get_doclength(Xapian::docid did) const
462 {
463  LOGCALL(API, Xapian::termcount, "Database::get_doclength", did);
464  if (did == 0)
466 
467  unsigned int multiplier = internal.size();
468  if (rare(multiplier == 0))
469  no_subdatabases();
470  Xapian::doccount n = (did - 1) % multiplier; // which actual database
471  Xapian::docid m = (did - 1) / multiplier + 1; // real docid in that database
472  RETURN(internal[n]->get_doclength(m));
473 }
474 
476 Database::get_unique_terms(Xapian::docid did) const
477 {
478  LOGCALL(API, Xapian::termcount, "Database::get_unique_terms", did);
479  if (did == 0)
481  unsigned int multiplier = internal.size();
482  if (rare(multiplier == 0))
483  no_subdatabases();
484  Xapian::doccount n = (did - 1) % multiplier; // which actual database
485  Xapian::docid m = (did - 1) / multiplier + 1; // real docid in that database
486  RETURN(internal[n]->get_unique_terms(m));
487 }
488 
489 Document
490 Database::get_document(Xapian::docid did) const
491 {
492  LOGCALL(API, Document, "Database::get_document", did);
493  if (did == 0)
495 
496  unsigned int multiplier = internal.size();
497  if (rare(multiplier == 0))
498  no_subdatabases();
499  Xapian::doccount n = (did - 1) % multiplier; // which actual database
500  Xapian::docid m = (did - 1) / multiplier + 1; // real docid in that database
501 
502  // Open non-lazily so we throw DocNotFoundError if the doc doesn't exist.
503  RETURN(Document(internal[n]->open_document(m, false)));
504 }
505 
506 Document
507 Database::get_document(Xapian::docid did, unsigned flags) const
508 {
509  LOGCALL(API, Document, "Database::get_document", did|flags);
510  if (did == 0)
512 
513  unsigned int multiplier = internal.size();
514  if (rare(multiplier == 0))
515  no_subdatabases();
516  Xapian::doccount n = (did - 1) % multiplier; // which actual database
517  Xapian::docid m = (did - 1) / multiplier + 1; // real docid in that database
518 
519  bool assume_valid = flags & Xapian::DOC_ASSUME_VALID;
520  RETURN(Document(internal[n]->open_document(m, assume_valid)));
521 }
522 
523 bool
524 Database::term_exists(const string & tname) const
525 {
526  LOGCALL(API, bool, "Database::term_exists", tname);
527  if (tname.empty()) {
528  RETURN(get_doccount() != 0);
529  }
530  vector<intrusive_ptr<Database::Internal> >::const_iterator i;
531  for (i = internal.begin(); i != internal.end(); ++i) {
532  if ((*i)->term_exists(tname)) RETURN(true);
533  }
534  RETURN(false);
535 }
536 
537 void
538 Database::keep_alive()
539 {
540  LOGCALL_VOID(API, "Database::keep_alive", NO_ARGS);
541  vector<intrusive_ptr<Database::Internal> >::const_iterator i;
542  for (i = internal.begin(); i != internal.end(); ++i) {
543  (*i)->keep_alive();
544  }
545 }
546 
547 string
548 Database::get_description() const
549 {
551  return "Database()";
552 }
553 
554 // We sum the character frequency histogram absolute differences to compute a
555 // lower bound on the edit distance. Rather than counting each Unicode code
556 // point uniquely, we use an array with VEC_SIZE elements and tally code points
557 // modulo VEC_SIZE which can only reduce the bound we calculate.
558 //
559 // There will be a trade-off between how good the bound is and how large and
560 // array is used (a larger array takes more time to clear and sum over). The
561 // value 64 is somewhat arbitrary - it works as well as 128 for the testsuite
562 // but that may not reflect real world performance. FIXME: profile and tune.
563 
564 #define VEC_SIZE 64
565 
566 static int
567 freq_edit_lower_bound(const vector<unsigned> & a, const vector<unsigned> & b)
568 {
569  int vec[VEC_SIZE];
570  memset(vec, 0, sizeof(vec));
571  vector<unsigned>::const_iterator i;
572  for (i = a.begin(); i != a.end(); ++i) {
573  ++vec[(*i) % VEC_SIZE];
574  }
575  for (i = b.begin(); i != b.end(); ++i) {
576  --vec[(*i) % VEC_SIZE];
577  }
578  unsigned int total = 0;
579  for (size_t j = 0; j < VEC_SIZE; ++j) {
580  total += abs(vec[j]);
581  }
582  // Each insertion or deletion adds at most 1 to total. Each transposition
583  // doesn't change it at all. But each substitution can change it by 2 so
584  // we need to divide it by 2. Rounding up is OK, since the odd change must
585  // be due to an actual edit.
586  return (total + 1) / 2;
587 }
588 
589 string
590 Database::get_spelling_suggestion(const string &word,
591  unsigned max_edit_distance) const
592 {
593  LOGCALL(API, string, "Database::get_spelling_suggestion", word | max_edit_distance);
594  if (word.size() <= 1 || max_edit_distance == 0) return string();
595 
596  max_edit_distance = min(max_edit_distance, unsigned(word.size() - 1));
597 
598  AutoPtr<TermList> merger;
599  for (size_t i = 0; i < internal.size(); ++i) {
600  TermList * tl = internal[i]->open_spelling_termlist(word);
601  LOGLINE(SPELLING, "Sub db " << i << " tl = " << (void*)tl);
602  if (tl) {
603  if (merger.get()) {
604  merger.reset(new OrTermList(merger.release(), tl));
605  } else {
606  merger.reset(tl);
607  }
608  }
609  }
610  if (!merger.get()) RETURN(string());
611 
612  // Convert word to UTF-32.
613  // Extra brackets needed to avoid this being misparsed as a function
614  // prototype.
615  vector<unsigned> utf32_word((Utf8Iterator(word)), Utf8Iterator());
616 
617  vector<unsigned> utf32_term;
618 
619  string result;
620  int edist_best = max_edit_distance;
621  Xapian::doccount freq_best = 0;
622  Xapian::doccount freq_exact = 0;
623  while (true) {
624  TermList *ret = merger->next();
625  if (rare(ret)) merger.reset(ret);
626 
627  if (rare(merger->at_end())) break;
628 
629  const string& term = merger->get_termname();
630 
631  LOGLINE(SPELLING, "Term \"" << term << "\"");
632 
633  // There's no point considering a word where the difference
634  // in length is greater than the smallest number of edits we've
635  // found so far.
636 
637  // First check the length of the encoded UTF-8 version of term.
638  // Each UTF-32 character is 1-4 bytes in UTF-8.
639  if (abs(long(term.size()) - long(word.size())) > edist_best * 4) {
640  LOGLINE(SPELLING, "Lengths much too different");
641  continue;
642  }
643 
644  // Now convert to UTF-32, and compare the true lengths more
645  // strictly.
646  utf32_term.assign(Utf8Iterator(term), Utf8Iterator());
647 
648  if (abs(long(utf32_term.size()) - long(utf32_word.size()))
649  > edist_best) {
650  LOGLINE(SPELLING, "Lengths too different");
651  continue;
652  }
653 
654  if (freq_edit_lower_bound(utf32_term, utf32_word) > edist_best) {
655  LOGLINE(SPELLING, "Rejected by character frequency test");
656  continue;
657  }
658 
659  int edist = edit_distance_unsigned(&utf32_term[0],
660  int(utf32_term.size()),
661  &utf32_word[0],
662  int(utf32_word.size()),
663  edist_best);
664  LOGLINE(SPELLING, "Edit distance " << edist);
665 
666  if (edist <= edist_best) {
667  Xapian::doccount freq = 0;
668  for (size_t j = 0; j < internal.size(); ++j)
669  freq += internal[j]->get_spelling_frequency(term);
670 
671  LOGLINE(SPELLING, "Freq " << freq << " best " << freq_best);
672  // Even if we have an exact match, there may be a much more
673  // frequent potential correction which will still be
674  // interesting.
675  if (edist == 0) {
676  freq_exact = freq;
677  continue;
678  }
679 
680  if (edist < edist_best || freq > freq_best) {
681  LOGLINE(SPELLING, "Best so far: \"" << term <<
682  "\" edist " << edist << " freq " << freq);
683  result = term;
684  edist_best = edist;
685  freq_best = freq;
686  }
687  }
688  }
689  if (freq_best < freq_exact)
690  RETURN(string());
691  RETURN(result);
692 }
693 
695 Database::spellings_begin() const
696 {
697  LOGCALL(API, TermIterator, "Database::spellings_begin", NO_ARGS);
698  AutoPtr<TermList> merger;
699  for (size_t i = 0; i < internal.size(); ++i) {
700  TermList * tl = internal[i]->open_spelling_wordlist();
701  if (tl) {
702  if (merger.get()) {
703  merger.reset(new FreqAdderOrTermList(merger.release(), tl));
704  } else {
705  merger.reset(tl);
706  }
707  }
708  }
709  RETURN(TermIterator(merger.release()));
710 }
711 
713 Database::synonyms_begin(const std::string &term) const
714 {
715  LOGCALL(API, TermIterator, "Database::synonyms_begin", term);
716  AutoPtr<TermList> merger;
717  for (size_t i = 0; i < internal.size(); ++i) {
718  TermList * tl = internal[i]->open_synonym_termlist(term);
719  if (tl) {
720  if (merger.get()) {
721  merger.reset(new OrTermList(merger.release(), tl));
722  } else {
723  merger.reset(tl);
724  }
725  }
726  }
727  RETURN(TermIterator(merger.release()));
728 }
729 
731 Database::synonym_keys_begin(const std::string &prefix) const
732 {
733  LOGCALL(API, TermIterator, "Database::synonym_keys_begin", prefix);
734  AutoPtr<TermList> merger;
735  for (size_t i = 0; i < internal.size(); ++i) {
736  TermList * tl = internal[i]->open_synonym_keylist(prefix);
737  if (tl) {
738  if (merger.get()) {
739  merger.reset(new OrTermList(merger.release(), tl));
740  } else {
741  merger.reset(tl);
742  }
743  }
744  }
745  RETURN(TermIterator(merger.release()));
746 }
747 
748 string
749 Database::get_metadata(const string & key) const
750 {
751  LOGCALL(API, string, "Database::get_metadata", key);
752  if (rare(key.empty()))
754  if (internal.empty()) RETURN(std::string());
755  RETURN(internal[0]->get_metadata(key));
756 }
757 
759 Database::metadata_keys_begin(const std::string &prefix) const
760 {
761  LOGCALL(API, Xapian::TermIterator, "Database::metadata_keys_begin", NO_ARGS);
762  if (internal.empty()) RETURN(TermIterator());
763  RETURN(TermIterator(internal[0]->open_metadata_keylist(prefix)));
764 }
765 
766 std::string
767 Database::get_uuid() const
768 {
769  LOGCALL(API, std::string, "Database::get_uuid", NO_ARGS);
770  string uuid;
771  for (size_t i = 0; i < internal.size(); ++i) {
772  string sub_uuid = internal[i]->get_uuid();
773  // If any of the sub-databases have no uuid, we can't make a uuid for
774  // the combined database.
775  if (sub_uuid.empty())
776  RETURN(sub_uuid);
777  if (!uuid.empty()) uuid += ':';
778  uuid += sub_uuid;
779  }
780  RETURN(uuid);
781 }
782 
783 bool
784 Database::locked() const
785 {
786  LOGCALL(API, bool, "Database::locked", NO_ARGS);
787  for (const auto & subdb : internal) {
788  // If any of the sub-databases is locked, return true.
789  if (subdb->locked())
790  RETURN(true);
791  }
792  RETURN(false);
793 }
794 
796 Database::get_revision() const
797 {
798  LOGCALL(API, Xapian::rev, "Database::get_revision", NO_ARGS);
799  size_t n_dbs = internal.size();
800  if (rare(n_dbs != 1)) {
801  if (n_dbs == 0)
802  return 0;
803  throw Xapian::InvalidOperationError("Database::get_revision() requires "
804  "exactly one subdatabase");
805  }
806  const string& s = internal[0]->get_revision_info();
807  const char* p = s.data();
808  const char* end = p + s.size();
810  if (!unpack_uint(&p, end, &revision))
811  throw Xapian::UnimplementedError("Database::get_revision() only "
812  "supported for chert and glass");
813  return revision;
814 }
815 
817 
818 WritableDatabase::WritableDatabase() : Database()
819 {
820  LOGCALL_CTOR(API, "WritableDatabase", NO_ARGS);
821 }
822 
824  : Database(internal_)
825 {
826  LOGCALL_CTOR(API, "WritableDatabase", internal_);
827 }
828 
830  : Database(other)
831 {
832  LOGCALL_CTOR(API, "WritableDatabase", other);
833 }
834 
835 void
837 {
838  LOGCALL_VOID(API, "WritableDatabase::operator=", other);
839  Database::operator=(other);
840 }
841 
843 {
844  LOGCALL_DTOR(API, "WritableDatabase");
845 }
846 
847 void
849 {
850  LOGCALL_VOID(API, "WritableDatabase::commit", NO_ARGS);
851  size_t n_dbs = internal.size();
852  if (rare(n_dbs == 0))
853  no_subdatabases();
854  for (size_t i = 0; i != n_dbs; ++i)
855  internal[i]->commit();
856 }
857 
858 void
860 {
861  LOGCALL_VOID(API, "WritableDatabase::begin_transaction", flushed);
862  size_t n_dbs = internal.size();
863  if (rare(n_dbs == 0))
864  no_subdatabases();
865  for (size_t i = 0; i != n_dbs; ++i)
866  internal[i]->begin_transaction(flushed);
867 }
868 
869 void
871 {
872  LOGCALL_VOID(API, "WritableDatabase::commit_transaction", NO_ARGS);
873  size_t n_dbs = internal.size();
874  if (rare(n_dbs == 0))
875  no_subdatabases();
876  for (size_t i = 0; i != n_dbs; ++i)
877  internal[i]->commit_transaction();
878 }
879 
880 void
882 {
883  LOGCALL_VOID(API, "WritableDatabase::cancel_transaction", NO_ARGS);
884  size_t n_dbs = internal.size();
885  if (rare(n_dbs == 0))
886  no_subdatabases();
887  for (size_t i = 0; i != n_dbs; ++i)
888  internal[i]->cancel_transaction();
889 }
890 
891 
894 {
895  LOGCALL(API, Xapian::docid, "WritableDatabase::add_document", document);
896  size_t n_dbs = internal.size();
897  if (rare(n_dbs == 0))
898  no_subdatabases();
899  if (n_dbs == 1)
900  RETURN(internal[0]->add_document(document));
901 
902  // Which database will the next never used docid be in?
903  Xapian::docid did = get_lastdocid() + 1;
904  if (rare(did == 0)) {
905  throw Xapian::DatabaseError("Run out of docids - you'll have to use copydatabase to eliminate any gaps before you can add more documents");
906  }
907  // We want exactly did to be used, not a lower docid if that subdb isn't
908  // using the docid before it, so call replace_document() not
909  // add_document().
910  size_t i = sub_db(did, n_dbs);
911  internal[i]->replace_document(sub_docid(did, n_dbs), document);
912  RETURN(did);
913 }
914 
915 void
917 {
918  LOGCALL_VOID(API, "WritableDatabase::delete_document", did);
919  if (rare(did == 0))
921 
922  size_t n_dbs = internal.size();
923  if (rare(n_dbs == 0))
924  no_subdatabases();
925  size_t i = sub_db(did, n_dbs);
926  internal[i]->delete_document(sub_docid(did, n_dbs));
927 }
928 
929 void
930 WritableDatabase::delete_document(const std::string & unique_term)
931 {
932  LOGCALL_VOID(API, "WritableDatabase::delete_document", unique_term);
933  if (unique_term.empty())
934  throw InvalidArgumentError("Empty termnames are invalid");
935  size_t n_dbs = internal.size();
936  if (rare(n_dbs == 0))
937  no_subdatabases();
938  for (size_t i = 0; i != n_dbs; ++i)
939  internal[i]->delete_document(unique_term);
940 }
941 
942 void
944 {
945  LOGCALL_VOID(API, "WritableDatabase::replace_document", did | document);
946  if (did == 0)
948  size_t n_dbs = internal.size();
949  if (rare(n_dbs == 0))
950  no_subdatabases();
951  size_t i = sub_db(did, n_dbs);
952  internal[i]->replace_document(sub_docid(did, n_dbs), document);
953 }
954 
956 WritableDatabase::replace_document(const std::string & unique_term,
957  const Document & document)
958 {
959  LOGCALL(API, Xapian::docid, "WritableDatabase::replace_document", unique_term | document);
960  if (unique_term.empty())
961  throw InvalidArgumentError("Empty termnames are invalid");
962  size_t n_dbs = internal.size();
963  if (rare(n_dbs == 0))
964  no_subdatabases();
965  if (n_dbs == 1)
966  RETURN(internal[0]->replace_document(unique_term, document));
967 
968  Xapian::PostingIterator postit = postlist_begin(unique_term);
969  // If no unique_term in the database, this is just an add_document().
970  if (postit == postlist_end(unique_term)) {
971  // Which database will the next never used docid be in?
972  Xapian::docid did = get_lastdocid() + 1;
973  if (rare(did == 0)) {
974  throw Xapian::DatabaseError("Run out of docids - you'll have to use copydatabase to eliminate any gaps before you can add more documents");
975  }
976  size_t i = sub_db(did, n_dbs);
977  RETURN(internal[i]->add_document(document));
978  }
979 
980  Xapian::docid retval = *postit;
981  size_t i = sub_db(retval, n_dbs);
982  internal[i]->replace_document(sub_docid(retval, n_dbs), document);
983 
984  // Delete any other occurrences of unique_term.
985  while (++postit != postlist_end(unique_term)) {
986  Xapian::docid did = *postit;
987  i = sub_db(did, n_dbs);
988  internal[i]->delete_document(sub_docid(did, n_dbs));
989  }
990 
991  return retval;
992 }
993 
994 void
995 WritableDatabase::add_spelling(const std::string & word,
996  Xapian::termcount freqinc) const
997 {
998  LOGCALL_VOID(API, "WritableDatabase::add_spelling", word | freqinc);
999  if (rare(internal.empty()))
1000  no_subdatabases();
1001  // FIXME: Is adding to the first subdatabase sensible?
1002  internal[0]->add_spelling(word, freqinc);
1003 }
1004 
1005 void
1006 WritableDatabase::remove_spelling(const std::string & word,
1007  Xapian::termcount freqdec) const
1008 {
1009  LOGCALL_VOID(API, "WritableDatabase::remove_spelling", word | freqdec);
1010  size_t n_dbs = internal.size();
1011  if (rare(n_dbs == 0))
1012  no_subdatabases();
1013  for (size_t i = 0; i < n_dbs; ++i) {
1014  internal[i]->remove_spelling(word, freqdec);
1015  }
1016 }
1017 
1018 void
1019 WritableDatabase::add_synonym(const std::string & term,
1020  const std::string & synonym) const
1021 {
1022  LOGCALL_VOID(API, "WritableDatabase::add_synonym", term | synonym);
1023  if (rare(internal.empty()))
1024  no_subdatabases();
1025  // FIXME: Is adding to the first subdatabase sensible?
1026  internal[0]->add_synonym(term, synonym);
1027 }
1028 
1029 void
1030 WritableDatabase::remove_synonym(const std::string & term,
1031  const std::string & synonym) const
1032 {
1033  LOGCALL_VOID(API, "WritableDatabase::remove_synonym", term | synonym);
1034  size_t n_dbs = internal.size();
1035  if (rare(n_dbs == 0))
1036  no_subdatabases();
1037  for (size_t i = 0; i < n_dbs; ++i) {
1038  internal[i]->remove_synonym(term, synonym);
1039  }
1040 }
1041 
1042 void
1043 WritableDatabase::clear_synonyms(const std::string & term) const
1044 {
1045  LOGCALL_VOID(API, "WritableDatabase::clear_synonyms", term);
1046  size_t n_dbs = internal.size();
1047  if (rare(n_dbs == 0))
1048  no_subdatabases();
1049  for (size_t i = 0; i < n_dbs; ++i) {
1050  internal[i]->clear_synonyms(term);
1051  }
1052 }
1053 
1054 void
1055 WritableDatabase::set_metadata(const string & key, const string & value)
1056 {
1057  LOGCALL_VOID(API, "WritableDatabase::set_metadata", key | value);
1058  if (rare(key.empty()))
1060  if (rare(internal.empty()))
1061  no_subdatabases();
1062  internal[0]->set_metadata(key, value);
1063 }
1064 
1065 string
1067 {
1069  return "WritableDatabase()";
1070 }
1071 
1072 }
Abstract base class for iterating all terms in a database.
Wrapper around standard unique_ptr template.
database class declarations
A termlist which ORs two termlists together, adding term frequencies.
Definition: ortermlist.h:81
Class for merging AllTermsList objects from subdatabases.
Class for merging ValueList objects from subdatabases.
DatabaseError indicates some sort of database related error.
Definition: error.h:367
Base class for databases.
Definition: database.h:57
This class is used to access a database, or a group of databases.
Definition: database.h:68
PostingIterator postlist_begin(const std::string &tname) const
An iterator pointing to the start of the postlist for a given term.
Definition: omdatabase.cc:162
PostingIterator postlist_end(const std::string &) const
Corresponding end iterator to postlist_begin().
Definition: database.h:230
void operator=(const Database &other)
Assignment is allowed.
Definition: omdatabase.cc:113
std::vector< Xapian::Internal::intrusive_ptr< Internal > > internal
Definition: database.h:81
Xapian::docid get_lastdocid() const
Get the highest document id which has been used in the database.
Definition: omdatabase.cc:279
A handle representing a document in a Xapian database.
Definition: document.h:61
A smart pointer that uses intrusive reference counting.
Definition: intrusive_ptr.h:82
InvalidArgumentError indicates an invalid parameter value was passed to the API.
Definition: error.h:241
InvalidOperationError indicates the API was used in an invalid way.
Definition: error.h:283
Class for iterating over term positions.
Class for iterating over a list of terms.
Abstract base class for termlists.
Definition: termlist.h:39
virtual std::string get_termname() const =0
Return the termname at the current position.
virtual Internal * next()=0
Advance the current position to the next term in the termlist.
Class for iterating over a list of terms.
Definition: termiterator.h:41
UnimplementedError indicates an attempt to use an unimplemented feature.
Definition: error.h:325
An iterator which returns Unicode character values from a UTF-8 encoded string.
Definition: unicode.h:38
Class for iterating over document values.
Definition: valueiterator.h:40
This class provides read/write access to a database.
Definition: database.h:795
void delete_document(Xapian::docid did)
Delete a document from the database.
Definition: omdatabase.cc:916
void begin_transaction(bool flushed=true)
Begin a transaction.
Definition: omdatabase.cc:859
void remove_synonym(const std::string &term, const std::string &synonym) const
Remove a synonym for a term.
Definition: omdatabase.cc:1030
void clear_synonyms(const std::string &term) const
Remove all synonyms for a term.
Definition: omdatabase.cc:1043
void replace_document(Xapian::docid did, const Xapian::Document &document)
Replace a given document in the database.
Definition: omdatabase.cc:943
void commit_transaction()
Complete the transaction currently in progress.
Definition: omdatabase.cc:870
void remove_spelling(const std::string &word, Xapian::termcount freqdec=1) const
Remove a word from the spelling dictionary.
Definition: omdatabase.cc:1006
void cancel_transaction()
Abort the transaction currently in progress, discarding the pending modifications made to the databas...
Definition: omdatabase.cc:881
virtual ~WritableDatabase()
Destroy this handle on the database.
Definition: omdatabase.cc:842
std::string get_description() const
Return a string describing this object.
Definition: omdatabase.cc:1066
void set_metadata(const std::string &key, const std::string &metadata)
Set the user-specified metadata associated with a given key.
Definition: omdatabase.cc:1055
WritableDatabase()
Create a WritableDatabase with no subdatabases.
Definition: omdatabase.cc:818
void operator=(const WritableDatabase &other)
Assignment is allowed.
Definition: omdatabase.cc:836
void commit()
Commit any pending modifications made to the database.
Definition: omdatabase.cc:848
void add_spelling(const std::string &word, Xapian::termcount freqinc=1) const
Add a word to the spelling dictionary.
Definition: omdatabase.cc:995
Xapian::docid add_document(const Xapian::Document &document)
Add a new document to the database.
Definition: omdatabase.cc:893
void add_synonym(const std::string &term, const std::string &synonym) const
Add a synonym for a term.
Definition: omdatabase.cc:1019
#define rare(COND)
Definition: config.h:578
Constants in the Xapian namespace.
Debug logging macros.
#define LOGCALL(CATEGORY, TYPE, FUNC, PARAMS)
Definition: debuglog.h:487
#define LOGLINE(a, b)
Definition: debuglog.h:494
#define LOGCALL_CTOR(CATEGORY, CLASS, PARAMS)
Definition: debuglog.h:489
#define LOGCALL_VOID(CATEGORY, FUNC, PARAMS)
Definition: debuglog.h:488
#define LOGCALL_DTOR(CATEGORY, CLASS)
Definition: debuglog.h:490
#define RETURN(A)
Definition: debuglog.h:493
int edit_distance_unsigned(const unsigned *ptr1, int len1, const unsigned *ptr2, int len2, int max_distance)
Calculate the edit distance between two sequences.
Edit distance calculation algorithm.
Hierarchy of classes which Xapian can throw as exceptions.
int close(FD &fd)
Definition: fd.h:63
Types used internally.
Class for merging AllTermsList objects from subdatabases.
Class for merging PostList objects from subdatabases.
C++ class declaration for multiple database access.
Class for merging ValueList objects from subdatabases.
@ SPELLING
Definition: glass_defs.h:58
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:80
int revision()
Report the revision of the library which the program is linked with.
Definition: xapian.h:142
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:72
const int DOC_ASSUME_VALID
Assume document id is valid.
Definition: constants.h:280
XAPIAN_REVISION_TYPE rev
Revision number of a database.
Definition: types.h:133
double doclength
A normalised document length.
Definition: types.h:59
unsigned valueno
The number for a value slot in a document.
Definition: types.h:108
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:38
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:52
XAPIAN_TOTALLENGTH_TYPE totallength
The total length of all documents in a database.
Definition: types.h:139
static int freq_edit_lower_bound(const vector< unsigned > &a, const vector< unsigned > &b)
Definition: omdatabase.cc:567
Define the XAPIAN_NORETURN macro.
Various assertion macros.
#define Assert(COND)
Definition: omassert.h:122
size_t sub_docid(Xapian::docid did, size_t n_dbs)
Definition: omdatabase.cc:82
static void docid_zero_invalid()
Definition: omdatabase.cc:58
#define VEC_SIZE
Definition: omdatabase.cc:564
static void empty_metadata_key()
Definition: omdatabase.cc:70
size_t sub_db(Xapian::docid did, size_t n_dbs)
Definition: omdatabase.cc:76
static void no_subdatabases()
Definition: omdatabase.cc:64
Merge two TermList objects using an OR operation.
Pack types into strings and unpack them again.
bool unpack_uint(const char **p, const char *end, U *result)
Decode an unsigned integer from a string.
Definition: pack.h:413
Class for iterating over term positions.
Class for iterating over a list of document ids.
Class for iterating over a list of terms.
Unicode and UTF-8 related classes and functions.