xapian-core  1.4.20
omdatabase.cc
Go to the documentation of this file.
1 /* omdatabase.cc: External interface for running queries
2  *
3  * Copyright 1999,2000,2001 BrightStation PLC
4  * Copyright 2001,2002 Ananova Ltd
5  * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2013,2014,2016 Olly Betts
6  * Copyright 2006,2008 Lemur Consulting Ltd
7  *
8  * This program is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU General Public License as
10  * published by the Free Software Foundation; either version 2 of the
11  * License, or (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
21  * USA
22  */
23 
24 #include <config.h>
25 
26 #include "autoptr.h"
27 
28 #include <xapian/constants.h>
29 #include <xapian/error.h>
31 #include <xapian/postingiterator.h>
32 #include <xapian/termiterator.h>
33 #include <xapian/unicode.h>
34 
35 #include "omassert.h"
36 #include "debuglog.h"
37 #include "backends/alltermslist.h"
42 #include "backends/database.h"
43 #include "editdistance.h"
44 #include "expand/ortermlist.h"
45 #include "internaltypes.h"
46 #include "noreturn.h"
47 #include "pack.h"
48 
49 #include <algorithm>
50 #include <cstdlib> // For abs().
51 #include <cstring>
52 #include <vector>
53 
54 using namespace std;
56 
57 XAPIAN_NORETURN(static void docid_zero_invalid());
58 static void docid_zero_invalid()
59 {
60  throw Xapian::InvalidArgumentError("Document ID 0 is invalid");
61 }
62 
63 XAPIAN_NORETURN(static void no_subdatabases());
64 static void no_subdatabases()
65 {
66  throw Xapian::InvalidOperationError("No subdatabases");
67 }
68 
69 XAPIAN_NORETURN(static void empty_metadata_key());
70 static void empty_metadata_key()
71 {
72  throw Xapian::InvalidArgumentError("Empty metadata keys are invalid");
73 }
74 
75 inline size_t
76 sub_db(Xapian::docid did, size_t n_dbs)
77 {
78  return (did - 1) % n_dbs;
79 }
80 
81 inline size_t
82 sub_docid(Xapian::docid did, size_t n_dbs)
83 {
84  return (did - 1) / n_dbs + 1;
85 }
86 
87 namespace Xapian {
88 
89 Database::Database(Database&&) = default;
90 
91 Database&
92 Database::operator=(Database&&) = default;
93 
94 Database::Database()
95 {
96  LOGCALL_CTOR(API, "Database", NO_ARGS);
97 }
98 
99 Database::Database(Database::Internal *internal_)
100 {
101  LOGCALL_CTOR(API, "Database", internal_);
102  intrusive_ptr<Database::Internal> newi(internal_);
103  internal.push_back(newi);
104 }
105 
106 Database::Database(const Database &other)
107 {
108  LOGCALL_CTOR(API, "Database", other);
109  internal = other.internal;
110 }
111 
112 void
113 Database::operator=(const Database &other)
114 {
115  LOGCALL_VOID(API, "Database::operator=", other);
116  internal = other.internal;
117 }
118 
119 Database::~Database()
120 {
121  LOGCALL_DTOR(API, "Database");
122 }
123 
124 bool
125 Database::reopen()
126 {
127  LOGCALL(API, bool, "Database::reopen", NO_ARGS);
128  bool maybe_changed = false;
129  vector<intrusive_ptr<Database::Internal> >::iterator i;
130  for (i = internal.begin(); i != internal.end(); ++i) {
131  if ((*i)->reopen())
132  maybe_changed = true;
133  }
134  RETURN(maybe_changed);
135 }
136 
137 void
139 {
140  LOGCALL_VOID(API, "Database::close", NO_ARGS);
141  vector<intrusive_ptr<Database::Internal> >::iterator i;
142  for (i = internal.begin(); i != internal.end(); ++i) {
143  (*i)->close();
144  }
145 }
146 
147 void
148 Database::add_database(const Database & database)
149 {
150  LOGCALL_VOID(API, "Database::add_database", database);
151  if (this == &database) {
152  LOGLINE(API, "Database added to itself");
153  throw Xapian::InvalidArgumentError("Can't add a Database to itself");
154  }
155  vector<intrusive_ptr<Database::Internal> >::const_iterator i;
156  for (i = database.internal.begin(); i != database.internal.end(); ++i) {
157  internal.push_back(*i);
158  }
159 }
160 
162 Database::postlist_begin(const string &tname) const
163 {
164  LOGCALL(API, PostingIterator, "Database::postlist_begin", tname);
165 
166  // Don't bother checking that the term exists first. If it does, we
167  // just end up doing more work, and if it doesn't, we save very little
168  // work.
169 
170  // Handle the common case of a single database specially.
171  if (internal.size() == 1)
172  RETURN(PostingIterator(internal[0]->open_post_list(tname)));
173 
174  if (rare(internal.empty()))
176 
177  vector<LeafPostList *> pls;
178  try {
179  vector<intrusive_ptr<Database::Internal> >::const_iterator i;
180  for (i = internal.begin(); i != internal.end(); ++i) {
181  pls.push_back((*i)->open_post_list(tname));
182  pls.back()->next();
183  }
184  Assert(pls.begin() != pls.end());
185  } catch (...) {
186  vector<LeafPostList *>::iterator i;
187  for (i = pls.begin(); i != pls.end(); ++i) {
188  delete *i;
189  *i = 0;
190  }
191  throw;
192  }
193 
194  RETURN(PostingIterator(new MultiPostList(pls, *this)));
195 }
196 
198 Database::termlist_begin(Xapian::docid did) const
199 {
200  LOGCALL(API, TermIterator, "Database::termlist_begin", did);
201  if (did == 0)
203 
204  unsigned int multiplier = internal.size();
205  if (rare(multiplier == 0))
206  no_subdatabases();
207  TermList *tl;
208  if (multiplier == 1) {
209  // There's no need for the MultiTermList wrapper in the common case
210  // where we're only dealing with a single database.
211  tl = internal[0]->open_term_list(did);
212  } else {
213  Assert(multiplier != 0);
214  Xapian::doccount n = (did - 1) % multiplier; // which actual database
215  Xapian::docid m = (did - 1) / multiplier + 1; // real docid in that database
216 
217  tl = new MultiTermList(internal[n]->open_term_list(m), *this, n);
218  }
219  RETURN(TermIterator(tl));
220 }
221 
223 Database::allterms_begin(const std::string & prefix) const
224 {
225  LOGCALL(API, TermIterator, "Database::allterms_begin", NO_ARGS);
226  TermList * tl;
227  if (rare(internal.size() == 0)) {
228  tl = NULL;
229  } else if (internal.size() == 1) {
230  tl = internal[0]->open_allterms(prefix);
231  } else {
232  tl = new MultiAllTermsList(internal, prefix);
233  }
234  RETURN(TermIterator(tl));
235 }
236 
237 bool
238 Database::has_positions() const
239 {
240  LOGCALL(API, bool, "Database::has_positions", NO_ARGS);
241  // If any sub-database has positions, the combined database does.
242  vector<intrusive_ptr<Database::Internal> >::const_iterator i;
243  for (i = internal.begin(); i != internal.end(); ++i) {
244  if ((*i)->has_positions()) RETURN(true);
245  }
246  RETURN(false);
247 }
248 
250 Database::positionlist_begin(Xapian::docid did, const string &tname) const
251 {
252  LOGCALL(API, PositionIterator, "Database::positionlist_begin", did | tname);
253  if (tname.empty())
254  throw InvalidArgumentError("Zero length terms are invalid");
255  if (did == 0)
257 
258  unsigned int multiplier = internal.size();
259  if (rare(multiplier == 0))
260  no_subdatabases();
261  Xapian::doccount n = (did - 1) % multiplier; // which actual database
262  Xapian::docid m = (did - 1) / multiplier + 1; // real docid in that database
263  RETURN(PositionIterator(internal[n]->open_position_list(m, tname)));
264 }
265 
267 Database::get_doccount() const
268 {
269  LOGCALL(API, Xapian::doccount, "Database::get_doccount", NO_ARGS);
270  Xapian::doccount docs = 0;
271  vector<intrusive_ptr<Database::Internal> >::const_iterator i;
272  for (i = internal.begin(); i != internal.end(); ++i) {
273  docs += (*i)->get_doccount();
274  }
275  RETURN(docs);
276 }
277 
279 Database::get_lastdocid() const
280 {
281  LOGCALL(API, Xapian::docid, "Database::get_lastdocid", NO_ARGS);
282  Xapian::docid did = 0;
283 
284  unsigned int multiplier = internal.size();
285  for (Xapian::doccount i = 0; i < multiplier; ++i) {
286  Xapian::docid did_i = internal[i]->get_lastdocid();
287  if (did_i) did = std::max(did, (did_i - 1) * multiplier + i + 1);
288  }
289  RETURN(did);
290 }
291 
293 Database::get_avlength() const
294 {
295  LOGCALL(API, Xapian::doclength, "Database::get_avlength", NO_ARGS);
296  Xapian::doccount docs = 0;
297  Xapian::totallength totlen = 0;
298 
299  vector<intrusive_ptr<Database::Internal> >::const_iterator i;
300  for (i = internal.begin(); i != internal.end(); ++i) {
301  docs += (*i)->get_doccount();
302  totlen += (*i)->get_total_length();
303  }
304  LOGLINE(UNKNOWN, "get_avlength() = " << totlen << " / " << docs <<
305  " (from " << internal.size() << " dbs)");
306 
307  if (docs == 0) RETURN(0.0);
308  RETURN(totlen / double(docs));
309 }
310 
312 Database::get_total_length() const
313 {
314  LOGCALL(API, Xapian::totallength, "Database::get_total_length", NO_ARGS);
315  Xapian::totallength total_length = 0;
316  for (auto&& sub_db : internal) {
317  total_length += sub_db->get_total_length();
318  }
319  RETURN(total_length);
320 }
321 
323 Database::get_termfreq(const string & tname) const
324 {
325  LOGCALL(API, Xapian::doccount, "Database::get_termfreq", tname);
326  if (tname.empty()) RETURN(get_doccount());
327 
328  Xapian::doccount tf = 0;
329  vector<intrusive_ptr<Database::Internal> >::const_iterator i;
330  for (i = internal.begin(); i != internal.end(); ++i) {
331  Xapian::doccount sub_tf;
332  (*i)->get_freqs(tname, &sub_tf, NULL);
333  tf += sub_tf;
334  }
335  RETURN(tf);
336 }
337 
339 Database::get_collection_freq(const string & tname) const
340 {
341  LOGCALL(API, Xapian::termcount, "Database::get_collection_freq", tname);
342  if (tname.empty()) RETURN(get_doccount());
343 
344  Xapian::termcount cf = 0;
345  vector<intrusive_ptr<Database::Internal> >::const_iterator i;
346  for (i = internal.begin(); i != internal.end(); ++i) {
347  Xapian::termcount sub_cf;
348  (*i)->get_freqs(tname, NULL, &sub_cf);
349  cf += sub_cf;
350  }
351  RETURN(cf);
352 }
353 
355 Database::get_value_freq(Xapian::valueno slot) const
356 {
357  LOGCALL(API, Xapian::doccount, "Database::get_value_freq", slot);
358 
359  Xapian::doccount vf = 0;
360  vector<intrusive_ptr<Database::Internal> >::const_iterator i;
361  for (i = internal.begin(); i != internal.end(); ++i) {
362  vf += (*i)->get_value_freq(slot);
363  }
364  RETURN(vf);
365 }
366 
367 string
368 Database::get_value_lower_bound(Xapian::valueno slot) const
369 {
370  LOGCALL(API, string, "Database::get_value_lower_bound", slot);
371 
372  if (rare(internal.empty())) RETURN(string());
373 
374  string full_lb;
375  for (auto&& subdb : internal) {
376  string lb = subdb->get_value_lower_bound(slot);
377  if (lb.empty())
378  continue;
379  if (full_lb.empty() || lb < full_lb)
380  full_lb = std::move(lb);
381  }
382  RETURN(full_lb);
383 }
384 
385 std::string
386 Database::get_value_upper_bound(Xapian::valueno slot) const
387 {
388  LOGCALL(API, std::string, "Database::get_value_upper_bound", slot);
389 
390  std::string full_ub;
391  vector<intrusive_ptr<Database::Internal> >::const_iterator i;
392  for (i = internal.begin(); i != internal.end(); ++i) {
393  std::string ub = (*i)->get_value_upper_bound(slot);
394  if (ub > full_ub)
395  full_ub = ub;
396  }
397  RETURN(full_ub);
398 }
399 
401 Database::get_doclength_lower_bound() const
402 {
403  LOGCALL(API, Xapian::termcount, "Database::get_doclength_lower_bound", NO_ARGS);
404 
405  if (rare(internal.empty())) RETURN(0);
406 
407  Xapian::termcount full_lb = 0;
408  vector<intrusive_ptr<Database::Internal> >::const_iterator i;
409  for (i = internal.begin(); i != internal.end(); ++i) {
410  // Skip sub-databases which are empty or only contain documents with
411  // doclen==0.
412  if ((*i)->get_total_length() != 0) {
413  Xapian::termcount lb = (*i)->get_doclength_lower_bound();
414  if (full_lb == 0 || lb < full_lb) full_lb = lb;
415  }
416  }
417  RETURN(full_lb);
418 }
419 
421 Database::get_doclength_upper_bound() const
422 {
423  LOGCALL(API, Xapian::termcount, "Database::get_doclength_upper_bound", NO_ARGS);
424 
425  Xapian::termcount full_ub = 0;
426  vector<intrusive_ptr<Database::Internal> >::const_iterator i;
427  for (i = internal.begin(); i != internal.end(); ++i) {
428  Xapian::termcount ub = (*i)->get_doclength_upper_bound();
429  if (ub > full_ub) full_ub = ub;
430  }
431  RETURN(full_ub);
432 }
433 
435 Database::get_wdf_upper_bound(const string & term) const
436 {
437  LOGCALL(API, Xapian::termcount, "Database::get_wdf_upper_bound", term);
438  if (term.empty()) RETURN(0);
439 
440  Xapian::termcount full_ub = 0;
441  vector<intrusive_ptr<Database::Internal> >::const_iterator i;
442  for (i = internal.begin(); i != internal.end(); ++i) {
443  Xapian::termcount ub = (*i)->get_wdf_upper_bound(term);
444  if (ub > full_ub) full_ub = ub;
445  }
446  RETURN(full_ub);
447 }
448 
450 Database::valuestream_begin(Xapian::valueno slot) const
451 {
452  LOGCALL(API, ValueIterator, "Database::valuestream_begin", slot);
453  if (internal.size() == 0)
455  if (internal.size() != 1)
456  RETURN(ValueIterator(new MultiValueList(internal, slot)));
457  RETURN(ValueIterator(internal[0]->open_value_list(slot)));
458 }
459 
461 Database::get_doclength(Xapian::docid did) const
462 {
463  LOGCALL(API, Xapian::termcount, "Database::get_doclength", did);
464  if (did == 0)
466 
467  unsigned int multiplier = internal.size();
468  if (rare(multiplier == 0))
469  no_subdatabases();
470  Xapian::doccount n = (did - 1) % multiplier; // which actual database
471  Xapian::docid m = (did - 1) / multiplier + 1; // real docid in that database
472  RETURN(internal[n]->get_doclength(m));
473 }
474 
476 Database::get_unique_terms(Xapian::docid did) const
477 {
478  LOGCALL(API, Xapian::termcount, "Database::get_unique_terms", did);
479  if (did == 0)
481  unsigned int multiplier = internal.size();
482  if (rare(multiplier == 0))
483  no_subdatabases();
484  Xapian::doccount n = (did - 1) % multiplier; // which actual database
485  Xapian::docid m = (did - 1) / multiplier + 1; // real docid in that database
486  RETURN(internal[n]->get_unique_terms(m));
487 }
488 
489 Document
490 Database::get_document(Xapian::docid did) const
491 {
492  LOGCALL(API, Document, "Database::get_document", did);
493  if (did == 0)
495 
496  unsigned int multiplier = internal.size();
497  if (rare(multiplier == 0))
498  no_subdatabases();
499  Xapian::doccount n = (did - 1) % multiplier; // which actual database
500  Xapian::docid m = (did - 1) / multiplier + 1; // real docid in that database
501 
502  // Open non-lazily so we throw DocNotFoundError if the doc doesn't exist.
503  RETURN(Document(internal[n]->open_document(m, false)));
504 }
505 
506 Document
507 Database::get_document(Xapian::docid did, unsigned flags) const
508 {
509  LOGCALL(API, Document, "Database::get_document", did|flags);
510  if (did == 0)
512 
513  unsigned int multiplier = internal.size();
514  if (rare(multiplier == 0))
515  no_subdatabases();
516  Xapian::doccount n = (did - 1) % multiplier; // which actual database
517  Xapian::docid m = (did - 1) / multiplier + 1; // real docid in that database
518 
519  bool assume_valid = flags & Xapian::DOC_ASSUME_VALID;
520  RETURN(Document(internal[n]->open_document(m, assume_valid)));
521 }
522 
523 bool
524 Database::term_exists(const string & tname) const
525 {
526  LOGCALL(API, bool, "Database::term_exists", tname);
527  if (tname.empty()) {
528  RETURN(get_doccount() != 0);
529  }
530  vector<intrusive_ptr<Database::Internal> >::const_iterator i;
531  for (i = internal.begin(); i != internal.end(); ++i) {
532  if ((*i)->term_exists(tname)) RETURN(true);
533  }
534  RETURN(false);
535 }
536 
537 void
538 Database::keep_alive()
539 {
540  LOGCALL_VOID(API, "Database::keep_alive", NO_ARGS);
541  vector<intrusive_ptr<Database::Internal> >::const_iterator i;
542  for (i = internal.begin(); i != internal.end(); ++i) {
543  (*i)->keep_alive();
544  }
545 }
546 
547 string
548 Database::get_description() const
549 {
551  return "Database()";
552 }
553 
554 // We sum the character frequency histogram absolute differences to compute a
555 // lower bound on the edit distance. Rather than counting each Unicode code
556 // point uniquely, we use an array with VEC_SIZE elements and tally code points
557 // modulo VEC_SIZE which can only reduce the bound we calculate.
558 //
559 // There will be a trade-off between how good the bound is and how large and
560 // array is used (a larger array takes more time to clear and sum over). The
561 // value 64 is somewhat arbitrary - it works as well as 128 for the testsuite
562 // but that may not reflect real world performance. FIXME: profile and tune.
563 
564 #define VEC_SIZE 64
565 
566 static int
567 freq_edit_lower_bound(const vector<unsigned> & a, const vector<unsigned> & b)
568 {
569  int vec[VEC_SIZE];
570  memset(vec, 0, sizeof(vec));
571  vector<unsigned>::const_iterator i;
572  for (i = a.begin(); i != a.end(); ++i) {
573  ++vec[(*i) % VEC_SIZE];
574  }
575  for (i = b.begin(); i != b.end(); ++i) {
576  --vec[(*i) % VEC_SIZE];
577  }
578  unsigned int total = 0;
579  for (size_t j = 0; j < VEC_SIZE; ++j) {
580  total += abs(vec[j]);
581  }
582  // Each insertion or deletion adds at most 1 to total. Each transposition
583  // doesn't change it at all. But each substitution can change it by 2 so
584  // we need to divide it by 2. Rounding up is OK, since the odd change must
585  // be due to an actual edit.
586  return (total + 1) / 2;
587 }
588 
589 // Word must have a trigram score at least this close to the best score seen
590 // so far.
591 #define TRIGRAM_SCORE_THRESHOLD 2
592 
593 string
594 Database::get_spelling_suggestion(const string &word,
595  unsigned max_edit_distance) const
596 {
597  LOGCALL(API, string, "Database::get_spelling_suggestion", word | max_edit_distance);
598  if (word.size() <= 1 || max_edit_distance == 0) return string();
599 
600  max_edit_distance = min(max_edit_distance, unsigned(word.size() - 1));
601 
602  AutoPtr<TermList> merger;
603  for (size_t i = 0; i < internal.size(); ++i) {
604  TermList * tl = internal[i]->open_spelling_termlist(word);
605  LOGLINE(SPELLING, "Sub db " << i << " tl = " << (void*)tl);
606  if (tl) {
607  if (merger.get()) {
608  merger.reset(new OrTermList(merger.release(), tl));
609  } else {
610  merger.reset(tl);
611  }
612  }
613  }
614  if (!merger.get()) RETURN(string());
615 
616  // Convert word to UTF-32.
617  // Extra brackets needed to avoid this being misparsed as a function
618  // prototype.
619  vector<unsigned> utf32_word((Utf8Iterator(word)), Utf8Iterator());
620 
621  vector<unsigned> utf32_term;
622 
623  Xapian::termcount best = 1;
624  string result;
625  int edist_best = max_edit_distance;
626  Xapian::doccount freq_best = 0;
627  Xapian::doccount freq_exact = 0;
628  while (true) {
629  TermList *ret = merger->next();
630  if (ret) merger.reset(ret);
631 
632  if (merger->at_end()) break;
633 
634  string term = merger->get_termname();
635  Xapian::termcount score = merger->get_wdf();
636 
637  LOGLINE(SPELLING, "Term \"" << term << "\" ngram score " << score);
638  if (score + TRIGRAM_SCORE_THRESHOLD >= best) {
639  if (score > best) best = score;
640 
641  // There's no point considering a word where the difference
642  // in length is greater than the smallest number of edits we've
643  // found so far.
644 
645  // First check the length of the encoded UTF-8 version of term.
646  // Each UTF-32 character is 1-4 bytes in UTF-8.
647  if (abs(long(term.size()) - long(word.size())) > edist_best * 4) {
648  LOGLINE(SPELLING, "Lengths much too different");
649  continue;
650  }
651 
652  // Now convert to UTF-32, and compare the true lengths more
653  // strictly.
654  utf32_term.assign(Utf8Iterator(term), Utf8Iterator());
655 
656  if (abs(long(utf32_term.size()) - long(utf32_word.size()))
657  > edist_best) {
658  LOGLINE(SPELLING, "Lengths too different");
659  continue;
660  }
661 
662  if (freq_edit_lower_bound(utf32_term, utf32_word) > edist_best) {
663  LOGLINE(SPELLING, "Rejected by character frequency test");
664  continue;
665  }
666 
667  int edist = edit_distance_unsigned(&utf32_term[0],
668  int(utf32_term.size()),
669  &utf32_word[0],
670  int(utf32_word.size()),
671  edist_best);
672  LOGLINE(SPELLING, "Edit distance " << edist);
673 
674  if (edist <= edist_best) {
675  Xapian::doccount freq = 0;
676  for (size_t j = 0; j < internal.size(); ++j)
677  freq += internal[j]->get_spelling_frequency(term);
678 
679  LOGLINE(SPELLING, "Freq " << freq << " best " << freq_best);
680  // Even if we have an exact match, there may be a much more
681  // frequent potential correction which will still be
682  // interesting.
683  if (edist == 0) {
684  freq_exact = freq;
685  continue;
686  }
687 
688  if (edist < edist_best || freq > freq_best) {
689  LOGLINE(SPELLING, "Best so far: \"" << term <<
690  "\" edist " << edist << " freq " << freq);
691  result = term;
692  edist_best = edist;
693  freq_best = freq;
694  }
695  }
696  }
697  }
698  if (freq_best < freq_exact)
699  RETURN(string());
700  RETURN(result);
701 }
702 
704 Database::spellings_begin() const
705 {
706  LOGCALL(API, TermIterator, "Database::spellings_begin", NO_ARGS);
707  AutoPtr<TermList> merger;
708  for (size_t i = 0; i < internal.size(); ++i) {
709  TermList * tl = internal[i]->open_spelling_wordlist();
710  if (tl) {
711  if (merger.get()) {
712  merger.reset(new FreqAdderOrTermList(merger.release(), tl));
713  } else {
714  merger.reset(tl);
715  }
716  }
717  }
718  RETURN(TermIterator(merger.release()));
719 }
720 
722 Database::synonyms_begin(const std::string &term) const
723 {
724  LOGCALL(API, TermIterator, "Database::synonyms_begin", term);
725  AutoPtr<TermList> merger;
726  for (size_t i = 0; i < internal.size(); ++i) {
727  TermList * tl = internal[i]->open_synonym_termlist(term);
728  if (tl) {
729  if (merger.get()) {
730  merger.reset(new OrTermList(merger.release(), tl));
731  } else {
732  merger.reset(tl);
733  }
734  }
735  }
736  RETURN(TermIterator(merger.release()));
737 }
738 
740 Database::synonym_keys_begin(const std::string &prefix) const
741 {
742  LOGCALL(API, TermIterator, "Database::synonym_keys_begin", prefix);
743  AutoPtr<TermList> merger;
744  for (size_t i = 0; i < internal.size(); ++i) {
745  TermList * tl = internal[i]->open_synonym_keylist(prefix);
746  if (tl) {
747  if (merger.get()) {
748  merger.reset(new OrTermList(merger.release(), tl));
749  } else {
750  merger.reset(tl);
751  }
752  }
753  }
754  RETURN(TermIterator(merger.release()));
755 }
756 
757 string
758 Database::get_metadata(const string & key) const
759 {
760  LOGCALL(API, string, "Database::get_metadata", key);
761  if (rare(key.empty()))
763  if (internal.empty()) RETURN(std::string());
764  RETURN(internal[0]->get_metadata(key));
765 }
766 
768 Database::metadata_keys_begin(const std::string &prefix) const
769 {
770  LOGCALL(API, Xapian::TermIterator, "Database::metadata_keys_begin", NO_ARGS);
771  if (internal.empty()) RETURN(TermIterator());
772  RETURN(TermIterator(internal[0]->open_metadata_keylist(prefix)));
773 }
774 
775 std::string
776 Database::get_uuid() const
777 {
778  LOGCALL(API, std::string, "Database::get_uuid", NO_ARGS);
779  string uuid;
780  for (size_t i = 0; i < internal.size(); ++i) {
781  string sub_uuid = internal[i]->get_uuid();
782  // If any of the sub-databases have no uuid, we can't make a uuid for
783  // the combined database.
784  if (sub_uuid.empty())
785  RETURN(sub_uuid);
786  if (!uuid.empty()) uuid += ':';
787  uuid += sub_uuid;
788  }
789  RETURN(uuid);
790 }
791 
792 bool
793 Database::locked() const
794 {
795  LOGCALL(API, bool, "Database::locked", NO_ARGS);
796  for (const auto & subdb : internal) {
797  // If any of the sub-databases is locked, return true.
798  if (subdb->locked())
799  RETURN(true);
800  }
801  RETURN(false);
802 }
803 
805 Database::get_revision() const
806 {
807  LOGCALL(API, Xapian::rev, "Database::get_revision", NO_ARGS);
808  size_t n_dbs = internal.size();
809  if (rare(n_dbs != 1)) {
810  if (n_dbs == 0)
811  return 0;
812  throw Xapian::InvalidOperationError("Database::get_revision() requires "
813  "exactly one subdatabase");
814  }
815  const string& s = internal[0]->get_revision_info();
816  const char* p = s.data();
817  const char* end = p + s.size();
819  if (!unpack_uint(&p, end, &revision))
820  throw Xapian::UnimplementedError("Database::get_revision() only "
821  "supported for chert and glass");
822  return revision;
823 }
824 
826 
827 WritableDatabase::WritableDatabase() : Database()
828 {
829  LOGCALL_CTOR(API, "WritableDatabase", NO_ARGS);
830 }
831 
833  : Database(internal_)
834 {
835  LOGCALL_CTOR(API, "WritableDatabase", internal_);
836 }
837 
839  : Database(other)
840 {
841  LOGCALL_CTOR(API, "WritableDatabase", other);
842 }
843 
844 void
846 {
847  LOGCALL_VOID(API, "WritableDatabase::operator=", other);
848  Database::operator=(other);
849 }
850 
852 {
853  LOGCALL_DTOR(API, "WritableDatabase");
854 }
855 
856 void
858 {
859  LOGCALL_VOID(API, "WritableDatabase::commit", NO_ARGS);
860  size_t n_dbs = internal.size();
861  if (rare(n_dbs == 0))
862  no_subdatabases();
863  for (size_t i = 0; i != n_dbs; ++i)
864  internal[i]->commit();
865 }
866 
867 void
869 {
870  LOGCALL_VOID(API, "WritableDatabase::begin_transaction", flushed);
871  size_t n_dbs = internal.size();
872  if (rare(n_dbs == 0))
873  no_subdatabases();
874  for (size_t i = 0; i != n_dbs; ++i)
875  internal[i]->begin_transaction(flushed);
876 }
877 
878 void
880 {
881  LOGCALL_VOID(API, "WritableDatabase::commit_transaction", NO_ARGS);
882  size_t n_dbs = internal.size();
883  if (rare(n_dbs == 0))
884  no_subdatabases();
885  for (size_t i = 0; i != n_dbs; ++i)
886  internal[i]->commit_transaction();
887 }
888 
889 void
891 {
892  LOGCALL_VOID(API, "WritableDatabase::cancel_transaction", NO_ARGS);
893  size_t n_dbs = internal.size();
894  if (rare(n_dbs == 0))
895  no_subdatabases();
896  for (size_t i = 0; i != n_dbs; ++i)
897  internal[i]->cancel_transaction();
898 }
899 
900 
903 {
904  LOGCALL(API, Xapian::docid, "WritableDatabase::add_document", document);
905  size_t n_dbs = internal.size();
906  if (rare(n_dbs == 0))
907  no_subdatabases();
908  if (n_dbs == 1)
909  RETURN(internal[0]->add_document(document));
910 
911  // Which database will the next never used docid be in?
912  Xapian::docid did = get_lastdocid() + 1;
913  if (rare(did == 0)) {
914  throw Xapian::DatabaseError("Run out of docids - you'll have to use copydatabase to eliminate any gaps before you can add more documents");
915  }
916  // We want exactly did to be used, not a lower docid if that subdb isn't
917  // using the docid before it, so call replace_document() not
918  // add_document().
919  size_t i = sub_db(did, n_dbs);
920  internal[i]->replace_document(sub_docid(did, n_dbs), document);
921  RETURN(did);
922 }
923 
924 void
926 {
927  LOGCALL_VOID(API, "WritableDatabase::delete_document", did);
928  if (rare(did == 0))
930 
931  size_t n_dbs = internal.size();
932  if (rare(n_dbs == 0))
933  no_subdatabases();
934  size_t i = sub_db(did, n_dbs);
935  internal[i]->delete_document(sub_docid(did, n_dbs));
936 }
937 
938 void
939 WritableDatabase::delete_document(const std::string & unique_term)
940 {
941  LOGCALL_VOID(API, "WritableDatabase::delete_document", unique_term);
942  if (unique_term.empty())
943  throw InvalidArgumentError("Empty termnames are invalid");
944  size_t n_dbs = internal.size();
945  if (rare(n_dbs == 0))
946  no_subdatabases();
947  for (size_t i = 0; i != n_dbs; ++i)
948  internal[i]->delete_document(unique_term);
949 }
950 
951 void
953 {
954  LOGCALL_VOID(API, "WritableDatabase::replace_document", did | document);
955  if (did == 0)
957  size_t n_dbs = internal.size();
958  if (rare(n_dbs == 0))
959  no_subdatabases();
960  size_t i = sub_db(did, n_dbs);
961  internal[i]->replace_document(sub_docid(did, n_dbs), document);
962 }
963 
965 WritableDatabase::replace_document(const std::string & unique_term,
966  const Document & document)
967 {
968  LOGCALL(API, Xapian::docid, "WritableDatabase::replace_document", unique_term | document);
969  if (unique_term.empty())
970  throw InvalidArgumentError("Empty termnames are invalid");
971  size_t n_dbs = internal.size();
972  if (rare(n_dbs == 0))
973  no_subdatabases();
974  if (n_dbs == 1)
975  RETURN(internal[0]->replace_document(unique_term, document));
976 
977  Xapian::PostingIterator postit = postlist_begin(unique_term);
978  // If no unique_term in the database, this is just an add_document().
979  if (postit == postlist_end(unique_term)) {
980  // Which database will the next never used docid be in?
981  Xapian::docid did = get_lastdocid() + 1;
982  if (rare(did == 0)) {
983  throw Xapian::DatabaseError("Run out of docids - you'll have to use copydatabase to eliminate any gaps before you can add more documents");
984  }
985  size_t i = sub_db(did, n_dbs);
986  RETURN(internal[i]->add_document(document));
987  }
988 
989  Xapian::docid retval = *postit;
990  size_t i = sub_db(retval, n_dbs);
991  internal[i]->replace_document(sub_docid(retval, n_dbs), document);
992 
993  // Delete any other occurrences of unique_term.
994  while (++postit != postlist_end(unique_term)) {
995  Xapian::docid did = *postit;
996  i = sub_db(did, n_dbs);
997  internal[i]->delete_document(sub_docid(did, n_dbs));
998  }
999 
1000  return retval;
1001 }
1002 
1003 void
1004 WritableDatabase::add_spelling(const std::string & word,
1005  Xapian::termcount freqinc) const
1006 {
1007  LOGCALL_VOID(API, "WritableDatabase::add_spelling", word | freqinc);
1008  if (rare(internal.empty()))
1009  no_subdatabases();
1010  // FIXME: Is adding to the first subdatabase sensible?
1011  internal[0]->add_spelling(word, freqinc);
1012 }
1013 
1014 void
1015 WritableDatabase::remove_spelling(const std::string & word,
1016  Xapian::termcount freqdec) const
1017 {
1018  LOGCALL_VOID(API, "WritableDatabase::remove_spelling", word | freqdec);
1019  size_t n_dbs = internal.size();
1020  if (rare(n_dbs == 0))
1021  no_subdatabases();
1022  for (size_t i = 0; i < n_dbs; ++i) {
1023  internal[i]->remove_spelling(word, freqdec);
1024  }
1025 }
1026 
1027 void
1028 WritableDatabase::add_synonym(const std::string & term,
1029  const std::string & synonym) const
1030 {
1031  LOGCALL_VOID(API, "WritableDatabase::add_synonym", term | synonym);
1032  if (rare(internal.empty()))
1033  no_subdatabases();
1034  // FIXME: Is adding to the first subdatabase sensible?
1035  internal[0]->add_synonym(term, synonym);
1036 }
1037 
1038 void
1039 WritableDatabase::remove_synonym(const std::string & term,
1040  const std::string & synonym) const
1041 {
1042  LOGCALL_VOID(API, "WritableDatabase::remove_synonym", term | synonym);
1043  size_t n_dbs = internal.size();
1044  if (rare(n_dbs == 0))
1045  no_subdatabases();
1046  for (size_t i = 0; i < n_dbs; ++i) {
1047  internal[i]->remove_synonym(term, synonym);
1048  }
1049 }
1050 
1051 void
1052 WritableDatabase::clear_synonyms(const std::string & term) const
1053 {
1054  LOGCALL_VOID(API, "WritableDatabase::clear_synonyms", term);
1055  size_t n_dbs = internal.size();
1056  if (rare(n_dbs == 0))
1057  no_subdatabases();
1058  for (size_t i = 0; i < n_dbs; ++i) {
1059  internal[i]->clear_synonyms(term);
1060  }
1061 }
1062 
1063 void
1064 WritableDatabase::set_metadata(const string & key, const string & value)
1065 {
1066  LOGCALL_VOID(API, "WritableDatabase::set_metadata", key | value);
1067  if (rare(key.empty()))
1069  if (rare(internal.empty()))
1070  no_subdatabases();
1071  internal[0]->set_metadata(key, value);
1072 }
1073 
1074 string
1076 {
1078  return "WritableDatabase()";
1079 }
1080 
1081 }
static void no_subdatabases()
Definition: omdatabase.cc:64
Unicode and UTF-8 related classes and functions.
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:80
int close(FD &fd)
Definition: fd.h:63
#define RETURN(A)
Definition: debuglog.h:482
Xapian::docid add_document(const Xapian::Document &document)
Add a new document to the database.
Definition: omdatabase.cc:902
#define Assert(COND)
Definition: omassert.h:122
#define VEC_SIZE
Definition: omdatabase.cc:564
Define the XAPIAN_NORETURN macro.
size_t sub_db(Xapian::docid did, size_t n_dbs)
Definition: omdatabase.cc:76
void cancel_transaction()
Abort the transaction currently in progress, discarding the pending modifications made to the databas...
Definition: omdatabase.cc:890
virtual Internal * next()=0
Advance the current position to the next term in the termlist.
XAPIAN_REVISION_TYPE rev
Revision number of a database.
Definition: types.h:133
This class is used to access a database, or a group of databases.
Definition: database.h:68
void remove_spelling(const std::string &word, Xapian::termcount freqdec=1) const
Remove a word from the spelling dictionary.
Definition: omdatabase.cc:1015
InvalidOperationError indicates the API was used in an invalid way.
Definition: error.h:283
Merge two TermList objects using an OR operation.
Base class for databases.
Definition: database.h:56
XAPIAN_TOTALLENGTH_TYPE totallength
The total length of all documents in a database.
Definition: types.h:139
#define TRIGRAM_SCORE_THRESHOLD
Definition: omdatabase.cc:591
Constants in the Xapian namespace.
Xapian::docid get_lastdocid() const
Get the highest document id which has been used in the database.
Definition: omdatabase.cc:279
size_t sub_docid(Xapian::docid did, size_t n_dbs)
Definition: omdatabase.cc:82
#define LOGCALL_DTOR(CATEGORY, CLASS)
Definition: debuglog.h:479
void begin_transaction(bool flushed=true)
Begin a transaction.
Definition: omdatabase.cc:868
Class for iterating over document values.
Definition: valueiterator.h:40
#define LOGCALL_VOID(CATEGORY, FUNC, PARAMS)
Definition: debuglog.h:477
Abstract base class for termlists.
Definition: termlist.h:39
STL namespace.
WritableDatabase()
Create a WritableDatabase with no subdatabases.
Definition: omdatabase.cc:827
int revision()
Report the revision of the library which the program is linked with.
Definition: xapian.h:142
std::string get_description() const
Return a string describing this object.
Definition: omdatabase.cc:1075
void replace_document(Xapian::docid did, const Xapian::Document &document)
Replace a given document in the database.
Definition: omdatabase.cc:952
void set_metadata(const std::string &key, const std::string &metadata)
Set the user-specified metadata associated with a given key.
Definition: omdatabase.cc:1064
std::vector< Xapian::Internal::intrusive_ptr< Internal > > internal
Definition: database.h:81
const int DOC_ASSUME_VALID
Assume document id is valid.
Definition: constants.h:280
Class for iterating over term positions.
Class for merging AllTermsList objects from subdatabases.
#define rare(COND)
Definition: config.h:562
Hierarchy of classes which Xapian can throw as exceptions.
Class for iterating over a list of terms.
Definition: termiterator.h:41
Class for merging AllTermsList objects from subdatabases.
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:72
Class for iterating over a list of terms.
void operator=(const WritableDatabase &other)
Assignment is allowed.
Definition: omdatabase.cc:845
InvalidArgumentError indicates an invalid parameter value was passed to the API.
Definition: error.h:241
void remove_synonym(const std::string &term, const std::string &synonym) const
Remove a synonym for a term.
Definition: omdatabase.cc:1039
virtual ~WritableDatabase()
Destroy this handle on the database.
Definition: omdatabase.cc:851
Class for merging ValueList objects from subdatabases.
This class provides read/write access to a database.
Definition: database.h:785
Class for merging ValueList objects from subdatabases.
static int freq_edit_lower_bound(const vector< unsigned > &a, const vector< unsigned > &b)
Definition: omdatabase.cc:567
double doclength
A normalised document length.
Definition: types.h:59
Edit distance calculation algorithm.
void delete_document(Xapian::docid did)
Delete a document from the database.
Definition: omdatabase.cc:925
void commit()
Commit any pending modifications made to the database.
Definition: omdatabase.cc:857
Abstract base class for iterating all terms in a database.
Class for iterating over term positions.
virtual std::string get_termname() const =0
Return the termname at the current position.
#define LOGCALL_CTOR(CATEGORY, CLASS, PARAMS)
Definition: debuglog.h:478
A termlist which ORs two termlists together, adding term frequencies.
Definition: ortermlist.h:81
void clear_synonyms(const std::string &term) const
Remove all synonyms for a term.
Definition: omdatabase.cc:1052
An iterator which returns Unicode character values from a UTF-8 encoded string.
Definition: unicode.h:38
C++ class declaration for multiple database access.
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:38
static void empty_metadata_key()
Definition: omdatabase.cc:70
Class for merging PostList objects from subdatabases.
Pack types into strings and unpack them again.
unsigned valueno
The number for a value slot in a document.
Definition: types.h:108
bool unpack_uint(const char **p, const char *end, U *result)
Decode an unsigned integer from a string.
Definition: pack.h:413
void commit_transaction()
Complete the transaction currently in progress.
Definition: omdatabase.cc:879
void operator=(const Database &other)
Assignment is allowed.
Definition: omdatabase.cc:113
Class for iterating over a list of document ids.
Various assertion macros.
#define LOGLINE(a, b)
Definition: debuglog.h:483
void add_synonym(const std::string &term, const std::string &synonym) const
Add a synonym for a term.
Definition: omdatabase.cc:1028
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:52
DatabaseError indicates some sort of database related error.
Definition: error.h:367
PostingIterator postlist_end(const std::string &) const
Corresponding end iterator to postlist_begin().
Definition: database.h:225
int edit_distance_unsigned(const unsigned *ptr1, int len1, const unsigned *ptr2, int len2, int max_distance)
Calculate the edit distance between two sequences.
A smart pointer that uses intrusive reference counting.
Definition: intrusive_ptr.h:81
Class for iterating over a list of terms.
static void docid_zero_invalid()
Definition: omdatabase.cc:58
A handle representing a document in a Xapian database.
Definition: document.h:61
Types used internally.
Wrapper around standard unique_ptr template.
Debug logging macros.
#define LOGCALL(CATEGORY, TYPE, FUNC, PARAMS)
Definition: debuglog.h:476
UnimplementedError indicates an attempt to use an unimplemented feature.
Definition: error.h:325
void add_spelling(const std::string &word, Xapian::termcount freqinc=1) const
Add a word to the spelling dictionary.
Definition: omdatabase.cc:1004
PostingIterator postlist_begin(const std::string &tname) const
An iterator pointing to the start of the postlist for a given term.
Definition: omdatabase.cc:162