omdatabase.cc
Go to the documentation of this file.
1 /* omdatabase.cc: External interface for running queries
2  *
3  * Copyright 1999,2000,2001 BrightStation PLC
4  * Copyright 2001,2002 Ananova Ltd
5  * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2013,2014,2016 Olly Betts
6  * Copyright 2006,2008 Lemur Consulting Ltd
7  *
8  * This program is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU General Public License as
10  * published by the Free Software Foundation; either version 2 of the
11  * License, or (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
21  * USA
22  */
23 
24 #include <config.h>
25 
26 #include "autoptr.h"
27 
28 #include <xapian/constants.h>
29 #include <xapian/error.h>
31 #include <xapian/postingiterator.h>
32 #include <xapian/termiterator.h>
33 #include <xapian/unicode.h>
34 
35 #include "omassert.h"
36 #include "debuglog.h"
37 #include "backends/alltermslist.h"
42 #include "backends/database.h"
43 #include "editdistance.h"
44 #include "expand/ortermlist.h"
45 #include "internaltypes.h"
46 #include "noreturn.h"
47 #include "pack.h"
48 
49 #include <algorithm>
50 #include <cstdlib> // For abs().
51 #include <cstring>
52 #include <vector>
53 
54 using namespace std;
56 
57 XAPIAN_NORETURN(static void docid_zero_invalid());
58 static void docid_zero_invalid()
59 {
60  throw Xapian::InvalidArgumentError("Document ID 0 is invalid");
61 }
62 
63 XAPIAN_NORETURN(static void no_subdatabases());
64 static void no_subdatabases()
65 {
66  throw Xapian::InvalidOperationError("No subdatabases");
67 }
68 
69 XAPIAN_NORETURN(static void empty_metadata_key());
70 static void empty_metadata_key()
71 {
72  throw Xapian::InvalidArgumentError("Empty metadata keys are invalid");
73 }
74 
75 inline size_t
76 sub_db(Xapian::docid did, size_t n_dbs)
77 {
78  return (did - 1) % n_dbs;
79 }
80 
81 inline size_t
82 sub_docid(Xapian::docid did, size_t n_dbs)
83 {
84  return (did - 1) / n_dbs + 1;
85 }
86 
87 namespace Xapian {
88 
89 Database::Database(Database&&) = default;
90 
91 Database&
92 Database::operator=(Database&&) = default;
93 
94 Database::Database()
95 {
96  LOGCALL_CTOR(API, "Database", NO_ARGS);
97 }
98 
99 Database::Database(Database::Internal *internal_)
100 {
101  LOGCALL_CTOR(API, "Database", internal_);
102  intrusive_ptr<Database::Internal> newi(internal_);
103  internal.push_back(newi);
104 }
105 
106 Database::Database(const Database &other)
107 {
108  LOGCALL_CTOR(API, "Database", other);
109  internal = other.internal;
110 }
111 
112 void
113 Database::operator=(const Database &other)
114 {
115  LOGCALL_VOID(API, "Database::operator=", other);
116  internal = other.internal;
117 }
118 
119 Database::~Database()
120 {
121  LOGCALL_DTOR(API, "Database");
122 }
123 
124 bool
125 Database::reopen()
126 {
127  LOGCALL(API, bool, "Database::reopen", NO_ARGS);
128  bool maybe_changed = false;
129  vector<intrusive_ptr<Database::Internal> >::iterator i;
130  for (i = internal.begin(); i != internal.end(); ++i) {
131  if ((*i)->reopen())
132  maybe_changed = true;
133  }
134  RETURN(maybe_changed);
135 }
136 
137 void
139 {
140  LOGCALL_VOID(API, "Database::close", NO_ARGS);
141  vector<intrusive_ptr<Database::Internal> >::iterator i;
142  for (i = internal.begin(); i != internal.end(); ++i) {
143  (*i)->close();
144  }
145 }
146 
147 void
148 Database::add_database(const Database & database)
149 {
150  LOGCALL_VOID(API, "Database::add_database", database);
151  if (this == &database) {
152  LOGLINE(API, "Database added to itself");
153  throw Xapian::InvalidArgumentError("Can't add a Database to itself");
154  }
155  vector<intrusive_ptr<Database::Internal> >::const_iterator i;
156  for (i = database.internal.begin(); i != database.internal.end(); ++i) {
157  internal.push_back(*i);
158  }
159 }
160 
162 Database::postlist_begin(const string &tname) const
163 {
164  LOGCALL(API, PostingIterator, "Database::postlist_begin", tname);
165 
166  // Don't bother checking that the term exists first. If it does, we
167  // just end up doing more work, and if it doesn't, we save very little
168  // work.
169 
170  // Handle the common case of a single database specially.
171  if (internal.size() == 1)
172  RETURN(PostingIterator(internal[0]->open_post_list(tname)));
173 
174  if (rare(internal.empty()))
176 
177  vector<LeafPostList *> pls;
178  try {
179  vector<intrusive_ptr<Database::Internal> >::const_iterator i;
180  for (i = internal.begin(); i != internal.end(); ++i) {
181  pls.push_back((*i)->open_post_list(tname));
182  pls.back()->next();
183  }
184  Assert(pls.begin() != pls.end());
185  } catch (...) {
186  vector<LeafPostList *>::iterator i;
187  for (i = pls.begin(); i != pls.end(); ++i) {
188  delete *i;
189  *i = 0;
190  }
191  throw;
192  }
193 
194  RETURN(PostingIterator(new MultiPostList(pls, *this)));
195 }
196 
198 Database::termlist_begin(Xapian::docid did) const
199 {
200  LOGCALL(API, TermIterator, "Database::termlist_begin", did);
201  if (did == 0)
203 
204  unsigned int multiplier = internal.size();
205  if (rare(multiplier == 0))
206  no_subdatabases();
207  TermList *tl;
208  if (multiplier == 1) {
209  // There's no need for the MultiTermList wrapper in the common case
210  // where we're only dealing with a single database.
211  tl = internal[0]->open_term_list(did);
212  } else {
213  Assert(multiplier != 0);
214  Xapian::doccount n = (did - 1) % multiplier; // which actual database
215  Xapian::docid m = (did - 1) / multiplier + 1; // real docid in that database
216 
217  tl = new MultiTermList(internal[n]->open_term_list(m), *this, n);
218  }
219  RETURN(TermIterator(tl));
220 }
221 
223 Database::allterms_begin(const std::string & prefix) const
224 {
225  LOGCALL(API, TermIterator, "Database::allterms_begin", NO_ARGS);
226  TermList * tl;
227  if (rare(internal.size() == 0)) {
228  tl = NULL;
229  } else if (internal.size() == 1) {
230  tl = internal[0]->open_allterms(prefix);
231  } else {
232  tl = new MultiAllTermsList(internal, prefix);
233  }
234  RETURN(TermIterator(tl));
235 }
236 
237 bool
238 Database::has_positions() const
239 {
240  LOGCALL(API, bool, "Database::has_positions", NO_ARGS);
241  // If any sub-database has positions, the combined database does.
242  vector<intrusive_ptr<Database::Internal> >::const_iterator i;
243  for (i = internal.begin(); i != internal.end(); ++i) {
244  if ((*i)->has_positions()) RETURN(true);
245  }
246  RETURN(false);
247 }
248 
250 Database::positionlist_begin(Xapian::docid did, const string &tname) const
251 {
252  LOGCALL(API, PositionIterator, "Database::positionlist_begin", did | tname);
253  if (tname.empty())
254  throw InvalidArgumentError("Zero length terms are invalid");
255  if (did == 0)
257 
258  unsigned int multiplier = internal.size();
259  if (rare(multiplier == 0))
260  no_subdatabases();
261  Xapian::doccount n = (did - 1) % multiplier; // which actual database
262  Xapian::docid m = (did - 1) / multiplier + 1; // real docid in that database
263  RETURN(PositionIterator(internal[n]->open_position_list(m, tname)));
264 }
265 
267 Database::get_doccount() const
268 {
269  LOGCALL(API, Xapian::doccount, "Database::get_doccount", NO_ARGS);
270  Xapian::doccount docs = 0;
271  vector<intrusive_ptr<Database::Internal> >::const_iterator i;
272  for (i = internal.begin(); i != internal.end(); ++i) {
273  docs += (*i)->get_doccount();
274  }
275  RETURN(docs);
276 }
277 
279 Database::get_lastdocid() const
280 {
281  LOGCALL(API, Xapian::docid, "Database::get_lastdocid", NO_ARGS);
282  Xapian::docid did = 0;
283 
284  unsigned int multiplier = internal.size();
285  for (Xapian::doccount i = 0; i < multiplier; ++i) {
286  Xapian::docid did_i = internal[i]->get_lastdocid();
287  if (did_i) did = std::max(did, (did_i - 1) * multiplier + i + 1);
288  }
289  RETURN(did);
290 }
291 
293 Database::get_avlength() const
294 {
295  LOGCALL(API, Xapian::doclength, "Database::get_avlength", NO_ARGS);
296  Xapian::doccount docs = 0;
297  Xapian::totallength totlen = 0;
298 
299  vector<intrusive_ptr<Database::Internal> >::const_iterator i;
300  for (i = internal.begin(); i != internal.end(); ++i) {
301  docs += (*i)->get_doccount();
302  totlen += (*i)->get_total_length();
303  }
304  LOGLINE(UNKNOWN, "get_avlength() = " << totlen << " / " << docs <<
305  " (from " << internal.size() << " dbs)");
306 
307  if (docs == 0) RETURN(0.0);
308  RETURN(totlen / double(docs));
309 }
310 
312 Database::get_total_length() const
313 {
314  LOGCALL(API, Xapian::totallength, "Database::get_total_length", NO_ARGS);
315  Xapian::totallength total_length = 0;
316  for (auto&& sub_db : internal) {
317  total_length += sub_db->get_total_length();
318  }
319  RETURN(total_length);
320 }
321 
323 Database::get_termfreq(const string & tname) const
324 {
325  LOGCALL(API, Xapian::doccount, "Database::get_termfreq", tname);
326  if (tname.empty()) RETURN(get_doccount());
327 
328  Xapian::doccount tf = 0;
329  vector<intrusive_ptr<Database::Internal> >::const_iterator i;
330  for (i = internal.begin(); i != internal.end(); ++i) {
331  Xapian::doccount sub_tf;
332  (*i)->get_freqs(tname, &sub_tf, NULL);
333  tf += sub_tf;
334  }
335  RETURN(tf);
336 }
337 
339 Database::get_collection_freq(const string & tname) const
340 {
341  LOGCALL(API, Xapian::termcount, "Database::get_collection_freq", tname);
342  if (tname.empty()) RETURN(get_doccount());
343 
344  Xapian::termcount cf = 0;
345  vector<intrusive_ptr<Database::Internal> >::const_iterator i;
346  for (i = internal.begin(); i != internal.end(); ++i) {
347  Xapian::termcount sub_cf;
348  (*i)->get_freqs(tname, NULL, &sub_cf);
349  cf += sub_cf;
350  }
351  RETURN(cf);
352 }
353 
355 Database::get_value_freq(Xapian::valueno slot) const
356 {
357  LOGCALL(API, Xapian::doccount, "Database::get_value_freq", slot);
358 
359  Xapian::doccount vf = 0;
360  vector<intrusive_ptr<Database::Internal> >::const_iterator i;
361  for (i = internal.begin(); i != internal.end(); ++i) {
362  vf += (*i)->get_value_freq(slot);
363  }
364  RETURN(vf);
365 }
366 
367 string
368 Database::get_value_lower_bound(Xapian::valueno slot) const
369 {
370  LOGCALL(API, string, "Database::get_value_lower_bound", slot);
371 
372  if (rare(internal.empty())) RETURN(string());
373 
374  string full_lb;
375  for (auto&& subdb : internal) {
376  string lb = subdb->get_value_lower_bound(slot);
377  if (lb.empty())
378  continue;
379  if (full_lb.empty() || lb < full_lb)
380  full_lb = std::move(lb);
381  }
382  RETURN(full_lb);
383 }
384 
385 std::string
386 Database::get_value_upper_bound(Xapian::valueno slot) const
387 {
388  LOGCALL(API, std::string, "Database::get_value_upper_bound", slot);
389 
390  std::string full_ub;
391  vector<intrusive_ptr<Database::Internal> >::const_iterator i;
392  for (i = internal.begin(); i != internal.end(); ++i) {
393  std::string ub = (*i)->get_value_upper_bound(slot);
394  if (ub > full_ub)
395  full_ub = ub;
396  }
397  RETURN(full_ub);
398 }
399 
401 Database::get_doclength_lower_bound() const
402 {
403  LOGCALL(API, Xapian::termcount, "Database::get_doclength_lower_bound", NO_ARGS);
404 
405  if (rare(internal.empty())) RETURN(0);
406 
407  Xapian::termcount full_lb = 0;
408  vector<intrusive_ptr<Database::Internal> >::const_iterator i;
409  for (i = internal.begin(); i != internal.end(); ++i) {
410  // Skip sub-databases which are empty or only contain documents with
411  // doclen==0.
412  if ((*i)->get_total_length() != 0) {
413  Xapian::termcount lb = (*i)->get_doclength_lower_bound();
414  if (full_lb == 0 || lb < full_lb) full_lb = lb;
415  }
416  }
417  RETURN(full_lb);
418 }
419 
421 Database::get_doclength_upper_bound() const
422 {
423  LOGCALL(API, Xapian::termcount, "Database::get_doclength_upper_bound", NO_ARGS);
424 
425  Xapian::termcount full_ub = 0;
426  vector<intrusive_ptr<Database::Internal> >::const_iterator i;
427  for (i = internal.begin(); i != internal.end(); ++i) {
428  Xapian::termcount ub = (*i)->get_doclength_upper_bound();
429  if (ub > full_ub) full_ub = ub;
430  }
431  RETURN(full_ub);
432 }
433 
435 Database::get_wdf_upper_bound(const string & term) const
436 {
437  LOGCALL(API, Xapian::termcount, "Database::get_wdf_upper_bound", term);
438  if (term.empty()) RETURN(0);
439 
440  Xapian::termcount full_ub = 0;
441  vector<intrusive_ptr<Database::Internal> >::const_iterator i;
442  for (i = internal.begin(); i != internal.end(); ++i) {
443  Xapian::termcount ub = (*i)->get_wdf_upper_bound(term);
444  if (ub > full_ub) full_ub = ub;
445  }
446  RETURN(full_ub);
447 }
448 
450 Database::valuestream_begin(Xapian::valueno slot) const
451 {
452  LOGCALL(API, ValueIterator, "Database::valuestream_begin", slot);
453  if (internal.size() == 0)
455  if (internal.size() != 1)
456  RETURN(ValueIterator(new MultiValueList(internal, slot)));
457  RETURN(ValueIterator(internal[0]->open_value_list(slot)));
458 }
459 
461 Database::get_doclength(Xapian::docid did) const
462 {
463  LOGCALL(API, Xapian::termcount, "Database::get_doclength", did);
464  if (did == 0)
466 
467  unsigned int multiplier = internal.size();
468  if (rare(multiplier == 0))
469  no_subdatabases();
470  Xapian::doccount n = (did - 1) % multiplier; // which actual database
471  Xapian::docid m = (did - 1) / multiplier + 1; // real docid in that database
472  RETURN(internal[n]->get_doclength(m));
473 }
474 
476 Database::get_unique_terms(Xapian::docid did) const
477 {
478  LOGCALL(API, Xapian::termcount, "Database::get_unique_terms", did);
479  if (did == 0)
481  unsigned int multiplier = internal.size();
482  if (rare(multiplier == 0))
483  no_subdatabases();
484  Xapian::doccount n = (did - 1) % multiplier; // which actual database
485  Xapian::docid m = (did - 1) / multiplier + 1; // real docid in that database
486  RETURN(internal[n]->get_unique_terms(m));
487 }
488 
489 Document
490 Database::get_document(Xapian::docid did) const
491 {
492  LOGCALL(API, Document, "Database::get_document", did);
493  if (did == 0)
495 
496  unsigned int multiplier = internal.size();
497  if (rare(multiplier == 0))
498  no_subdatabases();
499  Xapian::doccount n = (did - 1) % multiplier; // which actual database
500  Xapian::docid m = (did - 1) / multiplier + 1; // real docid in that database
501 
502  // Open non-lazily so we throw DocNotFoundError if the doc doesn't exist.
503  RETURN(Document(internal[n]->open_document(m, false)));
504 }
505 
506 Document
507 Database::get_document(Xapian::docid did, unsigned flags) const
508 {
509  LOGCALL(API, Document, "Database::get_document", did|flags);
510  if (did == 0)
512 
513  unsigned int multiplier = internal.size();
514  if (rare(multiplier == 0))
515  no_subdatabases();
516  Xapian::doccount n = (did - 1) % multiplier; // which actual database
517  Xapian::docid m = (did - 1) / multiplier + 1; // real docid in that database
518 
519  bool assume_valid = flags & Xapian::DOC_ASSUME_VALID;
520  RETURN(Document(internal[n]->open_document(m, assume_valid)));
521 }
522 
523 bool
524 Database::term_exists(const string & tname) const
525 {
526  LOGCALL(API, bool, "Database::term_exists", tname);
527  if (tname.empty()) {
528  RETURN(get_doccount() != 0);
529  }
530  vector<intrusive_ptr<Database::Internal> >::const_iterator i;
531  for (i = internal.begin(); i != internal.end(); ++i) {
532  if ((*i)->term_exists(tname)) RETURN(true);
533  }
534  RETURN(false);
535 }
536 
537 void
538 Database::keep_alive()
539 {
540  LOGCALL_VOID(API, "Database::keep_alive", NO_ARGS);
541  vector<intrusive_ptr<Database::Internal> >::const_iterator i;
542  for (i = internal.begin(); i != internal.end(); ++i) {
543  (*i)->keep_alive();
544  }
545 }
546 
547 string
548 Database::get_description() const
549 {
551  return "Database()";
552 }
553 
554 // We sum the character frequency histogram absolute differences to compute a
555 // lower bound on the edit distance. Rather than counting each Unicode code
556 // point uniquely, we use an array with VEC_SIZE elements and tally code points
557 // modulo VEC_SIZE which can only reduce the bound we calculate.
558 //
559 // There will be a trade-off between how good the bound is and how large and
560 // array is used (a larger array takes more time to clear and sum over). The
561 // value 64 is somewhat arbitrary - it works as well as 128 for the testsuite
562 // but that may not reflect real world performance. FIXME: profile and tune.
563 
564 #define VEC_SIZE 64
565 
566 static int
567 freq_edit_lower_bound(const vector<unsigned> & a, const vector<unsigned> & b)
568 {
569  int vec[VEC_SIZE];
570  memset(vec, 0, sizeof(vec));
571  vector<unsigned>::const_iterator i;
572  for (i = a.begin(); i != a.end(); ++i) {
573  ++vec[(*i) % VEC_SIZE];
574  }
575  for (i = b.begin(); i != b.end(); ++i) {
576  --vec[(*i) % VEC_SIZE];
577  }
578  unsigned int total = 0;
579  for (size_t j = 0; j < VEC_SIZE; ++j) {
580  total += abs(vec[j]);
581  }
582  // Each insertion or deletion adds at most 1 to total. Each transposition
583  // doesn't change it at all. But each substitution can change it by 2 so
584  // we need to divide it by 2. Rounding up is OK, since the odd change must
585  // be due to an actual edit.
586  return (total + 1) / 2;
587 }
588 
589 // Word must have a trigram score at least this close to the best score seen
590 // so far.
591 #define TRIGRAM_SCORE_THRESHOLD 2
592 
593 string
594 Database::get_spelling_suggestion(const string &word,
595  unsigned max_edit_distance) const
596 {
597  LOGCALL(API, string, "Database::get_spelling_suggestion", word | max_edit_distance);
598  if (word.size() <= 1) return string();
599  AutoPtr<TermList> merger;
600  for (size_t i = 0; i < internal.size(); ++i) {
601  TermList * tl = internal[i]->open_spelling_termlist(word);
602  LOGLINE(SPELLING, "Sub db " << i << " tl = " << (void*)tl);
603  if (tl) {
604  if (merger.get()) {
605  merger.reset(new OrTermList(merger.release(), tl));
606  } else {
607  merger.reset(tl);
608  }
609  }
610  }
611  if (!merger.get()) RETURN(string());
612 
613  // Convert word to UTF-32.
614  // Extra brackets needed to avoid this being misparsed as a function
615  // prototype.
616  vector<unsigned> utf32_word((Utf8Iterator(word)), Utf8Iterator());
617 
618  vector<unsigned> utf32_term;
619 
620  Xapian::termcount best = 1;
621  string result;
622  int edist_best = max_edit_distance;
623  Xapian::doccount freq_best = 0;
624  Xapian::doccount freq_exact = 0;
625  while (true) {
626  TermList *ret = merger->next();
627  if (ret) merger.reset(ret);
628 
629  if (merger->at_end()) break;
630 
631  string term = merger->get_termname();
632  Xapian::termcount score = merger->get_wdf();
633 
634  LOGLINE(SPELLING, "Term \"" << term << "\" ngram score " << score);
635  if (score + TRIGRAM_SCORE_THRESHOLD >= best) {
636  if (score > best) best = score;
637 
638  // There's no point considering a word where the difference
639  // in length is greater than the smallest number of edits we've
640  // found so far.
641 
642  // First check the length of the encoded UTF-8 version of term.
643  // Each UTF-32 character is 1-4 bytes in UTF-8.
644  if (abs(long(term.size()) - long(word.size())) > edist_best * 4) {
645  LOGLINE(SPELLING, "Lengths much too different");
646  continue;
647  }
648 
649  // Now convert to UTF-32, and compare the true lengths more
650  // strictly.
651  utf32_term.assign(Utf8Iterator(term), Utf8Iterator());
652 
653  if (abs(long(utf32_term.size()) - long(utf32_word.size()))
654  > edist_best) {
655  LOGLINE(SPELLING, "Lengths too different");
656  continue;
657  }
658 
659  if (freq_edit_lower_bound(utf32_term, utf32_word) > edist_best) {
660  LOGLINE(SPELLING, "Rejected by character frequency test");
661  continue;
662  }
663 
664  int edist = edit_distance_unsigned(&utf32_term[0],
665  int(utf32_term.size()),
666  &utf32_word[0],
667  int(utf32_word.size()),
668  edist_best);
669  LOGLINE(SPELLING, "Edit distance " << edist);
670 
671  if (edist <= edist_best) {
672  Xapian::doccount freq = 0;
673  for (size_t j = 0; j < internal.size(); ++j)
674  freq += internal[j]->get_spelling_frequency(term);
675 
676  LOGLINE(SPELLING, "Freq " << freq << " best " << freq_best);
677  // Even if we have an exact match, there may be a much more
678  // frequent potential correction which will still be
679  // interesting.
680  if (edist == 0) {
681  freq_exact = freq;
682  continue;
683  }
684 
685  if (edist < edist_best || freq > freq_best) {
686  LOGLINE(SPELLING, "Best so far: \"" << term <<
687  "\" edist " << edist << " freq " << freq);
688  result = term;
689  edist_best = edist;
690  freq_best = freq;
691  }
692  }
693  }
694  }
695  if (freq_best < freq_exact)
696  RETURN(string());
697  RETURN(result);
698 }
699 
701 Database::spellings_begin() const
702 {
703  LOGCALL(API, TermIterator, "Database::spellings_begin", NO_ARGS);
704  AutoPtr<TermList> merger;
705  for (size_t i = 0; i < internal.size(); ++i) {
706  TermList * tl = internal[i]->open_spelling_wordlist();
707  if (tl) {
708  if (merger.get()) {
709  merger.reset(new FreqAdderOrTermList(merger.release(), tl));
710  } else {
711  merger.reset(tl);
712  }
713  }
714  }
715  RETURN(TermIterator(merger.release()));
716 }
717 
719 Database::synonyms_begin(const std::string &term) const
720 {
721  LOGCALL(API, TermIterator, "Database::synonyms_begin", term);
722  AutoPtr<TermList> merger;
723  for (size_t i = 0; i < internal.size(); ++i) {
724  TermList * tl = internal[i]->open_synonym_termlist(term);
725  if (tl) {
726  if (merger.get()) {
727  merger.reset(new OrTermList(merger.release(), tl));
728  } else {
729  merger.reset(tl);
730  }
731  }
732  }
733  RETURN(TermIterator(merger.release()));
734 }
735 
737 Database::synonym_keys_begin(const std::string &prefix) const
738 {
739  LOGCALL(API, TermIterator, "Database::synonym_keys_begin", prefix);
740  AutoPtr<TermList> merger;
741  for (size_t i = 0; i < internal.size(); ++i) {
742  TermList * tl = internal[i]->open_synonym_keylist(prefix);
743  if (tl) {
744  if (merger.get()) {
745  merger.reset(new OrTermList(merger.release(), tl));
746  } else {
747  merger.reset(tl);
748  }
749  }
750  }
751  RETURN(TermIterator(merger.release()));
752 }
753 
754 string
755 Database::get_metadata(const string & key) const
756 {
757  LOGCALL(API, string, "Database::get_metadata", key);
758  if (rare(key.empty()))
760  if (internal.empty()) RETURN(std::string());
761  RETURN(internal[0]->get_metadata(key));
762 }
763 
765 Database::metadata_keys_begin(const std::string &prefix) const
766 {
767  LOGCALL(API, Xapian::TermIterator, "Database::metadata_keys_begin", NO_ARGS);
768  if (internal.empty()) RETURN(TermIterator());
769  RETURN(TermIterator(internal[0]->open_metadata_keylist(prefix)));
770 }
771 
772 std::string
773 Database::get_uuid() const
774 {
775  LOGCALL(API, std::string, "Database::get_uuid", NO_ARGS);
776  string uuid;
777  for (size_t i = 0; i < internal.size(); ++i) {
778  string sub_uuid = internal[i]->get_uuid();
779  // If any of the sub-databases have no uuid, we can't make a uuid for
780  // the combined database.
781  if (sub_uuid.empty())
782  RETURN(sub_uuid);
783  if (!uuid.empty()) uuid += ':';
784  uuid += sub_uuid;
785  }
786  RETURN(uuid);
787 }
788 
789 bool
790 Database::locked() const
791 {
792  LOGCALL(API, bool, "Database::locked", NO_ARGS);
793  for (const auto & subdb : internal) {
794  // If any of the sub-databases is locked, return true.
795  if (subdb->locked())
796  RETURN(true);
797  }
798  RETURN(false);
799 }
800 
802 Database::get_revision() const
803 {
804  LOGCALL(API, Xapian::rev, "Database::get_revision", NO_ARGS);
805  size_t n_dbs = internal.size();
806  if (rare(n_dbs != 1))
807  throw Xapian::InvalidOperationError("Database::get_revision() requires "
808  "exactly one subdatabase");
809  const string& s = internal[0]->get_revision_info();
810  const char* p = s.data();
811  const char* end = p + s.size();
813  if (!unpack_uint(&p, end, &revision))
814  throw Xapian::UnimplementedError("Database::get_revision() only "
815  "supported for chert and glass");
816  return revision;
817 }
818 
820 
821 WritableDatabase::WritableDatabase() : Database()
822 {
823  LOGCALL_CTOR(API, "WritableDatabase", NO_ARGS);
824 }
825 
827  : Database(internal_)
828 {
829  LOGCALL_CTOR(API, "WritableDatabase", internal_);
830 }
831 
833  : Database(other)
834 {
835  LOGCALL_CTOR(API, "WritableDatabase", other);
836 }
837 
838 void
840 {
841  LOGCALL_VOID(API, "WritableDatabase::operator=", other);
842  Database::operator=(other);
843 }
844 
846 {
847  LOGCALL_DTOR(API, "WritableDatabase");
848 }
849 
850 void
852 {
853  LOGCALL_VOID(API, "WritableDatabase::commit", NO_ARGS);
854  size_t n_dbs = internal.size();
855  if (rare(n_dbs == 0))
856  no_subdatabases();
857  for (size_t i = 0; i != n_dbs; ++i)
858  internal[i]->commit();
859 }
860 
861 void
863 {
864  LOGCALL_VOID(API, "WritableDatabase::begin_transaction", flushed);
865  size_t n_dbs = internal.size();
866  if (rare(n_dbs == 0))
867  no_subdatabases();
868  for (size_t i = 0; i != n_dbs; ++i)
869  internal[i]->begin_transaction(flushed);
870 }
871 
872 void
874 {
875  LOGCALL_VOID(API, "WritableDatabase::commit_transaction", NO_ARGS);
876  size_t n_dbs = internal.size();
877  if (rare(n_dbs == 0))
878  no_subdatabases();
879  for (size_t i = 0; i != n_dbs; ++i)
880  internal[i]->commit_transaction();
881 }
882 
883 void
885 {
886  LOGCALL_VOID(API, "WritableDatabase::cancel_transaction", NO_ARGS);
887  size_t n_dbs = internal.size();
888  if (rare(n_dbs == 0))
889  no_subdatabases();
890  for (size_t i = 0; i != n_dbs; ++i)
891  internal[i]->cancel_transaction();
892 }
893 
894 
897 {
898  LOGCALL(API, Xapian::docid, "WritableDatabase::add_document", document);
899  size_t n_dbs = internal.size();
900  if (rare(n_dbs == 0))
901  no_subdatabases();
902  if (n_dbs == 1)
903  RETURN(internal[0]->add_document(document));
904 
905  // Which database will the next never used docid be in?
906  Xapian::docid did = get_lastdocid() + 1;
907  if (rare(did == 0)) {
908  throw Xapian::DatabaseError("Run out of docids - you'll have to use copydatabase to eliminate any gaps before you can add more documents");
909  }
910  // We want exactly did to be used, not a lower docid if that subdb isn't
911  // using the docid before it, so call replace_document() not
912  // add_document().
913  size_t i = sub_db(did, n_dbs);
914  internal[i]->replace_document(sub_docid(did, n_dbs), document);
915  RETURN(did);
916 }
917 
918 void
920 {
921  LOGCALL_VOID(API, "WritableDatabase::delete_document", did);
922  if (rare(did == 0))
924 
925  size_t n_dbs = internal.size();
926  if (rare(n_dbs == 0))
927  no_subdatabases();
928  size_t i = sub_db(did, n_dbs);
929  internal[i]->delete_document(sub_docid(did, n_dbs));
930 }
931 
932 void
933 WritableDatabase::delete_document(const std::string & unique_term)
934 {
935  LOGCALL_VOID(API, "WritableDatabase::delete_document", unique_term);
936  if (unique_term.empty())
937  throw InvalidArgumentError("Empty termnames are invalid");
938  size_t n_dbs = internal.size();
939  if (rare(n_dbs == 0))
940  no_subdatabases();
941  for (size_t i = 0; i != n_dbs; ++i)
942  internal[i]->delete_document(unique_term);
943 }
944 
945 void
947 {
948  LOGCALL_VOID(API, "WritableDatabase::replace_document", did | document);
949  if (did == 0)
951  size_t n_dbs = internal.size();
952  if (rare(n_dbs == 0))
953  no_subdatabases();
954  size_t i = sub_db(did, n_dbs);
955  internal[i]->replace_document(sub_docid(did, n_dbs), document);
956 }
957 
959 WritableDatabase::replace_document(const std::string & unique_term,
960  const Document & document)
961 {
962  LOGCALL(API, Xapian::docid, "WritableDatabase::replace_document", unique_term | document);
963  if (unique_term.empty())
964  throw InvalidArgumentError("Empty termnames are invalid");
965  size_t n_dbs = internal.size();
966  if (rare(n_dbs == 0))
967  no_subdatabases();
968  if (n_dbs == 1)
969  RETURN(internal[0]->replace_document(unique_term, document));
970 
971  Xapian::PostingIterator postit = postlist_begin(unique_term);
972  // If no unique_term in the database, this is just an add_document().
973  if (postit == postlist_end(unique_term)) {
974  // Which database will the next never used docid be in?
975  Xapian::docid did = get_lastdocid() + 1;
976  if (rare(did == 0)) {
977  throw Xapian::DatabaseError("Run out of docids - you'll have to use copydatabase to eliminate any gaps before you can add more documents");
978  }
979  size_t i = sub_db(did, n_dbs);
980  RETURN(internal[i]->add_document(document));
981  }
982 
983  Xapian::docid retval = *postit;
984  size_t i = sub_db(retval, n_dbs);
985  internal[i]->replace_document(sub_docid(retval, n_dbs), document);
986 
987  // Delete any other occurrences of unique_term.
988  while (++postit != postlist_end(unique_term)) {
989  Xapian::docid did = *postit;
990  i = sub_db(did, n_dbs);
991  internal[i]->delete_document(sub_docid(did, n_dbs));
992  }
993 
994  return retval;
995 }
996 
997 void
998 WritableDatabase::add_spelling(const std::string & word,
999  Xapian::termcount freqinc) const
1000 {
1001  LOGCALL_VOID(API, "WritableDatabase::add_spelling", word | freqinc);
1002  if (rare(internal.empty()))
1003  no_subdatabases();
1004  // FIXME: Is adding to the first subdatabase sensible?
1005  internal[0]->add_spelling(word, freqinc);
1006 }
1007 
1008 void
1009 WritableDatabase::remove_spelling(const std::string & word,
1010  Xapian::termcount freqdec) const
1011 {
1012  LOGCALL_VOID(API, "WritableDatabase::remove_spelling", word | freqdec);
1013  size_t n_dbs = internal.size();
1014  if (rare(n_dbs == 0))
1015  no_subdatabases();
1016  for (size_t i = 0; i < n_dbs; ++i) {
1017  internal[i]->remove_spelling(word, freqdec);
1018  }
1019 }
1020 
1021 void
1022 WritableDatabase::add_synonym(const std::string & term,
1023  const std::string & synonym) const
1024 {
1025  LOGCALL_VOID(API, "WritableDatabase::add_synonym", term | synonym);
1026  if (rare(internal.empty()))
1027  no_subdatabases();
1028  // FIXME: Is adding to the first subdatabase sensible?
1029  internal[0]->add_synonym(term, synonym);
1030 }
1031 
1032 void
1033 WritableDatabase::remove_synonym(const std::string & term,
1034  const std::string & synonym) const
1035 {
1036  LOGCALL_VOID(API, "WritableDatabase::remove_synonym", term | synonym);
1037  size_t n_dbs = internal.size();
1038  if (rare(n_dbs == 0))
1039  no_subdatabases();
1040  for (size_t i = 0; i < n_dbs; ++i) {
1041  internal[i]->remove_synonym(term, synonym);
1042  }
1043 }
1044 
1045 void
1046 WritableDatabase::clear_synonyms(const std::string & term) const
1047 {
1048  LOGCALL_VOID(API, "WritableDatabase::clear_synonyms", term);
1049  size_t n_dbs = internal.size();
1050  if (rare(n_dbs == 0))
1051  no_subdatabases();
1052  for (size_t i = 0; i < n_dbs; ++i) {
1053  internal[i]->clear_synonyms(term);
1054  }
1055 }
1056 
1057 void
1058 WritableDatabase::set_metadata(const string & key, const string & value)
1059 {
1060  LOGCALL_VOID(API, "WritableDatabase::set_metadata", key | value);
1061  if (rare(key.empty()))
1063  if (rare(internal.empty()))
1064  no_subdatabases();
1065  internal[0]->set_metadata(key, value);
1066 }
1067 
1068 string
1070 {
1072  return "WritableDatabase()";
1073 }
1074 
1075 }
static void no_subdatabases()
Definition: omdatabase.cc:64
Unicode and UTF-8 related classes and functions.
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:80
int close(FD &fd)
Definition: fd.h:63
#define RETURN(A)
Definition: debuglog.h:459
Xapian::docid add_document(const Xapian::Document &document)
Add a new document to the database.
Definition: omdatabase.cc:896
#define Assert(COND)
Definition: omassert.h:122
#define VEC_SIZE
Definition: omdatabase.cc:564
Define the XAPIAN_NORETURN macro.
size_t sub_db(Xapian::docid did, size_t n_dbs)
Definition: omdatabase.cc:76
void cancel_transaction()
Abort the transaction currently in progress, discarding the pending modifications made to the databas...
Definition: omdatabase.cc:884
virtual Internal * next()=0
Advance the current position to the next term in the termlist.
XAPIAN_REVISION_TYPE rev
Revision number of a database.
Definition: types.h:133
This class is used to access a database, or a group of databases.
Definition: database.h:68
void remove_spelling(const std::string &word, Xapian::termcount freqdec=1) const
Remove a word from the spelling dictionary.
Definition: omdatabase.cc:1009
InvalidOperationError indicates the API was used in an invalid way.
Definition: error.h:274
Merge two TermList objects using an OR operation.
Base class for databases.
Definition: database.h:56
XAPIAN_TOTALLENGTH_TYPE totallength
The total length of all documents in a database.
Definition: types.h:139
#define TRIGRAM_SCORE_THRESHOLD
Definition: omdatabase.cc:591
Constants in the Xapian namespace.
Xapian::docid get_lastdocid() const
Get the highest document id which has been used in the database.
Definition: omdatabase.cc:279
size_t sub_docid(Xapian::docid did, size_t n_dbs)
Definition: omdatabase.cc:82
#define LOGCALL_DTOR(CATEGORY, CLASS)
Definition: debuglog.h:456
void begin_transaction(bool flushed=true)
Begin a transaction.
Definition: omdatabase.cc:862
Class for iterating over document values.
Definition: valueiterator.h:40
#define LOGCALL_VOID(CATEGORY, FUNC, PARAMS)
Definition: debuglog.h:454
Abstract base class for termlists.
Definition: termlist.h:39
STL namespace.
WritableDatabase()
Create a WritableDatabase with no subdatabases.
Definition: omdatabase.cc:821
int revision()
Report the revision of the library which the program is linked with.
Definition: xapian.h:142
std::string get_description() const
Return a string describing this object.
Definition: omdatabase.cc:1069
void replace_document(Xapian::docid did, const Xapian::Document &document)
Replace a given document in the database.
Definition: omdatabase.cc:946
void set_metadata(const std::string &key, const std::string &metadata)
Set the user-specified metadata associated with a given key.
Definition: omdatabase.cc:1058
std::vector< Xapian::Internal::intrusive_ptr< Internal > > internal
Definition: database.h:81
const int DOC_ASSUME_VALID
Assume document id is valid.
Definition: constants.h:280
Class for iterating over term positions.
Class for merging AllTermsList objects from subdatabases.
#define rare(COND)
Definition: config.h:518
Hierarchy of classes which Xapian can throw as exceptions.
Class for iterating over a list of terms.
Definition: termiterator.h:41
Class for merging AllTermsList objects from subdatabases.
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:72
Class for iterating over a list of terms.
void operator=(const WritableDatabase &other)
Assignment is allowed.
Definition: omdatabase.cc:839
InvalidArgumentError indicates an invalid parameter value was passed to the API.
Definition: error.h:232
void remove_synonym(const std::string &term, const std::string &synonym) const
Remove a synonym for a term.
Definition: omdatabase.cc:1033
virtual ~WritableDatabase()
Destroy this handle on the database.
Definition: omdatabase.cc:845
Class for merging ValueList objects from subdatabases.
This class provides read/write access to a database.
Definition: database.h:772
Class for merging ValueList objects from subdatabases.
static int freq_edit_lower_bound(const vector< unsigned > &a, const vector< unsigned > &b)
Definition: omdatabase.cc:567
double doclength
A normalised document length.
Definition: types.h:59
Edit distance calculation algorithm.
void delete_document(Xapian::docid did)
Delete a document from the database.
Definition: omdatabase.cc:919
void commit()
Commit any pending modifications made to the database.
Definition: omdatabase.cc:851
Abstract base class for iterating all terms in a database.
Class for iterating over term positions.
virtual std::string get_termname() const =0
Return the termname at the current position.
#define LOGCALL_CTOR(CATEGORY, CLASS, PARAMS)
Definition: debuglog.h:455
A termlist which ORs two termlists together, adding term frequencies.
Definition: ortermlist.h:81
void clear_synonyms(const std::string &term) const
Remove all synonyms for a term.
Definition: omdatabase.cc:1046
An iterator which returns Unicode character values from a UTF-8 encoded string.
Definition: unicode.h:38
C++ class declaration for multiple database access.
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:38
static void empty_metadata_key()
Definition: omdatabase.cc:70
C++ class definition for multiple database access.
Pack types into strings and unpack them again.
unsigned valueno
The number for a value slot in a document.
Definition: types.h:108
bool unpack_uint(const char **p, const char *end, U *result)
Decode an unsigned integer from a string.
Definition: pack.h:413
void commit_transaction()
Complete the transaction currently in progress.
Definition: omdatabase.cc:873
void operator=(const Database &other)
Assignment is allowed.
Definition: omdatabase.cc:113
Class for iterating over a list of document ids.
Various assertion macros.
#define LOGLINE(a, b)
Definition: debuglog.h:460
void add_synonym(const std::string &term, const std::string &synonym) const
Add a synonym for a term.
Definition: omdatabase.cc:1022
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:52
DatabaseError indicates some sort of database related error.
Definition: error.h:358
PostingIterator postlist_end(const std::string &) const
Corresponding end iterator to postlist_begin().
Definition: database.h:220
int edit_distance_unsigned(const unsigned *ptr1, int len1, const unsigned *ptr2, int len2, int max_distance)
Calculate the edit distance between two sequences.
A smart pointer that uses intrusive reference counting.
Definition: intrusive_ptr.h:81
Class for iterating over a list of terms.
static void docid_zero_invalid()
Definition: omdatabase.cc:58
A handle representing a document in a Xapian database.
Definition: document.h:61
Types used internally.
Wrapper around standard unique_ptr template.
Debug logging macros.
#define LOGCALL(CATEGORY, TYPE, FUNC, PARAMS)
Definition: debuglog.h:453
UnimplementedError indicates an attempt to use an unimplemented feature.
Definition: error.h:316
void add_spelling(const std::string &word, Xapian::termcount freqinc=1) const
Add a word to the spelling dictionary.
Definition: omdatabase.cc:998
PostingIterator postlist_begin(const std::string &tname) const
An iterator pointing to the start of the postlist for a given term.
Definition: omdatabase.cc:162

Documentation for Xapian (version 1.4.11).
Generated on Wed Mar 6 2019 by Doxygen 1.8.13.