xapian-core  2.0.0
databaseinternal.cc
Go to the documentation of this file.
1 
4 /* Copyright 2003-2024 Olly Betts
5  * Copyright 2008 Lemur Consulting Ltd
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License as
9  * published by the Free Software Foundation; either version 2 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, see
19  * <https://www.gnu.org/licenses/>.
20  */
21 
22 #include <config.h>
23 
24 #include "databaseinternal.h"
25 
26 #include "api/termlist.h"
27 #include "heap.h"
28 #include "omassert.h"
29 #include "postlist.h"
30 #include "slowvaluelist.h"
31 #include "stringutils.h"
32 #include "xapian/error.h"
33 
34 #include <algorithm>
35 #include <limits>
36 #include <memory>
37 #include <string>
38 #include <string_view>
39 
40 using namespace std;
42 
43 namespace Xapian {
44 
45 [[noreturn]]
46 static void invalid_operation(const char* msg)
47 {
48  throw InvalidOperationError(msg);
49 }
50 
51 Database::Internal::size_type
52 Database::Internal::size() const
53 {
54  return 1;
55 }
56 
57 void
58 Database::Internal::keep_alive()
59 {
60  // No-op except for remote databases.
61 }
62 
63 void
64 Database::Internal::readahead_for_query(const Xapian::Query &) const
65 {
66 }
67 
69 Database::Internal::get_unique_terms_lower_bound() const
70 {
71  return get_doclength_upper_bound() ? 1 : 0;
72 }
73 
75 Database::Internal::get_unique_terms_upper_bound() const
76 {
77  return get_doclength_upper_bound();
78 }
79 
80 // Discard any exceptions - we're called from the destructors of derived
81 // classes so we can't safely throw.
82 void
83 Database::Internal::dtor_called_()
84 {
85  try {
86  if (transaction_active()) {
87  end_transaction(false);
88  } else {
89  // TRANSACTION_READONLY and TRANSACTION_UNIMPLEMENTED should be
90  // handled by the inlined dtor_called() wrapper.
91  AssertEq(state, TRANSACTION_NONE);
92  commit();
93  }
94  } catch (...) {
95  // We can't safely throw exceptions from a destructor in case an
96  // exception is already active and causing us to be destroyed.
97  }
98 }
99 
100 void
101 Database::Internal::commit()
102 {
103  // Writable databases should override this method, but this can get called
104  // if a read-only shard gets added to a WritableDatabase.
105  invalid_operation("WritableDatabase::commit() called with a read-only shard");
106 }
107 
108 void
109 Database::Internal::cancel()
110 {
111  // Writable databases should override this method, but this can get called
112  // if a read-only shard gets added to a WritableDatabase.
113  invalid_operation("WritableDatabase::cancel() called with a read-only shard");
114 }
115 
116 void
117 Database::Internal::begin_transaction(bool flushed)
118 {
119  if (state != TRANSACTION_NONE) {
120  if (transaction_active()) {
121  invalid_operation("WritableDatabase::begin_transaction(): already "
122  "in a transaction");
123  }
124  if (is_read_only()) {
125  invalid_operation("WritableDatabase::begin_transaction(): called "
126  "with a read-only shard");
127  }
128  throw UnimplementedError("This backend doesn't implement transactions");
129  }
130  if (flushed) {
131  // N.B. Call commit() before we set state since commit() isn't allowed
132  // during a transaction.
133  commit();
134  state = TRANSACTION_FLUSHED;
135  } else {
136  state = TRANSACTION_UNFLUSHED;
137  }
138 }
139 
140 void
141 Database::Internal::end_transaction(bool do_commit)
142 {
143  if (!transaction_active()) {
144  if (state != TRANSACTION_NONE) {
145  if (is_read_only()) {
146  invalid_operation(do_commit ?
147  "WritableDatabase::commit_transaction(): "
148  "called with a read-only shard" :
149  "WritableDatabase::cancel_transaction(): "
150  "called with a read-only shard");
151  }
152  throw UnimplementedError("This backend doesn't implement transactions");
153  }
154  invalid_operation(do_commit ?
155  "WritableDatabase::commit_transaction(): not in a "
156  "transaction" :
157  "WritableDatabase::cancel_transaction(): not in a "
158  "transaction");
159  }
160 
161  auto old_state = state;
162  state = TRANSACTION_NONE;
163  if (!do_commit) {
164  cancel();
165  } else if (old_state == TRANSACTION_FLUSHED) {
166  // N.B. Call commit() after we clear state since commit() isn't
167  // allowed during a transaction.
168  commit();
169  }
170 }
171 
173 Database::Internal::add_document(const Xapian::Document &)
174 {
175  // Writable databases should override this method, but this can get called
176  // if a read-only shard gets added to a WritableDatabase.
177  invalid_operation("WritableDatabase::add_document() called with a "
178  "read-only shard");
179 }
180 
181 void
182 Database::Internal::delete_document(Xapian::docid)
183 {
184  // Writable databases should override this method, but this can get called
185  // if a read-only shard gets added to a WritableDatabase.
186  invalid_operation("WritableDatabase::delete_document() called with a "
187  "read-only shard");
188 }
189 
190 void
191 Database::Internal::delete_document(string_view unique_term)
192 {
193  // Default implementation - overridden for remote and sharded databases.
194 
195  if (is_read_only()) {
196  // This can happen if a read-only shard gets added to a
197  // WritableDatabase.
198  invalid_operation("WritableDatabase::delete_document() called with a "
199  "read-only shard");
200  }
201 
202  unique_ptr<PostList> pl(open_post_list(unique_term));
203  if (!pl) {
204  // unique_term doesn't index any documents.
205  return;
206  }
207 
208  // We want this operation to be atomic if possible, so if we aren't in a
209  // transaction and the backend supports transactions, temporarily enter an
210  // unflushed transaction.
211  auto old_state = state;
212  if (state != TRANSACTION_UNIMPLEMENTED)
213  state = TRANSACTION_UNFLUSHED;
214  try {
215  while (pl->next(), !pl->at_end()) {
216  delete_document(pl->get_docid());
217  }
218  } catch (...) {
219  state = old_state;
220  throw;
221  }
222  state = old_state;
223 }
224 
225 void
226 Database::Internal::replace_document(Xapian::docid, const Xapian::Document &)
227 {
228  // Writable databases should override this method, but this can get called
229  // if a read-only shard gets added to a WritableDatabase.
230  invalid_operation("WritableDatabase::replace_document() called with a "
231  "read-only shard");
232 }
233 
235 Database::Internal::replace_document(string_view unique_term,
236  const Xapian::Document& document)
237 {
238  // Default implementation - overridden for remote and sharded databases.
239 
240  if (is_read_only()) {
241  // This can happen if a read-only shard gets added to a
242  // WritableDatabase.
243  invalid_operation("WritableDatabase::replace_document() called with a "
244  "read-only shard");
245  }
246 
247  unique_ptr<PostList> pl(open_post_list(unique_term));
248  if (!pl || (pl->next(), pl->at_end())) {
249  // unique_term doesn't index any documents.
250  return add_document(document);
251  }
252  Xapian::docid did = pl->get_docid();
253 
254  // We want this operation to be atomic if possible, so if we aren't in a
255  // transaction and the backend supports transactions, temporarily enter an
256  // unflushed transaction.
257  auto old_state = state;
258  if (state != TRANSACTION_UNIMPLEMENTED)
259  state = TRANSACTION_UNFLUSHED;
260  try {
261  replace_document(did, document);
262  while (pl->next(), !pl->at_end()) {
263  delete_document(pl->get_docid());
264  }
265  } catch (...) {
266  state = old_state;
267  throw;
268  }
269  state = old_state;
270  return did;
271 }
272 
273 ValueList *
274 Database::Internal::open_value_list(Xapian::valueno slot) const
275 {
276  return new SlowValueList(this, slot);
277 }
278 
279 TermList *
280 Database::Internal::open_spelling_termlist(string_view) const
281 {
282  // Only implemented for some database backends - others will just not
283  // suggest spelling corrections (or not contribute to them in a multiple
284  // database situation).
285  return NULL;
286 }
287 
288 TermList *
289 Database::Internal::open_spelling_wordlist() const
290 {
291  // Only implemented for some database backends - others will just not
292  // suggest spelling corrections (or not contribute to them in a multiple
293  // database situation).
294  return NULL;
295 }
296 
298 Database::Internal::get_spelling_frequency(string_view) const
299 {
300  // Only implemented for some database backends - others will just not
301  // suggest spelling corrections (or not contribute to them in a multiple
302  // database situation).
303  return 0;
304 }
305 
306 void
307 Database::Internal::add_spelling(string_view, Xapian::termcount) const
308 {
309  throw Xapian::UnimplementedError("This backend doesn't implement spelling correction");
310 }
311 
313 Database::Internal::remove_spelling(string_view, Xapian::termcount) const
314 {
315  throw Xapian::UnimplementedError("This backend doesn't implement spelling correction");
316 }
317 
318 TermList *
319 Database::Internal::open_synonym_termlist(string_view) const
320 {
321  // Only implemented for some database backends - others will just not
322  // expand synonyms (or not contribute to them in a multiple database
323  // situation).
324  return NULL;
325 }
326 
327 TermList *
328 Database::Internal::open_synonym_keylist(string_view) const
329 {
330  // Only implemented for some database backends - others will just not
331  // expand synonyms (or not contribute to them in a multiple database
332  // situation).
333  return NULL;
334 }
335 
336 void
337 Database::Internal::add_synonym(string_view, string_view) const
338 {
339  throw Xapian::UnimplementedError("This backend doesn't implement synonyms");
340 }
341 
342 void
343 Database::Internal::remove_synonym(string_view, string_view) const
344 {
345  throw Xapian::UnimplementedError("This backend doesn't implement synonyms");
346 }
347 
348 void
349 Database::Internal::clear_synonyms(string_view) const
350 {
351  throw Xapian::UnimplementedError("This backend doesn't implement synonyms");
352 }
353 
354 string
355 Database::Internal::get_metadata(string_view) const
356 {
357  return string();
358 }
359 
360 TermList*
361 Database::Internal::open_metadata_keylist(string_view) const
362 {
363  // Only implemented for some database backends - others will simply report
364  // there being no metadata keys.
365  return NULL;
366 }
367 
368 void
369 Database::Internal::set_metadata(string_view, string_view)
370 {
371  throw Xapian::UnimplementedError("This backend doesn't implement metadata");
372 }
373 
374 bool
375 Database::Internal::reopen()
376 {
377  // Database backends which don't support simultaneous update and reading
378  // probably don't need to do anything here. And since we didn't do
379  // anything we should return false to indicate that nothing has changed.
380  return false;
381 }
382 
383 void
384 Database::Internal::request_document(Xapian::docid) const
385 {
386 }
387 
388 void
389 Database::Internal::write_changesets_to_fd(int, string_view, bool,
391 {
392  throw Xapian::UnimplementedError("This backend doesn't provide changesets");
393 }
394 
396 Database::Internal::get_revision() const
397 {
398  throw Xapian::UnimplementedError("This backend doesn't provide access to revision information");
399 }
400 
401 string
402 Database::Internal::get_uuid() const
403 {
404  return string();
405 }
406 
407 void
408 Database::Internal::invalidate_doc_object(Xapian::Document::Internal*) const
409 {
410  // Do nothing, by default.
411 }
412 
413 void
414 Database::Internal::get_used_docid_range(Xapian::docid &,
415  Xapian::docid &) const
416 {
417  throw Xapian::UnimplementedError("This backend doesn't implement get_used_docid_range()");
418 }
419 
420 bool
421 Database::Internal::locked() const
422 {
423  return false;
424 }
425 
427 Database::Internal::update_lock(int flags)
428 {
429  if (flags == Xapian::DB_READONLY_) return this;
430  throw Xapian::DatabaseLockError("Not possible to lock for writing");
431 }
432 
433 namespace {
434  class Pos {
436 
438 
439  string term;
440 
441  public:
442  Pos(string&& term_, PositionList* p_)
443  : p(p_), term(term_) {
444  pos = p->get_position();
445  }
446 
447  ~Pos() { delete p; }
448 
449  Xapian::termpos get_pos() const { return pos; }
450 
451  const string& get_term() const { return term; }
452 
453  bool next() {
454  if (!p->next()) {
455  return false;
456  }
457  pos = p->get_position();
458  return true;
459  }
460  };
461 }
462 
463 static void
465  Xapian::termpos start_pos,
466  Xapian::termpos end_pos,
467  string_view end,
468  vector<unique_ptr<Pos>>& heap,
469  size_t prefix_size = 0)
470 {
471  constexpr Xapian::termpos LAST_POS = Xapian::termpos(-1);
472  do {
473  const string& term = termlist->get_termname();
474  if (!end.empty() && term >= end) {
475  break;
476  }
477  PositionList* poslist = termlist->positionlist_begin();
478  if (poslist &&
479  (start_pos ? poslist->skip_to(start_pos) : poslist->next()) &&
480  (end_pos == LAST_POS || poslist->get_position() <= end_pos)) {
481  heap.emplace_back(new Pos(term.substr(prefix_size), poslist));
482  } else {
483  delete poslist;
484  }
485  } while (termlist->next() == NULL);
486 }
487 
488 string
489 Database::Internal::reconstruct_text(Xapian::docid did,
490  size_t length,
491  std::string_view prefix,
492  Xapian::termpos start_pos,
493  Xapian::termpos end_pos) const
494 {
495  if (end_pos == 0) {
496  // Set to largest possible value.
497  end_pos = numeric_limits<decltype(end_pos)>::max();
498  }
499 
500  if (length == 0) {
501  // Set to largest possible value.
502  length = numeric_limits<decltype(length)>::max();
503  }
504 
505  struct PosCmp {
506  bool operator()(const unique_ptr<Pos>& a, const unique_ptr<Pos>& b) {
507  if (a->get_pos() != b->get_pos()) {
508  return a->get_pos() > b->get_pos();
509  }
510  return a->get_term() > b->get_term();
511  }
512  };
513 
514  vector<unique_ptr<Pos>> heap;
515 
516  unique_ptr<TermList> termlist(open_term_list_direct(did));
517  if (usual(termlist)) {
518  if (prefix.empty()) {
519  if (termlist->next() == NULL) {
520  reconstruct_open_poslists(termlist.get(), start_pos, end_pos,
521  "A", heap);
522  if (termlist->skip_to("[") == NULL) {
523  reconstruct_open_poslists(termlist.get(),
524  start_pos, end_pos,
525  prefix, heap);
526  }
527  }
528  } else {
529  if (termlist->skip_to(prefix) == NULL) {
530  // Calculate the first possible term without the specified
531  // prefix.
532  string term_ub{prefix};
533  size_t i = term_ub.find_last_not_of('\xff');
534  term_ub.resize(i + 1);
535  if (i != string::npos) {
536  term_ub[i] = (unsigned char)term_ub[i] + 1;
537  }
538  reconstruct_open_poslists(termlist.get(), start_pos, end_pos,
539  term_ub, heap, prefix.size());
540  }
541  }
542 
543  Heap::make(heap.begin(), heap.end(), PosCmp());
544  }
545 
546  string result;
547 
548  Xapian::termpos old_pos = UNSIGNED_OVERFLOW_OK(start_pos - 1);
549  while (!heap.empty()) {
550  Pos* tip = heap.front().get();
551  Xapian::termpos pos = tip->get_pos();
552  if (pos > end_pos) break;
553 
554  Xapian::termpos delta = UNSIGNED_OVERFLOW_OK(pos - old_pos);
555  // Ignore additional terms at the same position.
556  if (delta) {
557  if (usual(!result.empty())) {
558  // Insert newline for gap in used positions.
559  result += (delta == 1 ? ' ' : '\n');
560  }
561  result += tip->get_term();
562  }
563 
564  if (result.size() >= length) break;
565 
566  old_pos = pos;
567 
568  if (tip->next()) {
569  Heap::replace(heap.begin(), heap.end(), PosCmp());
570  } else {
571  Heap::pop(heap.begin(), heap.end(), PosCmp());
572  heap.resize(heap.size() - 1);
573  }
574  }
575 
576  return result;
577 }
578 
579 }
static void invalid_operation(const char *msg)
Pos(const string &term_, const Xapian::PositionIterator &p_)
Definition: xapian-pos.cc:66
Slow implementation for backends which don't streamed values.
Definition: slowvaluelist.h:32
DatabaseLockError indicates failure to lock a database.
Definition: error.h:481
Virtual base class for Database internals.
Abstract base class for a document.
Class representing a document.
Definition: document.h:64
A smart pointer that uses intrusive reference counting.
Definition: intrusive_ptr.h:83
InvalidOperationError indicates the API was used in an invalid way.
Definition: error.h:271
Abstract base class for iterating term positions in a document.
Definition: positionlist.h:32
virtual bool next()=0
Advance to the next entry in the positionlist.
virtual bool skip_to(Xapian::termpos termpos)=0
Skip forward to the specified position.
virtual Xapian::termpos get_position() const =0
Return the current position.
Class representing a query.
Definition: query.h:45
Abstract base class for termlists.
Definition: termlist.h:42
virtual PositionList * positionlist_begin() const =0
Return PositionList for the current position.
virtual Internal * next()=0
Advance the current position to the next term in the termlist.
const std::string & get_termname() const
Return the termname at the current position.
Definition: termlist.h:69
UnimplementedError indicates an attempt to use an unimplemented feature.
Definition: error.h:313
Abstract base class for value streams.
Definition: valuelist.h:31
#define UNSIGNED_OVERFLOW_OK(X)
Definition: config.h:626
#define usual(COND)
Definition: config.h:608
string term
PositionList * p
Xapian::termpos pos
Virtual base class for Database internals.
Hierarchy of classes which Xapian can throw as exceptions.
C++ STL heap implementation with extensions.
void pop(_RandomAccessIterator first, _RandomAccessIterator last, _Compare comp)
Definition: heap.h:213
void replace(_RandomAccessIterator first, _RandomAccessIterator last, _Compare comp)
Definition: heap.h:230
void make(_RandomAccessIterator first, _RandomAccessIterator last, _Compare comp)
Definition: heap.h:259
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:82
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:64
XAPIAN_REVISION_TYPE rev
Revision number of a database.
Definition: types.h:108
unsigned valueno
The number for a value slot in a document.
Definition: types.h:90
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:37
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:51
unsigned XAPIAN_TERMPOS_BASE_TYPE termpos
A term position within a document or query.
Definition: types.h:75
static void reconstruct_open_poslists(TermList *termlist, Xapian::termpos start_pos, Xapian::termpos end_pos, string_view end, vector< unique_ptr< Pos >> &heap, size_t prefix_size=0)
Various assertion macros.
#define AssertEq(A, B)
Definition: omassert.h:124
Abstract base class for postlists.
Slow implementation for backends which don't streamed values.
Various handy string-related helpers.
Information about the steps involved in performing a replication.
Definition: replication.h:32
Abstract base class for termlists.