xapian-core  2.0.0
documentinternal.h
Go to the documentation of this file.
1 
4 /* Copyright 2017,2018,2019,2023,2024 Olly Betts
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License as
8  * published by the Free Software Foundation; either version 2 of the
9  * License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, see
18  * <https://www.gnu.org/licenses/>.
19  */
20 
21 #ifndef XAPIAN_INCLUDED_DOCUMENTINTERNAL_H
22 #define XAPIAN_INCLUDED_DOCUMENTINTERNAL_H
23 
24 #include <xapian/document.h>
25 #include <xapian/intrusive_ptr.h>
26 #include <xapian/types.h>
27 
28 #include "api/terminfo.h"
29 #include "api/termlist.h"
31 #include "overflow.h"
32 
33 #include <functional>
34 #include <limits>
35 #include <map>
36 #include <memory>
37 #include <string>
38 #include <string_view>
39 
40 class DocumentTermList;
41 class DocumentValueList;
42 class GlassValueManager;
43 class HoneyValueManager;
45 
46 namespace Xapian {
47 
50  friend class ::DocumentTermList;
51  friend class ::DocumentValueList;
52  // For ensure_values_fetched():
53  friend class ::GlassValueManager;
54  friend class ::HoneyValueManager;
55  friend class ::ValueStreamDocument;
56 
58  void operator=(const Internal &) = delete;
59 
61  Internal(const Internal &) = delete;
62 
67  std::unique_ptr<std::string> data;
68 
77  mutable
78  std::unique_ptr<std::map<std::string, TermInfo, std::less<>>> terms;
79 
87 
104 
115  mutable bool positions_modified_ : 1;
116 
122  void ensure_terms_fetched() const;
123 
129  void ensure_values_fetched() const;
130 
131  protected:
140  mutable std::unique_ptr<std::map<Xapian::valueno, std::string>> values;
141 
147 
156 
159  Xapian::docid did_)
160  : index(), positions_modified_(false), database(database_), did(did_) {}
161 
164  Xapian::docid did_,
165  std::string&& data_,
166  std::map<Xapian::valueno, std::string>&& values_)
167  : data(new std::string(std::move(data_))),
169  values(new std::map<Xapian::valueno, std::string>(std::move(values_))),
170  database(database_),
171  did(did_) {}
172 
178  virtual std::string fetch_data() const;
179 
185  virtual void fetch_all_values(std::map<Xapian::valueno,
186  std::string>& values_) const;
187 
193  virtual std::string fetch_value(Xapian::valueno slot) const;
194 
195  public:
198 
202  virtual ~Internal();
203 
210  bool data_modified() const { return data != NULL; }
211 
218  bool terms_modified() const { return terms != NULL; }
219 
226  bool values_modified() const { return values != NULL; }
227 
234  bool modified() const {
235  return data_modified() || terms_modified() || values_modified();
236  }
237 
244  bool positions_modified() const { return positions_modified_; }
245 
253  Xapian::docid get_docid() const { return did; }
254 
256  Xapian::doccount get_index() const { return index; }
257 
259  void set_index(Xapian::doccount new_index) { index = new_index; }
260 
262  std::string get_data() const {
263  if (data)
264  return *data;
265  return fetch_data();
266  }
267 
269  void set_data(std::string_view data_) {
270  data.reset(new std::string(data_));
271  }
272 
274  void add_term(std::string_view term, Xapian::termcount wdf_inc) {
276 
277  auto i = terms->find(term);
278  if (i == terms->end()) {
279  ++termlist_size;
280  terms->emplace(term, TermInfo(wdf_inc));
281  } else {
282  if (i->second.increase_wdf(wdf_inc))
283  ++termlist_size;
284  }
285  }
286 
288  bool remove_term(std::string_view term) {
290 
291  auto i = terms->find(term);
292  if (i == terms->end()) {
293  return false;
294  }
295  if (i->second.has_positions()) {
296  positions_modified_ = true;
297  }
298  if (!i->second.remove()) {
299  return false;
300  }
301  --termlist_size;
302  return true;
303  }
304 
306  void add_posting(std::string_view term,
307  Xapian::termpos term_pos,
308  Xapian::termcount wdf_inc) {
310  positions_modified_ = true;
311 
312  auto i = terms->find(term);
313  if (i == terms->end()) {
314  ++termlist_size;
315  terms->emplace(term, TermInfo(wdf_inc, term_pos));
316  return;
317  }
318  if (i->second.add_position(wdf_inc, term_pos))
319  ++termlist_size;
320  }
321 
323 
326  remove_posting(std::string_view term,
327  Xapian::termpos term_pos,
328  Xapian::termcount wdf_dec) {
330 
331  auto i = terms->find(term);
332  if (i == terms->end() || i->second.is_deleted()) {
333  return remove_posting_result::NO_TERM;
334  }
335  if (!i->second.remove_position(term_pos)) {
336  return remove_posting_result::NO_POS;
337  }
338  if (i->second.decrease_wdf(wdf_dec))
339  --termlist_size;
340  positions_modified_ = true;
341  return remove_posting_result::OK;
342  }
343 
349  remove_postings(std::string_view term,
350  Xapian::termpos term_pos_first,
351  Xapian::termpos term_pos_last,
352  Xapian::termcount wdf_dec,
353  Xapian::termpos& n_removed) {
355 
356  auto i = terms->find(term);
357  if (i == terms->end() || i->second.is_deleted()) {
358  return remove_posting_result::NO_TERM;
359  }
360  n_removed = i->second.remove_positions(term_pos_first,
361  term_pos_last);
362  if (n_removed) {
363  positions_modified_ = true;
364  Xapian::termcount wdf_delta;
365  if (mul_overflows(n_removed, wdf_dec, wdf_delta)) {
366  // Decreasing by the maximum value will zero the wdf.
367  wdf_delta = std::numeric_limits<Xapian::termcount>::max();
368  }
369  if (i->second.decrease_wdf(wdf_delta))
370  --termlist_size;
371  }
372  return remove_posting_result::OK;
373  }
374 
376  void clear_terms() {
377  if (!terms) {
378  if (!database) {
379  // We didn't come from a database, so there are no unfetched
380  // terms to clear.
381  return;
382  }
383  terms.reset(new std::map<std::string, TermInfo, std::less<>>());
384  } else {
385  terms->clear();
386  }
387  termlist_size = 0;
388  // Assume there was positional data if there's any in the database.
390  }
391 
394  if (terms)
395  return termlist_size;
396 
397  if (!database)
398  return 0;
399 
400  std::unique_ptr<TermList> tl(database->open_term_list(did));
401  // get_approx_size() is exact for TermList from a database.
402  return tl->get_approx_size();
403  }
404 
410  TermList* open_term_list() const;
411 
416  std::string get_value(Xapian::valueno slot) const {
417  if (values) {
418  auto i = values->find(slot);
419  if (i != values->end())
420  return i->second;
421  return std::string();
422  }
423 
424  return fetch_value(slot);
425  }
426 
428  void add_value(Xapian::valueno slot, std::string_view value) {
430 
431  if (!value.empty()) {
432  (*values)[slot] = value;
433  } else {
434  // Empty values aren't stored, but replace any existing value by
435  // removing it.
436  values->erase(slot);
437  }
438  }
439 
441  void clear_values() {
442  if (!values) {
443  if (database) {
444  values.reset(new std::map<Xapian::valueno, std::string>());
445  } else {
446  // We didn't come from a database, so there are no unfetched
447  // values to clear.
448  }
449  } else {
450  values->clear();
451  }
452  }
453 
457  return values->size();
458  }
459 
461 
463  std::string get_description() const;
464 };
465 
466 }
467 
468 #endif // XAPIAN_INCLUDED_DOCUMENTINTERNAL_H
Iteration over terms in a document.
Iteration over values in a document.
Metadata for a term in a document.
Definition: terminfo.h:28
A document which gets its values from a ValueStreamManager.
Virtual base class for Database internals.
virtual TermList * open_term_list(docid did) const =0
virtual bool has_positions() const =0
Check whether this database contains any positional information.
Abstract base class for a document.
void set_index(Xapian::doccount new_index)
Internal method used by MSet::diversify().
Xapian::docid did
The document ID this document came from in database.
std::unique_ptr< std::map< std::string, TermInfo, std::less<> > > terms
Terms in the document and their associated metadata.
void add_value(Xapian::valueno slot, std::string_view value)
Add a value to a slot in this document.
Xapian::termcount termlist_count() const
Return the number of distinct terms in this document.
virtual void fetch_all_values(std::map< Xapian::valueno, std::string > &values_) const
Fetch all set values from the database.
Internal(Xapian::Internal::intrusive_ptr< const Xapian::Database::Internal > database_, Xapian::docid did_)
Constructor used by subclasses.
void ensure_values_fetched() const
Ensure values have been fetched from database.
std::unique_ptr< std::map< Xapian::valueno, std::string > > values
Document value slots and their contents.
std::unique_ptr< std::string > data
The document data.
void ensure_terms_fetched() const
Ensure terms have been fetched from database.
void add_term(std::string_view term, Xapian::termcount wdf_inc)
Add a term to this document.
Xapian::docid get_docid() const
Get the document ID this document came from.
bool modified() const
Return true if the document might have been modified in any way.
bool data_modified() const
Return true if the document data might have been modified.
bool positions_modified_
Are there any changes to term positions in terms?
bool remove_term(std::string_view term)
Remove a term from this document.
void add_posting(std::string_view term, Xapian::termpos term_pos, Xapian::termcount wdf_inc)
Add a posting for a term.
Xapian::doccount index
An index value, unused by Document itself.
void set_data(std::string_view data_)
Set the document data.
virtual std::string fetch_data() const
Fetch the document data from the database.
Xapian::doccount get_index() const
Internal method used by MSet::diversify().
Xapian::ValueIterator values_begin() const
bool values_modified() const
Return true if the document's values might have been modified.
bool positions_modified() const
Return true if the document's term positions might have been modified.
std::string get_description() const
Return a string describing this object.
Xapian::termcount termlist_size
The number of distinct terms in terms.
virtual std::string fetch_value(Xapian::valueno slot) const
Fetch a single value from the database.
remove_posting_result remove_postings(std::string_view term, Xapian::termpos term_pos_first, Xapian::termpos term_pos_last, Xapian::termcount wdf_dec, Xapian::termpos &n_removed)
Remove a range of postings for a term.
Internal()
Construct an empty document.
remove_posting_result remove_posting(std::string_view term, Xapian::termpos term_pos, Xapian::termcount wdf_dec)
Remove a posting for a term.
Internal(const Internal &)=delete
Don't allow copying.
std::string get_data() const
Get the document data.
void operator=(const Internal &)=delete
Don't allow assignment.
Xapian::valueno values_count() const
Count the value slots used in this document.
void clear_terms()
Clear all terms from the document.
std::string get_value(Xapian::valueno slot) const
Read a value slot in this document.
TermList * open_term_list() const
Start iterating the terms in this document.
virtual ~Internal()
We have virtual methods and want to be able to delete derived classes using a pointer to the base cla...
void clear_values()
Clear all value slots in this document.
Xapian::Internal::intrusive_ptr< const Xapian::Database::Internal > database
Database this document came from.
bool terms_modified() const
Return true if the document's terms might have been modified.
Internal(const Xapian::Database::Internal *database_, Xapian::docid did_, std::string &&data_, std::map< Xapian::valueno, std::string > &&values_)
Constructor used by RemoteDocument subclass.
Base class for objects managed by intrusive_ptr.
Definition: intrusive_ptr.h:50
Abstract base class for termlists.
Definition: termlist.h:42
Class for iterating over document values.
Definition: valueiterator.h:39
string term
Virtual base class for Database internals.
Class representing a document.
#define false
Definition: header.h:9
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:82
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:64
unsigned valueno
The number for a value slot in a document.
Definition: types.h:90
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:37
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:51
unsigned XAPIAN_TERMPOS_BASE_TYPE termpos
A term position within a document or query.
Definition: types.h:75
Arithmetic operations with overflow checks.
std::enable_if_t< std::is_unsigned_v< T1 > &&std::is_unsigned_v< T2 > &&std::is_unsigned_v< R >, bool > mul_overflows(T1 a, T2 b, R &res)
Multiplication with overflow checking.
Definition: overflow.h:188
Metadata for a term in a document.
Abstract base class for termlists.
typedefs for Xapian