xapian-core  2.0.0
postingsource.h
Go to the documentation of this file.
1 
4 /* Copyright (C) 2007-2026 Olly Betts
5  * Copyright (C) 2008,2009 Lemur Consulting Ltd
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, see
19  * <https://www.gnu.org/licenses/>.
20  */
21 
22 #ifndef XAPIAN_INCLUDED_POSTINGSOURCE_H
23 #define XAPIAN_INCLUDED_POSTINGSOURCE_H
24 
25 #if !defined XAPIAN_IN_XAPIAN_H && !defined XAPIAN_LIB_BUILD
26 # error Never use <xapian/postingsource.h> directly; include <xapian.h> instead.
27 #endif
28 
29 #include <xapian/attributes.h>
30 #include <xapian/database.h>
31 #include <xapian/intrusive_ptr.h>
32 #include <xapian/postingiterator.h>
33 #include <xapian/types.h>
34 #include <xapian/valueiterator.h>
35 #include <xapian/visibility.h>
36 
37 #include <string>
38 #include <map>
39 
40 namespace Xapian {
41 
42 class Registry;
43 
49  void operator=(const PostingSource &) = delete;
50 
52  PostingSource(const PostingSource &) = delete;
53 
55  double max_weight_ = 0.0;
56 
58  bool* max_weight_cached_flag_ptr = nullptr;
59 
60  public:
62  PostingSource() noexcept { }
63 
71  void set_max_weight_cached_flag_ptr_(bool* flag_ptr) {
72  max_weight_cached_flag_ptr = flag_ptr;
73  }
74 
75  // Destructor.
76  virtual ~PostingSource();
77 
83  virtual Xapian::doccount get_termfreq_min() const = 0;
84 
94  virtual Xapian::doccount get_termfreq_est() const = 0;
95 
101  virtual Xapian::doccount get_termfreq_max() const = 0;
102 
125  void set_maxweight(double max_weight) {
126  max_weight_ = max_weight;
127  if (max_weight_cached_flag_ptr) {
128  *max_weight_cached_flag_ptr = false;
129  }
130  }
131 
133  double get_maxweight() const noexcept { return max_weight_; }
134 
148  virtual double get_weight() const;
149 
159  virtual Xapian::docid get_docid() const = 0;
160 
173  virtual void next(double min_wt) = 0;
174 
202  virtual void skip_to(Xapian::docid did, double min_wt);
203 
239  virtual bool check(Xapian::docid did, double min_wt);
240 
246  virtual bool at_end() const = 0;
247 
269  virtual PostingSource * clone() const;
270 
287  virtual std::string name() const;
288 
298  virtual std::string serialise() const;
299 
314  virtual PostingSource * unserialise(const std::string &serialised) const;
315 
334  virtual PostingSource * unserialise_with_registry(const std::string &serialised,
335  const Registry & registry) const;
336 
368  virtual void reset(const Database& db, Xapian::doccount shard_index);
369 
381  virtual void init(const Database& db);
382 
390  virtual std::string get_description() const;
391 
400  opt_intrusive_base::release();
401  return this;
402  }
403 
411  const PostingSource * release() const {
412  opt_intrusive_base::release();
413  return this;
414  }
415 };
416 
417 
430 
432 
434 
435  bool started;
436 
438 
440 
442 
443  public:
448  explicit ValuePostingSource(Xapian::valueno slot_) noexcept
449  : slot(slot_) {}
450 
451  Xapian::doccount get_termfreq_min() const;
452  Xapian::doccount get_termfreq_est() const;
453  Xapian::doccount get_termfreq_max() const;
454 
455  void next(double min_wt);
456  void skip_to(Xapian::docid min_docid, double min_wt);
457  bool check(Xapian::docid min_docid, double min_wt);
458 
459  bool at_end() const;
460 
461  Xapian::docid get_docid() const;
462 
463  void reset(const Database& db_, Xapian::doccount shard_index);
464 
469  Xapian::Database get_database() const { return db; }
470 
475  Xapian::valueno get_slot() const { return slot; }
476 
481  std::string get_value() const { return *value_it; }
482 
489  void done() {
490  value_it = db.valuestream_end(slot);
491  started = true;
492  }
493 
498  bool get_started() const { return started; }
499 
507  void set_termfreq_min(Xapian::doccount termfreq_min_) {
508  termfreq_min = termfreq_min_;
509  }
510 
518  void set_termfreq_est(Xapian::doccount termfreq_est_) {
519  termfreq_est = termfreq_est_;
520  }
521 
529  void set_termfreq_max(Xapian::doccount termfreq_max_) {
530  termfreq_max = termfreq_max_;
531  }
532 
533  std::string get_description() const;
534 };
535 
536 
556  : public ValuePostingSource {
557  public:
563 
564  double get_weight() const;
565  ValueWeightPostingSource * clone() const;
566  std::string name() const;
567  std::string serialise() const;
568  ValueWeightPostingSource * unserialise(const std::string &serialised) const;
569  void reset(const Database& db_, Xapian::doccount shard_index);
570 
571  std::string get_description() const;
572 };
573 
574 
595  protected:
601 
607 
609  double curr_weight;
610 
613 
615  void skip_if_in_range(double min_wt);
616 
617  public:
627  Xapian::docid range_start_ = 0,
628  Xapian::docid range_end_ = 0);
629 
630  double get_weight() const override;
631  DecreasingValueWeightPostingSource* clone() const override;
632  std::string name() const override;
633  std::string serialise() const override;
635  unserialise(const std::string& serialised) const override;
636  void reset(const Database& db_, Xapian::doccount shard_index) override;
637 
638  void next(double min_wt) override;
639  void skip_to(Xapian::docid min_docid, double min_wt) override;
640  bool check(Xapian::docid min_docid, double min_wt) override;
641 
642  std::string get_description() const override;
643 };
644 
645 
655  : public ValuePostingSource {
658 
661 
663  std::map<std::string, double> weight_map;
664 
665  public:
670  explicit ValueMapPostingSource(Xapian::valueno slot_);
671 
677  void add_mapping(const std::string &key, double wt);
678 
680  void clear_mappings();
681 
686  void set_default_weight(double wt);
687 
688  double get_weight() const override;
689  ValueMapPostingSource* clone() const override;
690  std::string name() const override;
691  std::string serialise() const override;
693  unserialise(const std::string& serialised) const override;
694  void reset(const Database& db_, Xapian::doccount shard_index) override;
695 
696  std::string get_description() const override;
697 };
698 
699 
712 
715 
718 
720  bool started;
721 
724 
725  public:
730  explicit FixedWeightPostingSource(double wt);
731 
732  Xapian::doccount get_termfreq_min() const override;
733  Xapian::doccount get_termfreq_est() const override;
734  Xapian::doccount get_termfreq_max() const override;
735 
736  double get_weight() const override;
737 
738  void next(double min_wt) override;
739  void skip_to(Xapian::docid min_docid, double min_wt) override;
740  bool check(Xapian::docid min_docid, double min_wt) override;
741 
742  bool at_end() const override;
743 
744  Xapian::docid get_docid() const override;
745 
746  FixedWeightPostingSource* clone() const override;
747  std::string name() const override;
748  std::string serialise() const override;
750  unserialise(const std::string& serialised) const override;
751  void reset(const Database& db_, Xapian::doccount shard_index) override;
752 
753  std::string get_description() const override;
754 };
755 
756 }
757 
758 #endif // XAPIAN_INCLUDED_POSTINGSOURCE_H
Compiler attribute macros.
char name[9]
Definition: dbcheck.cc:57
An indexed database of documents.
Definition: database.h:75
ValueIterator valuestream_end(Xapian::valueno) const noexcept
Return end iterator corresponding to valuestream_begin().
Definition: database.h:421
Read weights from a value which is known to decrease as docid increases.
Xapian::docid range_end
End of range of docids for which weights are known to be decreasing.
double curr_weight
Weight at current position.
Xapian::docid range_start
Start of range of docids for which weights are known to be decreasing.
bool items_at_end
Flag, set to true if there are docs after the end of the range.
A posting source which returns a fixed weight for all documents.
Xapian::PostingIterator it
Iterator over all documents.
bool started
Flag indicating if we've started (true if we have).
Xapian::docid check_docid
The docid last passed to check() (0 if check() wasn't the last move).
Xapian::doccount termfreq
Number of documents in the posting source.
Xapian::Database db
The database we're reading documents from.
Base class for objects managed by opt_intrusive_ptr.
Class for iterating over a list of terms.
Base class which provides an "external" source of postings.
Definition: postingsource.h:47
PostingSource(const PostingSource &)=delete
Don't allow copying.
virtual Xapian::doccount get_termfreq_est() const =0
An estimate of the number of documents this object can return.
void set_max_weight_cached_flag_ptr_(bool *flag_ptr)
Definition: postingsource.h:71
virtual void next(double min_wt)=0
Advance the current position to the next matching document.
const PostingSource * release() const
Start reference counting this object.
virtual Xapian::doccount get_termfreq_max() const =0
An upper bound on the number of documents this object can return.
PostingSource * release()
Start reference counting this object.
PostingSource() noexcept
Allow subclasses to be instantiated.
Definition: postingsource.h:62
virtual Xapian::doccount get_termfreq_min() const =0
A lower bound on the number of documents this object can return.
void set_maxweight(double max_weight)
Specify an upper bound on what get_weight() will return from now on.
virtual Xapian::docid get_docid() const =0
Return the current docid.
virtual bool at_end() const =0
Return true if the current position is past the last entry in this list.
void operator=(const PostingSource &)=delete
Don't allow assignment.
double get_maxweight() const noexcept
Return the currently set upper bound on what get_weight() can return.
Registry for user subclasses.
Definition: registry.h:47
Class for iterating over document values.
Definition: valueiterator.h:39
A posting source which looks up weights in a map using values as the key.
std::map< std::string, double > weight_map
The value -> weight map.
double max_weight_in_map
The maximum weight in weight_map.
double default_weight
The default weight.
A posting source which generates weights from a value slot.
Xapian::doccount termfreq_min
void set_termfreq_min(Xapian::doccount termfreq_min_)
Set a lower bound on the term frequency.
Xapian::doccount termfreq_max
Xapian::Database get_database() const
The database we're reading values from.
void set_termfreq_est(Xapian::doccount termfreq_est_)
An estimate of the term frequency.
bool get_started() const
Flag indicating if we've started (true if we have).
Xapian::doccount termfreq_est
ValuePostingSource(Xapian::valueno slot_) noexcept
Construct a ValuePostingSource.
void set_termfreq_max(Xapian::doccount termfreq_max_)
An upper bound on the term frequency.
std::string get_value() const
Read current value.
Xapian::ValueIterator value_it
Xapian::valueno get_slot() const
The slot we're reading values from.
void done()
End the iteration.
A posting source which reads weights from a value slot.
An indexed database of documents.
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:82
unsigned valueno
The number for a value slot in a document.
Definition: types.h:90
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:37
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:51
Class for iterating over a list of document ids.
typedefs for Xapian
Class for iterating over document values.
Define XAPIAN_VISIBILITY_* macros.
#define XAPIAN_VISIBILITY_DEFAULT
Definition: visibility.h:28
#define XAPIAN_VISIBILITY_INTERNAL
Definition: visibility.h:29