xapian-core  1.4.25
postingsource.h
Go to the documentation of this file.
1 
4 /* Copyright (C) 2007,2008,2009,2010,2011,2012,2013,2014,2015,2016 Olly Betts
5  * Copyright (C) 2008,2009 Lemur Consulting Ltd
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 #ifndef XAPIAN_INCLUDED_POSTINGSOURCE_H
23 #define XAPIAN_INCLUDED_POSTINGSOURCE_H
24 
25 #if !defined XAPIAN_IN_XAPIAN_H && !defined XAPIAN_LIB_BUILD
26 # error Never use <xapian/postingsource.h> directly; include <xapian.h> instead.
27 #endif
28 
29 #include <xapian/attributes.h>
30 #include <xapian/database.h>
31 #include <xapian/deprecated.h>
32 #include <xapian/intrusive_ptr.h>
33 #include <xapian/postingiterator.h>
34 #include <xapian/types.h>
35 #include <xapian/valueiterator.h>
36 #include <xapian/visibility.h>
37 
38 #include <string>
39 #include <map>
40 
41 namespace Xapian {
42 
43 class Registry;
44 
50  void operator=(const PostingSource &);
51 
54 
56  double max_weight_;
57 
63  void * matcher_;
64 
65  public:
67  XAPIAN_NOTHROW(PostingSource())
68  : max_weight_(0), matcher_(NULL) { }
69 
76  void register_matcher_(void * matcher) { matcher_ = matcher; }
77 
78  // Destructor.
79  virtual ~PostingSource();
80 
86  virtual Xapian::doccount get_termfreq_min() const = 0;
87 
97  virtual Xapian::doccount get_termfreq_est() const = 0;
98 
104  virtual Xapian::doccount get_termfreq_max() const = 0;
105 
128  void set_maxweight(double max_weight);
129 
131  double XAPIAN_NOTHROW(get_maxweight() const) { return max_weight_; }
132 
146  virtual double get_weight() const;
147 
157  virtual Xapian::docid get_docid() const = 0;
158 
171  virtual void next(double min_wt) = 0;
172 
200  virtual void skip_to(Xapian::docid did, double min_wt);
201 
237  virtual bool check(Xapian::docid did, double min_wt);
238 
244  virtual bool at_end() const = 0;
245 
267  virtual PostingSource * clone() const;
268 
285  virtual std::string name() const;
286 
296  virtual std::string serialise() const;
297 
312  virtual PostingSource * unserialise(const std::string &serialised) const;
313 
332  virtual PostingSource * unserialise_with_registry(const std::string &serialised,
333  const Registry & registry) const;
334 
354  virtual void init(const Database & db) = 0;
355 
363  virtual std::string get_description() const;
364 
373  opt_intrusive_base::release();
374  return this;
375  }
376 
384  const PostingSource * release() const {
385  opt_intrusive_base::release();
386  return this;
387  }
388 };
389 
390 
402  // We want to give a deprecation warning for uses of the members from user
403  // code, but we also want to be able to inline functions to access them,
404  // without those functions generating deprecated warnings. To achieve
405  // this, we make the old names references to members with a "real_" prefix
406  // and then use the latter in the inlined accessor functions. The
407  // constructor initialises all the references to point to their "real_"
408  // counterparts.
410 
412 
414 
416 
418 
420 
422 
423  protected:
429 
435 
443 
448  XAPIAN_DEPRECATED(bool& started);
449 
458 
467 
476 
477  public:
482  explicit ValuePostingSource(Xapian::valueno slot_);
483 
484  Xapian::doccount get_termfreq_min() const;
485  Xapian::doccount get_termfreq_est() const;
486  Xapian::doccount get_termfreq_max() const;
487 
488  void next(double min_wt);
489  void skip_to(Xapian::docid min_docid, double min_wt);
490  bool check(Xapian::docid min_docid, double min_wt);
491 
492  bool at_end() const;
493 
494  Xapian::docid get_docid() const;
495 
496  void init(const Database & db_);
497 
502  Xapian::Database get_database() const { return real_db; }
503 
508  Xapian::valueno get_slot() const { return real_slot; }
509 
514  std::string get_value() const { return *real_value_it; }
515 
522  void done() {
523  real_value_it = real_db.valuestream_end(real_slot);
524  real_started = true;
525  }
526 
531  bool get_started() const { return real_started; }
532 
540  void set_termfreq_min(Xapian::doccount termfreq_min_) {
541  real_termfreq_min = termfreq_min_;
542  }
543 
551  void set_termfreq_est(Xapian::doccount termfreq_est_) {
552  real_termfreq_est = termfreq_est_;
553  }
554 
562  void set_termfreq_max(Xapian::doccount termfreq_max_) {
563  real_termfreq_max = termfreq_max_;
564  }
565 };
566 
567 
587  : public ValuePostingSource {
588  public:
594 
595  double get_weight() const;
596  ValueWeightPostingSource * clone() const;
597  std::string name() const;
598  std::string serialise() const;
599  ValueWeightPostingSource * unserialise(const std::string &serialised) const;
600  void init(const Database & db_);
601 
602  std::string get_description() const;
603 };
604 
605 
627  protected:
633 
639 
641  double curr_weight;
642 
645 
647  void skip_if_in_range(double min_wt);
648 
649  public:
659  Xapian::docid range_start_ = 0,
660  Xapian::docid range_end_ = 0);
661 
662  double get_weight() const;
663  DecreasingValueWeightPostingSource * clone() const;
664  std::string name() const;
665  std::string serialise() const;
666  DecreasingValueWeightPostingSource * unserialise(const std::string &serialised) const;
667  void init(const Xapian::Database & db_);
668 
669  void next(double min_wt);
670  void skip_to(Xapian::docid min_docid, double min_wt);
671  bool check(Xapian::docid min_docid, double min_wt);
672 
673  std::string get_description() const;
674 };
675 
676 
686  : public ValuePostingSource {
689 
692 
694  std::map<std::string, double> weight_map;
695 
696  public:
701  explicit ValueMapPostingSource(Xapian::valueno slot_);
702 
708  void add_mapping(const std::string &key, double wt);
709 
711  void clear_mappings();
712 
717  void set_default_weight(double wt);
718 
719  double get_weight() const;
720  ValueMapPostingSource * clone() const;
721  std::string name() const;
722  std::string serialise() const;
723  ValueMapPostingSource * unserialise(const std::string &serialised) const;
724  void init(const Database & db_);
725 
726  std::string get_description() const;
727 };
728 
729 
738 
741 
744 
746  bool started;
747 
750 
751  public:
756  explicit FixedWeightPostingSource(double wt);
757 
758  Xapian::doccount get_termfreq_min() const;
759  Xapian::doccount get_termfreq_est() const;
760  Xapian::doccount get_termfreq_max() const;
761 
762  double get_weight() const;
763 
764  void next(double min_wt);
765  void skip_to(Xapian::docid min_docid, double min_wt);
766  bool check(Xapian::docid min_docid, double min_wt);
767 
768  bool at_end() const;
769 
770  Xapian::docid get_docid() const;
771 
772  FixedWeightPostingSource * clone() const;
773  std::string name() const;
774  std::string serialise() const;
775  FixedWeightPostingSource * unserialise(const std::string &serialised) const;
776  void init(const Database & db_);
777 
778  std::string get_description() const;
779 };
780 
781 }
782 
783 #endif // XAPIAN_INCLUDED_POSTINGSOURCE_H
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:80
typedefs for Xapian
double max_weight_in_map
The maximum weight in weight_map.
This class is used to access a database, or a group of databases.
Definition: database.h:68
Xapian::PostingIterator it
Iterator over all documents.
void done()
End the iteration.
A posting source which looks up weights in a map using values as the key.
Compiler attribute macros.
Define XAPIAN_DEPRECATED() and related macros.
Class for iterating over document values.
Definition: valueiterator.h:40
void set_termfreq_est(Xapian::doccount termfreq_est_)
An estimate of the term frequency.
#define XAPIAN_DEPRECATED(X)
#define XAPIAN_VISIBILITY_DEFAULT
Definition: visibility.h:28
void set_termfreq_max(Xapian::doccount termfreq_max_)
An upper bound on the term frequency.
const PostingSource * release() const
Start reference counting this object.
Read weights from a value which is known to decrease as docid increases.
double max_weight_
The current upper bound on what get_weight() can return.
Definition: postingsource.h:56
Class for iterating over a list of terms.
PostingSource()
Allow subclasses to be instantiated.
Definition: postingsource.h:67
std::string get_value() const
Read current value.
ValueIterator valuestream_end(Xapian::valueno) const
Return end iterator corresponding to valuestream_begin().
Definition: database.h:363
bool get_started() const
Flag indicating if we&#39;ve started (true if we have).
void register_matcher_(void *matcher)
Definition: postingsource.h:76
Define XAPIAN_VISIBILITY_* macros.
Registry for user subclasses.
Definition: registry.h:47
Xapian::doccount termfreq
Number of documents in the posting source.
Xapian::doccount real_termfreq_min
bool items_at_end
Flag, set to true if there are docs after the end of the range.
A posting source which returns a fixed weight for all documents.
API for working with Xapian databases.
Xapian::docid range_end
End of range of docids for which weights are known to be decreasing.
Class for iterating over document values.
bool started
Flag indicating if we&#39;ve started (true if we have).
Base class which provides an "external" source of postings.
Definition: postingsource.h:47
std::map< std::string, double > weight_map
The value -> weight map.
Base class for objects managed by opt_intrusive_ptr.
A posting source which generates weights from a value slot.
Xapian::ValueIterator real_value_it
A posting source which reads weights from a value slot.
char name[9]
Definition: dbcheck.cc:55
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:38
unsigned valueno
The number for a value slot in a document.
Definition: types.h:108
Xapian::doccount real_termfreq_est
PostingSource * release()
Start reference counting this object.
Xapian::Database db
The database we&#39;re reading documents from.
Class for iterating over a list of document ids.
double get_maxweight() const
Return the currently set upper bound on what get_weight() can return.
double default_weight
The default weight.
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:52
Xapian::doccount real_termfreq_max
Xapian::docid check_docid
The docid last passed to check() (0 if check() wasn&#39;t the last move).
Xapian::Database real_db
Xapian::docid range_start
Start of range of docids for which weights are known to be decreasing.
double curr_weight
Weight at current position.
void * matcher_
The object to inform of maxweight changes.
Definition: postingsource.h:63
Xapian::valueno get_slot() const
The slot we&#39;re reading values from.
void set_termfreq_min(Xapian::doccount termfreq_min_)
Set a lower bound on the term frequency.
Xapian::Database get_database() const
The database we&#39;re reading values from.