xapian-core  1.4.26
maxpostlist.h
Go to the documentation of this file.
1 
4 /* Copyright (C) 2007,2009,2010,2011,2012,2013 Olly Betts
5  * Copyright (C) 2009 Lemur Consulting Ltd
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License as
9  * published by the Free Software Foundation; either version 2 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 #ifndef XAPIAN_INCLUDED_MAXPOSTLIST_H
23 #define XAPIAN_INCLUDED_MAXPOSTLIST_H
24 
25 #include "multimatch.h"
26 #include "api/postlist.h"
27 #include <algorithm>
28 
29 class MultiMatch;
30 
32 class MaxPostList : public PostList {
34  void operator=(const MaxPostList &);
35 
37  MaxPostList(const MaxPostList &);
38 
41 
43  size_t n_kids;
44 
47 
49  double max_cached;
50 
53 
56 
58  void erase_sublist(size_t i) {
59  delete plist[i];
60  --n_kids;
61  for (size_t j = i; j < n_kids; ++j) {
62  plist[j] = plist[j + 1];
63  }
64  matcher->recalc_maxweight();
65  }
66 
67  public:
71  template<class RandomItor>
72  MaxPostList(RandomItor pl_begin, RandomItor pl_end,
73  MultiMatch * matcher_, Xapian::doccount db_size_)
74  : did(0), n_kids(pl_end - pl_begin), plist(NULL),
75  max_cached(0), db_size(db_size_), matcher(matcher_)
76  {
77  plist = new PostList * [n_kids];
78  std::copy(pl_begin, pl_end, plist);
79  }
80 
81  ~MaxPostList();
82 
84 
86 
88 
89  double get_maxweight() const;
90 
91  Xapian::docid get_docid() const;
92 
94 
96 
97  double get_weight() const;
98 
99  bool at_end() const;
100 
101  double recalc_maxweight();
102 
104  return NULL;
105  }
106 
107  PostList* next(double w_min);
108 
109  PostList* skip_to(Xapian::docid, double w_min);
110 
111  std::string get_description() const;
112 
120  Xapian::termcount get_wdf() const;
121 
122  // Note - we don't need to implement get_termfreq_est_using_stats() because
123  // an OP_MAX when used as a child of a synonym will be optimised to an OR.
124 
126 };
127 
128 #endif // XAPIAN_INCLUDED_MAXPOSTLIST_H
double max_cached
Cached answer to get_maxweight.
Definition: maxpostlist.h:49
N-way OR postlist with wt=max(wt_i).
Definition: maxpostlist.h:32
Abstract base class for postlists.
Definition: postlist.h:37
Xapian::docid get_docid() const
Return the current docid.
Definition: maxpostlist.cc:89
PositionList * read_position_list()
Read the position list for the term in the current document and return a pointer to it (owned by the ...
Definition: maxpostlist.h:103
class for performing a match
Xapian::termcount get_unique_terms() const
Return the number of unique terms in the current document.
Definition: maxpostlist.cc:115
PostList ** plist
Array of pointers to sub-postlists.
Definition: maxpostlist.h:46
void operator=(const MaxPostList &)
Don&#39;t allow assignment.
void erase_sublist(size_t i)
Erase a sub-postlist.
Definition: maxpostlist.h:58
Xapian::docid did
The current docid, or zero if we haven&#39;t started or are at_end.
Definition: maxpostlist.h:40
Abstract base class for postlists.
MultiMatch * matcher
Pointer to the matcher object, so we can report pruning.
Definition: maxpostlist.h:55
double get_weight() const
Return the weight contribution for the current position.
Definition: maxpostlist.cc:135
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:72
Xapian::doccount db_size
The number of documents in the database.
Definition: maxpostlist.h:52
size_t n_kids
The number of sub-postlists.
Definition: maxpostlist.h:43
Xapian::termcount get_doclength() const
Return the length of current document.
Definition: maxpostlist.cc:95
PostList * skip_to(Xapian::docid, double w_min)
Skip forward to the specified docid.
Definition: maxpostlist.cc:208
Internal * next()
Advance the current position to the next document in the postlist.
Definition: postlist.h:194
Xapian::doccount get_termfreq_min() const
Get a lower bound on the number of documents indexed by this term.
Definition: maxpostlist.cc:42
void recalc_maxweight()
Called by postlists to indicate that they&#39;ve rearranged themselves and the maxweight now possible is ...
Definition: multimatch.h:136
MaxPostList(const MaxPostList &)
Don&#39;t allow copying.
Xapian::doccount get_termfreq_est() const
Get an estimate of the number of documents indexed by this term.
Definition: maxpostlist.cc:65
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:38
std::string get_description() const
Return a string description of this object.
Definition: maxpostlist.cc:248
Xapian::doccount get_termfreq_max() const
Get an upper bound on the number of documents indexed by this term.
Definition: maxpostlist.cc:52
double get_maxweight() const
Return an upper bound on what get_weight() can return.
Definition: maxpostlist.cc:83
double recalc_maxweight()
Recalculate the upper bound on what get_weight() can return.
Definition: maxpostlist.cc:153
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:52
Abstract base class for iterating term positions in a document.
Definition: positionlist.h:31
bool at_end() const
Return true if the current position is past the last entry in this list.
Definition: maxpostlist.cc:147
MaxPostList(RandomItor pl_begin, RandomItor pl_end, MultiMatch *matcher_, Xapian::doccount db_size_)
Construct from 2 random-access iterators to a container of PostList*, a pointer to the matcher...
Definition: maxpostlist.h:72
Xapian::termcount count_matching_subqs() const
Count the number of leaf subqueries which match at the current position.
Definition: maxpostlist.cc:272
Xapian::termcount get_wdf() const
get_wdf() for MaxPostlist returns the sum of the wdfs of the sub postlists which match the current do...
Definition: maxpostlist.cc:261