xapian-core  2.0.0
xorpostlist.h
Go to the documentation of this file.
1 
4 /* Copyright (C) 2007,2009,2010,2011,2012,2017 Olly Betts
5  * Copyright (C) 2009 Lemur Consulting Ltd
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License as
9  * published by the Free Software Foundation; either version 2 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, see
19  * <https://www.gnu.org/licenses/>.
20  */
21 
22 #ifndef XAPIAN_INCLUDED_XORPOSTLIST_H
23 #define XAPIAN_INCLUDED_XORPOSTLIST_H
24 
25 #include "backends/postlist.h"
26 #include "postlisttree.h"
27 
28 #include <algorithm>
29 
31 class XorPostList : public PostList {
33  XorPostList& operator=(const XorPostList&) = delete;
34 
36  XorPostList(const XorPostList&) = delete;
37 
40 
42  size_t n_kids;
43 
45  PostList** plist = nullptr;
46 
49 
51  void erase_sublist(size_t i) {
52  delete plist[i];
53  --n_kids;
54  for (size_t j = i; j < n_kids; ++j) {
55  plist[j] = plist[j + 1];
56  }
58  }
59 
60  public:
64  template<class RandomItor>
65  XorPostList(RandomItor pl_begin, RandomItor pl_end,
66  PostListTree* matcher_, Xapian::doccount db_size)
67  : n_kids(pl_end - pl_begin), matcher(matcher_)
68  {
69  plist = new PostList * [n_kids];
70  std::copy(pl_begin, pl_end, plist);
71 
72  // We shortcut an empty shard and avoid creating a postlist tree for it.
73  Assert(db_size);
74  // We calculate the estimate assuming independence. The simplest
75  // way to calculate this seems to be a series of (n_kids - 1) pairwise
76  // calculations, which gives the same answer regardless of the order.
77  double scale = 1.0 / db_size;
78  double P_est = plist[0]->get_termfreq() * scale;
79  for (size_t i = 1; i < n_kids; ++i) {
80  double P_i = plist[i]->get_termfreq() * scale;
81  P_est += P_i - 2.0 * P_est * P_i;
82  }
83  termfreq = static_cast<Xapian::doccount>(P_est * db_size + 0.5);
84  }
85 
86  ~XorPostList();
87 
88  Xapian::docid get_docid() const;
89 
90  double get_weight(Xapian::termcount doclen,
91  Xapian::termcount unique_terms,
92  Xapian::termcount wdfdocmax) const;
93 
94  bool at_end() const;
95 
96  double recalc_maxweight();
97 
99  return NULL;
100  }
101 
102  PostList* next(double w_min);
103 
104  PostList* skip_to(Xapian::docid, double w_min);
105 
106  void get_docid_range(Xapian::docid& first, Xapian::docid& last) const;
107 
108  std::string get_description() const;
109 
119  Xapian::termcount get_wdf() const;
120 
122 };
123 
124 #endif // XAPIAN_INCLUDED_XORPOSTLIST_H
void force_recalc()
Definition: postlisttree.h:125
Abstract base class for postlists.
Definition: postlist.h:40
Xapian::doccount get_termfreq() const
Get an estimate of the number of documents this PostList will return.
Definition: postlist.h:67
PostList * next()
Advance the current position to the next document in the postlist.
Definition: postlist.h:168
Xapian::doccount termfreq
Estimate of the number of documents this PostList will return.
Definition: postlist.h:52
Abstract base class for iterating term positions in a document.
Definition: positionlist.h:32
N-way XOR postlist.
Definition: xorpostlist.h:31
PostListTree * matcher
Pointer to the matcher object, so we can report pruning.
Definition: xorpostlist.h:48
void get_docid_range(Xapian::docid &first, Xapian::docid &last) const
Get the bounds on the range of docids this PostList can return.
Definition: xorpostlist.cc:193
Xapian::docid did
The current docid, or zero if we haven't started or are at_end.
Definition: xorpostlist.h:39
size_t n_kids
The number of sub-postlists.
Definition: xorpostlist.h:42
XorPostList & operator=(const XorPostList &)=delete
Don't allow assignment.
double get_weight(Xapian::termcount doclen, Xapian::termcount unique_terms, Xapian::termcount wdfdocmax) const
Return the weight contribution for the current position.
Definition: xorpostlist.cc:50
Xapian::termcount count_matching_subqs() const
Count the number of leaf subqueries which match at the current position.
Definition: xorpostlist.cc:229
std::string get_description() const
Return a string description of this object.
Definition: xorpostlist.cc:205
Xapian::docid get_docid() const
Return the current docid.
Definition: xorpostlist.cc:44
double recalc_maxweight()
Recalculate the upper bound on what get_weight() can return.
Definition: xorpostlist.cc:70
XorPostList(const XorPostList &)=delete
Don't allow copying.
bool at_end() const
Return true if the current position is past the last entry in this list.
Definition: xorpostlist.cc:64
Xapian::termcount get_wdf() const
Get the within-document frequency.
Definition: xorpostlist.cc:218
XorPostList(RandomItor pl_begin, RandomItor pl_end, PostListTree *matcher_, Xapian::doccount db_size)
Construct from 2 random-access iterators to a container of PostList*, a pointer to the matcher,...
Definition: xorpostlist.h:65
void erase_sublist(size_t i)
Erase a sub-postlist.
Definition: xorpostlist.h:51
PostList * skip_to(Xapian::docid, double w_min)
Skip forward to the specified docid.
Definition: xorpostlist.cc:143
PositionList * read_position_list()
Read the position list for the term in the current document and return a pointer to it (owned by the ...
Definition: xorpostlist.h:98
PostList ** plist
Array of pointers to sub-postlists.
Definition: xorpostlist.h:45
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:64
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:37
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:51
#define Assert(COND)
Definition: omassert.h:122
Abstract base class for postlists.
Class for managing a tree of PostList objects.