xapian-core  2.0.0
termlistmerger.h
Go to the documentation of this file.
1 
4 /* Copyright (C) 2008,2010,2011,2013,2016,2017,2018 Olly Betts
5  * Copyright (C) 2011 Action Without Borders
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, see
19  * <https://www.gnu.org/licenses/>.
20  */
21 
22 #ifndef XAPIAN_INCLUDED_TERMLISTMERGER_H
23 #define XAPIAN_INCLUDED_TERMLISTMERGER_H
24 
25 #include "api/termlist.h"
26 #include "heap.h"
27 #include "omassert.h"
28 #include "ortermlist.h"
29 
31  bool operator()(const TermList* a, const TermList* b) const {
32  return a->get_approx_size() > b->get_approx_size();
33  }
34 };
35 
36 template<class ORTERMLIST = OrTermList>
37 inline TermList*
38 make_termlist_merger(std::vector<TermList*>& termlists)
39 {
40  if (termlists.size() <= 1) {
41  return termlists.size() == 1 ? termlists[0] : NULL;
42  }
43 
44  // Make termlists into a heap so that the longest termlist is at the
45  // top of the heap.
46  Heap::make(termlists.begin(), termlists.end(),
48 
49  // Now build a tree of binary TermList objects. The algorithm used to
50  // build the tree is like that used to build an optimal Huffman coding
51  // tree. If we called next() repeatedly, this arrangement would
52  // minimise the number of method calls. Generally we don't actually do
53  // that, but this arrangement is still likely to be a good one, and it
54  // does minimise the work in the worst case.
55  while (true) {
56  AssertRel(termlists.size(), >=, 2);
57  // We build the tree such that at each branch:
58  //
59  // l.get_approx_size() >= r.get_approx_size()
60  //
61  // We do this so that the OrTermList class can be optimised
62  // assuming that this is the case.
63  TermList* r = termlists.front();
64  Heap::pop(termlists.begin(), termlists.end(),
66  termlists.pop_back();
67  TermList* l = termlists.front();
68 
69  TermList* tl = new ORTERMLIST(l, r);
70 
71  if (termlists.size() == 1)
72  return tl;
73 
74  termlists.front() = tl;
75  Heap::replace(termlists.begin(), termlists.end(),
77  }
78 }
79 
80 #endif // XAPIAN_INCLUDED_TERMLISTMERGER_H
Abstract base class for termlists.
Definition: termlist.h:42
virtual Xapian::termcount get_approx_size() const =0
Return approximate size of this termlist.
C++ STL heap implementation with extensions.
void pop(_RandomAccessIterator first, _RandomAccessIterator last, _Compare comp)
Definition: heap.h:213
void replace(_RandomAccessIterator first, _RandomAccessIterator last, _Compare comp)
Definition: heap.h:230
void make(_RandomAccessIterator first, _RandomAccessIterator last, _Compare comp)
Definition: heap.h:259
Various assertion macros.
#define AssertRel(A, REL, B)
Definition: omassert.h:123
Merge two TermList objects using an OR operation.
bool operator()(const TermList *a, const TermList *b) const
Abstract base class for termlists.
TermList * make_termlist_merger(std::vector< TermList * > &termlists)