xapian-core  2.0.0
mset.h
Go to the documentation of this file.
1 
4 /* Copyright (C) 2015,2016,2017,2019,2023,2024,2026 Olly Betts
5  * Copyright (C) 2018 Uppinder Chugh
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License as
9  * published by the Free Software Foundation; either version 2 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, see
19  * <https://www.gnu.org/licenses/>.
20  */
21 
22 #ifndef XAPIAN_INCLUDED_MSET_H
23 #define XAPIAN_INCLUDED_MSET_H
24 
25 #if !defined XAPIAN_IN_XAPIAN_H && !defined XAPIAN_LIB_BUILD
26 # error Never use <xapian/mset.h> directly; include <xapian.h> instead.
27 #endif
28 
29 #include <iterator>
30 #include <string>
31 #include <string_view>
32 
33 #include <xapian/attributes.h>
34 #include <xapian/document.h>
35 #include <xapian/error.h>
36 #include <xapian/intrusive_ptr.h>
37 #include <xapian/stem.h>
38 #include <xapian/types.h>
39 #include <xapian/visibility.h>
40 
41 namespace Xapian {
42 
43 class MSetIterator;
44 
47  friend class MSetIterator;
48 
49  // Helper function for fetch() methods.
50  void fetch_(Xapian::doccount first, Xapian::doccount last) const;
51 
66  void set_item_weight(Xapian::doccount i, double wt);
67 
68 #if 0 // FIXME: Need work before release.
70  void diversify_(Xapian::doccount k,
72  double factor1,
73  double factor2);
74 #endif
75 
76  public:
78  class Internal;
81 
86  MSet(const MSet & o);
87 
92  MSet & operator=(const MSet & o);
93 
95  MSet(MSet && o);
96 
98  MSet & operator=(MSet && o);
99 
104  MSet();
105 
108  explicit MSet(Internal* internal_);
109 
111  ~MSet();
112 
129  template<typename Iterator>
130  void replace_weights(Iterator first, Iterator last)
131  {
132  auto distance = last - first;
133  // Take care to compare signed and unsigned types both safely and
134  // without triggering compiler warnings.
135  if (distance < 0 ||
136  (sizeof(distance) <= sizeof(Xapian::doccount) ?
137  Xapian::doccount(distance) != size() :
138  distance != static_cast<decltype(distance)>(size()))) {
139  throw Xapian::InvalidArgumentError("Number of weights assigned "
140  "doesn't match the number of "
141  "items");
142  }
143  Xapian::doccount i = 0;
144  while (first != last) {
145  set_item_weight(i, *first);
146  ++i;
147  ++first;
148  }
149  }
150 
159  void sort_by_relevance();
160 
161 #if 0 // FIXME: Need work before release.
181  void diversify(Xapian::doccount k,
183  double lambda = 0.5,
184  double b = 5.0,
185  double sigma_sqr = 1e-3) {
186  // Inline the argument value checks and the calculation of the scale
187  // factor for score_2 so the compiler can optimise in the case where
188  // some or all parameter values are compile-time constants.
189  if (r == 0)
190  throw InvalidArgumentError("r must be > 0");
191  if (lambda < 0.0 || lambda > 1.0)
192  throw InvalidArgumentError("lambda must be between 0 and 1");
193  if (k > 1)
194  diversify_(k, r, lambda, (1.0 - lambda) * b * sigma_sqr * 2.0);
195  }
196 #endif
197 
222  int convert_to_percent(double weight) const;
223 
248  int convert_to_percent(const MSetIterator & it) const;
249 
264  Xapian::doccount get_termfreq(std::string_view term) const;
265 
278  double get_termweight(std::string_view term) const;
279 
284  Xapian::doccount get_firstitem() const;
285 
287  Xapian::doccount get_matches_lower_bound() const;
289  Xapian::doccount get_matches_estimated() const;
291  Xapian::doccount get_matches_upper_bound() const;
292 
298  Xapian::doccount get_uncollapsed_matches_lower_bound() const;
304  Xapian::doccount get_uncollapsed_matches_estimated() const;
310  Xapian::doccount get_uncollapsed_matches_upper_bound() const;
311 
313  double get_max_attained() const;
315  double get_max_possible() const;
316 
317  enum {
324  SNIPPET_BACKGROUND_MODEL = 1,
331  SNIPPET_EXHAUSTIVE = 2,
338  SNIPPET_EMPTY_WITHOUT_MATCH = 4,
339 
363  SNIPPET_NGRAMS = 2048,
364 
372  SNIPPET_CJK_NGRAM = SNIPPET_NGRAMS,
373 
387  SNIPPET_WORD_BREAKS = 4096
388  };
389 
419  std::string snippet(std::string_view text,
420  size_t length = 500,
421  const Xapian::Stem & stemmer = Xapian::Stem(),
422  unsigned flags = SNIPPET_BACKGROUND_MODEL|SNIPPET_EXHAUSTIVE,
423  std::string_view hi_start = "<b>",
424  std::string_view hi_end = "</b>",
425  std::string_view omit = "...") const;
426 
437  void fetch(const MSetIterator &begin, const MSetIterator &end) const;
438 
449  void fetch(const MSetIterator &item) const;
450 
461  void fetch() const { fetch_(0, Xapian::doccount(-1)); }
462 
464  Xapian::doccount size() const;
465 
467  bool empty() const { return size() == 0; }
468 
470  void swap(MSet & o) { internal.swap(o.internal); }
471 
473  MSetIterator begin() const;
474 
476  MSetIterator end() const;
477 
479  MSetIterator operator[](Xapian::doccount i) const;
480 
482  MSetIterator back() const;
483 
485  std::string get_description() const;
486 
498  // @{
510  typedef value_type * pointer;
512  typedef const value_type * const_pointer;
517  // @}
518  //
528  // @{
529  // The size is fixed once created.
530  Xapian::doccount max_size() const { return size(); }
531  // @}
532 };
533 
536  friend class MSet;
537 
538  MSetIterator(const Xapian::MSet & mset_, Xapian::doccount off_from_end_)
539  : mset(mset_), off_from_end(off_from_end_) { }
540 
541  public:
544 
551 
553  MSetIterator() : off_from_end(0) { }
554 
556  Xapian::docid operator*() const;
557 
560  --off_from_end;
561  return *this;
562  }
563 
566  MSetIterator retval = *this;
567  --off_from_end;
568  return retval;
569  }
570 
573  ++off_from_end;
574  return *this;
575  }
576 
579  MSetIterator retval = *this;
580  ++off_from_end;
581  return retval;
582  }
583 
594  // @{
596  typedef std::random_access_iterator_tag iterator_category;
602  typedef value_type* pointer;
605  // @}
606 
609  off_from_end -= n;
610  return *this;
611  }
612 
615  off_from_end += n;
616  return *this;
617  }
618 
624  return MSetIterator(mset, off_from_end - n);
625  }
626 
632  return MSetIterator(mset, off_from_end + n);
633  }
634 
637  return difference_type(o.off_from_end) - difference_type(off_from_end);
638  }
639 
645  return mset.get_firstitem() + (mset.size() - off_from_end);
646  }
647 
649  Xapian::Document get_document() const;
650 
652  double get_weight() const;
653 
658  std::string get_collapse_key() const;
659 
677  Xapian::doccount get_collapse_count() const;
678 
685  std::string get_sort_key() const;
686 
711  int get_percent() const {
712  return mset.convert_to_percent(get_weight());
713  }
714 
716  std::string get_description() const;
717 };
718 
720 inline bool
721 operator==(const MSetIterator& a, const MSetIterator& b) noexcept
722 {
723  return a.off_from_end == b.off_from_end;
724 }
725 
727 inline bool
728 operator!=(const MSetIterator& a, const MSetIterator& b) noexcept
729 {
730  return !(a == b);
731 }
732 
734 inline bool
735 operator<(const MSetIterator& a, const MSetIterator& b) noexcept
736 {
737  return a.off_from_end > b.off_from_end;
738 }
739 
741 inline bool
742 operator>(const MSetIterator& a, const MSetIterator& b) noexcept
743 {
744  return b < a;
745 }
746 
748 inline bool
749 operator>=(const MSetIterator& a, const MSetIterator& b) noexcept
750 {
751  return !(a < b);
752 }
753 
755 inline bool
756 operator<=(const MSetIterator& a, const MSetIterator& b) noexcept
757 {
758  return !(b < a);
759 }
760 
765 inline MSetIterator
767 {
768  return it + n;
769 }
770 
771 // Inlined methods of MSet which need MSetIterator to have been defined:
772 
773 inline void
774 MSet::fetch(const MSetIterator &begin_it, const MSetIterator &end_it) const
775 {
776  fetch_(begin_it.off_from_end, end_it.off_from_end);
777 }
778 
779 inline void
780 MSet::fetch(const MSetIterator &item) const
781 {
782  fetch_(item.off_from_end, item.off_from_end);
783 }
784 
785 inline MSetIterator
786 MSet::begin() const {
787  return MSetIterator(*this, size());
788 }
789 
790 inline MSetIterator
791 MSet::end() const {
792  // Decrementing the result of end() needs to work, so we must pass in
793  // *this here.
794  return MSetIterator(*this, 0);
795 }
796 
797 inline MSetIterator
799  return MSetIterator(*this, size() - i);
800 }
801 
802 inline MSetIterator
803 MSet::back() const {
804  return MSetIterator(*this, 1);
805 }
806 
807 inline int
809  return convert_to_percent(it.get_weight());
810 }
811 
812 }
813 
814 #endif // XAPIAN_INCLUDED_MSET_H
Compiler attribute macros.
Class representing a document.
Definition: document.h:64
InvalidArgumentError indicates an invalid parameter value was passed to the API.
Definition: error.h:229
Iterator over a Xapian::MSet.
Definition: mset.h:535
MSetIterator operator-(difference_type n) const
Return the iterator decremented by n positions.
Definition: mset.h:631
Xapian::docid value_type
Definition: mset.h:598
MSetIterator & operator+=(difference_type n)
Move the iterator forwards by n positions.
Definition: mset.h:608
MSetIterator()
Create an unpositioned MSetIterator.
Definition: mset.h:553
std::random_access_iterator_tag iterator_category
Definition: mset.h:596
Xapian::doccount get_rank() const
Return the MSet rank for the current position.
Definition: mset.h:644
MSetIterator & operator--()
Move the iterator to the previous position.
Definition: mset.h:572
int get_percent() const
Convert the weight of the current iterator position to a percentage.
Definition: mset.h:711
Xapian::MSet::size_type off_from_end
Definition: mset.h:550
MSetIterator & operator-=(difference_type n)
Move the iterator back by n positions.
Definition: mset.h:614
Xapian::termcount_diff difference_type
Definition: mset.h:600
MSetIterator operator+(difference_type n) const
Return the iterator incremented by n positions.
Definition: mset.h:623
value_type * pointer
Definition: mset.h:602
double get_weight() const
Get the weight for the current position.
Definition: msetiterator.cc:55
MSetIterator(const Xapian::MSet &mset_, Xapian::doccount off_from_end_)
Definition: mset.h:538
Xapian::MSet mset
Definition: mset.h:543
MSetIterator & operator++()
Advance the iterator to the next position.
Definition: mset.h:559
MSetIterator operator--(int)
Move the iterator to the previous position (postfix version).
Definition: mset.h:578
difference_type operator-(const MSetIterator &o) const
Return the number of positions between o and this iterator.
Definition: mset.h:636
value_type reference
Definition: mset.h:604
MSetIterator operator++(int)
Advance the iterator to the next position (postfix version).
Definition: mset.h:565
Xapian::MSet internals.
Definition: msetinternal.h:44
Class representing a list of search results.
Definition: mset.h:46
Xapian::Internal::intrusive_ptr_nonnull< Internal > internal
Definition: mset.h:78
value_type * pointer
Definition: mset.h:510
MSet(MSet &&o)
Move constructor.
Xapian::doccount_diff difference_type
Definition: mset.h:504
Xapian::doccount size() const
Return number of items in this MSet object.
Definition: mset.cc:374
value_type reference
Definition: mset.h:514
MSet(const MSet &o)
Copying is allowed.
void fetch() const
Prefetch hint the whole MSet.
Definition: mset.h:461
void replace_weights(Iterator first, Iterator last)
Assigns new weights and updates MSet.
Definition: mset.h:130
Xapian::doccount max_size() const
Definition: mset.h:530
void fetch_(Xapian::doccount first, Xapian::doccount last) const
Definition: mset.cc:66
Xapian::doccount size_type
Definition: mset.h:502
Xapian::MSetIterator value_type
Definition: mset.h:500
friend class MSetIterator
Definition: mset.h:47
MSet & operator=(MSet &&o)
Move assignment operator.
int convert_to_percent(double weight) const
Convert a weight to a percentage.
Definition: mset.cc:275
Xapian::MSetIterator iterator
Definition: mset.h:506
bool empty() const
Return true if this MSet object is empty.
Definition: mset.h:467
MSet & operator=(const MSet &o)
Copying is allowed.
Xapian::doccount get_firstitem() const
Rank of first item in this MSet.
Definition: mset.cc:312
MSetIterator operator[](Xapian::doccount i) const
Return iterator pointing to the i-th object in this MSet.
Definition: mset.h:798
MSetIterator back() const
Return iterator pointing to the last object in this MSet.
Definition: mset.h:803
void swap(MSet &o)
Efficiently swap this MSet object with another.
Definition: mset.h:470
MSetIterator begin() const
Return iterator pointing to the first item in this MSet.
Definition: mset.h:786
const value_type const_reference
Definition: mset.h:516
Xapian::MSetIterator const_iterator
Definition: mset.h:508
MSetIterator end() const
Return iterator pointing to just after the last item in this MSet.
Definition: mset.h:791
const value_type * const_pointer
Definition: mset.h:512
Class representing a stemming algorithm.
Definition: stem.h:74
string term
Class representing a document.
Hierarchy of classes which Xapian can throw as exceptions.
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:82
XAPIAN_DOCID_BASE_TYPE doccount_diff
A signed difference between two counts of documents.
Definition: types.h:44
bool operator>(const ESetIterator &a, const ESetIterator &b) noexcept
Inequality test for ESetIterator objects.
Definition: eset.h:292
XAPIAN_TERMCOUNT_BASE_TYPE termcount_diff
A signed difference between two counts of terms.
Definition: types.h:71
bool operator>=(const ESetIterator &a, const ESetIterator &b) noexcept
Inequality test for ESetIterator objects.
Definition: eset.h:299
bool operator<=(const ESetIterator &a, const ESetIterator &b) noexcept
Inequality test for ESetIterator objects.
Definition: eset.h:306
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:37
bool operator==(const ESetIterator &a, const ESetIterator &b) noexcept
Equality test for ESetIterator objects.
Definition: eset.h:271
bool operator!=(const ESetIterator &a, const ESetIterator &b) noexcept
Inequality test for ESetIterator objects.
Definition: eset.h:278
ESetIterator operator+(ESetIterator::difference_type n, const ESetIterator &it)
Return ESetIterator it incremented by n positions.
Definition: eset.h:316
bool operator<(const ESetIterator &a, const ESetIterator &b) noexcept
Inequality test for ESetIterator objects.
Definition: eset.h:285
const Query operator*(double factor, const Query &q)
Scale a Xapian::Query object using OP_SCALE_WEIGHT.
Definition: query.h:827
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:51
stemming algorithms
static Xapian::Stem stemmer
Definition: stemtest.cc:42
typedefs for Xapian
Define XAPIAN_VISIBILITY_* macros.
#define XAPIAN_VISIBILITY_DEFAULT
Definition: visibility.h:28
#define XAPIAN_VISIBILITY_INTERNAL
Definition: visibility.h:29