xapian-core  1.4.27
omenquire.cc
Go to the documentation of this file.
1 /* omenquire.cc: External interface for running queries
2  *
3  * Copyright 1999,2000,2001 BrightStation PLC
4  * Copyright 2001,2002 Ananova Ltd
5  * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2013,2014,2015,2016,2017 Olly Betts
6  * Copyright 2007,2009 Lemur Consulting Ltd
7  * Copyright 2011, Action Without Borders
8  *
9  * This program is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU General Public License as
11  * published by the Free Software Foundation; either version 2 of the
12  * License, or (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
22  * USA
23  */
24 
25 #include <config.h>
26 #include "xapian/enquire.h"
27 
28 #include "xapian/document.h"
29 #include "xapian/error.h"
30 #include "xapian/expanddecider.h"
31 #include "xapian/matchspy.h"
32 #include "xapian/termiterator.h"
33 #include "xapian/weight.h"
34 
35 #include "vectortermlist.h"
36 
37 #include "backends/database.h"
38 #include "debuglog.h"
39 #include "expand/esetinternal.h"
40 #include "expand/expandweight.h"
41 #include "exp10.h"
42 #include "matcher/msetcmp.h"
43 #include "matcher/multimatch.h"
44 #include "omassert.h"
45 #include "api/omenquireinternal.h"
46 #include "roundestimate.h"
47 #include "str.h"
48 #include "weight/weightinternal.h"
49 
50 #include <algorithm>
51 #include "autoptr.h"
52 #include <cfloat>
53 #include <cmath>
54 #include <vector>
55 
56 using namespace std;
57 
61 
62 namespace Xapian {
63 
64 MatchDecider::~MatchDecider() { }
65 
66 // Methods for Xapian::RSet
67 
68 RSet::RSet() : internal(new RSet::Internal)
69 {
70 }
71 
72 RSet::RSet(const RSet &other) : internal(other.internal)
73 {
74 }
75 
76 void
77 RSet::operator=(const RSet &other)
78 {
79  internal = other.internal;
80 }
81 
82 RSet::RSet(RSet &&) = default;
83 
84 RSet&
85 RSet::operator=(RSet &&) = default;
86 
88 {
89 }
90 
92 RSet::size() const
93 {
94  return internal->items.size();
95 }
96 
97 bool
98 RSet::empty() const
99 {
100  return internal->items.empty();
101 }
102 
103 void
105 {
106  if (did == 0) throw Xapian::InvalidArgumentError("Docid 0 not valid");
107  internal->items.insert(did);
108 }
109 
110 void
112 {
113  internal->items.erase(did);
114 }
115 
116 bool
118 {
119  return internal->items.find(did) != internal->items.end();
120 }
121 
122 string
124 {
125  return "RSet(" + internal->get_description() + ")";
126 }
127 
128 string
130 {
131  string description("RSet::Internal(");
132 
133  set<Xapian::docid>::const_iterator i;
134  for (i = items.begin(); i != items.end(); ++i) {
135  if (i != items.begin()) description += ", ";
136  description += str(*i);
137  }
138 
139  description += ')';
140 
141  return description;
142 }
143 
144 namespace Internal {
145 
146 // Methods for Xapian::MSetItem
147 
148 string
149 MSetItem::get_description() const
150 {
151  string description;
152 
153  description = str(did) + ", " + str(wt) + ", " +
154  collapse_key;
155 
156  description = "Xapian::MSetItem(" + description + ")";
157 
158  return description;
159 }
160 
161 }
162 
163 // Methods for Xapian::MSet
164 
166 {
167 }
168 
170 {
171 }
172 
173 MSet::MSet(const MSet & other) : internal(other.internal)
174 {
175 }
176 
177 MSet &
178 MSet::operator=(const MSet &other)
179 {
180  internal = other.internal;
181  return *this;
182 }
183 
184 MSet::MSet(MSet&&) = default;
185 
186 MSet&
187 MSet::operator=(MSet&&) = default;
188 
189 void
191 {
192  LOGCALL_VOID(API, "Xapian::MSet::fetch_", first | last);
193  Assert(internal.get() != 0);
194  internal->fetch_items(first, last);
195 }
196 
197 int
198 MSet::convert_to_percent(double wt) const
199 {
200  LOGCALL(API, int, "Xapian::MSet::convert_to_percent", wt);
201  Assert(internal.get() != 0);
202  RETURN(internal->convert_to_percent_internal(wt));
203 }
204 
206 MSet::get_termfreq(const string &tname) const
207 {
208  LOGCALL(API, Xapian::doccount, "Xapian::MSet::get_termfreq", tname);
209  Assert(internal.get() != 0);
210  if (usual(internal->stats)) {
211  Xapian::doccount termfreq;
212  if (internal->stats->get_stats(tname, termfreq))
213  RETURN(termfreq);
214  }
215  if (internal->enquire.get() == 0) {
216  throw InvalidOperationError("Can't get termfreq from an MSet which is not derived from a query.");
217  }
218  RETURN(internal->enquire->get_termfreq(tname));
219 }
220 
221 double
222 MSet::get_termweight(const string &tname) const
223 {
224  LOGCALL(API, double, "Xapian::MSet::get_termweight", tname);
225  Assert(internal.get() != 0);
226  if (!internal->stats) {
227  throw InvalidOperationError("Can't get termweight from an MSet which is not derived from a query.");
228  }
229  double termweight;
230  if (!internal->stats->get_termweight(tname, termweight)) {
231  string msg = tname;
232  msg += ": termweight not available";
233  throw InvalidArgumentError(msg);
234  }
235  RETURN(termweight);
236 }
237 
240 {
241  Assert(internal.get() != 0);
242  return internal->firstitem;
243 }
244 
247 {
248  Assert(internal.get() != 0);
249  return internal->matches_lower_bound;
250 }
251 
254 {
255  Assert(internal.get() != 0);
256  return round_estimate(internal->matches_lower_bound,
257  internal->matches_upper_bound,
258  internal->matches_estimated);
259 }
260 
263 {
264  Assert(internal.get() != 0);
265  return internal->matches_upper_bound;
266 }
267 
270 {
271  Assert(internal.get() != 0);
272  return internal->uncollapsed_lower_bound;
273 }
274 
277 {
278  Assert(internal.get() != 0);
279  return internal->uncollapsed_estimated;
280 }
281 
284 {
285  Assert(internal.get() != 0);
286  return internal->uncollapsed_upper_bound;
287 }
288 
289 double
291 {
292  Assert(internal.get() != 0);
293  return internal->max_possible;
294 }
295 
296 double
298 {
299  Assert(internal.get() != 0);
300  return internal->max_attained;
301 }
302 
303 string
304 MSet::snippet(const string & text,
305  size_t length,
306  const Xapian::Stem & stemmer,
307  unsigned flags,
308  const string & hi_start,
309  const string & hi_end,
310  const string & omit) const
311 {
312  Assert(internal.get() != 0);
313  return internal->snippet(text, length, stemmer, flags,
314  hi_start, hi_end, omit);
315 }
316 
318 MSet::size() const
319 {
320  Assert(internal.get() != 0);
321  return internal->items.size();
322 }
323 
324 string
326 {
327  Assert(internal.get() != 0);
328  return "Xapian::MSet(" + internal->get_description() + ")";
329 }
330 
331 int
333 {
334  LOGCALL(MATCH, int, "Xapian::MSet::Internal::convert_to_percent_internal", wt);
335  if (percent_factor == 0) RETURN(100);
336 
337  // Excess precision on x86 can result in a difference here.
338  double v = wt * percent_factor + 100.0 * DBL_EPSILON;
339  int pcent = static_cast<int>(v);
340  LOGLINE(MATCH, "wt = " << wt << ", max_possible = " << max_possible <<
341  " => pcent = " << pcent);
342  if (pcent > 100) pcent = 100;
343  if (pcent < 0) pcent = 0;
344  if (pcent == 0 && wt > 0) pcent = 1;
345 
346  RETURN(pcent);
347 }
348 
349 Document
351 {
352  LOGCALL(MATCH, Document, "Xapian::MSet::Internal::get_doc_by_index", index);
353  index += firstitem;
354  map<Xapian::doccount, Document>::const_iterator doc;
355  doc = indexeddocs.find(index);
356  if (doc != indexeddocs.end()) {
357  RETURN(doc->second);
358  }
359  if (index < firstitem || index >= firstitem + items.size()) {
360  throw RangeError("The mset returned from the match does not contain the document at index " + str(index));
361  }
362  Assert(enquire.get());
363  if (!requested_docs.empty()) {
364  // There's already a pending request, so handle that.
365  read_docs();
366  // Maybe we just fetched the doc we want.
367  doc = indexeddocs.find(index);
368  if (doc != indexeddocs.end()) {
369  RETURN(doc->second);
370  }
371  }
372 
373  RETURN(enquire->get_document(items[index - firstitem]));
374 }
375 
376 void
378 {
379  LOGCALL_VOID(MATCH, "Xapian::MSet::Internal::fetch_items", first | last);
380  if (enquire.get() == 0) {
381  throw InvalidOperationError("Can't fetch documents from an MSet which is not derived from a query.");
382  }
383  if (items.empty()) return;
384  if (last > items.size() - 1)
385  last = items.size() - 1;
386  for (Xapian::doccount i = first; i <= last; ++i) {
387  map<Xapian::doccount, Document>::const_iterator doc;
388  doc = indexeddocs.find(i);
389  if (doc == indexeddocs.end()) {
390  /* We don't have the document cached */
391  set<Xapian::doccount>::const_iterator s;
392  s = requested_docs.find(i);
393  if (s == requested_docs.end()) {
394  /* We haven't even requested it yet - do so now. */
395  enquire->request_doc(items[i - firstitem]);
396  requested_docs.insert(i);
397  }
398  }
399  }
400 }
401 
402 string
404 {
405  string description = "Xapian::MSet::Internal(";
406 
407  description += "firstitem=" + str(firstitem) + ", " +
408  "matches_lower_bound=" + str(matches_lower_bound) + ", " +
409  "matches_estimated=" + str(matches_estimated) + ", " +
410  "matches_upper_bound=" + str(matches_upper_bound) + ", " +
411  "max_possible=" + str(max_possible) + ", " +
412  "max_attained=" + str(max_attained);
413 
414  for (vector<Xapian::Internal::MSetItem>::const_iterator i = items.begin();
415  i != items.end(); ++i) {
416  if (!description.empty()) description += ", ";
417  description += i->get_description();
418  }
419 
420  description += ")";
421 
422  return description;
423 }
424 
425 void
427 {
428  set<Xapian::doccount>::const_iterator i;
429  for (i = requested_docs.begin(); i != requested_docs.end(); ++i) {
430  indexeddocs[*i] = enquire->read_doc(items[*i - firstitem]);
431  LOGLINE(MATCH, "stored doc at index " << *i << " is " << indexeddocs[*i]);
432  }
433  /* Clear list of requested but not fetched documents. */
434  requested_docs.clear();
435 }
436 
437 // MSetIterator
438 
441 {
442  Assert(mset.internal.get());
443  Xapian::doccount size = mset.internal->items.size();
444  Xapian::doccount index = size - off_from_end;
445  AssertRel(index,<,size);
446  return mset.internal->items[index].did;
447 }
448 
449 Document
451 {
452  Assert(mset.internal.get());
453  Xapian::doccount size = mset.internal->items.size();
454  Xapian::doccount index = size - off_from_end;
455  AssertRel(index,<,size);
456  return mset.internal->get_doc_by_index(index);
457 }
458 
459 double
461 {
462  Assert(mset.internal.get());
463  Xapian::doccount size = mset.internal->items.size();
464  Xapian::doccount index = size - off_from_end;
465  AssertRel(index,<,size);
466  return mset.internal->items[index].wt;
467 }
468 
469 std::string
471 {
472  Assert(mset.internal.get());
473  Xapian::doccount size = mset.internal->items.size();
474  Xapian::doccount index = size - off_from_end;
475  AssertRel(index,<,size);
476  return mset.internal->items[index].collapse_key;
477 }
478 
481 {
482  Assert(mset.internal.get());
483  Xapian::doccount size = mset.internal->items.size();
484  Xapian::doccount index = size - off_from_end;
485  AssertRel(index,<,size);
486  return mset.internal->items[index].collapse_count;
487 }
488 
489 string
491 {
492  Assert(mset.internal.get());
493  Xapian::doccount size = mset.internal->items.size();
494  Xapian::doccount index = size - off_from_end;
495  AssertRel(index,<,size);
496  return mset.internal->items[index].sort_key;
497 }
498 
499 string
501 {
502  return "Xapian::MSetIterator(" + str(mset.size() - off_from_end) + ")";
503 }
504 
505 // Methods for Xapian::Enquire::Internal
506 
508  : db(db_), query(), collapse_key(Xapian::BAD_VALUENO), collapse_max(0),
509  order(Enquire::ASCENDING), percent_cutoff(0), weight_cutoff(0),
510  sort_key(Xapian::BAD_VALUENO), sort_by(REL), sort_value_forward(true),
511  sorter(), time_limit(0.0), weight(0),
512  eweightname("trad"), expand_k(1.0)
513 {
514  if (db.internal.empty()) {
515  throw InvalidArgumentError("Can't make an Enquire object from an uninitialised Database object.");
516  }
517 }
518 
520 {
521  delete weight;
522  weight = 0;
523 }
524 
525 void
527 {
528  query = query_;
529  qlen = qlen_ ? qlen_ : query.get_length();
530 }
531 
532 const Query &
534 {
535  return query;
536 }
537 
538 MSet
540  Xapian::doccount check_at_least, const RSet *rset,
541  const MatchDecider *mdecider) const
542 {
543  LOGCALL(MATCH, MSet, "Enquire::Internal::get_mset", first | maxitems | check_at_least | rset | mdecider);
544 
545  if (percent_cutoff && (sort_by == VAL || sort_by == VAL_REL)) {
546  throw Xapian::UnimplementedError("Use of a percentage cutoff while sorting primary by value isn't currently supported");
547  }
548 
549  if (weight == 0) {
550  weight = new BM25Weight;
551  }
552 
553  Xapian::doccount first_orig = first;
554  {
556  first = min(first, docs);
557  maxitems = min(maxitems, docs - first);
558  check_at_least = min(check_at_least, docs);
559  check_at_least = max(check_at_least, first + maxitems);
560  }
561 
562  AutoPtr<Xapian::Weight::Internal> stats(new Xapian::Weight::Internal);
563  ::MultiMatch match(db, query, qlen, rset,
567  time_limit, *(stats.get()), weight, spies,
568  (sorter.get() != NULL),
569  (mdecider != NULL));
570  // Run query and put results into supplied Xapian::MSet object.
571  MSet retval;
572  match.get_mset(first, maxitems, check_at_least, retval,
573  *(stats.get()), mdecider, sorter.get());
574  if (first_orig != first && retval.internal.get()) {
575  retval.internal->firstitem = first_orig;
576  }
577 
578  Assert(weight->name() != "bool" || retval.get_max_possible() == 0);
579 
580  // The Xapian::MSet needs to have a pointer to ourselves, so that it can
581  // retrieve the documents. This is set here explicitly to avoid having
582  // to pass it into the matcher, which gets messy particularly in the
583  // networked case.
584  retval.internal->enquire = this;
585 
586  if (!retval.internal->stats) {
587  retval.internal->stats = stats.release();
588  }
589 
590  RETURN(retval);
591 }
592 
593 ESet
595  const RSet & rset, int flags,
596  const ExpandDecider * edecider_,
597  double min_wt) const
598 {
599  LOGCALL(MATCH, ESet, "Enquire::Internal::get_eset", maxitems | rset | flags | edecider_ | min_wt);
600 
602  opt_intrusive_ptr<const ExpandDecider> edecider(edecider_);
603  if (maxitems == 0 || rset.empty()) {
604  // Either we were asked for no results, or wouldn't produce any
605  // because no documents were marked as relevant.
606  RETURN(ESet());
607  }
608 
609  LOGVALUE(MATCH, rset.size());
610 
611  if (!query.empty() && !(flags & Enquire::INCLUDE_QUERY_TERMS)) {
612  opt_intrusive_ptr<const ExpandDecider> decider_noquery(
614  query.get_terms_end()))->release());
615  if (edecider.get()) {
616  edecider = (new ExpandDeciderAnd(decider_noquery.get(),
617  edecider.get()))->release();
618  } else {
619  edecider = decider_noquery;
620  }
621  }
622 
623  bool use_exact_termfreq(flags & Enquire::USE_EXACT_TERMFREQ);
624  Xapian::ESet eset;
625  eset.internal = new Xapian::ESet::Internal;
626 
627  if (eweightname == "bo1") {
628  Bo1EWeight bo1eweight(db, rset.size(), use_exact_termfreq);
629  eset.internal->expand(maxitems, db, rset, edecider.get(), bo1eweight,
630  min_wt);
631  } else {
632  TradEWeight tradeweight(db, rset.size(), use_exact_termfreq, expand_k);
633  eset.internal->expand(maxitems, db, rset, edecider.get(), tradeweight,
634  min_wt);
635  }
636 
637  RETURN(eset);
638 }
639 
641  private:
642  typedef map<string, unsigned int> tmap_t;
643  const tmap_t &tmap;
644 
645  public:
646  explicit ByQueryIndexCmp(const tmap_t &tmap_) : tmap(tmap_) {}
647  bool operator()(const string &left,
648  const string &right) const {
649  tmap_t::const_iterator l, r;
650  l = tmap.find(left);
651  r = tmap.find(right);
652  Assert((l != tmap.end()) && (r != tmap.end()));
653 
654  return l->second < r->second;
655  }
656 };
657 
660 {
661  if (query.empty())
662  return TermIterator();
663 
664  // The ordered list of terms in the query.
666 
667  // copy the list of query terms into a map for faster access.
668  // FIXME: a hash would be faster than a map, if this becomes
669  // a problem.
670  map<string, unsigned int> tmap;
671  unsigned int index = 1;
672  for ( ; qt != query.get_terms_end(); ++qt) {
673  if (tmap.find(*qt) == tmap.end())
674  tmap[*qt] = index++;
675  }
676 
677  vector<string> matching_terms;
678 
679  TermIterator docterms = db.termlist_begin(did);
680  TermIterator docterms_end = db.termlist_end(did);
681  while (docterms != docterms_end) {
682  string term = *docterms;
683  map<string, unsigned int>::iterator t = tmap.find(term);
684  if (t != tmap.end()) matching_terms.push_back(term);
685  ++docterms;
686  }
687 
688  // sort the resulting list by query position.
689  sort(matching_terms.begin(), matching_terms.end(), ByQueryIndexCmp(tmap));
690 
691  return TermIterator(new VectorTermList(matching_terms.begin(),
692  matching_terms.end()));
693 }
694 
697 {
698  // FIXME: take advantage of MSetIterator to ensure that database
699  // doesn't get modified underneath us.
700  return get_matching_terms(*it);
701 }
702 
704 Enquire::Internal::get_termfreq(const string &tname) const
705 {
706  return db.get_termfreq(tname);
707 }
708 
709 string
711 {
712  string description = db.get_description();
713  description += ", ";
714  description += query.get_description();
715  return description;
716 }
717 
718 // Private methods for Xapian::Enquire::Internal
719 
720 void
722 {
723  unsigned int multiplier = db.internal.size();
724 
725  Xapian::docid realdid = (item.did - 1) / multiplier + 1;
726  Xapian::doccount dbnumber = (item.did - 1) % multiplier;
727 
728  db.internal[dbnumber]->request_document(realdid);
729 }
730 
731 Document
733 {
734  unsigned int multiplier = db.internal.size();
735 
736  Xapian::docid realdid = (item.did - 1) / multiplier + 1;
737  Xapian::doccount dbnumber = (item.did - 1) % multiplier;
738 
740  doc = db.internal[dbnumber]->collect_document(realdid);
741  return Document(doc);
742 }
743 
744 Document
746 {
747  unsigned int multiplier = db.internal.size();
748 
749  Xapian::docid realdid = (item.did - 1) / multiplier + 1;
750  Xapian::doccount dbnumber = (item.did - 1) % multiplier;
751 
752  // We know the doc exists, so open lazily.
753  return Document(db.internal[dbnumber]->open_document(realdid, true));
754 }
755 
756 // Methods of Xapian::Enquire
757 
758 Enquire::Enquire(const Enquire & other) : internal(other.internal)
759 {
760  LOGCALL_CTOR(API, "Enquire", other);
761 }
762 
763 void
765 {
766  LOGCALL_VOID(API, "Xapian::Enquire::operator=", other);
767  internal = other.internal;
768 }
769 
770 Enquire::Enquire(Enquire&&) = default;
771 
772 Enquire&
773 Enquire::operator=(Enquire&&) = default;
774 
775 Enquire::Enquire(const Database &databases)
776  : internal(new Internal(databases))
777 {
778  LOGCALL_CTOR(API, "Enquire", databases);
779 }
780 
782  : internal(new Internal(databases))
783 {
784  LOGCALL_CTOR(API, "Enquire", databases | Literal("errorhandler"));
785 }
786 
788 {
789  LOGCALL_DTOR(API, "Enquire");
790 }
791 
792 void
794 {
795  LOGCALL_VOID(API, "Xapian::Enquire::set_query", query | len);
796  internal->set_query(query, len);
797 }
798 
799 const Query &
801 {
802  LOGCALL(API, const Xapian::Query &, "Xapian::Enquire::get_query", NO_ARGS);
803  RETURN(internal->get_query());
804 }
805 
806 void
808  LOGCALL_VOID(API, "Xapian::Enquire::add_matchspy", spy);
809  internal->spies.push_back(spy);
810 }
811 
812 void
814  LOGCALL_VOID(API, "Xapian::Enquire::clear_matchspies", NO_ARGS);
815  internal->spies.clear();
816 }
817 
818 void
820 {
821  LOGCALL_VOID(API, "Xapian::Enquire::set_weighting_scheme", weight_);
822  // Clone first in case doing so throws an exception.
823  Weight * wt = weight_.clone();
824  swap(wt, internal->weight);
825  delete wt;
826 }
827 
828 void
829 Enquire::set_expansion_scheme(const std::string &eweightname_, double expand_k_) const
830 {
831  LOGCALL_VOID(API, "Xapian::Enquire::set_expansion_scheme", eweightname_ | expand_k_);
832 
833  if (eweightname_ == "prob") {
834  internal->eweightname = "trad";
835  internal->expand_k = expand_k_;
836  return;
837  }
838 
839  if (eweightname_ != "bo1" && eweightname_ != "trad") {
840  throw InvalidArgumentError("Invalid name for query expansion scheme.");
841  }
842 
843  internal->eweightname = eweightname_;
844  internal->expand_k = expand_k_;
845 }
846 
847 void
849 {
850  if (collapse_key == Xapian::BAD_VALUENO) collapse_max = 0;
851  internal->collapse_key = collapse_key;
852  internal->collapse_max = collapse_max;
853 }
854 
855 void
857 {
858  internal->order = order;
859 }
860 
861 void
862 Enquire::set_cutoff(int percent_cutoff, double weight_cutoff)
863 {
864  internal->percent_cutoff = percent_cutoff;
865  internal->weight_cutoff = weight_cutoff;
866 }
867 
868 void
870 {
871  internal->sort_by = Internal::REL;
872 }
873 
874 void
875 Enquire::set_sort_by_value(valueno sort_key, bool ascending)
876 {
877  internal->sorter = NULL;
878  internal->sort_key = sort_key;
879  internal->sort_by = Internal::VAL;
880  internal->sort_value_forward = ascending;
881 }
882 
883 void
885 {
886  internal->sorter = NULL;
887  internal->sort_key = sort_key;
888  internal->sort_by = Internal::VAL_REL;
889  internal->sort_value_forward = ascending;
890 }
891 
892 void
894 {
895  internal->sorter = NULL;
896  internal->sort_key = sort_key;
897  internal->sort_by = Internal::REL_VAL;
898  internal->sort_value_forward = ascending;
899 }
900 
901 void
902 Enquire::set_sort_by_key(KeyMaker * sorter, bool ascending)
903 {
904  if (sorter == NULL)
905  throw InvalidArgumentError("sorter can't be NULL");
906  internal->sorter = sorter;
907  internal->sort_by = Internal::VAL;
908  internal->sort_value_forward = ascending;
909 }
910 
911 void
913 {
914  if (sorter == NULL)
915  throw InvalidArgumentError("sorter can't be NULL");
916  internal->sorter = sorter;
917  internal->sort_by = Internal::VAL_REL;
918  internal->sort_value_forward = ascending;
919 }
920 
921 void
923 {
924  if (sorter == NULL)
925  throw Xapian::InvalidArgumentError("sorter can't be NULL");
926  internal->sorter = sorter;
927  internal->sort_by = Internal::REL_VAL;
928  internal->sort_value_forward = ascending;
929 }
930 
931 void
932 Enquire::set_time_limit(double time_limit)
933 {
934  internal->time_limit = time_limit;
935 }
936 
937 MSet
939  Xapian::doccount check_at_least, const RSet *rset,
940  const MatchDecider *mdecider) const
941 {
942  LOGCALL(API, Xapian::MSet, "Xapian::Enquire::get_mset", first | maxitems | check_at_least | rset | mdecider);
943  RETURN(internal->get_mset(first, maxitems, check_at_least, rset, mdecider));
944 }
945 
946 ESet
947 Enquire::get_eset(Xapian::termcount maxitems, const RSet & rset, int flags,
948  const ExpandDecider * edecider, double min_wt) const
949 {
950  LOGCALL(API, Xapian::ESet, "Xapian::Enquire::get_eset", maxitems | rset | flags | edecider | min_wt);
951  RETURN(internal->get_eset(maxitems, rset, flags, edecider, min_wt));
952 }
953 
956 {
957  LOGCALL(API, Xapian::TermIterator, "Xapian::Enquire::get_matching_terms_begin", it);
958  RETURN(internal->get_matching_terms(it));
959 }
960 
963 {
964  LOGCALL(API, Xapian::TermIterator, "Xapian::Enquire::get_matching_terms_begin", did);
965  RETURN(internal->get_matching_terms(did));
966 }
967 
968 string
970 {
971  return "Xapian::Enquire(" + internal->get_description() + ")";
972 }
973 
974 }
Xapian::termcount get_length() const
Return the length of this query object.
Definition: query.cc:187
The Xapian namespace contains public interfaces for the Xapian library.
Definition: compactor.cc:80
Xapian::doccount size() const
Return number of items in this MSet object.
Definition: omenquire.cc:318
ExpandDecider subclass which rejects terms in a specified list.
#define RETURN(A)
Definition: debuglog.h:493
void operator=(const Enquire &other)
Assignment is allowed (and is cheap).
Definition: omenquire.cc:764
#define Assert(COND)
Definition: omassert.h:122
Xapian::doccount size() const
The number of documents in this R-Set.
Definition: omenquire.cc:92
void set_expansion_scheme(const std::string &eweightname_, double expand_k_=1.0) const
Set the weighting scheme to use for expansion.
Definition: omenquire.cc:829
std::string eweightname
The weighting scheme to use for query expansion.
void set_sort_by_value_then_relevance(Xapian::valueno sort_key, bool reverse)
Set the sorting to be by value, then by relevance for documents with the same value.
Definition: omenquire.cc:884
MSet & operator=(const MSet &o)
Copying is allowed.
Definition: omenquire.cc:178
const Xapian::Database db
The database which this enquire object uses.
void read_docs() const
Read and cache the documents so far requested.
Definition: omenquire.cc:426
TermIterator termlist_begin(Xapian::docid did) const
An iterator pointing to the start of the termlist for a given document.
Definition: omdatabase.cc:198
double get_max_possible() const
The maximum possible weight any document could achieve.
Definition: omenquire.cc:290
void set_sort_by_relevance()
Set the sorting to be by relevance only.
Definition: omenquire.cc:869
void set_docid_order(docid_order order)
Set sort order for document IDs.
Definition: omenquire.cc:856
std::string get_description() const
Return a string describing this object.
Definition: omenquire.cc:123
int convert_to_percent(double weight) const
Convert a weight to a percentage.
Definition: omenquire.cc:198
This class is used to access a database, or a group of databases.
Definition: database.h:68
Xapian::Document get_doc_by_index(Xapian::doccount index) const
get a document by index in MSet, via the cache.
Definition: omenquire.cc:350
class for performing a match
void set_sort_by_value(Xapian::valueno sort_key, bool reverse)
Set the sorting to be by value only.
Definition: omenquire.cc:875
std::string get_description() const
Return a string describing this object.
Definition: omenquire.cc:325
void fetch_(Xapian::doccount first, Xapian::doccount last) const
Definition: omenquire.cc:190
string get_description() const
Definition: omenquire.cc:710
const Query & get_query() const
Definition: omenquire.cc:533
#define true
Definition: header.h:8
void set_cutoff(int percent_cutoff, double weight_cutoff=0)
Set the percentage and/or weight cutoffs.
Definition: omenquire.cc:862
#define AssertRel(A, REL, B)
Definition: omassert.h:123
InvalidOperationError indicates the API was used in an invalid way.
Definition: error.h:283
const TermIterator get_terms_begin() const
Begin iterator for terms in the query object.
Definition: query.cc:135
Class representing a stemming algorithm.
Definition: stem.h:62
Abstract base class for match spies.
Definition: matchspy.h:49
double weight
The weight of a document or term.
Definition: types.h:122
#define usual(COND)
Definition: config.h:576
virtual std::string get_description() const
Return a string describing this object.
Definition: omdatabase.cc:548
Xapian::doccount round_estimate(T lb, T ub, T est)
Round a bounded estimate to an appropriate number of S.F.
Definition: roundestimate.h:37
Class which actually implements Xapian::ESet.
Definition: esetinternal.h:77
Xapian::doccount get_matches_lower_bound() const
Lower bound on the total number of matching documents.
Definition: omenquire.cc:246
Xapian::docid did
Document id.
Xapian::Internal::intrusive_ptr< Internal > internal
Definition: mset.h:52
double get_max_attained() const
The maximum weight attained by any document.
Definition: omenquire.cc:297
std::string snippet(const std::string &text, size_t length=500, const Xapian::Stem &stemmer=Xapian::Stem(), unsigned flags=SNIPPET_BACKGROUND_MODEL|SNIPPET_EXHAUSTIVE, const std::string &hi_start="<b>", const std::string &hi_end="</b>", const std::string &omit="...") const
Generate a snippet.
Definition: omenquire.cc:304
Xapian::Document read_doc(const Xapian::Internal::MSetItem &item) const
Read a previously requested document from the database.
Definition: omenquire.cc:732
void operator=(const RSet &rset)
Assignment operator.
Definition: omenquire.cc:77
#define LOGCALL_DTOR(CATEGORY, CLASS)
Definition: debuglog.h:490
Xapian::doccount get_termfreq(const string &tname) const
Definition: omenquire.cc:704
bool operator()(const string &left, const string &right) const
Definition: omenquire.cc:647
~MSet()
Destructor.
Definition: omenquire.cc:169
static const int USE_EXACT_TERMFREQ
Calculate exact term frequencies in get_eset().
Definition: enquire.h:605
A document in the database, possibly plus modifications.
Definition: document.h:43
Class representing a list of search results.
Definition: mset.h:44
bool contains(Xapian::docid did) const
Test if a given document in the relevance set.
Definition: omenquire.cc:117
ExpandDecider subclass which rejects terms using two ExpandDeciders.
Definition: expanddecider.h:88
#define LOGCALL_VOID(CATEGORY, FUNC, PARAMS)
Definition: debuglog.h:488
std::string get_sort_key() const
Return the sort key for the current position.
Definition: omenquire.cc:490
STL namespace.
MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems, Xapian::doccount checkatleast=0, const RSet *omrset=0, const MatchDecider *mdecider=0) const
Get (a portion of) the match set for the current query.
Definition: omenquire.cc:938
Convert types to std::string.
Virtual base class for expand decider functor.
Definition: expanddecider.h:37
ByQueryIndexCmp(const tmap_t &tmap_)
Definition: omenquire.cc:646
MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems, Xapian::doccount check_at_least, const RSet *omrset, const MatchDecider *mdecider) const
Definition: omenquire.cc:539
std::vector< Xapian::Internal::intrusive_ptr< Internal > > internal
Definition: database.h:81
Xapian::doccount get_doccount() const
Get the number of documents in the database.
Definition: omdatabase.cc:267
static Xapian::Stem stemmer
Definition: stemtest.cc:41
virtual Weight * clone() const =0
Clone this object.
A vector-like container of terms which can be iterated.
static const int INCLUDE_QUERY_TERMS
Terms in the query may be returned by get_eset().
Definition: enquire.h:595
TermIterator get_matching_terms(Xapian::docid did) const
Definition: omenquire.cc:659
TermIterator get_matching_terms_begin(Xapian::docid did) const
Get terms which match a given document, by document id.
Definition: omenquire.cc:962
Enquire(const Enquire &other)
Copying is allowed (and is cheap).
Definition: omenquire.cc:758
Xapian::Internal::intrusive_ptr< Internal > internal
Definition: enquire.h:63
Xapian::Enquire::docid_order order
Xapian::doccount get_matches_upper_bound() const
Upper bound on the total number of matching documents.
Definition: omenquire.cc:262
API for running queries.
Class for calculating ESet term weights.
Definition: expandweight.h:114
Hierarchy of classes which Xapian can throw as exceptions.
Class for iterating over a list of terms.
Definition: termiterator.h:41
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Definition: types.h:72
ESet get_eset(Xapian::termcount maxitems, const RSet &omrset, int flags=0, const Xapian::ExpandDecider *edecider=0, double min_wt=0.0) const
Get the expand set for the given rset.
Definition: omenquire.cc:947
const tmap_t & tmap
Definition: omenquire.cc:643
Decide if a Xapian::Error exception should be ignored.
Definition: errorhandler.h:50
int convert_to_percent_internal(double wt) const
Converts a weight to a percentage weight.
Definition: omenquire.cc:332
Xapian::doccount get_uncollapsed_matches_estimated() const
Estimate of the total number of matching documents before collapsing.
Definition: omenquire.cc:276
Definition: pretty.h:45
RangeError indicates an attempt to access outside the bounds of a container.
Definition: error.h:971
InvalidArgumentError indicates an invalid parameter value was passed to the API.
Definition: error.h:241
Xapian::doccount get_collapse_count() const
Return a count of the number of collapses done onto the current key.
Definition: omenquire.cc:480
Xapian::doccount get_firstitem() const
Rank of first item in this MSet.
Definition: omenquire.cc:239
string get_description() const
Return a string describing this object.
Definition: omenquire.cc:129
Collate statistics and calculate the term weights for the ESet.
MatchSpy implementation.
Iterator over a Xapian::MSet.
Definition: mset.h:368
Xapian::docid operator*() const
Get the numeric document id for the current position.
Definition: omenquire.cc:440
#define LOGVALUE(a, b)
Definition: debuglog.h:495
Weight * weight
The weight to use for this query.
Xapian::Weight::Internal class, holding database and term statistics.
void set_sort_by_relevance_then_value(Xapian::valueno sort_key, bool reverse)
Set the sorting to be by relevance then value.
Definition: omenquire.cc:893
Query query
The user&#39;s query.
Class to hold statistics for a given collection.
void clear_matchspies()
Remove all the matchspies.
Definition: omenquire.cc:813
void set_time_limit(double time_limit)
Set a time limit for the match.
Definition: omenquire.cc:932
An item resulting from a query.
string str(int value)
Convert int to std::string.
Definition: str.cc:90
~RSet()
Destructor.
Definition: omenquire.cc:87
MSet()
Default constructor.
Definition: omenquire.cc:165
termcount qlen
The query length.
double get_termweight(const std::string &term) const
Get the term weight of a term.
Definition: omenquire.cc:222
Xapian::Internal::opt_intrusive_ptr< KeyMaker > sorter
Define exp10() if not provided by <cmath>
Allow rejection of terms during ESet generation.
std::string get_description() const
Return a string describing this object.
Definition: omenquire.cc:500
RSet()
Default constructor.
Definition: omenquire.cc:68
void set_sort_by_relevance_then_key(Xapian::KeyMaker *sorter, bool reverse)
Set the sorting to be by relevance, then by keys generated from values.
Definition: omenquire.cc:922
Xapian::Internal::intrusive_ptr< Internal > internal
Definition: enquire.h:168
std::string get_description() const
Return a string describing this object.
Definition: omenquire.cc:969
#define LOGCALL_CTOR(CATEGORY, CLASS, PARAMS)
Definition: debuglog.h:489
TermIterator termlist_end(Xapian::docid) const
Corresponding end iterator to termlist_begin().
Definition: database.h:240
Weighting scheme API.
void set_query(const Xapian::Query &query, Xapian::termcount qlen=0)
Set the query to run.
Definition: omenquire.cc:793
map< string, unsigned int > tmap_t
Definition: omenquire.cc:642
Xapian::Internal::intrusive_ptr< Internal > internal
Definition: eset.h:48
Base class for matcher decision functor.
Definition: enquire.h:118
This class stores a list of terms.
void set_sort_by_key_then_relevance(Xapian::KeyMaker *sorter, bool reverse)
Set the sorting to be by keys generated from values, then by relevance for documents with identical k...
Definition: omenquire.cc:912
double get_weight() const
Get the weight for the current position.
Definition: omenquire.cc:460
void remove_document(Xapian::docid did)
Remove a document from the relevance set.
Definition: omenquire.cc:111
void add_document(Xapian::docid did)
Add a document to the relevance set.
Definition: omenquire.cc:104
Xapian::Document get_document(const Xapian::Internal::MSetItem &item) const
Definition: omenquire.cc:745
static Xapian::Query query(Xapian::Query::op op, const string &t1=string(), const string &t2=string(), const string &t3=string(), const string &t4=string(), const string &t5=string(), const string &t6=string(), const string &t7=string(), const string &t8=string(), const string &t9=string(), const string &t10=string())
Definition: api_anydb.cc:63
void request_doc(const Xapian::Internal::MSetItem &item) const
Request a document from the database.
Definition: omenquire.cc:721
Xapian::ESet::Internal class.
~Enquire()
Close the Xapian::Enquire object.
Definition: omenquire.cc:787
Xapian::doccount get_matches_estimated() const
Estimate of the total number of matching documents.
Definition: omenquire.cc:253
Round a bounded estimate to an appropriate number of S.F.
const Xapian::Query & get_query() const
Get the current query.
Definition: omenquire.cc:800
void add_matchspy(MatchSpy *spy)
Add a matchspy.
Definition: omenquire.cc:807
std::string get_description() const
Return a string describing this object.
Definition: query.cc:232
This class provides an interface to the information retrieval system for the purpose of searching...
Definition: enquire.h:152
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:38
std::string get_collapse_key() const
Return the collapse key for the current position.
Definition: omenquire.cc:470
MSetItem comparison functions.
string get_description() const
Return a string describing this object.
Definition: omenquire.cc:403
Xapian::doccount get_uncollapsed_matches_upper_bound() const
Upper bound on the total number of matching documents before collapsing.
Definition: omenquire.cc:283
bool empty() const
Check if this query is Xapian::Query::MatchNothing.
Definition: query.h:524
unsigned valueno
The number for a value slot in a document.
Definition: types.h:108
void fetch_items(Xapian::doccount first, Xapian::doccount last) const
Fetch items specified into the document cache.
Definition: omenquire.cc:377
const Xapian::Enquire::Internal::sort_setting REL
Definition: multimatch.cc:154
This class implements the TradWeight scheme for query expansion.
Definition: expandweight.h:222
Xapian::doccount get_termfreq(const std::string &term) const
Get the termfreq of a term.
Definition: omenquire.cc:206
Class representing a list of search results.
Definition: eset.h:43
void set_query(const Query &query_, termcount qlen_)
Definition: omenquire.cc:526
Various assertion macros.
double expand_k
The parameter required for TradWeight query expansion.
Xapian::Document get_document() const
Get the Document object for the current position.
Definition: omenquire.cc:450
#define LOGLINE(a, b)
Definition: debuglog.h:494
void set_weighting_scheme(const Weight &weight_)
Set the weighting scheme to use for queries.
Definition: omenquire.cc:819
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:52
Class representing a query.
Definition: query.h:46
const valueno BAD_VALUENO
Reserved value to indicate "no valueno".
Definition: types.h:125
vector< Xapian::Internal::opt_intrusive_ptr< MatchSpy > > spies
API for working with documents.
A smart pointer that optionally uses intrusive reference counting.
bool empty() const
Test if this R-Set is empty.
Definition: omenquire.cc:98
const TermIterator get_terms_end() const
End iterator for terms in the query object.
Definition: query.h:502
Class for iterating over a list of terms.
void set_collapse_key(Xapian::valueno collapse_key, Xapian::doccount collapse_max=1)
Set the collapse key to use for queries.
Definition: omenquire.cc:848
Internals of enquire system.
docid_order
Ordering of docids.
Definition: enquire.h:326
Xapian::doccount get_termfreq(const std::string &tname) const
Get the number of documents in the database indexed by a given term.
Definition: omdatabase.cc:323
A handle representing a document in a Xapian database.
Definition: document.h:61
Wrapper around standard unique_ptr template.
ESet get_eset(Xapian::termcount maxitems, const RSet &omrset, int flags, const ExpandDecider *edecider, double min_wt) const
Definition: omenquire.cc:594
Debug logging macros.
Xapian::Weight subclass implementing the BM25 probabilistic formula.
Definition: weight.h:546
#define LOGCALL(CATEGORY, TYPE, FUNC, PARAMS)
Definition: debuglog.h:487
This class implements the Bo1 scheme for query expansion.
Definition: expandweight.h:255
void set_sort_by_key(Xapian::KeyMaker *sorter, bool reverse)
Set the sorting to be by key generated from values only.
Definition: omenquire.cc:902
Virtual base class for key making functors.
Definition: keymaker.h:41
A relevance set (R-Set).
Definition: enquire.h:60
UnimplementedError indicates an attempt to use an unimplemented feature.
Definition: error.h:325
Internal(const Internal &)
Copy not allowed.
Xapian::doccount get_uncollapsed_matches_lower_bound() const
Lower bound on the total number of matching documents before collapsing.
Definition: omenquire.cc:269
Abstract base class for weighting schemes.
Definition: weight.h:35