xapian-core  1.4.27
simpleexpand.cc
Go to the documentation of this file.
1 
4 /* Copyright (C) 2007-2022 Olly Betts
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #ifdef HAVE_CONFIG_H
22 # include <config.h>
23 #endif
24 
25 #include <xapian.h>
26 
27 #include <iostream>
28 #include <string>
29 
30 #include <cstdlib> // For exit().
31 #include <cstring>
32 
33 using namespace std;
34 
35 int
36 main(int argc, char **argv)
37 try {
38  // We require at least two command line arguments.
39  if (argc < 3) {
40  int rc = 1;
41  if (argv[1]) {
42  if (strcmp(argv[1], "--version") == 0) {
43  cout << "simpleexpand\n";
44  exit(0);
45  }
46  if (strcmp(argv[1], "--help") == 0) {
47  rc = 0;
48  }
49  }
50  cout << "Usage: " << argv[0] << " PATH_TO_DATABASE QUERY [-- [DOCID...]]\n";
51  exit(rc);
52  }
53 
54  // Open the database for searching.
55  Xapian::Database db(argv[1]);
56 
57  // Start an enquire session.
58  Xapian::Enquire enquire(db);
59 
60  // Combine command line arguments up to "--" with spaces between
61  // them, so that simple queries don't have to be quoted at the shell
62  // level.
63  string query_string(argv[2]);
64  argv += 3;
65  while (*argv && strcmp(*argv, "--") != 0) {
66  query_string += ' ';
67  query_string += *argv++;
68  }
69 
70  // Create an RSet with the listed docids in.
71  Xapian::RSet rset;
72  if (*argv) {
73  while (*++argv) {
74  rset.add_document(atoi(*argv));
75  }
76  }
77 
78  // Parse the query string to produce a Xapian::Query object.
80  Xapian::Stem stemmer("english");
81  qp.set_stemmer(stemmer);
82  qp.set_database(db);
84  Xapian::Query query = qp.parse_query(query_string);
85  cout << "Parsed query is: " << query.get_description() << '\n';
86 
87  // Find the top 10 results for the query.
88  enquire.set_query(query);
89  Xapian::MSet matches = enquire.get_mset(0, 10, &rset);
90 
91  // Display the results.
92  cout << matches.get_matches_estimated() << " results found:\n";
93 
94  for (Xapian::MSetIterator i = matches.begin(); i != matches.end(); ++i) {
95  cout << i.get_rank() + 1 << ": " << i.get_weight() << " docid=" << *i
96  << " [" << i.get_document().get_data() << "]\n\n";
97  }
98 
99  // If no relevant docids were given, invent an RSet containing the top 5
100  // matches (or all the matches if there are less than 5).
101  if (rset.empty()) {
102  int c = 5;
103  Xapian::MSetIterator i = matches.begin();
104  while (c-- && i != matches.end()) {
105  rset.add_document(*i);
106  ++i;
107  }
108  }
109 
110  // Generate an ESet containing terms that the user might want to add to
111  // the query.
112  Xapian::ESet eset = enquire.get_eset(10, rset);
113 
114  // List the terms.
116  for (t = eset.begin(); t != eset.end(); ++t) {
117  cout << *t << ": weight = " << t.get_weight() << '\n';
118  }
119 } catch (const Xapian::Error &e) {
120  cout << e.get_description() << '\n';
121  exit(1);
122 }
This class is used to access a database, or a group of databases.
Definition: database.h:68
Class representing a stemming algorithm.
Definition: stem.h:62
Build a Xapian::Query object from a user query string.
Definition: queryparser.h:778
Class representing a list of search results.
Definition: mset.h:44
STL namespace.
MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems, Xapian::doccount checkatleast=0, const RSet *omrset=0, const MatchDecider *mdecider=0) const
Get (a portion of) the match set for the current query.
Definition: omenquire.cc:938
static Xapian::Stem stemmer
Definition: stemtest.cc:41
double get_weight() const
Get the weight for the current position.
void set_stemmer(const Xapian::Stem &stemmer)
Set the stemmer.
Definition: queryparser.cc:85
ESetIterator begin() const
Return iterator pointing to the first item in this ESet.
Definition: eset.h:345
ESet get_eset(Xapian::termcount maxitems, const RSet &omrset, int flags=0, const Xapian::ExpandDecider *edecider=0, double min_wt=0.0) const
Get the expand set for the given rset.
Definition: omenquire.cc:947
void set_stemming_strategy(stem_strategy strategy)
Set the stemming strategy.
Definition: queryparser.cc:91
Iterator over a Xapian::MSet.
Definition: mset.h:368
Public interfaces for the Xapian library.
MSetIterator begin() const
Return iterator pointing to the first item in this MSet.
Definition: mset.h:624
MSetIterator end() const
Return iterator pointing to just after the last item in this MSet.
Definition: mset.h:629
Query parse_query(const std::string &query_string, unsigned flags=FLAG_DEFAULT, const std::string &default_prefix=std::string())
Parse a query.
Definition: queryparser.cc:162
Iterator over a Xapian::ESet.
Definition: eset.h:160
void set_query(const Xapian::Query &query, Xapian::termcount qlen=0)
Set the query to run.
Definition: omenquire.cc:793
std::string get_description() const
Return a string describing this object.
Definition: error.cc:93
void add_document(Xapian::docid did)
Add a document to the relevance set.
Definition: omenquire.cc:104
static Xapian::Query query(Xapian::Query::op op, const string &t1=string(), const string &t2=string(), const string &t3=string(), const string &t4=string(), const string &t5=string(), const string &t6=string(), const string &t7=string(), const string &t8=string(), const string &t9=string(), const string &t10=string())
Definition: api_anydb.cc:63
Xapian::doccount get_matches_estimated() const
Estimate of the total number of matching documents.
Definition: omenquire.cc:253
int main(int argc, char **argv)
Definition: simpleexpand.cc:36
void set_database(const Database &db)
Specify the database being searched.
Definition: queryparser.cc:142
std::string get_description() const
Return a string describing this object.
Definition: query.cc:232
This class provides an interface to the information retrieval system for the purpose of searching...
Definition: enquire.h:152
All exceptions thrown by Xapian are subclasses of Xapian::Error.
Definition: error.h:43
Class representing a list of search results.
Definition: eset.h:43
Class representing a query.
Definition: query.h:46
bool empty() const
Test if this R-Set is empty.
Definition: omenquire.cc:98
ESetIterator end() const
Return iterator pointing to just after the last item in this ESet.
Definition: eset.h:350
A relevance set (R-Set).
Definition: enquire.h:60