36 #define PROG_NAME "quest" 37 #define PROG_DESC "Xapian command line search tool" 40 static const char *
const sw[] = {
41 "a",
"about",
"an",
"and",
"are",
"as",
"at",
46 "i",
"in",
"is",
"it",
48 "that",
"the",
"this",
"to",
49 "was",
"what",
"when",
"where",
"which",
"who",
"why",
"will",
"with" 59 return strcmp(s, s_) < 0;
68 template<
typename T, std::
size_t N>
70 decode(
const T (&table)[N],
const char* s)
72 auto p = lower_bound(begin(table), end(table), s);
73 if (p == end(table) || strcmp(s, p->s) != 0)
158 for (
auto& i : table) {
159 size_t len = strlen(i.s);
160 if (pos < 256) cout <<
',';
161 if (pos + len >= 78) {
174 cout <<
"Usage: " PROG_NAME " [OPTIONS] 'QUERY'\n" 175 "NB: QUERY should be quoted to protect it from the shell.\n\n" 177 " -d, --db=DIRECTORY database to search (multiple databases may\n" 179 " -m, --msize=MSIZE maximum number of matches to return\n" 180 " -c, --check-at-least=HOWMANY minimum number of matches to check\n" 181 " -s, --stemmer=LANG set the stemming language, the default is\n" 182 " 'english' (pass 'none' to disable stemming)\n" 183 " -p, --prefix=PFX:TERMPFX add a prefix\n" 184 " -b, --boolean-prefix=PFX:TERMPFX add a boolean prefix\n" 185 " -f, --flags=FLAG1[,FLAG2]... specify QueryParser flags (default:\n" 186 " default). Valid flags:" 188 " -o, --default-op=OP specify QueryParser default operator\n" 189 " (default: or). Valid operators:" 191 " -w, --weight=SCHEME specify weighting scheme to use\n" 192 " (default: bm25). Valid schemes:" 194 " -F, --freqs show query term frequencies\n" 195 " -h, --help display this help and exit\n" 196 " -v, --version output version information and exit\n";
202 const char *
opts =
"d:m:c:s:p:b:f:o:w:Fhv";
203 static const struct option long_opts[] = {
224 bool have_database =
false;
229 bool flags_set =
false;
230 bool show_termfreqs =
false;
238 unsigned long v = strtoul(
optarg, &p, 10);
240 if (*p || v != msize) {
242 <<
"' passed for msize\n";
249 unsigned long v = strtoul(
optarg, &p, 10);
251 if (*p || v != check_at_least) {
253 <<
"' passed for check_at_least\n";
260 have_database =
true;
266 cerr <<
"Unknown stemming language '" <<
optarg <<
"'.\n" 267 "Available language names are: " 272 case 'b':
case 'p': {
273 const char * colon = strchr(
optarg,
':');
275 cerr << argv[0] <<
": need ':' when setting prefix\n";
279 string termprefix(colon + 1);
290 char * comma = strchr(
optarg,
',');
295 cerr <<
"Unknown flag '" <<
optarg <<
"'\n";
298 flags |= unsigned(flag);
305 cerr <<
"Unknown op '" <<
optarg <<
"'\n";
314 cerr <<
"Unknown weighting scheme '" <<
optarg <<
"'\n";
320 show_termfreqs =
true;
351 if (!correction.empty())
352 cout <<
"Did you mean: " << correction <<
"\n\n";
356 if (!have_database) {
357 cout <<
"No database specified so not running the query.\n";
414 if (show_termfreqs) {
415 cout <<
"Query term frequencies:\n";
419 const string& term = *t;
420 cout <<
" " << mset.
get_termfreq(term) <<
'\t' << term <<
'\n';
426 if (lower_bound == upper_bound) {
427 cout <<
"Exactly " << estimate <<
" matches\n";
429 cout <<
"Between " << lower_bound <<
" and " << upper_bound
430 <<
" matches, best estimate is " << estimate <<
'\n';
437 cout << *i <<
": [" << i.get_weight() <<
"]\n" << data <<
"\n";
441 cout <<
"Couldn't parse query: " << e.
get_msg() <<
'\n';
Support AND, OR, etc even if they aren't in ALLCAPS.
static const tab_entry wt_tab[]
static const char *const sw[]
Wrappers to allow GNU getopt to be used cleanly from C++ code.
Simple implementation of Stopper class - this will suit most users.
void set_default_op(Query::op default_op)
Set the default operator.
This class is used to access a database, or a group of databases.
int gnu_getopt_long(int argc_, char *const *argv_, const char *shortopts_, const struct option *longopts_, int *optind_)
int main(int argc, char **argv)
const TermIterator get_terms_begin() const
Begin iterator for terms in the query object.
Class representing a stemming algorithm.
double weight
The weight of a document or term.
void set_stopper(const Stopper *stop=NULL)
Set the stopper.
std::string get_corrected_query_string() const
Get the spelling-corrected query string.
bool operator<(const char *s_) const
Xapian::doccount get_matches_lower_bound() const
Lower bound on the total number of matching documents.
const std::string & get_msg() const
Message giving details of the error, intended for human consumption.
Build a Xapian::Query object from a user query string.
static int decode(const T(&table)[N], const char *s)
Decode a string to an integer.
Class representing a list of search results.
This class implements the InL2 weighting scheme.
Pick the maximum weight of any subquery.
MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems, Xapian::doccount checkatleast=0, const RSet *omrset=0, const MatchDecider *mdecider=0) const
Get (a portion of) the match set for the current query.
Indicates a query string can't be parsed.
Produce a query which doesn't use positional information.
Xapian::Weight subclass implementing the PL2+ probabilistic formula.
static Xapian::Stem stemmer
static const tab_entry default_op_tab[]
Enable automatic use of synonyms for single terms and groups of terms.
void set_stemmer(const Xapian::Stem &stemmer)
Set the stemmer.
static const tab_entry flag_tab[]
Xapian::doccount get_matches_upper_bound() const
Upper bound on the total number of matching documents.
This class implements the BB2 weighting scheme.
Xapian::Weight subclass implementing Coordinate Matching.
InvalidArgumentError indicates an invalid parameter value was passed to the API.
Class implementing a "boolean" weighting scheme.
Pick the best N subqueries and combine with OP_OR.
void set_stemming_strategy(stem_strategy strategy)
Set the stemming strategy.
Iterator over a Xapian::MSet.
Match only documents where all subqueries match near and in order.
Common string to integer map entry for option decoding.
Public interfaces for the Xapian library.
Enable spelling correction.
static std::string get_available_languages()
Return a list of available languages.
void add_boolean_prefix(const std::string &field, const std::string &prefix, const std::string *grouping=NULL)
Add a boolean term prefix allowing the user to restrict a search with a boolean filter specified in t...
MSetIterator begin() const
Return iterator pointing to the first item in this MSet.
MSetIterator end() const
Return iterator pointing to just after the last item in this MSet.
#define INDENT
The number of spaces to indent by in print_table.
Xapian::Weight subclass implementing the traditional probabilistic formula.
This class implements the DLH weighting scheme, which is a representative scheme of the Divergence fr...
This class implements the PL2 weighting scheme.
This class implements the IneB2 weighting scheme.
#define required_argument
Query parse_query(const std::string &query_string, unsigned flags=FLAG_DEFAULT, const std::string &default_prefix=std::string())
Parse a query.
void add_database(const Database &database)
Add an existing database (or group of databases) to those accessed by this object.
void set_query(const Xapian::Query &query, Xapian::termcount qlen=0)
Set the query to run.
Match like OP_OR but weighting as if a single term.
std::string get_description() const
Return a string describing this object.
This class implements the IfB2 weighting scheme.
Match only documents which all subqueries match.
static Xapian::Query query(Xapian::Query::op op, const string &t1=string(), const string &t2=string(), const string &t3=string(), const string &t4=string(), const string &t5=string(), const string &t6=string(), const string &t7=string(), const string &t8=string(), const string &t9=string(), const string &t10=string())
Enable automatic use of synonyms for single terms.
Xapian::doccount get_matches_estimated() const
Estimate of the total number of matching documents.
void set_database(const Database &db)
Specify the database being searched.
Accumulate unstem and stoplist results.
std::string get_description() const
Return a string describing this object.
This class provides an interface to the information retrieval system for the purpose of searching...
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Support AND, OR, etc and bracketed subexpressions.
Match only documents where all subqueries match near each other.
This class implements the DPH weighting scheme.
All exceptions thrown by Xapian are subclasses of Xapian::Error.
Match documents which at least one subquery matches.
Allow queries such as 'NOT apples'.
Xapian::doccount get_termfreq(const std::string &term) const
Get the termfreq of a term.
Enable generation of n-grams from CJK text.
static char print_table(const T &table)
Print string from a string to integer mapping table.
void set_weighting_scheme(const Weight &weight_)
Set the weighting scheme to use for queries.
Class representing a query.
void add_prefix(const std::string &field, const std::string &prefix)
Add a free-text field term prefix.
std::string get_data() const
Get data stored in the document.
Xapian::Weight subclass implementing the Language Model formula.
const TermIterator get_terms_end() const
End iterator for terms in the query object.
Enable synonym operator '~'.
A handle representing a document in a Xapian database.
Xapian::Weight subclass implementing the BM25+ probabilistic formula.
Xapian::Weight subclass implementing the BM25 probabilistic formula.
Xapian::Weight subclass implementing the tf-idf weighting scheme.