36 #define PROG_NAME "quest"
37 #define PROG_DESC "Xapian command line search tool"
40 static const char *
const sw[] = {
41 "a",
"about",
"an",
"and",
"are",
"as",
"at",
46 "i",
"in",
"is",
"it",
48 "that",
"the",
"this",
"to",
49 "was",
"what",
"when",
"where",
"which",
"who",
"why",
"will",
"with"
59 return strcmp(s, s_) < 0;
68 template<
typename T, std::
size_t N>
70 decode(
const T (&table)[N],
const char* s)
72 auto p = lower_bound(begin(table), end(table), s);
73 if (p == end(table) || strcmp(s, p->s) != 0)
167 for (
auto& i : table) {
168 size_t len = strlen(i.s);
169 if (pos < 256) cout <<
',';
170 if (pos + len >= 78) {
189 while (p != string::npos) {
190 size_t space = langs.find(
' ', p);
191 size_t len = (space != string::npos) ? space - p : langs.size() - p;
192 if (pos < 256) cout <<
',';
193 if (pos + len >= 78) {
199 cout << string(langs.data() + p, len);
202 if (p != string::npos) ++p;
215 for (
auto& i : table) {
221 cout <<
"Usage: " PROG_NAME " [OPTIONS] 'QUERY'\n"
222 "NB: QUERY should be quoted to protect it from the shell.\n\n"
224 " -d, --db=DIRECTORY database to search (multiple databases may\n"
226 " -m, --msize=MSIZE maximum number of matches to return\n"
227 " -c, --check-at-least=HOWMANY minimum number of matches to check\n"
228 " -s, --stemmer=LANG set the stemming language, the default is\n"
229 " 'english' (pass 'none' to disable stemming).\n"
232 " -S, --stem-strategy=STRATEGY set the stemming strategy (default: some).\n"
235 " -p, --prefix=PFX:TERMPFX add a prefix\n"
236 " -b, --boolean-prefix=PFX:TERMPFX add a boolean prefix\n"
237 " -f, --flags=FLAG1[,FLAG2]... specify QueryParser flags (default:\n"
238 " default). Valid flags:"
240 " -o, --default-op=OP specify QueryParser default operator\n"
241 " (default: or). Valid operators:"
243 " -w, --weight=SCHEME specify weighting scheme to use\n"
244 " (default: bm25). Valid schemes:"
246 " -F, --freqs show query term frequencies\n"
247 " -h, --help display this help and exit\n"
248 " -v, --version output version information and exit\n";
254 const char *
opts =
"d:m:c:s:S:p:b:f:o:w:Fhv";
277 bool have_database =
false;
282 bool flags_set =
false;
283 bool show_termfreqs =
false;
291 unsigned long v = strtoul(
optarg, &p, 10);
293 if (*p || v != msize) {
295 <<
"' passed for msize\n";
302 unsigned long v = strtoul(
optarg, &p, 10);
304 if (*p || v != check_at_least) {
306 <<
"' passed for check_at_least\n";
313 have_database =
true;
319 cerr <<
"Unknown stemming language '" <<
optarg <<
"'.\n"
320 "Available language names are: "
325 case 'b':
case 'p': {
326 const char * colon = strchr(
optarg,
':');
328 cerr << argv[0] <<
": need ':' when setting prefix\n";
332 string termprefix(colon + 1);
343 char * comma = strchr(
optarg,
',');
348 cerr <<
"Unknown flag '" <<
optarg <<
"'\n"
349 "Available flags are:\n";
353 flags |= unsigned(
flag);
360 cerr <<
"Unknown operator '" <<
optarg <<
"'\n"
361 "Available operators are:\n";
371 cerr <<
"Unknown stem strategy '" <<
optarg <<
"'\n"
372 "Available stem strategies are:\n";
384 cerr <<
"Unknown weighting scheme '" <<
optarg <<
"'\n";
390 show_termfreqs =
true;
420 if (!correction.empty())
421 cout <<
"Did you mean: " << correction <<
"\n\n";
425 if (!have_database) {
426 cout <<
"No database specified so not running the query.\n";
483 if (show_termfreqs) {
484 cout <<
"Query term frequencies:\n";
488 const string& term = *t;
489 cout <<
" " << mset.
get_termfreq(term) <<
'\t' << term <<
'\n';
495 if (lower_bound == upper_bound) {
496 cout <<
"Exactly " << estimate <<
" matches\n";
498 cout <<
"Between " << lower_bound <<
" and " << upper_bound
499 <<
" matches, best estimate is " << estimate <<
'\n';
506 cout << *i <<
": [" << i.get_weight() <<
"]\n" << data <<
"\n";
510 cout <<
"Couldn't parse query: " << e.
get_msg() <<
'\n';
static Xapian::Query query(Xapian::Query::op op, const string &t1=string(), const string &t2=string(), const string &t3=string(), const string &t4=string(), const string &t5=string(), const string &t6=string(), const string &t7=string(), const string &t8=string(), const string &t9=string(), const string &t10=string())
This class implements the BB2 weighting scheme.
Xapian::Weight subclass implementing the BM25+ probabilistic formula.
Xapian::Weight subclass implementing the BM25 probabilistic formula.
Class implementing a "boolean" weighting scheme.
Xapian::Weight subclass implementing Coordinate Matching.
This class implements the DLH weighting scheme, which is a representative scheme of the Divergence fr...
This class implements the DPH weighting scheme.
This class is used to access a database, or a group of databases.
void add_database(const Database &database)
Add an existing database (or group of databases) to those accessed by this object.
A handle representing a document in a Xapian database.
std::string get_data() const
Get data stored in the document.
This class provides an interface to the information retrieval system for the purpose of searching.
void set_query(const Xapian::Query &query, Xapian::termcount qlen=0)
Set the query to run.
MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems, Xapian::doccount checkatleast=0, const RSet *omrset=0, const MatchDecider *mdecider=0) const
Get (a portion of) the match set for the current query.
void set_weighting_scheme(const Weight &weight_)
Set the weighting scheme to use for queries.
All exceptions thrown by Xapian are subclasses of Xapian::Error.
const std::string & get_msg() const
Message giving details of the error, intended for human consumption.
std::string get_description() const
Return a string describing this object.
This class implements the IfB2 weighting scheme.
This class implements the InL2 weighting scheme.
This class implements the IneB2 weighting scheme.
InvalidArgumentError indicates an invalid parameter value was passed to the API.
Xapian::Weight subclass implementing the Language Model formula.
Iterator over a Xapian::MSet.
Class representing a list of search results.
Xapian::doccount get_termfreq(const std::string &term) const
Get the termfreq of a term.
Xapian::doccount get_matches_upper_bound() const
Upper bound on the total number of matching documents.
MSetIterator begin() const
Return iterator pointing to the first item in this MSet.
Xapian::doccount get_matches_lower_bound() const
Lower bound on the total number of matching documents.
MSetIterator end() const
Return iterator pointing to just after the last item in this MSet.
Xapian::doccount get_matches_estimated() const
Estimate of the total number of matching documents.
Xapian::Weight subclass implementing the PL2+ probabilistic formula.
This class implements the PL2 weighting scheme.
Indicates a query string can't be parsed.
Build a Xapian::Query object from a user query string.
void set_database(const Database &db)
Specify the database being searched.
void set_stemmer(const Xapian::Stem &stemmer)
Set the stemmer.
void set_stemming_strategy(stem_strategy strategy)
Set the stemming strategy.
void add_boolean_prefix(const std::string &field, const std::string &prefix, const std::string *grouping=NULL)
Add a boolean term prefix allowing the user to restrict a search with a boolean filter specified in t...
void set_default_op(Query::op default_op)
Set the default operator.
void set_stopper(const Stopper *stop=NULL)
Set the stopper.
std::string get_corrected_query_string() const
Get the spelling-corrected query string.
void add_prefix(const std::string &field, const std::string &prefix)
Add a free-text field term prefix.
Query parse_query(const std::string &query_string, unsigned flags=FLAG_DEFAULT, const std::string &default_prefix=std::string())
Parse a query.
stem_strategy
Stemming strategies, for use with set_stemming_strategy().
@ FLAG_LOVEHATE
Support + and -.
@ FLAG_AUTO_SYNONYMS
Enable automatic use of synonyms for single terms.
@ FLAG_AUTO_MULTIWORD_SYNONYMS
Enable automatic use of synonyms for single terms and groups of terms.
@ FLAG_NGRAMS
Generate n-grams for scripts without explicit word breaks.
@ FLAG_ACCUMULATE
Accumulate unstem and stoplist results.
@ FLAG_CJK_NGRAM
Generate n-grams for scripts without explicit word breaks.
@ FLAG_DEFAULT
The default flags.
@ FLAG_BOOLEAN_ANY_CASE
Support AND, OR, etc even if they aren't in ALLCAPS.
@ FLAG_WILDCARD
Support wildcards.
@ FLAG_SYNONYM
Enable synonym operator '~'.
@ FLAG_SPELLING_CORRECTION
Enable spelling correction.
@ FLAG_PURE_NOT
Allow queries such as 'NOT apples'.
@ FLAG_NO_POSITIONS
Produce a query which doesn't use positional information.
@ FLAG_PHRASE
Support quoted phrases.
@ FLAG_PARTIAL
Enable partial matching.
@ FLAG_BOOLEAN
Support AND, OR, etc and bracketed subexpressions.
Class representing a query.
const TermIterator get_terms_begin() const
Begin iterator for terms in the query object.
const TermIterator get_terms_end() const
End iterator for terms in the query object.
std::string get_description() const
Return a string describing this object.
@ OP_MAX
Pick the maximum weight of any subquery.
@ OP_NEAR
Match only documents where all subqueries match near each other.
@ OP_ELITE_SET
Pick the best N subqueries and combine with OP_OR.
@ OP_AND
Match only documents which all subqueries match.
@ OP_OR
Match documents which at least one subquery matches.
@ OP_PHRASE
Match only documents where all subqueries match near and in order.
@ OP_SYNONYM
Match like OP_OR but weighting as if a single term.
Simple implementation of Stopper class - this will suit most users.
Class representing a stemming algorithm.
static std::string get_available_languages()
Return a list of available languages.
Xapian::Weight subclass implementing the tf-idf weighting scheme.
Xapian::Weight subclass implementing the traditional probabilistic formula.
Wrappers to allow GNU getopt to be used cleanly from C++ code.
#define required_argument
int gnu_getopt_long(int argc_, char *const *argv_, const char *shortopts_, const struct option *longopts_, int *optind_)
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
double weight
The weight of a document or term.
static void list_table(const T &table)
List strings from a string to integer mapping table, one per line.
static char print_stemmers()
Print available stemmers, line wrapped.
static char print_table(const T &table)
Print strings from a string to integer mapping table.
int main(int argc, char **argv)
static const char *const sw[]
#define INDENT
The number of spaces to indent by in print_table.
static const tab_entry flag_tab[]
static const tab_entry wt_tab[]
static int decode(const T(&table)[N], const char *s)
Decode a string to an integer.
static const tab_entry stem_strategy_tab[]
static const tab_entry default_op_tab[]
static Xapian::Stem stemmer
Common string to integer map entry for option decoding.
bool operator<(const char *s_) const
static const struct option long_opts[]
Public interfaces for the Xapian library.