36 #define PROG_NAME "xapian-quest"
37 #define PROG_DESC "Xapian command line search tool"
40 static const char *
const sw[] = {
41 "a",
"about",
"an",
"and",
"are",
"as",
"at",
46 "i",
"in",
"is",
"it",
48 "that",
"the",
"this",
"to",
49 "was",
"what",
"when",
"where",
"which",
"who",
"why",
"will",
"with"
59 return strcmp(s, s_) < 0;
68 template<
typename T, std::
size_t N>
70 decode(
const T (&table)[N],
const char* s)
72 auto p = lower_bound(begin(table), end(table), s);
73 if (
p == end(table) || strcmp(s,
p->s) != 0)
137 for (
auto& i : table) {
138 size_t len = strlen(i.s);
139 if (
pos < 256) cout <<
',';
140 if (
pos + len >= 78) {
159 while (
p != string::npos) {
160 size_t space = langs.find(
' ',
p);
161 size_t len = (space != string::npos) ? space -
p : langs.size() -
p;
162 if (
pos < 256) cout <<
',';
163 if (
pos + len >= 78) {
169 cout << string_view(langs.data() +
p, len);
172 if (
p != string::npos) ++
p;
185 for (
auto& i : table) {
191 cout <<
"Usage: " PROG_NAME " [OPTIONS] 'QUERY'\n"
192 "NB: QUERY should be quoted to protect it from the shell.\n\n"
194 " -d, --db=DIRECTORY database to search (multiple databases may\n"
196 " -m, --msize=MSIZE maximum number of matches to return\n"
197 " -c, --check-at-least=HOWMANY minimum number of matches to check\n"
198 " -s, --stemmer=LANG set the stemming language, the default is\n"
199 " 'english' (pass 'none' to disable stemming).\n"
202 " -S, --stem-strategy=STRATEGY set the stemming strategy (default: some).\n"
205 " -p, --prefix=PFX:TERMPFX add a prefix\n"
206 " -b, --boolean-prefix=PFX:TERMPFX add a boolean prefix\n"
207 " -f, --flags=FLAG1[,FLAG2]... specify QueryParser flags (default:\n"
208 " default). Valid flags:"
210 " -o, --default-op=OP specify QueryParser default operator\n"
211 " (default: or). Valid operators:"
213 " -w, --weight=SCHEME specify weighting scheme to use, which\n"
214 " can include parameters, e.g.\n"
215 " --weight='bm25 1 0 0 1 0' (default: bm25).\n"
218 " -F, --freqs show query term frequencies\n"
219 " -h, --help display this help and exit\n"
220 " -v, --version output version information and exit\n";
226 const char *
opts =
"d:m:c:s:S:p:b:f:o:w:Fhv";
249 bool have_database =
false;
254 bool flags_set =
false;
255 bool show_termfreqs =
false;
256 const char* weighting_scheme =
"bm25";
263 unsigned long v = strtoul(
optarg, &
p, 10);
265 if (*
p || v != msize) {
267 <<
"' passed for msize\n";
274 unsigned long v = strtoul(
optarg, &
p, 10);
276 if (*
p || v != check_at_least) {
278 <<
"' passed for check_at_least\n";
285 have_database =
true;
291 cerr <<
"Unknown stemming language '" <<
optarg <<
"'.\n"
292 "Available language names are: "
297 case 'b':
case 'p': {
298 const char * colon = strchr(
optarg,
':');
300 cerr << argv[0] <<
": need ':' when setting prefix\n";
304 string termprefix(colon + 1);
315 char * comma = strchr(
optarg,
',');
320 cerr <<
"Unknown flag '" <<
optarg <<
"'\n"
321 "Available flags are:\n";
325 flags |= unsigned(
flag);
332 cerr <<
"Unknown operator '" <<
optarg <<
"'\n"
333 "Available operators are:\n";
343 cerr <<
"Unknown stem strategy '" <<
optarg <<
"'\n"
344 "Available stem strategies are:\n";
354 weighting_scheme =
optarg;
357 show_termfreqs =
true;
387 if (!correction.empty())
388 cout <<
"Did you mean: " << correction <<
"\n\n";
392 if (!have_database) {
393 cout <<
"No database specified so not running the query.\n";
407 if (show_termfreqs) {
408 cout <<
"Query term frequencies:\n";
412 const string&
term = *t;
419 if (lower_bound == upper_bound) {
420 cout <<
"Exactly " << estimate <<
" matches\n";
422 cout <<
"Between " << lower_bound <<
" and " << upper_bound
423 <<
" matches, best estimate is " << estimate <<
'\n';
430 cout << *i <<
": [" << i.get_weight() <<
"]\n" << data <<
"\n";
434 cout <<
"Couldn't parse query: " << e.
get_msg() <<
'\n';
static Xapian::Query query(Xapian::Query::op op, const string &t1=string(), const string &t2=string(), const string &t3=string(), const string &t4=string(), const string &t5=string(), const string &t6=string(), const string &t7=string(), const string &t8=string(), const string &t9=string(), const string &t10=string())
An indexed database of documents.
void add_database(const Database &other)
Add shards from another Database.
Class representing a document.
std::string get_data() const
Get the document data.
void set_weighting_scheme(const Weight &weight)
Set the weighting scheme to use.
MSet get_mset(doccount first, doccount maxitems, doccount checkatleast=0, const RSet *rset=NULL, const MatchDecider *mdecider=NULL) const
Run the query.
void set_query(const Query &query, termcount query_length=0)
Set the query.
All exceptions thrown by Xapian are subclasses of Xapian::Error.
const std::string & get_msg() const noexcept
Message giving details of the error, intended for human consumption.
std::string get_description() const
Return a string describing this object.
InvalidArgumentError indicates an invalid parameter value was passed to the API.
Iterator over a Xapian::MSet.
Class representing a list of search results.
Xapian::doccount get_termfreq(std::string_view term) const
Get the termfreq of a term.
Xapian::doccount get_matches_upper_bound() const
Upper bound on the total number of matching documents.
MSetIterator begin() const
Return iterator pointing to the first item in this MSet.
Xapian::doccount get_matches_lower_bound() const
Lower bound on the total number of matching documents.
MSetIterator end() const
Return iterator pointing to just after the last item in this MSet.
Xapian::doccount get_matches_estimated() const
Estimate of the total number of matching documents.
Indicates a query string can't be parsed.
Build a Xapian::Query object from a user query string.
void set_database(const Database &db)
Specify the database being searched.
void set_stemmer(const Xapian::Stem &stemmer)
Set the stemmer.
void set_stemming_strategy(stem_strategy strategy)
Set the stemming strategy.
void add_boolean_prefix(std::string_view field, std::string_view prefix, const std::string *grouping=NULL)
Add a boolean term prefix allowing the user to restrict a search with a boolean filter specified in t...
void add_prefix(std::string_view field, std::string_view prefix)
Add a free-text field term prefix.
Query parse_query(std::string_view query_string, unsigned flags=FLAG_DEFAULT, std::string_view default_prefix={})
Parse a query.
void set_default_op(Query::op default_op)
Set the default operator.
void set_stopper(const Stopper *stop=NULL)
Set the stopper.
std::string get_corrected_query_string() const
Get the spelling-corrected query string.
stem_strategy
Stemming strategies, for use with set_stemming_strategy().
@ FLAG_LOVEHATE
Support + and -.
@ FLAG_AUTO_SYNONYMS
Enable automatic use of synonyms for single terms.
@ FLAG_AUTO_MULTIWORD_SYNONYMS
Enable automatic use of synonyms for single terms and groups of terms.
@ FLAG_NGRAMS
Generate n-grams for scripts without explicit word breaks.
@ FLAG_ACCUMULATE
Accumulate unstem and stoplist results.
@ FLAG_CJK_NGRAM
Generate n-grams for scripts without explicit word breaks.
@ FLAG_DEFAULT
The default flags.
@ FLAG_WILDCARD_GLOB
Enable glob-style wildcarding.
@ FLAG_NO_PROPER_NOUN_HEURISTIC
Turn off special handling of capitalised words.
@ FLAG_FUZZY
Support fuzzy matching.
@ FLAG_WORD_BREAKS
Find word breaks for text in scripts without explicit word breaks.
@ FLAG_BOOLEAN_ANY_CASE
Support AND, OR, etc even if they aren't in ALLCAPS.
@ FLAG_WILDCARD
Support wildcards.
@ FLAG_SYNONYM
Enable synonym operator '~'.
@ FLAG_SPELLING_CORRECTION
Enable spelling correction.
@ FLAG_WILDCARD_MULTI
Support extended wildcard '*'.
@ FLAG_WILDCARD_SINGLE
Support extended wildcard '?'.
@ FLAG_PURE_NOT
Allow queries such as 'NOT apples'.
@ FLAG_NO_POSITIONS
Produce a query which doesn't use positional information.
@ FLAG_PHRASE
Support quoted phrases.
@ FLAG_PARTIAL
Enable partial matching.
@ FLAG_BOOLEAN
Support AND, OR, etc and bracketed subexpressions.
Class representing a query.
const TermIterator get_terms_begin() const
Begin iterator for terms in the query object.
std::string get_description() const
Return a string describing this object.
const TermIterator get_terms_end() const noexcept
End iterator for terms in the query object.
@ OP_MAX
Pick the maximum weight of any subquery.
@ OP_NEAR
Match only documents where all subqueries match near each other.
@ OP_ELITE_SET
Pick the best N subqueries and combine with OP_OR.
@ OP_AND
Match only documents which all subqueries match.
@ OP_OR
Match documents which at least one subquery matches.
@ OP_PHRASE
Match only documents where all subqueries match near and in order.
@ OP_SYNONYM
Match like OP_OR but weighting as if a single term.
Simple implementation of Stopper class - this will suit most users.
Class representing a stemming algorithm.
static std::string get_available_languages()
Return a list of available languages.
Abstract base class for weighting schemes.
static const Weight * create(const std::string &scheme, const Registry ®=Registry())
Return the appropriate weighting scheme object.
Wrappers to allow GNU getopt to be used cleanly from C++ code.
#define required_argument
int gnu_getopt_long(int argc_, char *const *argv_, const char *shortopts_, const struct option *longopts_, int *optind_)
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
static Xapian::Stem stemmer
Various handy string-related helpers.
Common string to integer map entry for option decoding.
bool operator<(const char *s_) const
static const struct option long_opts[]
static void list_table(const T &table)
List strings from a string to integer mapping table, one per line.
static char print_stemmers()
Print available stemmers, line wrapped.
static char print_table(const T &table)
Print strings from a string to integer mapping table.
int main(int argc, char **argv)
static const char *const sw[]
#define INDENT
The number of spaces to indent by in print_table.
static const tab_entry flag_tab[]
static int decode(const T(&table)[N], const char *s)
Decode a string to an integer.
static const tab_entry stem_strategy_tab[]
static const tab_entry default_op_tab[]
Public interfaces for the Xapian library.