36 #define PROG_NAME "quest"
37 #define PROG_DESC "Xapian command line search tool"
40 static const char *
const sw[] = {
41 "a",
"about",
"an",
"and",
"are",
"as",
"at",
46 "i",
"in",
"is",
"it",
48 "that",
"the",
"this",
"to",
49 "was",
"what",
"when",
"where",
"which",
"who",
"why",
"will",
"with"
59 return strcmp(s, s_) < 0;
68 template<
typename T, std::
size_t N>
70 decode(
const T (&table)[N],
const char* s)
72 auto p = lower_bound(begin(table), end(table), s);
73 if (p == end(table) || strcmp(s, p->s) != 0)
159 for (
auto& i : table) {
160 size_t len = strlen(i.s);
161 if (pos < 256) cout <<
',';
162 if (pos + len >= 78) {
175 cout <<
"Usage: " PROG_NAME " [OPTIONS] 'QUERY'\n"
176 "NB: QUERY should be quoted to protect it from the shell.\n\n"
178 " -d, --db=DIRECTORY database to search (multiple databases may\n"
180 " -m, --msize=MSIZE maximum number of matches to return\n"
181 " -c, --check-at-least=HOWMANY minimum number of matches to check\n"
182 " -s, --stemmer=LANG set the stemming language, the default is\n"
183 " 'english' (pass 'none' to disable stemming)\n"
184 " -p, --prefix=PFX:TERMPFX add a prefix\n"
185 " -b, --boolean-prefix=PFX:TERMPFX add a boolean prefix\n"
186 " -f, --flags=FLAG1[,FLAG2]... specify QueryParser flags (default:\n"
187 " default). Valid flags:"
189 " -o, --default-op=OP specify QueryParser default operator\n"
190 " (default: or). Valid operators:"
192 " -w, --weight=SCHEME specify weighting scheme to use\n"
193 " (default: bm25). Valid schemes:"
195 " -F, --freqs show query term frequencies\n"
196 " -h, --help display this help and exit\n"
197 " -v, --version output version information and exit\n";
203 const char *
opts =
"d:m:c:s:p:b:f:o:w:Fhv";
225 bool have_database =
false;
230 bool flags_set =
false;
231 bool show_termfreqs =
false;
239 unsigned long v = strtoul(
optarg, &p, 10);
241 if (*p || v != msize) {
243 <<
"' passed for msize\n";
250 unsigned long v = strtoul(
optarg, &p, 10);
252 if (*p || v != check_at_least) {
254 <<
"' passed for check_at_least\n";
261 have_database =
true;
267 cerr <<
"Unknown stemming language '" <<
optarg <<
"'.\n"
268 "Available language names are: "
273 case 'b':
case 'p': {
274 const char * colon = strchr(
optarg,
':');
276 cerr << argv[0] <<
": need ':' when setting prefix\n";
280 string termprefix(colon + 1);
291 char * comma = strchr(
optarg,
',');
296 cerr <<
"Unknown flag '" <<
optarg <<
"'\n";
299 flags |= unsigned(
flag);
306 cerr <<
"Unknown op '" <<
optarg <<
"'\n";
315 cerr <<
"Unknown weighting scheme '" <<
optarg <<
"'\n";
321 show_termfreqs =
true;
352 if (!correction.empty())
353 cout <<
"Did you mean: " << correction <<
"\n\n";
357 if (!have_database) {
358 cout <<
"No database specified so not running the query.\n";
415 if (show_termfreqs) {
416 cout <<
"Query term frequencies:\n";
420 const string& term = *t;
421 cout <<
" " << mset.
get_termfreq(term) <<
'\t' << term <<
'\n';
427 if (lower_bound == upper_bound) {
428 cout <<
"Exactly " << estimate <<
" matches\n";
430 cout <<
"Between " << lower_bound <<
" and " << upper_bound
431 <<
" matches, best estimate is " << estimate <<
'\n';
438 cout << *i <<
": [" << i.get_weight() <<
"]\n" << data <<
"\n";
442 cout <<
"Couldn't parse query: " << e.
get_msg() <<
'\n';
static Xapian::Query query(Xapian::Query::op op, const string &t1=string(), const string &t2=string(), const string &t3=string(), const string &t4=string(), const string &t5=string(), const string &t6=string(), const string &t7=string(), const string &t8=string(), const string &t9=string(), const string &t10=string())
This class implements the BB2 weighting scheme.
Xapian::Weight subclass implementing the BM25+ probabilistic formula.
Xapian::Weight subclass implementing the BM25 probabilistic formula.
Class implementing a "boolean" weighting scheme.
Xapian::Weight subclass implementing Coordinate Matching.
This class implements the DLH weighting scheme, which is a representative scheme of the Divergence fr...
This class implements the DPH weighting scheme.
This class is used to access a database, or a group of databases.
void add_database(const Database &database)
Add an existing database (or group of databases) to those accessed by this object.
A handle representing a document in a Xapian database.
std::string get_data() const
Get data stored in the document.
This class provides an interface to the information retrieval system for the purpose of searching.
void set_query(const Xapian::Query &query, Xapian::termcount qlen=0)
Set the query to run.
MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems, Xapian::doccount checkatleast=0, const RSet *omrset=0, const MatchDecider *mdecider=0) const
Get (a portion of) the match set for the current query.
void set_weighting_scheme(const Weight &weight_)
Set the weighting scheme to use for queries.
All exceptions thrown by Xapian are subclasses of Xapian::Error.
const std::string & get_msg() const
Message giving details of the error, intended for human consumption.
std::string get_description() const
Return a string describing this object.
This class implements the IfB2 weighting scheme.
This class implements the InL2 weighting scheme.
This class implements the IneB2 weighting scheme.
InvalidArgumentError indicates an invalid parameter value was passed to the API.
Xapian::Weight subclass implementing the Language Model formula.
Iterator over a Xapian::MSet.
Class representing a list of search results.
Xapian::doccount get_termfreq(const std::string &term) const
Get the termfreq of a term.
Xapian::doccount get_matches_upper_bound() const
Upper bound on the total number of matching documents.
MSetIterator begin() const
Return iterator pointing to the first item in this MSet.
Xapian::doccount get_matches_lower_bound() const
Lower bound on the total number of matching documents.
MSetIterator end() const
Return iterator pointing to just after the last item in this MSet.
Xapian::doccount get_matches_estimated() const
Estimate of the total number of matching documents.
Xapian::Weight subclass implementing the PL2+ probabilistic formula.
This class implements the PL2 weighting scheme.
Indicates a query string can't be parsed.
Build a Xapian::Query object from a user query string.
void set_database(const Database &db)
Specify the database being searched.
void set_stemmer(const Xapian::Stem &stemmer)
Set the stemmer.
void set_stemming_strategy(stem_strategy strategy)
Set the stemming strategy.
void add_boolean_prefix(const std::string &field, const std::string &prefix, const std::string *grouping=NULL)
Add a boolean term prefix allowing the user to restrict a search with a boolean filter specified in t...
void set_default_op(Query::op default_op)
Set the default operator.
void set_stopper(const Stopper *stop=NULL)
Set the stopper.
std::string get_corrected_query_string() const
Get the spelling-corrected query string.
void add_prefix(const std::string &field, const std::string &prefix)
Add a free-text field term prefix.
Query parse_query(const std::string &query_string, unsigned flags=FLAG_DEFAULT, const std::string &default_prefix=std::string())
Parse a query.
@ FLAG_LOVEHATE
Support + and -.
@ FLAG_AUTO_SYNONYMS
Enable automatic use of synonyms for single terms.
@ FLAG_AUTO_MULTIWORD_SYNONYMS
Enable automatic use of synonyms for single terms and groups of terms.
@ FLAG_NGRAMS
Generate n-grams for scripts without explicit word breaks.
@ FLAG_ACCUMULATE
Accumulate unstem and stoplist results.
@ FLAG_CJK_NGRAM
Generate n-grams for scripts without explicit word breaks.
@ FLAG_DEFAULT
The default flags.
@ FLAG_BOOLEAN_ANY_CASE
Support AND, OR, etc even if they aren't in ALLCAPS.
@ FLAG_WILDCARD
Support wildcards.
@ FLAG_SYNONYM
Enable synonym operator '~'.
@ FLAG_SPELLING_CORRECTION
Enable spelling correction.
@ FLAG_PURE_NOT
Allow queries such as 'NOT apples'.
@ FLAG_NO_POSITIONS
Produce a query which doesn't use positional information.
@ FLAG_PHRASE
Support quoted phrases.
@ FLAG_PARTIAL
Enable partial matching.
@ FLAG_BOOLEAN
Support AND, OR, etc and bracketed subexpressions.
Class representing a query.
const TermIterator get_terms_begin() const
Begin iterator for terms in the query object.
const TermIterator get_terms_end() const
End iterator for terms in the query object.
std::string get_description() const
Return a string describing this object.
@ OP_MAX
Pick the maximum weight of any subquery.
@ OP_NEAR
Match only documents where all subqueries match near each other.
@ OP_ELITE_SET
Pick the best N subqueries and combine with OP_OR.
@ OP_AND
Match only documents which all subqueries match.
@ OP_OR
Match documents which at least one subquery matches.
@ OP_PHRASE
Match only documents where all subqueries match near and in order.
@ OP_SYNONYM
Match like OP_OR but weighting as if a single term.
Simple implementation of Stopper class - this will suit most users.
Class representing a stemming algorithm.
static std::string get_available_languages()
Return a list of available languages.
Xapian::Weight subclass implementing the tf-idf weighting scheme.
Xapian::Weight subclass implementing the traditional probabilistic formula.
Wrappers to allow GNU getopt to be used cleanly from C++ code.
#define required_argument
int gnu_getopt_long(int argc_, char *const *argv_, const char *shortopts_, const struct option *longopts_, int *optind_)
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
double weight
The weight of a document or term.
static char print_table(const T &table)
Print string from a string to integer mapping table.
int main(int argc, char **argv)
static const char *const sw[]
#define INDENT
The number of spaces to indent by in print_table.
static const tab_entry flag_tab[]
static const tab_entry wt_tab[]
static int decode(const T(&table)[N], const char *s)
Decode a string to an integer.
static const tab_entry default_op_tab[]
static Xapian::Stem stemmer
Common string to integer map entry for option decoding.
bool operator<(const char *s_) const
static const struct option long_opts[]
Public interfaces for the Xapian library.