36 #define PROG_NAME "quest" 37 #define PROG_DESC "Xapian command line search tool" 40 static const char *
const sw[] = {
41 "a",
"about",
"an",
"and",
"are",
"as",
"at",
46 "i",
"in",
"is",
"it",
48 "that",
"the",
"this",
"to",
49 "was",
"what",
"when",
"where",
"which",
"who",
"why",
"will",
"with" 59 return strcmp(s, s_) < 0;
68 template<
typename T, std::
size_t N>
70 decode(
const T (&table)[N],
const char* s)
72 auto p = lower_bound(begin(table), end(table), s);
73 if (p == end(table) || strcmp(s, p->s) != 0)
159 for (
auto& i : table) {
160 size_t len = strlen(i.s);
161 if (pos < 256) cout <<
',';
162 if (pos + len >= 78) {
175 cout <<
"Usage: " PROG_NAME " [OPTIONS] 'QUERY'\n" 176 "NB: QUERY should be quoted to protect it from the shell.\n\n" 178 " -d, --db=DIRECTORY database to search (multiple databases may\n" 180 " -m, --msize=MSIZE maximum number of matches to return\n" 181 " -c, --check-at-least=HOWMANY minimum number of matches to check\n" 182 " -s, --stemmer=LANG set the stemming language, the default is\n" 183 " 'english' (pass 'none' to disable stemming)\n" 184 " -p, --prefix=PFX:TERMPFX add a prefix\n" 185 " -b, --boolean-prefix=PFX:TERMPFX add a boolean prefix\n" 186 " -f, --flags=FLAG1[,FLAG2]... specify QueryParser flags (default:\n" 187 " default). Valid flags:" 189 " -o, --default-op=OP specify QueryParser default operator\n" 190 " (default: or). Valid operators:" 192 " -w, --weight=SCHEME specify weighting scheme to use\n" 193 " (default: bm25). Valid schemes:" 195 " -F, --freqs show query term frequencies\n" 196 " -h, --help display this help and exit\n" 197 " -v, --version output version information and exit\n";
203 const char *
opts =
"d:m:c:s:p:b:f:o:w:Fhv";
204 static const struct option long_opts[] = {
225 bool have_database =
false;
230 bool flags_set =
false;
231 bool show_termfreqs =
false;
239 unsigned long v = strtoul(
optarg, &p, 10);
241 if (*p || v != msize) {
243 <<
"' passed for msize\n";
250 unsigned long v = strtoul(
optarg, &p, 10);
252 if (*p || v != check_at_least) {
254 <<
"' passed for check_at_least\n";
261 have_database =
true;
267 cerr <<
"Unknown stemming language '" <<
optarg <<
"'.\n" 268 "Available language names are: " 273 case 'b':
case 'p': {
274 const char * colon = strchr(
optarg,
':');
276 cerr << argv[0] <<
": need ':' when setting prefix\n";
280 string termprefix(colon + 1);
291 char * comma = strchr(
optarg,
',');
296 cerr <<
"Unknown flag '" <<
optarg <<
"'\n";
299 flags |= unsigned(flag);
306 cerr <<
"Unknown op '" <<
optarg <<
"'\n";
315 cerr <<
"Unknown weighting scheme '" <<
optarg <<
"'\n";
321 show_termfreqs =
true;
352 if (!correction.empty())
353 cout <<
"Did you mean: " << correction <<
"\n\n";
357 if (!have_database) {
358 cout <<
"No database specified so not running the query.\n";
415 if (show_termfreqs) {
416 cout <<
"Query term frequencies:\n";
420 const string& term = *t;
421 cout <<
" " << mset.
get_termfreq(term) <<
'\t' << term <<
'\n';
427 if (lower_bound == upper_bound) {
428 cout <<
"Exactly " << estimate <<
" matches\n";
430 cout <<
"Between " << lower_bound <<
" and " << upper_bound
431 <<
" matches, best estimate is " << estimate <<
'\n';
438 cout << *i <<
": [" << i.get_weight() <<
"]\n" << data <<
"\n";
442 cout <<
"Couldn't parse query: " << e.
get_msg() <<
'\n';
Support AND, OR, etc even if they aren't in ALLCAPS.
static const tab_entry wt_tab[]
static const char *const sw[]
Wrappers to allow GNU getopt to be used cleanly from C++ code.
Simple implementation of Stopper class - this will suit most users.
void set_default_op(Query::op default_op)
Set the default operator.
This class is used to access a database, or a group of databases.
int gnu_getopt_long(int argc_, char *const *argv_, const char *shortopts_, const struct option *longopts_, int *optind_)
int main(int argc, char **argv)
const TermIterator get_terms_begin() const
Begin iterator for terms in the query object.
Class representing a stemming algorithm.
double weight
The weight of a document or term.
void set_stopper(const Stopper *stop=NULL)
Set the stopper.
std::string get_corrected_query_string() const
Get the spelling-corrected query string.
bool operator<(const char *s_) const
Xapian::doccount get_matches_lower_bound() const
Lower bound on the total number of matching documents.
const std::string & get_msg() const
Message giving details of the error, intended for human consumption.
Build a Xapian::Query object from a user query string.
static int decode(const T(&table)[N], const char *s)
Decode a string to an integer.
Class representing a list of search results.
This class implements the InL2 weighting scheme.
Pick the maximum weight of any subquery.
MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems, Xapian::doccount checkatleast=0, const RSet *omrset=0, const MatchDecider *mdecider=0) const
Get (a portion of) the match set for the current query.
Indicates a query string can't be parsed.
Produce a query which doesn't use positional information.
Xapian::Weight subclass implementing the PL2+ probabilistic formula.
static Xapian::Stem stemmer
static const tab_entry default_op_tab[]
Enable automatic use of synonyms for single terms and groups of terms.
void set_stemmer(const Xapian::Stem &stemmer)
Set the stemmer.
static const tab_entry flag_tab[]
Xapian::doccount get_matches_upper_bound() const
Upper bound on the total number of matching documents.
This class implements the BB2 weighting scheme.
Xapian::Weight subclass implementing Coordinate Matching.
InvalidArgumentError indicates an invalid parameter value was passed to the API.
Class implementing a "boolean" weighting scheme.
Pick the best N subqueries and combine with OP_OR.
void set_stemming_strategy(stem_strategy strategy)
Set the stemming strategy.
Iterator over a Xapian::MSet.
Match only documents where all subqueries match near and in order.
Common string to integer map entry for option decoding.
Public interfaces for the Xapian library.
Enable spelling correction.
static std::string get_available_languages()
Return a list of available languages.
void add_boolean_prefix(const std::string &field, const std::string &prefix, const std::string *grouping=NULL)
Add a boolean term prefix allowing the user to restrict a search with a boolean filter specified in t...
MSetIterator begin() const
Return iterator pointing to the first item in this MSet.
MSetIterator end() const
Return iterator pointing to just after the last item in this MSet.
#define INDENT
The number of spaces to indent by in print_table.
Xapian::Weight subclass implementing the traditional probabilistic formula.
This class implements the DLH weighting scheme, which is a representative scheme of the Divergence fr...
This class implements the PL2 weighting scheme.
This class implements the IneB2 weighting scheme.
#define required_argument
Query parse_query(const std::string &query_string, unsigned flags=FLAG_DEFAULT, const std::string &default_prefix=std::string())
Parse a query.
void add_database(const Database &database)
Add an existing database (or group of databases) to those accessed by this object.
void set_query(const Xapian::Query &query, Xapian::termcount qlen=0)
Set the query to run.
Match like OP_OR but weighting as if a single term.
std::string get_description() const
Return a string describing this object.
This class implements the IfB2 weighting scheme.
Match only documents which all subqueries match.
static Xapian::Query query(Xapian::Query::op op, const string &t1=string(), const string &t2=string(), const string &t3=string(), const string &t4=string(), const string &t5=string(), const string &t6=string(), const string &t7=string(), const string &t8=string(), const string &t9=string(), const string &t10=string())
Enable automatic use of synonyms for single terms.
Xapian::doccount get_matches_estimated() const
Estimate of the total number of matching documents.
void set_database(const Database &db)
Specify the database being searched.
Accumulate unstem and stoplist results.
std::string get_description() const
Return a string describing this object.
This class provides an interface to the information retrieval system for the purpose of searching...
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Support AND, OR, etc and bracketed subexpressions.
Match only documents where all subqueries match near each other.
This class implements the DPH weighting scheme.
All exceptions thrown by Xapian are subclasses of Xapian::Error.
Match documents which at least one subquery matches.
Allow queries such as 'NOT apples'.
Xapian::doccount get_termfreq(const std::string &term) const
Get the termfreq of a term.
Generate n-grams for scripts without explicit word breaks.
static char print_table(const T &table)
Print string from a string to integer mapping table.
void set_weighting_scheme(const Weight &weight_)
Set the weighting scheme to use for queries.
Class representing a query.
void add_prefix(const std::string &field, const std::string &prefix)
Add a free-text field term prefix.
std::string get_data() const
Get data stored in the document.
Xapian::Weight subclass implementing the Language Model formula.
const TermIterator get_terms_end() const
End iterator for terms in the query object.
Generate n-grams for scripts without explicit word breaks.
Enable synonym operator '~'.
A handle representing a document in a Xapian database.
Xapian::Weight subclass implementing the BM25+ probabilistic formula.
Xapian::Weight subclass implementing the BM25 probabilistic formula.
Xapian::Weight subclass implementing the tf-idf weighting scheme.