78 return (did - 1) % n_dbs;
84 return (did - 1) / n_dbs + 1;
89 Database::Database(Database&&) =
default;
92 Database::operator=(Database&&) =
default;
103 internal.push_back(newi);
119 Database::~Database()
127 LOGCALL(API,
bool,
"Database::reopen", NO_ARGS);
128 bool maybe_changed =
false;
129 vector<intrusive_ptr<Database::Internal> >::iterator i;
130 for (i =
internal.begin(); i !=
internal.end(); ++i) {
132 maybe_changed =
true;
141 vector<intrusive_ptr<Database::Internal> >::iterator i;
142 for (i =
internal.begin(); i !=
internal.end(); ++i) {
151 if (
this == &database) {
152 LOGLINE(API,
"Database added to itself");
155 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
157 internal.push_back(*i);
162 Database::postlist_begin(
const string &tname)
const
171 if (
internal.size() == 1)
174 if (
rare(
internal.empty()))
177 vector<LeafPostList *> pls;
179 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
180 for (i =
internal.begin(); i !=
internal.end(); ++i) {
181 pls.push_back((*i)->open_post_list(tname));
184 Assert(pls.begin() != pls.end());
186 vector<LeafPostList *>::iterator i;
187 for (i = pls.begin(); i != pls.end(); ++i) {
204 unsigned int multiplier =
internal.size();
205 if (
rare(multiplier == 0))
208 if (multiplier == 1) {
211 tl =
internal[0]->open_term_list(did);
217 tl =
new MultiTermList(
internal[n]->open_term_list(m), *
this, n);
223 Database::allterms_begin(
const std::string & prefix)
const
227 if (
rare(
internal.size() == 0)) {
229 }
else if (
internal.size() == 1) {
230 tl =
internal[0]->open_allterms(prefix);
238 Database::has_positions()
const
240 LOGCALL(API,
bool,
"Database::has_positions", NO_ARGS);
242 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
243 for (i =
internal.begin(); i !=
internal.end(); ++i) {
244 if ((*i)->has_positions())
RETURN(
true);
258 unsigned int multiplier =
internal.size();
259 if (
rare(multiplier == 0))
267 Database::get_doccount()
const
271 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
272 for (i =
internal.begin(); i !=
internal.end(); ++i) {
273 docs += (*i)->get_doccount();
279 Database::get_lastdocid()
const
284 unsigned int multiplier =
internal.size();
287 if (did_i) did = std::max(did, (did_i - 1) * multiplier + i + 1);
293 Database::get_avlength()
const
299 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
300 for (i =
internal.begin(); i !=
internal.end(); ++i) {
301 docs += (*i)->get_doccount();
302 totlen += (*i)->get_total_length();
304 LOGLINE(UNKNOWN,
"get_avlength() = " << totlen <<
" / " << docs <<
305 " (from " <<
internal.size() <<
" dbs)");
307 if (docs == 0)
RETURN(0.0);
308 RETURN(totlen /
double(docs));
312 Database::get_total_length()
const
316 for (
auto&&
sub_db :
internal) {
317 total_length +=
sub_db->get_total_length();
323 Database::get_termfreq(
const string & tname)
const
326 if (tname.empty())
RETURN(get_doccount());
329 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
330 for (i =
internal.begin(); i !=
internal.end(); ++i) {
332 (*i)->get_freqs(tname, &sub_tf, NULL);
339 Database::get_collection_freq(
const string & tname)
const
342 if (tname.empty())
RETURN(get_doccount());
345 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
346 for (i =
internal.begin(); i !=
internal.end(); ++i) {
348 (*i)->get_freqs(tname, NULL, &sub_cf);
360 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
361 for (i =
internal.begin(); i !=
internal.end(); ++i) {
362 vf += (*i)->get_value_freq(slot);
370 LOGCALL(API,
string,
"Database::get_value_lower_bound", slot);
375 for (
auto&& subdb :
internal) {
376 string lb = subdb->get_value_lower_bound(slot);
379 if (full_lb.empty() || lb < full_lb)
380 full_lb = std::move(lb);
388 LOGCALL(API, std::string,
"Database::get_value_upper_bound", slot);
391 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
392 for (i =
internal.begin(); i !=
internal.end(); ++i) {
393 std::string ub = (*i)->get_value_upper_bound(slot);
401 Database::get_doclength_lower_bound()
const
408 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
409 for (i =
internal.begin(); i !=
internal.end(); ++i) {
412 if ((*i)->get_total_length() != 0) {
414 if (full_lb == 0 || lb < full_lb) full_lb = lb;
421 Database::get_doclength_upper_bound()
const
426 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
427 for (i =
internal.begin(); i !=
internal.end(); ++i) {
429 if (ub > full_ub) full_ub = ub;
435 Database::get_wdf_upper_bound(
const string & term)
const
438 if (term.empty())
RETURN(0);
441 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
442 for (i =
internal.begin(); i !=
internal.end(); ++i) {
444 if (ub > full_ub) full_ub = ub;
453 if (
internal.size() == 0)
455 if (
internal.size() != 1)
467 unsigned int multiplier =
internal.size();
468 if (
rare(multiplier == 0))
472 RETURN(
internal[n]->get_doclength(m));
481 unsigned int multiplier =
internal.size();
482 if (
rare(multiplier == 0))
486 RETURN(
internal[n]->get_unique_terms(m));
496 unsigned int multiplier =
internal.size();
497 if (
rare(multiplier == 0))
513 unsigned int multiplier =
internal.size();
514 if (
rare(multiplier == 0))
524 Database::term_exists(
const string & tname)
const
526 LOGCALL(API,
bool,
"Database::term_exists", tname);
528 RETURN(get_doccount() != 0);
530 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
531 for (i =
internal.begin(); i !=
internal.end(); ++i) {
532 if ((*i)->term_exists(tname))
RETURN(
true);
538 Database::keep_alive()
541 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
542 for (i =
internal.begin(); i !=
internal.end(); ++i) {
548 Database::get_description()
const
570 memset(vec, 0,
sizeof(vec));
571 vector<unsigned>::const_iterator i;
572 for (i = a.begin(); i != a.end(); ++i) {
575 for (i = b.begin(); i != b.end(); ++i) {
578 unsigned int total = 0;
579 for (
size_t j = 0; j <
VEC_SIZE; ++j) {
580 total += abs(vec[j]);
586 return (total + 1) / 2;
590 Database::get_spelling_suggestion(
const string &word,
591 unsigned max_edit_distance)
const
593 LOGCALL(API,
string,
"Database::get_spelling_suggestion", word | max_edit_distance);
594 if (word.size() <= 1 || max_edit_distance == 0)
return string();
596 max_edit_distance = min(max_edit_distance,
unsigned(word.size() - 1));
598 AutoPtr<TermList> merger;
599 for (
size_t i = 0; i <
internal.size(); ++i) {
600 TermList * tl =
internal[i]->open_spelling_termlist(word);
604 merger.reset(
new OrTermList(merger.release(), tl));
610 if (!merger.get())
RETURN(
string());
617 vector<unsigned> utf32_term;
620 int edist_best = max_edit_distance;
625 if (
rare(ret)) merger.reset(ret);
627 if (
rare(merger->at_end()))
break;
639 if (abs(
long(term.size()) -
long(word.size())) > edist_best * 4) {
648 if (abs(
long(utf32_term.size()) -
long(utf32_word.size()))
660 int(utf32_term.size()),
662 int(utf32_word.size()),
666 if (edist <= edist_best) {
668 for (
size_t j = 0; j <
internal.size(); ++j)
669 freq +=
internal[j]->get_spelling_frequency(term);
680 if (edist < edist_best || freq > freq_best) {
682 "\" edist " << edist <<
" freq " << freq);
689 if (freq_best < freq_exact)
695 Database::spellings_begin()
const
698 AutoPtr<TermList> merger;
699 for (
size_t i = 0; i <
internal.size(); ++i) {
700 TermList * tl =
internal[i]->open_spelling_wordlist();
713 Database::synonyms_begin(
const std::string &term)
const
716 AutoPtr<TermList> merger;
717 for (
size_t i = 0; i <
internal.size(); ++i) {
718 TermList * tl =
internal[i]->open_synonym_termlist(term);
721 merger.reset(
new OrTermList(merger.release(), tl));
731 Database::synonym_keys_begin(
const std::string &prefix)
const
734 AutoPtr<TermList> merger;
735 for (
size_t i = 0; i <
internal.size(); ++i) {
736 TermList * tl =
internal[i]->open_synonym_keylist(prefix);
739 merger.reset(
new OrTermList(merger.release(), tl));
749 Database::get_metadata(
const string & key)
const
751 LOGCALL(API,
string,
"Database::get_metadata", key);
752 if (
rare(key.empty()))
754 if (
internal.empty())
RETURN(std::string());
755 RETURN(
internal[0]->get_metadata(key));
759 Database::metadata_keys_begin(
const std::string &prefix)
const
767 Database::get_uuid()
const
769 LOGCALL(API, std::string,
"Database::get_uuid", NO_ARGS);
771 for (
size_t i = 0; i <
internal.size(); ++i) {
772 string sub_uuid =
internal[i]->get_uuid();
775 if (sub_uuid.empty())
777 if (!uuid.empty()) uuid +=
':';
784 Database::locked()
const
786 LOGCALL(API,
bool,
"Database::locked", NO_ARGS);
787 for (
const auto & subdb :
internal) {
796 Database::get_revision()
const
799 size_t n_dbs =
internal.size();
800 if (
rare(n_dbs != 1)) {
804 "exactly one subdatabase");
806 const string& s =
internal[0]->get_revision_info();
807 const char* p = s.data();
808 const char* end = p + s.size();
812 "supported for chert and glass");
838 LOGCALL_VOID(API,
"WritableDatabase::operator=", other);
851 size_t n_dbs =
internal.size();
852 if (
rare(n_dbs == 0))
854 for (
size_t i = 0; i != n_dbs; ++i)
861 LOGCALL_VOID(API,
"WritableDatabase::begin_transaction", flushed);
862 size_t n_dbs =
internal.size();
863 if (
rare(n_dbs == 0))
865 for (
size_t i = 0; i != n_dbs; ++i)
872 LOGCALL_VOID(API,
"WritableDatabase::commit_transaction", NO_ARGS);
873 size_t n_dbs =
internal.size();
874 if (
rare(n_dbs == 0))
876 for (
size_t i = 0; i != n_dbs; ++i)
883 LOGCALL_VOID(API,
"WritableDatabase::cancel_transaction", NO_ARGS);
884 size_t n_dbs =
internal.size();
885 if (
rare(n_dbs == 0))
887 for (
size_t i = 0; i != n_dbs; ++i)
896 size_t n_dbs =
internal.size();
897 if (
rare(n_dbs == 0))
904 if (
rare(did == 0)) {
905 throw Xapian::DatabaseError(
"Run out of docids - you'll have to use copydatabase to eliminate any gaps before you can add more documents");
910 size_t i =
sub_db(did, n_dbs);
911 internal[i]->replace_document(
sub_docid(did, n_dbs), document);
918 LOGCALL_VOID(API,
"WritableDatabase::delete_document", did);
922 size_t n_dbs =
internal.size();
923 if (
rare(n_dbs == 0))
925 size_t i =
sub_db(did, n_dbs);
926 internal[i]->delete_document(
sub_docid(did, n_dbs));
932 LOGCALL_VOID(API,
"WritableDatabase::delete_document", unique_term);
933 if (unique_term.empty())
935 size_t n_dbs =
internal.size();
936 if (
rare(n_dbs == 0))
938 for (
size_t i = 0; i != n_dbs; ++i)
945 LOGCALL_VOID(API,
"WritableDatabase::replace_document", did | document);
948 size_t n_dbs =
internal.size();
949 if (
rare(n_dbs == 0))
951 size_t i =
sub_db(did, n_dbs);
952 internal[i]->replace_document(
sub_docid(did, n_dbs), document);
960 if (unique_term.empty())
962 size_t n_dbs =
internal.size();
963 if (
rare(n_dbs == 0))
973 if (
rare(did == 0)) {
974 throw Xapian::DatabaseError(
"Run out of docids - you'll have to use copydatabase to eliminate any gaps before you can add more documents");
976 size_t i =
sub_db(did, n_dbs);
981 size_t i =
sub_db(retval, n_dbs);
982 internal[i]->replace_document(
sub_docid(retval, n_dbs), document);
988 internal[i]->delete_document(
sub_docid(did, n_dbs));
998 LOGCALL_VOID(API,
"WritableDatabase::add_spelling", word | freqinc);
999 if (
rare(
internal.empty()))
1002 internal[0]->add_spelling(word, freqinc);
1009 LOGCALL_VOID(API,
"WritableDatabase::remove_spelling", word | freqdec);
1010 size_t n_dbs =
internal.size();
1011 if (
rare(n_dbs == 0))
1013 for (
size_t i = 0; i < n_dbs; ++i) {
1014 internal[i]->remove_spelling(word, freqdec);
1020 const std::string & synonym)
const
1022 LOGCALL_VOID(API,
"WritableDatabase::add_synonym", term | synonym);
1023 if (
rare(
internal.empty()))
1026 internal[0]->add_synonym(term, synonym);
1031 const std::string & synonym)
const
1033 LOGCALL_VOID(API,
"WritableDatabase::remove_synonym", term | synonym);
1034 size_t n_dbs =
internal.size();
1035 if (
rare(n_dbs == 0))
1037 for (
size_t i = 0; i < n_dbs; ++i) {
1038 internal[i]->remove_synonym(term, synonym);
1045 LOGCALL_VOID(API,
"WritableDatabase::clear_synonyms", term);
1046 size_t n_dbs =
internal.size();
1047 if (
rare(n_dbs == 0))
1049 for (
size_t i = 0; i < n_dbs; ++i) {
1050 internal[i]->clear_synonyms(term);
1057 LOGCALL_VOID(API,
"WritableDatabase::set_metadata", key | value);
1058 if (
rare(key.empty()))
1060 if (
rare(
internal.empty()))
1062 internal[0]->set_metadata(key, value);
1069 return "WritableDatabase()";
Abstract base class for iterating all terms in a database.
Wrapper around standard unique_ptr template.
database class declarations
A termlist which ORs two termlists together, adding term frequencies.
Class for merging AllTermsList objects from subdatabases.
Class for merging ValueList objects from subdatabases.
DatabaseError indicates some sort of database related error.
Base class for databases.
This class is used to access a database, or a group of databases.
PostingIterator postlist_begin(const std::string &tname) const
An iterator pointing to the start of the postlist for a given term.
PostingIterator postlist_end(const std::string &) const
Corresponding end iterator to postlist_begin().
void operator=(const Database &other)
Assignment is allowed.
std::vector< Xapian::Internal::intrusive_ptr< Internal > > internal
Xapian::docid get_lastdocid() const
Get the highest document id which has been used in the database.
A handle representing a document in a Xapian database.
A smart pointer that uses intrusive reference counting.
InvalidArgumentError indicates an invalid parameter value was passed to the API.
InvalidOperationError indicates the API was used in an invalid way.
Class for iterating over term positions.
Class for iterating over a list of terms.
Abstract base class for termlists.
virtual std::string get_termname() const =0
Return the termname at the current position.
virtual Internal * next()=0
Advance the current position to the next term in the termlist.
Class for iterating over a list of terms.
UnimplementedError indicates an attempt to use an unimplemented feature.
An iterator which returns Unicode character values from a UTF-8 encoded string.
Class for iterating over document values.
This class provides read/write access to a database.
void delete_document(Xapian::docid did)
Delete a document from the database.
void begin_transaction(bool flushed=true)
Begin a transaction.
void remove_synonym(const std::string &term, const std::string &synonym) const
Remove a synonym for a term.
void clear_synonyms(const std::string &term) const
Remove all synonyms for a term.
void replace_document(Xapian::docid did, const Xapian::Document &document)
Replace a given document in the database.
void commit_transaction()
Complete the transaction currently in progress.
void remove_spelling(const std::string &word, Xapian::termcount freqdec=1) const
Remove a word from the spelling dictionary.
void cancel_transaction()
Abort the transaction currently in progress, discarding the pending modifications made to the databas...
virtual ~WritableDatabase()
Destroy this handle on the database.
std::string get_description() const
Return a string describing this object.
void set_metadata(const std::string &key, const std::string &metadata)
Set the user-specified metadata associated with a given key.
WritableDatabase()
Create a WritableDatabase with no subdatabases.
void operator=(const WritableDatabase &other)
Assignment is allowed.
void commit()
Commit any pending modifications made to the database.
void add_spelling(const std::string &word, Xapian::termcount freqinc=1) const
Add a word to the spelling dictionary.
Xapian::docid add_document(const Xapian::Document &document)
Add a new document to the database.
void add_synonym(const std::string &term, const std::string &synonym) const
Add a synonym for a term.
Constants in the Xapian namespace.
#define LOGCALL(CATEGORY, TYPE, FUNC, PARAMS)
#define LOGCALL_CTOR(CATEGORY, CLASS, PARAMS)
#define LOGCALL_VOID(CATEGORY, FUNC, PARAMS)
#define LOGCALL_DTOR(CATEGORY, CLASS)
int edit_distance_unsigned(const unsigned *ptr1, int len1, const unsigned *ptr2, int len2, int max_distance)
Calculate the edit distance between two sequences.
Edit distance calculation algorithm.
Hierarchy of classes which Xapian can throw as exceptions.
Class for merging AllTermsList objects from subdatabases.
Class for merging PostList objects from subdatabases.
C++ class declaration for multiple database access.
Class for merging ValueList objects from subdatabases.
The Xapian namespace contains public interfaces for the Xapian library.
int revision()
Report the revision of the library which the program is linked with.
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
const int DOC_ASSUME_VALID
Assume document id is valid.
XAPIAN_REVISION_TYPE rev
Revision number of a database.
double doclength
A normalised document length.
unsigned valueno
The number for a value slot in a document.
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
XAPIAN_TOTALLENGTH_TYPE totallength
The total length of all documents in a database.
static int freq_edit_lower_bound(const vector< unsigned > &a, const vector< unsigned > &b)
Define the XAPIAN_NORETURN macro.
Various assertion macros.
size_t sub_docid(Xapian::docid did, size_t n_dbs)
static void docid_zero_invalid()
static void empty_metadata_key()
size_t sub_db(Xapian::docid did, size_t n_dbs)
static void no_subdatabases()
Merge two TermList objects using an OR operation.
Pack types into strings and unpack them again.
bool unpack_uint(const char **p, const char *end, U *result)
Decode an unsigned integer from a string.
Class for iterating over term positions.
Class for iterating over a list of document ids.
Class for iterating over a list of terms.
Unicode and UTF-8 related classes and functions.