78 return (did - 1) % n_dbs;
84 return (did - 1) / n_dbs + 1;
89 Database::Database(Database&&) =
default;
92 Database::operator=(Database&&) =
default;
103 internal.push_back(newi);
119 Database::~Database()
127 LOGCALL(API,
bool,
"Database::reopen", NO_ARGS);
128 bool maybe_changed =
false;
129 vector<intrusive_ptr<Database::Internal> >::iterator i;
130 for (i =
internal.begin(); i !=
internal.end(); ++i) {
132 maybe_changed =
true;
141 vector<intrusive_ptr<Database::Internal> >::iterator i;
142 for (i =
internal.begin(); i !=
internal.end(); ++i) {
151 if (
this == &database) {
152 LOGLINE(API,
"Database added to itself");
155 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
157 internal.push_back(*i);
162 Database::postlist_begin(
const string &tname)
const 171 if (
internal.size() == 1)
174 if (
rare(
internal.empty()))
177 vector<LeafPostList *> pls;
179 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
180 for (i =
internal.begin(); i !=
internal.end(); ++i) {
181 pls.push_back((*i)->open_post_list(tname));
184 Assert(pls.begin() != pls.end());
186 vector<LeafPostList *>::iterator i;
187 for (i = pls.begin(); i != pls.end(); ++i) {
204 unsigned int multiplier =
internal.size();
205 if (
rare(multiplier == 0))
208 if (multiplier == 1) {
211 tl =
internal[0]->open_term_list(did);
217 tl =
new MultiTermList(
internal[n]->open_term_list(m), *
this, n);
223 Database::allterms_begin(
const std::string & prefix)
const 227 if (
rare(
internal.size() == 0)) {
229 }
else if (
internal.size() == 1) {
230 tl =
internal[0]->open_allterms(prefix);
238 Database::has_positions()
const 240 LOGCALL(API,
bool,
"Database::has_positions", NO_ARGS);
242 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
243 for (i =
internal.begin(); i !=
internal.end(); ++i) {
244 if ((*i)->has_positions())
RETURN(
true);
258 unsigned int multiplier =
internal.size();
259 if (
rare(multiplier == 0))
267 Database::get_doccount()
const 271 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
272 for (i =
internal.begin(); i !=
internal.end(); ++i) {
273 docs += (*i)->get_doccount();
279 Database::get_lastdocid()
const 284 unsigned int multiplier =
internal.size();
287 if (did_i) did = std::max(did, (did_i - 1) * multiplier + i + 1);
293 Database::get_avlength()
const 299 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
300 for (i =
internal.begin(); i !=
internal.end(); ++i) {
301 docs += (*i)->get_doccount();
302 totlen += (*i)->get_total_length();
304 LOGLINE(UNKNOWN,
"get_avlength() = " << totlen <<
" / " << docs <<
305 " (from " <<
internal.size() <<
" dbs)");
307 if (docs == 0)
RETURN(0.0);
308 RETURN(totlen /
double(docs));
312 Database::get_total_length()
const 316 for (
auto&&
sub_db :
internal) {
317 total_length +=
sub_db->get_total_length();
323 Database::get_termfreq(
const string & tname)
const 326 if (tname.empty())
RETURN(get_doccount());
329 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
330 for (i =
internal.begin(); i !=
internal.end(); ++i) {
332 (*i)->get_freqs(tname, &sub_tf, NULL);
339 Database::get_collection_freq(
const string & tname)
const 342 if (tname.empty())
RETURN(get_doccount());
345 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
346 for (i =
internal.begin(); i !=
internal.end(); ++i) {
348 (*i)->get_freqs(tname, NULL, &sub_cf);
360 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
361 for (i =
internal.begin(); i !=
internal.end(); ++i) {
362 vf += (*i)->get_value_freq(slot);
370 LOGCALL(API,
string,
"Database::get_value_lower_bound", slot);
375 for (
auto&& subdb :
internal) {
376 string lb = subdb->get_value_lower_bound(slot);
379 if (full_lb.empty() || lb < full_lb)
380 full_lb = std::move(lb);
388 LOGCALL(API, std::string,
"Database::get_value_upper_bound", slot);
391 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
392 for (i =
internal.begin(); i !=
internal.end(); ++i) {
393 std::string ub = (*i)->get_value_upper_bound(slot);
401 Database::get_doclength_lower_bound()
const 408 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
409 for (i =
internal.begin(); i !=
internal.end(); ++i) {
412 if ((*i)->get_total_length() != 0) {
414 if (full_lb == 0 || lb < full_lb) full_lb = lb;
421 Database::get_doclength_upper_bound()
const 426 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
427 for (i =
internal.begin(); i !=
internal.end(); ++i) {
429 if (ub > full_ub) full_ub = ub;
435 Database::get_wdf_upper_bound(
const string & term)
const 438 if (term.empty())
RETURN(0);
441 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
442 for (i =
internal.begin(); i !=
internal.end(); ++i) {
444 if (ub > full_ub) full_ub = ub;
453 if (
internal.size() == 0)
455 if (
internal.size() != 1)
467 unsigned int multiplier =
internal.size();
468 if (
rare(multiplier == 0))
472 RETURN(
internal[n]->get_doclength(m));
481 unsigned int multiplier =
internal.size();
482 if (
rare(multiplier == 0))
486 RETURN(
internal[n]->get_unique_terms(m));
496 unsigned int multiplier =
internal.size();
497 if (
rare(multiplier == 0))
513 unsigned int multiplier =
internal.size();
514 if (
rare(multiplier == 0))
524 Database::term_exists(
const string & tname)
const 526 LOGCALL(API,
bool,
"Database::term_exists", tname);
528 RETURN(get_doccount() != 0);
530 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
531 for (i =
internal.begin(); i !=
internal.end(); ++i) {
532 if ((*i)->term_exists(tname))
RETURN(
true);
538 Database::keep_alive()
541 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
542 for (i =
internal.begin(); i !=
internal.end(); ++i) {
548 Database::get_description()
const 570 memset(vec, 0,
sizeof(vec));
571 vector<unsigned>::const_iterator i;
572 for (i = a.begin(); i != a.end(); ++i) {
575 for (i = b.begin(); i != b.end(); ++i) {
578 unsigned int total = 0;
579 for (
size_t j = 0; j <
VEC_SIZE; ++j) {
580 total += abs(vec[j]);
586 return (total + 1) / 2;
591 #define TRIGRAM_SCORE_THRESHOLD 2 594 Database::get_spelling_suggestion(
const string &word,
595 unsigned max_edit_distance)
const 597 LOGCALL(API,
string,
"Database::get_spelling_suggestion", word | max_edit_distance);
598 if (word.size() <= 1 || max_edit_distance == 0)
return string();
600 max_edit_distance = min(max_edit_distance,
unsigned(word.size() - 1));
602 AutoPtr<TermList> merger;
603 for (
size_t i = 0; i <
internal.size(); ++i) {
604 TermList * tl =
internal[i]->open_spelling_termlist(word);
608 merger.reset(
new OrTermList(merger.release(), tl));
614 if (!merger.get())
RETURN(
string());
621 vector<unsigned> utf32_term;
625 int edist_best = max_edit_distance;
630 if (ret) merger.reset(ret);
632 if (merger->at_end())
break;
639 if (score > best) best = score;
647 if (abs(
long(term.size()) -
long(word.size())) > edist_best * 4) {
656 if (abs(
long(utf32_term.size()) -
long(utf32_word.size()))
668 int(utf32_term.size()),
670 int(utf32_word.size()),
674 if (edist <= edist_best) {
676 for (
size_t j = 0; j <
internal.size(); ++j)
677 freq +=
internal[j]->get_spelling_frequency(term);
688 if (edist < edist_best || freq > freq_best) {
690 "\" edist " << edist <<
" freq " << freq);
698 if (freq_best < freq_exact)
704 Database::spellings_begin()
const 707 AutoPtr<TermList> merger;
708 for (
size_t i = 0; i <
internal.size(); ++i) {
709 TermList * tl =
internal[i]->open_spelling_wordlist();
722 Database::synonyms_begin(
const std::string &term)
const 725 AutoPtr<TermList> merger;
726 for (
size_t i = 0; i <
internal.size(); ++i) {
727 TermList * tl =
internal[i]->open_synonym_termlist(term);
730 merger.reset(
new OrTermList(merger.release(), tl));
740 Database::synonym_keys_begin(
const std::string &prefix)
const 743 AutoPtr<TermList> merger;
744 for (
size_t i = 0; i <
internal.size(); ++i) {
745 TermList * tl =
internal[i]->open_synonym_keylist(prefix);
748 merger.reset(
new OrTermList(merger.release(), tl));
758 Database::get_metadata(
const string & key)
const 760 LOGCALL(API,
string,
"Database::get_metadata", key);
761 if (
rare(key.empty()))
763 if (
internal.empty())
RETURN(std::string());
764 RETURN(
internal[0]->get_metadata(key));
768 Database::metadata_keys_begin(
const std::string &prefix)
const 776 Database::get_uuid()
const 778 LOGCALL(API, std::string,
"Database::get_uuid", NO_ARGS);
780 for (
size_t i = 0; i <
internal.size(); ++i) {
781 string sub_uuid =
internal[i]->get_uuid();
784 if (sub_uuid.empty())
786 if (!uuid.empty()) uuid +=
':';
793 Database::locked()
const 795 LOGCALL(API,
bool,
"Database::locked", NO_ARGS);
796 for (
const auto & subdb :
internal) {
805 Database::get_revision()
const 808 size_t n_dbs =
internal.size();
809 if (
rare(n_dbs != 1)) {
813 "exactly one subdatabase");
815 const string& s =
internal[0]->get_revision_info();
816 const char* p = s.data();
817 const char* end = p + s.size();
821 "supported for chert and glass");
847 LOGCALL_VOID(API,
"WritableDatabase::operator=", other);
860 size_t n_dbs =
internal.size();
861 if (
rare(n_dbs == 0))
863 for (
size_t i = 0; i != n_dbs; ++i)
870 LOGCALL_VOID(API,
"WritableDatabase::begin_transaction", flushed);
871 size_t n_dbs =
internal.size();
872 if (
rare(n_dbs == 0))
874 for (
size_t i = 0; i != n_dbs; ++i)
881 LOGCALL_VOID(API,
"WritableDatabase::commit_transaction", NO_ARGS);
882 size_t n_dbs =
internal.size();
883 if (
rare(n_dbs == 0))
885 for (
size_t i = 0; i != n_dbs; ++i)
892 LOGCALL_VOID(API,
"WritableDatabase::cancel_transaction", NO_ARGS);
893 size_t n_dbs =
internal.size();
894 if (
rare(n_dbs == 0))
896 for (
size_t i = 0; i != n_dbs; ++i)
905 size_t n_dbs =
internal.size();
906 if (
rare(n_dbs == 0))
913 if (
rare(did == 0)) {
914 throw Xapian::DatabaseError(
"Run out of docids - you'll have to use copydatabase to eliminate any gaps before you can add more documents");
919 size_t i =
sub_db(did, n_dbs);
920 internal[i]->replace_document(
sub_docid(did, n_dbs), document);
927 LOGCALL_VOID(API,
"WritableDatabase::delete_document", did);
931 size_t n_dbs =
internal.size();
932 if (
rare(n_dbs == 0))
934 size_t i =
sub_db(did, n_dbs);
935 internal[i]->delete_document(
sub_docid(did, n_dbs));
941 LOGCALL_VOID(API,
"WritableDatabase::delete_document", unique_term);
942 if (unique_term.empty())
944 size_t n_dbs =
internal.size();
945 if (
rare(n_dbs == 0))
947 for (
size_t i = 0; i != n_dbs; ++i)
954 LOGCALL_VOID(API,
"WritableDatabase::replace_document", did | document);
957 size_t n_dbs =
internal.size();
958 if (
rare(n_dbs == 0))
960 size_t i =
sub_db(did, n_dbs);
961 internal[i]->replace_document(
sub_docid(did, n_dbs), document);
969 if (unique_term.empty())
971 size_t n_dbs =
internal.size();
972 if (
rare(n_dbs == 0))
982 if (
rare(did == 0)) {
983 throw Xapian::DatabaseError(
"Run out of docids - you'll have to use copydatabase to eliminate any gaps before you can add more documents");
985 size_t i =
sub_db(did, n_dbs);
990 size_t i =
sub_db(retval, n_dbs);
991 internal[i]->replace_document(
sub_docid(retval, n_dbs), document);
997 internal[i]->delete_document(
sub_docid(did, n_dbs));
1007 LOGCALL_VOID(API,
"WritableDatabase::add_spelling", word | freqinc);
1008 if (
rare(
internal.empty()))
1011 internal[0]->add_spelling(word, freqinc);
1018 LOGCALL_VOID(API,
"WritableDatabase::remove_spelling", word | freqdec);
1019 size_t n_dbs =
internal.size();
1020 if (
rare(n_dbs == 0))
1022 for (
size_t i = 0; i < n_dbs; ++i) {
1023 internal[i]->remove_spelling(word, freqdec);
1029 const std::string & synonym)
const 1031 LOGCALL_VOID(API,
"WritableDatabase::add_synonym", term | synonym);
1032 if (
rare(
internal.empty()))
1035 internal[0]->add_synonym(term, synonym);
1040 const std::string & synonym)
const 1042 LOGCALL_VOID(API,
"WritableDatabase::remove_synonym", term | synonym);
1043 size_t n_dbs =
internal.size();
1044 if (
rare(n_dbs == 0))
1046 for (
size_t i = 0; i < n_dbs; ++i) {
1047 internal[i]->remove_synonym(term, synonym);
1054 LOGCALL_VOID(API,
"WritableDatabase::clear_synonyms", term);
1055 size_t n_dbs =
internal.size();
1056 if (
rare(n_dbs == 0))
1058 for (
size_t i = 0; i < n_dbs; ++i) {
1059 internal[i]->clear_synonyms(term);
1066 LOGCALL_VOID(API,
"WritableDatabase::set_metadata", key | value);
1067 if (
rare(key.empty()))
1069 if (
rare(
internal.empty()))
1071 internal[0]->set_metadata(key, value);
1078 return "WritableDatabase()";
static void no_subdatabases()
Unicode and UTF-8 related classes and functions.
The Xapian namespace contains public interfaces for the Xapian library.
Xapian::docid add_document(const Xapian::Document &document)
Add a new document to the database.
Define the XAPIAN_NORETURN macro.
size_t sub_db(Xapian::docid did, size_t n_dbs)
void cancel_transaction()
Abort the transaction currently in progress, discarding the pending modifications made to the databas...
virtual Internal * next()=0
Advance the current position to the next term in the termlist.
XAPIAN_REVISION_TYPE rev
Revision number of a database.
This class is used to access a database, or a group of databases.
void remove_spelling(const std::string &word, Xapian::termcount freqdec=1) const
Remove a word from the spelling dictionary.
InvalidOperationError indicates the API was used in an invalid way.
Merge two TermList objects using an OR operation.
Base class for databases.
XAPIAN_TOTALLENGTH_TYPE totallength
The total length of all documents in a database.
#define TRIGRAM_SCORE_THRESHOLD
Constants in the Xapian namespace.
Xapian::docid get_lastdocid() const
Get the highest document id which has been used in the database.
size_t sub_docid(Xapian::docid did, size_t n_dbs)
#define LOGCALL_DTOR(CATEGORY, CLASS)
void begin_transaction(bool flushed=true)
Begin a transaction.
Class for iterating over document values.
#define LOGCALL_VOID(CATEGORY, FUNC, PARAMS)
Abstract base class for termlists.
WritableDatabase()
Create a WritableDatabase with no subdatabases.
int revision()
Report the revision of the library which the program is linked with.
std::string get_description() const
Return a string describing this object.
void replace_document(Xapian::docid did, const Xapian::Document &document)
Replace a given document in the database.
void set_metadata(const std::string &key, const std::string &metadata)
Set the user-specified metadata associated with a given key.
std::vector< Xapian::Internal::intrusive_ptr< Internal > > internal
const int DOC_ASSUME_VALID
Assume document id is valid.
Class for iterating over term positions.
Class for merging AllTermsList objects from subdatabases.
Hierarchy of classes which Xapian can throw as exceptions.
Class for iterating over a list of terms.
Class for merging AllTermsList objects from subdatabases.
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
Class for iterating over a list of terms.
void operator=(const WritableDatabase &other)
Assignment is allowed.
InvalidArgumentError indicates an invalid parameter value was passed to the API.
void remove_synonym(const std::string &term, const std::string &synonym) const
Remove a synonym for a term.
virtual ~WritableDatabase()
Destroy this handle on the database.
Class for merging ValueList objects from subdatabases.
This class provides read/write access to a database.
Class for merging ValueList objects from subdatabases.
static int freq_edit_lower_bound(const vector< unsigned > &a, const vector< unsigned > &b)
double doclength
A normalised document length.
Edit distance calculation algorithm.
void delete_document(Xapian::docid did)
Delete a document from the database.
void commit()
Commit any pending modifications made to the database.
Abstract base class for iterating all terms in a database.
Class for iterating over term positions.
virtual std::string get_termname() const =0
Return the termname at the current position.
#define LOGCALL_CTOR(CATEGORY, CLASS, PARAMS)
A termlist which ORs two termlists together, adding term frequencies.
void clear_synonyms(const std::string &term) const
Remove all synonyms for a term.
An iterator which returns Unicode character values from a UTF-8 encoded string.
C++ class declaration for multiple database access.
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
static void empty_metadata_key()
Class for merging PostList objects from subdatabases.
Pack types into strings and unpack them again.
unsigned valueno
The number for a value slot in a document.
bool unpack_uint(const char **p, const char *end, U *result)
Decode an unsigned integer from a string.
void commit_transaction()
Complete the transaction currently in progress.
void operator=(const Database &other)
Assignment is allowed.
Class for iterating over a list of document ids.
Various assertion macros.
void add_synonym(const std::string &term, const std::string &synonym) const
Add a synonym for a term.
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
DatabaseError indicates some sort of database related error.
PostingIterator postlist_end(const std::string &) const
Corresponding end iterator to postlist_begin().
int edit_distance_unsigned(const unsigned *ptr1, int len1, const unsigned *ptr2, int len2, int max_distance)
Calculate the edit distance between two sequences.
A smart pointer that uses intrusive reference counting.
Class for iterating over a list of terms.
static void docid_zero_invalid()
A handle representing a document in a Xapian database.
Wrapper around standard unique_ptr template.
#define LOGCALL(CATEGORY, TYPE, FUNC, PARAMS)
UnimplementedError indicates an attempt to use an unimplemented feature.
void add_spelling(const std::string &word, Xapian::termcount freqinc=1) const
Add a word to the spelling dictionary.
PostingIterator postlist_begin(const std::string &tname) const
An iterator pointing to the start of the postlist for a given term.