78 return (did - 1) % n_dbs;
84 return (did - 1) / n_dbs + 1;
89 Database::Database(Database&&) =
default;
92 Database::operator=(Database&&) =
default;
103 internal.push_back(newi);
119 Database::~Database()
127 LOGCALL(API,
bool,
"Database::reopen", NO_ARGS);
128 bool maybe_changed =
false;
129 vector<intrusive_ptr<Database::Internal> >::iterator i;
130 for (i =
internal.begin(); i !=
internal.end(); ++i) {
132 maybe_changed =
true;
141 vector<intrusive_ptr<Database::Internal> >::iterator i;
142 for (i =
internal.begin(); i !=
internal.end(); ++i) {
151 if (
this == &database) {
152 LOGLINE(API,
"Database added to itself");
155 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
157 internal.push_back(*i);
162 Database::postlist_begin(
const string &tname)
const
171 if (
internal.size() == 1)
174 if (
rare(
internal.empty()))
177 vector<LeafPostList *> pls;
179 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
180 for (i =
internal.begin(); i !=
internal.end(); ++i) {
181 pls.push_back((*i)->open_post_list(tname));
184 Assert(pls.begin() != pls.end());
186 vector<LeafPostList *>::iterator i;
187 for (i = pls.begin(); i != pls.end(); ++i) {
204 unsigned int multiplier =
internal.size();
205 if (
rare(multiplier == 0))
208 if (multiplier == 1) {
211 tl =
internal[0]->open_term_list(did);
217 tl =
new MultiTermList(
internal[n]->open_term_list(m), *
this, n);
223 Database::allterms_begin(
const std::string & prefix)
const
227 if (
rare(
internal.size() == 0)) {
229 }
else if (
internal.size() == 1) {
230 tl =
internal[0]->open_allterms(prefix);
238 Database::has_positions()
const
240 LOGCALL(API,
bool,
"Database::has_positions", NO_ARGS);
242 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
243 for (i =
internal.begin(); i !=
internal.end(); ++i) {
244 if ((*i)->has_positions())
RETURN(
true);
258 unsigned int multiplier =
internal.size();
259 if (
rare(multiplier == 0))
267 Database::get_doccount()
const
271 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
272 for (i =
internal.begin(); i !=
internal.end(); ++i) {
273 docs += (*i)->get_doccount();
279 Database::get_lastdocid()
const
284 unsigned int multiplier =
internal.size();
287 if (did_i) did = std::max(did, (did_i - 1) * multiplier + i + 1);
293 Database::get_avlength()
const
299 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
300 for (i =
internal.begin(); i !=
internal.end(); ++i) {
301 docs += (*i)->get_doccount();
302 totlen += (*i)->get_total_length();
304 LOGLINE(UNKNOWN,
"get_avlength() = " << totlen <<
" / " << docs <<
305 " (from " <<
internal.size() <<
" dbs)");
307 if (docs == 0)
RETURN(0.0);
308 RETURN(totlen /
double(docs));
312 Database::get_total_length()
const
316 for (
auto&&
sub_db :
internal) {
317 total_length +=
sub_db->get_total_length();
323 Database::get_termfreq(
const string & tname)
const
326 if (tname.empty())
RETURN(get_doccount());
329 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
330 for (i =
internal.begin(); i !=
internal.end(); ++i) {
332 (*i)->get_freqs(tname, &sub_tf, NULL);
339 Database::get_collection_freq(
const string & tname)
const
342 if (tname.empty())
RETURN(get_doccount());
345 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
346 for (i =
internal.begin(); i !=
internal.end(); ++i) {
348 (*i)->get_freqs(tname, NULL, &sub_cf);
360 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
361 for (i =
internal.begin(); i !=
internal.end(); ++i) {
362 vf += (*i)->get_value_freq(slot);
370 LOGCALL(API,
string,
"Database::get_value_lower_bound", slot);
375 for (
auto&& subdb :
internal) {
376 string lb = subdb->get_value_lower_bound(slot);
379 if (full_lb.empty() || lb < full_lb)
380 full_lb = std::move(lb);
388 LOGCALL(API, std::string,
"Database::get_value_upper_bound", slot);
391 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
392 for (i =
internal.begin(); i !=
internal.end(); ++i) {
393 std::string ub = (*i)->get_value_upper_bound(slot);
401 Database::get_doclength_lower_bound()
const
408 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
409 for (i =
internal.begin(); i !=
internal.end(); ++i) {
412 if ((*i)->get_total_length() != 0) {
414 if (full_lb == 0 || lb < full_lb) full_lb = lb;
421 Database::get_doclength_upper_bound()
const
426 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
427 for (i =
internal.begin(); i !=
internal.end(); ++i) {
429 if (ub > full_ub) full_ub = ub;
435 Database::get_wdf_upper_bound(
const string & term)
const
438 if (term.empty())
RETURN(0);
441 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
442 for (i =
internal.begin(); i !=
internal.end(); ++i) {
444 if (ub > full_ub) full_ub = ub;
453 if (
internal.size() == 0)
455 if (
internal.size() != 1)
467 unsigned int multiplier =
internal.size();
468 if (
rare(multiplier == 0))
472 RETURN(
internal[n]->get_doclength(m));
481 unsigned int multiplier =
internal.size();
482 if (
rare(multiplier == 0))
486 RETURN(
internal[n]->get_unique_terms(m));
496 unsigned int multiplier =
internal.size();
497 if (
rare(multiplier == 0))
513 unsigned int multiplier =
internal.size();
514 if (
rare(multiplier == 0))
524 Database::term_exists(
const string & tname)
const
526 LOGCALL(API,
bool,
"Database::term_exists", tname);
528 RETURN(get_doccount() != 0);
530 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
531 for (i =
internal.begin(); i !=
internal.end(); ++i) {
532 if ((*i)->term_exists(tname))
RETURN(
true);
538 Database::keep_alive()
541 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
542 for (i =
internal.begin(); i !=
internal.end(); ++i) {
548 Database::get_description()
const
570 memset(vec, 0,
sizeof(vec));
571 vector<unsigned>::const_iterator i;
572 for (i = a.begin(); i != a.end(); ++i) {
575 for (i = b.begin(); i != b.end(); ++i) {
578 unsigned int total = 0;
579 for (
size_t j = 0; j <
VEC_SIZE; ++j) {
580 total += abs(vec[j]);
586 return (total + 1) / 2;
591 #define TRIGRAM_SCORE_THRESHOLD 2
594 Database::get_spelling_suggestion(
const string &word,
595 unsigned max_edit_distance)
const
597 LOGCALL(API,
string,
"Database::get_spelling_suggestion", word | max_edit_distance);
598 if (word.size() <= 1 || max_edit_distance == 0)
return string();
600 max_edit_distance = min(max_edit_distance,
unsigned(word.size() - 1));
602 AutoPtr<TermList> merger;
603 for (
size_t i = 0; i <
internal.size(); ++i) {
604 TermList * tl =
internal[i]->open_spelling_termlist(word);
608 merger.reset(
new OrTermList(merger.release(), tl));
614 if (!merger.get())
RETURN(
string());
621 vector<unsigned> utf32_term;
625 int edist_best = max_edit_distance;
630 if (ret) merger.reset(ret);
632 if (merger->at_end())
break;
639 if (score > best) best = score;
647 if (abs(
long(term.size()) -
long(word.size())) > edist_best * 4) {
656 if (abs(
long(utf32_term.size()) -
long(utf32_word.size()))
668 int(utf32_term.size()),
670 int(utf32_word.size()),
674 if (edist <= edist_best) {
676 for (
size_t j = 0; j <
internal.size(); ++j)
677 freq +=
internal[j]->get_spelling_frequency(term);
688 if (edist < edist_best || freq > freq_best) {
690 "\" edist " << edist <<
" freq " << freq);
698 if (freq_best < freq_exact)
704 Database::spellings_begin()
const
707 AutoPtr<TermList> merger;
708 for (
size_t i = 0; i <
internal.size(); ++i) {
709 TermList * tl =
internal[i]->open_spelling_wordlist();
722 Database::synonyms_begin(
const std::string &term)
const
725 AutoPtr<TermList> merger;
726 for (
size_t i = 0; i <
internal.size(); ++i) {
727 TermList * tl =
internal[i]->open_synonym_termlist(term);
730 merger.reset(
new OrTermList(merger.release(), tl));
740 Database::synonym_keys_begin(
const std::string &prefix)
const
743 AutoPtr<TermList> merger;
744 for (
size_t i = 0; i <
internal.size(); ++i) {
745 TermList * tl =
internal[i]->open_synonym_keylist(prefix);
748 merger.reset(
new OrTermList(merger.release(), tl));
758 Database::get_metadata(
const string & key)
const
760 LOGCALL(API,
string,
"Database::get_metadata", key);
761 if (
rare(key.empty()))
763 if (
internal.empty())
RETURN(std::string());
764 RETURN(
internal[0]->get_metadata(key));
768 Database::metadata_keys_begin(
const std::string &prefix)
const
776 Database::get_uuid()
const
778 LOGCALL(API, std::string,
"Database::get_uuid", NO_ARGS);
780 for (
size_t i = 0; i <
internal.size(); ++i) {
781 string sub_uuid =
internal[i]->get_uuid();
784 if (sub_uuid.empty())
786 if (!uuid.empty()) uuid +=
':';
793 Database::locked()
const
795 LOGCALL(API,
bool,
"Database::locked", NO_ARGS);
796 for (
const auto & subdb :
internal) {
805 Database::get_revision()
const
808 size_t n_dbs =
internal.size();
809 if (
rare(n_dbs != 1)) {
813 "exactly one subdatabase");
815 const string& s =
internal[0]->get_revision_info();
816 const char* p = s.data();
817 const char* end = p + s.size();
821 "supported for chert and glass");
847 LOGCALL_VOID(API,
"WritableDatabase::operator=", other);
860 size_t n_dbs =
internal.size();
861 if (
rare(n_dbs == 0))
863 for (
size_t i = 0; i != n_dbs; ++i)
870 LOGCALL_VOID(API,
"WritableDatabase::begin_transaction", flushed);
871 size_t n_dbs =
internal.size();
872 if (
rare(n_dbs == 0))
874 for (
size_t i = 0; i != n_dbs; ++i)
881 LOGCALL_VOID(API,
"WritableDatabase::commit_transaction", NO_ARGS);
882 size_t n_dbs =
internal.size();
883 if (
rare(n_dbs == 0))
885 for (
size_t i = 0; i != n_dbs; ++i)
892 LOGCALL_VOID(API,
"WritableDatabase::cancel_transaction", NO_ARGS);
893 size_t n_dbs =
internal.size();
894 if (
rare(n_dbs == 0))
896 for (
size_t i = 0; i != n_dbs; ++i)
905 size_t n_dbs =
internal.size();
906 if (
rare(n_dbs == 0))
913 if (
rare(did == 0)) {
914 throw Xapian::DatabaseError(
"Run out of docids - you'll have to use copydatabase to eliminate any gaps before you can add more documents");
919 size_t i =
sub_db(did, n_dbs);
920 internal[i]->replace_document(
sub_docid(did, n_dbs), document);
927 LOGCALL_VOID(API,
"WritableDatabase::delete_document", did);
931 size_t n_dbs =
internal.size();
932 if (
rare(n_dbs == 0))
934 size_t i =
sub_db(did, n_dbs);
935 internal[i]->delete_document(
sub_docid(did, n_dbs));
941 LOGCALL_VOID(API,
"WritableDatabase::delete_document", unique_term);
942 if (unique_term.empty())
944 size_t n_dbs =
internal.size();
945 if (
rare(n_dbs == 0))
947 for (
size_t i = 0; i != n_dbs; ++i)
954 LOGCALL_VOID(API,
"WritableDatabase::replace_document", did | document);
957 size_t n_dbs =
internal.size();
958 if (
rare(n_dbs == 0))
960 size_t i =
sub_db(did, n_dbs);
961 internal[i]->replace_document(
sub_docid(did, n_dbs), document);
969 if (unique_term.empty())
971 size_t n_dbs =
internal.size();
972 if (
rare(n_dbs == 0))
982 if (
rare(did == 0)) {
983 throw Xapian::DatabaseError(
"Run out of docids - you'll have to use copydatabase to eliminate any gaps before you can add more documents");
985 size_t i =
sub_db(did, n_dbs);
990 size_t i =
sub_db(retval, n_dbs);
991 internal[i]->replace_document(
sub_docid(retval, n_dbs), document);
997 internal[i]->delete_document(
sub_docid(did, n_dbs));
1007 LOGCALL_VOID(API,
"WritableDatabase::add_spelling", word | freqinc);
1008 if (
rare(
internal.empty()))
1011 internal[0]->add_spelling(word, freqinc);
1018 LOGCALL_VOID(API,
"WritableDatabase::remove_spelling", word | freqdec);
1019 size_t n_dbs =
internal.size();
1020 if (
rare(n_dbs == 0))
1022 for (
size_t i = 0; i < n_dbs; ++i) {
1023 internal[i]->remove_spelling(word, freqdec);
1029 const std::string & synonym)
const
1031 LOGCALL_VOID(API,
"WritableDatabase::add_synonym", term | synonym);
1032 if (
rare(
internal.empty()))
1035 internal[0]->add_synonym(term, synonym);
1040 const std::string & synonym)
const
1042 LOGCALL_VOID(API,
"WritableDatabase::remove_synonym", term | synonym);
1043 size_t n_dbs =
internal.size();
1044 if (
rare(n_dbs == 0))
1046 for (
size_t i = 0; i < n_dbs; ++i) {
1047 internal[i]->remove_synonym(term, synonym);
1054 LOGCALL_VOID(API,
"WritableDatabase::clear_synonyms", term);
1055 size_t n_dbs =
internal.size();
1056 if (
rare(n_dbs == 0))
1058 for (
size_t i = 0; i < n_dbs; ++i) {
1059 internal[i]->clear_synonyms(term);
1066 LOGCALL_VOID(API,
"WritableDatabase::set_metadata", key | value);
1067 if (
rare(key.empty()))
1069 if (
rare(
internal.empty()))
1071 internal[0]->set_metadata(key, value);
1078 return "WritableDatabase()";
Abstract base class for iterating all terms in a database.
Wrapper around standard unique_ptr template.
database class declarations
A termlist which ORs two termlists together, adding term frequencies.
Class for merging AllTermsList objects from subdatabases.
Class for merging ValueList objects from subdatabases.
DatabaseError indicates some sort of database related error.
Base class for databases.
This class is used to access a database, or a group of databases.
PostingIterator postlist_begin(const std::string &tname) const
An iterator pointing to the start of the postlist for a given term.
PostingIterator postlist_end(const std::string &) const
Corresponding end iterator to postlist_begin().
void operator=(const Database &other)
Assignment is allowed.
std::vector< Xapian::Internal::intrusive_ptr< Internal > > internal
Xapian::docid get_lastdocid() const
Get the highest document id which has been used in the database.
A handle representing a document in a Xapian database.
A smart pointer that uses intrusive reference counting.
InvalidArgumentError indicates an invalid parameter value was passed to the API.
InvalidOperationError indicates the API was used in an invalid way.
Class for iterating over term positions.
Class for iterating over a list of terms.
Abstract base class for termlists.
virtual std::string get_termname() const =0
Return the termname at the current position.
virtual Internal * next()=0
Advance the current position to the next term in the termlist.
Class for iterating over a list of terms.
UnimplementedError indicates an attempt to use an unimplemented feature.
An iterator which returns Unicode character values from a UTF-8 encoded string.
Class for iterating over document values.
This class provides read/write access to a database.
void delete_document(Xapian::docid did)
Delete a document from the database.
void begin_transaction(bool flushed=true)
Begin a transaction.
void remove_synonym(const std::string &term, const std::string &synonym) const
Remove a synonym for a term.
void clear_synonyms(const std::string &term) const
Remove all synonyms for a term.
void replace_document(Xapian::docid did, const Xapian::Document &document)
Replace a given document in the database.
void commit_transaction()
Complete the transaction currently in progress.
void remove_spelling(const std::string &word, Xapian::termcount freqdec=1) const
Remove a word from the spelling dictionary.
void cancel_transaction()
Abort the transaction currently in progress, discarding the pending modifications made to the databas...
virtual ~WritableDatabase()
Destroy this handle on the database.
std::string get_description() const
Return a string describing this object.
void set_metadata(const std::string &key, const std::string &metadata)
Set the user-specified metadata associated with a given key.
WritableDatabase()
Create a WritableDatabase with no subdatabases.
void operator=(const WritableDatabase &other)
Assignment is allowed.
void commit()
Commit any pending modifications made to the database.
void add_spelling(const std::string &word, Xapian::termcount freqinc=1) const
Add a word to the spelling dictionary.
Xapian::docid add_document(const Xapian::Document &document)
Add a new document to the database.
void add_synonym(const std::string &term, const std::string &synonym) const
Add a synonym for a term.
Constants in the Xapian namespace.
#define LOGCALL(CATEGORY, TYPE, FUNC, PARAMS)
#define LOGCALL_CTOR(CATEGORY, CLASS, PARAMS)
#define LOGCALL_VOID(CATEGORY, FUNC, PARAMS)
#define LOGCALL_DTOR(CATEGORY, CLASS)
int edit_distance_unsigned(const unsigned *ptr1, int len1, const unsigned *ptr2, int len2, int max_distance)
Calculate the edit distance between two sequences.
Edit distance calculation algorithm.
Hierarchy of classes which Xapian can throw as exceptions.
Class for merging AllTermsList objects from subdatabases.
Class for merging PostList objects from subdatabases.
C++ class declaration for multiple database access.
Class for merging ValueList objects from subdatabases.
The Xapian namespace contains public interfaces for the Xapian library.
int revision()
Report the revision of the library which the program is linked with.
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
const int DOC_ASSUME_VALID
Assume document id is valid.
XAPIAN_REVISION_TYPE rev
Revision number of a database.
double doclength
A normalised document length.
unsigned valueno
The number for a value slot in a document.
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
XAPIAN_TOTALLENGTH_TYPE totallength
The total length of all documents in a database.
static int freq_edit_lower_bound(const vector< unsigned > &a, const vector< unsigned > &b)
Define the XAPIAN_NORETURN macro.
Various assertion macros.
size_t sub_docid(Xapian::docid did, size_t n_dbs)
static void docid_zero_invalid()
#define TRIGRAM_SCORE_THRESHOLD
static void empty_metadata_key()
size_t sub_db(Xapian::docid did, size_t n_dbs)
static void no_subdatabases()
Merge two TermList objects using an OR operation.
Pack types into strings and unpack them again.
bool unpack_uint(const char **p, const char *end, U *result)
Decode an unsigned integer from a string.
Class for iterating over term positions.
Class for iterating over a list of document ids.
Class for iterating over a list of terms.
Unicode and UTF-8 related classes and functions.