46 #include "../byte_length_strings.h"
47 #include "../prefix_compressed_strings.h"
58 return key.size() > 1 && key[0] ==
'\0' && key[1] ==
'\xc0';
64 return key.size() > 1 && key[0] ==
'\0' && key[1] ==
'\xd0';
70 return key.size() > 1 && key[0] ==
'\0' && key[1] ==
'\xd8';
76 return key.size() > 1 && key[0] ==
'\0' && key[1] ==
'\xe0';
105 const char *
p = key.data();
106 const char * end =
p + key.length();
116 key.assign(
"\0\xd8", 2);
125 const char * d = key.data();
126 const char * e = d + key.size();
145 tag.erase(0, d - tag.data());
148 size_t tmp = d - key.data();
167 if (a->
key > b->
key)
return true;
168 if (a->
key != b->
key)
return false;
175 const string & lbound,
const string & ubound)
183 if (lbound != ubound) value += ubound;
189 GlassTable * out, vector<Xapian::docid>::const_iterator offset,
190 vector<const GlassTable*>::const_iterator b,
191 vector<const GlassTable*>::const_iterator e)
193 priority_queue<PostlistCursor *, vector<PostlistCursor *>,
PostlistCursorGt> pq;
194 for ( ; b != e; ++b, ++offset) {
208 while (!pq.empty()) {
210 const string& key = cur->
key;
213 if (key != last_key) {
215 if (
tags.size() > 1 && compactor) {
216 Assert(!last_key.empty());
220 const string & resolved_tag =
224 if (!resolved_tag.empty())
225 out->
add(last_key, resolved_tag);
227 Assert(!last_key.empty());
244 if (
tags.size() > 1 && compactor) {
245 Assert(!last_key.empty());
246 const string & resolved_tag =
250 if (!resolved_tag.empty())
251 out->
add(last_key, resolved_tag);
253 Assert(!last_key.empty());
262 string lbound, ubound;
264 while (!pq.empty()) {
266 const string& key = cur->
key;
268 if (key != last_key) {
279 const string & tag = cur->
tag;
281 const char *
pos = tag.data();
282 const char * end =
pos + tag.size();
294 size_t len = end -
pos;
306 if (l < lbound) lbound = l;
307 if (u > ubound) ubound = u;
324 while (!pq.empty()) {
326 const string & key = cur->
key;
339 vector<pair<Xapian::docid, string>>
tags;
347 if (cur == NULL || cur->
key != last_key) {
353 string tag =
tags[0].second;
354 tag[0] = (
tags.size() == 1) ?
'1' :
'0';
356 out->
add(last_key, first_tag);
360 const char *
p = last_key.data();
361 const char * end =
p + last_key.size();
366 auto i =
tags.begin();
367 while (++i !=
tags.end()) {
369 tag[0] = (i + 1 ==
tags.end()) ?
'1' :
'0';
374 if (cur == NULL)
break;
407 vector<const GlassTable*>::const_iterator b,
408 vector<const GlassTable*>::const_iterator e)
410 priority_queue<MergeCursor *, vector<MergeCursor *>,
CursorGt> pq;
411 for ( ; b != e; ++b) {
418 while (!pq.empty()) {
423 if (pq.empty() || pq.top()->current_key > key) {
426 bool compressed = cur->
read_tag(
true);
442 vector<PrefixCompressedStringItor *>,
447 vector<MergeCursor *> vec;
448 vec.reserve(pq.size());
454 if (pq.empty() || pq.top()->current_key != key)
break;
461 while (!pqtag.empty()) {
465 if (word != lastword) {
477 vector<MergeCursor *>::const_iterator i;
478 for (i = vec.begin(); i != vec.end(); ++i) {
503 if (pq.empty() || pq.top()->current_key != key)
break;
516 vector<const GlassTable*>::const_iterator b,
517 vector<const GlassTable*>::const_iterator e)
519 priority_queue<MergeCursor *, vector<MergeCursor *>,
CursorGt> pq;
520 for ( ; b != e; ++b) {
527 while (!pq.empty()) {
532 if (pq.empty() || pq.top()->current_key > key) {
535 bool compressed = cur->
read_tag(
true);
551 vector<ByteLengthPrefixedStringItor *>,
553 vector<MergeCursor *> vec;
559 if (pq.empty() || pq.top()->current_key != key)
break;
564 string_view lastword;
565 while (!pqtag.empty()) {
568 string_view word = **it;
569 if (word != lastword) {
582 vector<MergeCursor *>::const_iterator i;
583 for (i = vec.begin(); i != vec.end(); ++i) {
599 vector<const GlassTable *> tmp,
600 vector<Xapian::docid> off)
603 while (tmp.size() > 3) {
604 vector<const GlassTable *> tmpout;
605 tmpout.reserve(tmp.size() / 2);
606 vector<Xapian::docid> newoff;
607 newoff.resize(tmp.size() / 2);
608 for (
unsigned int i = 0, j; i < tmp.size(); i = j) {
610 if (j == tmp.size() - 1) ++j;
612 string dest = tmpdir;
626 root_info.
init(65536, 0);
631 tmp.begin() + i, tmp.begin() + j);
633 for (
unsigned int k = i; k < j; ++k) {
634 unlink(tmp[k]->get_path().c_str());
639 tmpout.push_back(tmptab);
641 tmptab->
commit(1, &root_info);
650 for (
size_t k = 0; k < tmp.size(); ++k) {
651 unlink(tmp[k]->get_path().c_str());
674 const char * d = current_key.data();
675 const char * e = d + current_key.size();
706 const vector<Xapian::docid> & offset)
708 priority_queue<PositionCursor *, vector<PositionCursor *>,
PositionCursorGt> pq;
709 for (
size_t i = 0; i < inputs.size(); ++i) {
719 while (!pq.empty()) {
733 const vector<Xapian::docid> & offset)
735 for (
size_t i = 0; i < inputs.size(); ++i) {
739 if (in->
empty())
continue;
752 string msg =
"Bad key in ";
753 msg += inputs[i]->get_path();
762 key.append(d, e - d);
767 bool compressed = cur.
read_tag(
true);
779 const char * destdir,
781 const vector<const Xapian::Database::Internal*>& sources,
782 const vector<Xapian::docid> & offset,
797 static const table_list tables[] = {
806 const table_list* tables_end = std::end(tables);
818 for (
size_t i = 0; i != sources.size(); ++i) {
820 if (db->has_uncommitted_changes()) {
822 "Can't compact from a WritableDatabase with uncommitted "
823 "changes - either call commit() first, or create a new "
824 "Database object from the filename on disk";
831 (block_size & (block_size - 1)) != 0) {
844 unique_ptr<GlassVersion> version_file_out;
858 version_file_out->create(block_size);
859 for (
size_t i = 0; i != sources.size(); ++i) {
861 version_file_out->merge_stats(db->version_file);
864 string fl_serialised;
868 fl.
pack(fl_serialised);
871 vector<GlassTable *> tabs;
872 tabs.reserve(tables_end - tables);
874 for (
const table_list * t = tables; t < tables_end; ++t) {
891 bool output_will_exist = !t->lazy;
895 bool bad_stat =
false;
899 bool single_file_in =
false;
903 vector<const GlassTable*> inputs;
904 inputs.reserve(sources.size());
905 size_t inputs_present = 0;
906 for (
auto src : sources) {
911 table = &(db->postlist_table);
914 table = &(db->docdata_table);
917 table = &(db->termlist_table);
920 table = &(db->position_table);
923 table = &(db->spelling_table);
926 table = &(db->synonym_table);
933 if (db->single_file()) {
934 if (t->lazy && table->
empty()) {
939 single_file_in =
true;
940 output_will_exist =
true;
946 in_size += db_size / 1024;
947 output_will_exist =
true;
949 }
else if (errno != ENOENT) {
952 output_will_exist =
true;
956 inputs.push_back(table);
961 if (inputs_present != 0) {
963 string m =
str(inputs_present);
965 m +=
str(sources.size());
966 m +=
" inputs present, so suppressing output";
971 output_will_exist =
false;
974 if (!output_will_exist) {
976 compactor->
set_status(t->name,
"doesn't exist");
982 out =
new GlassTable(t->name, fd, version_file_out->get_offset(),
985 out =
new GlassTable(t->name, dest,
false, t->lazy);
988 RootInfo * root_info = version_file_out->root_to_set(t->type);
991 out->
open(FLAGS, version_file_out->get_root(t->type), version_file_out->get_revision());
1000 if (multipass && inputs.size() > 3) {
1005 inputs.begin(), inputs.end());
1027 out->
commit(1, root_info);
1030 if (single_file) fl_serialised = root_info->
get_free_list();
1033 if (!bad_stat && !single_file_in) {
1042 auto old_prev_size = max(prev_size,
1044 prev_size = db_size;
1046 db_size -= old_prev_size;
1048 out_size = db_size / 1024;
1050 bad_stat = (errno != ENOENT);
1055 compactor->
set_status(t->name,
"Done (couldn't stat all the DB files)");
1056 }
else if (single_file_in) {
1058 compactor->
set_status(t->name,
"Done (table sizes unknown for single file DB input)");
1061 if (out_size == in_size) {
1062 status =
"Size unchanged (";
1065 if (out_size < in_size) {
1066 delta = in_size - out_size;
1067 status =
"Reduced by ";
1069 delta = out_size - in_size;
1070 status =
"INCREASED by ";
1073 status +=
str(100 * delta / in_size);
1076 status +=
str(delta);
1078 status +=
str(in_size);
1081 status +=
str(out_size);
1091 if (single_file && prev_size < block_size) {
1092 #ifdef HAVE_FTRUNCATE
1093 if (ftruncate(fd, block_size) < 0) {
1097 const off_t off = block_size - 1;
1098 if (lseek(fd, off, SEEK_SET) != off || write(fd,
"", 1) != 1) {
1105 if (lseek(fd, version_file_out->get_offset(), SEEK_SET) < 0) {
1109 version_file_out->set_last_docid(last_docid);
1110 string tmpfile = version_file_out->write(1, FLAGS);
1111 for (
unsigned j = 0; j != tabs.size(); ++j) {
1115 version_file_out->sync(tmpfile, 1, FLAGS);
1116 for (
unsigned j = 0; j != tabs.size(); ++j) {
1120 if (!single_file) lock.
release();
void release()
Release the lock.
reason lock(bool exclusive, bool wait, std::string &explanation)
Attempt to obtain the lock.
void throw_databaselockerror(FlintLock::reason why, const std::string &db_dir, const std::string &explanation) const
Throw Xapian::DatabaseLockError.
bool operator()(const PositionCursor *a, const PositionCursor *b) const
Return true if and only if a's key is strictly greater than b's key.
PositionCursor(const GlassTable *in, Xapian::docid offset_)
const string & get_tag() const
bool operator()(const PostlistCursor *a, const PostlistCursor *b) const
Return true if and only if a's key is strictly greater than b's key.
PostlistCursor(const GlassTable *in, Xapian::docid offset_)
A cursor pointing to a position in a Btree table, for reading several entries in order,...
string current_key
Current key pointed to by cursor.
bool after_end() const
Determine whether cursor is off the end of table.
bool read_tag(bool keep_compressed=false)
Read the tag from the table and store it in current_tag.
bool next()
Advance to the next key.
void rewind()
Position cursor on the dummy empty key.
string current_tag
Current tag pointed to by cursor.
A backend designed for efficient indexing and retrieval, using compressed posting lists and a btree s...
static void compact(Xapian::Compactor *compactor, const char *destdir, int fd, const std::vector< const Xapian::Database::Internal * > &sources, const std::vector< Xapian::docid > &offset, unsigned block_size, Xapian::Compactor::compaction_level compaction, unsigned flags, Xapian::docid last_docid)
void set_first_unused_block(uint4 base)
void pack(std::string &buf)
Class managing a Btree table in a Glass database.
void create_and_open(int flags_, const RootInfo &root_info)
Create a new empty btree structure on disk and open it at the initial revision.
void commit(glass_revision_number_t revision, RootInfo *root_info)
Commit any outstanding changes to the table.
void flush_db()
Flush any outstanding changes to the DB file of the table.
bool empty() const
Return true if there are no entries in the table.
void open(int flags_, const RootInfo &root_info, glass_revision_number_t rev)
Open the btree.
bool is_modified() const
Determine whether the object contains uncommitted modifications.
void set_full_compaction(bool parity)
void add(std::string_view key, std::string_view tag, bool already_compressed=false)
Add a key/tag pair to the table, replacing any existing pair with the same key.
The GlassVersion class manages the revision files.
const std::string & get_free_list() const
void init(unsigned blocksize_, uint4 compress_min_)
unsigned get_blocksize() const
void set_free_list(const std::string &s)
void append(const std::string &word)
Compact a database, or merge and compact several.
virtual void set_status(const std::string &table, const std::string &status)
Update progress.
compaction_level
Compaction level.
@ STANDARD
Don't split items unnecessarily.
virtual std::string resolve_duplicate_metadata(const std::string &key, size_t num_tags, const std::string tags[])
Resolve multiple user metadata entries with the same key.
DatabaseCorruptError indicates database corruption was detected.
DatabaseCreateError indicates a failure to create a database.
DatabaseError indicates some sort of database related error.
InvalidOperationError indicates the API was used in an invalid way.
RangeError indicates an attempt to access outside the bounds of a container.
Compact a database, or merge and compact several.
#define UNSIGNED_OVERFLOW_OK(X)
Constants in the Xapian namespace.
Hierarchy of classes which Xapian can throw as exceptions.
Utility functions for testing files.
std::make_unsigned_t< off_t > file_size_type
Unsigned return type of file_size() function.
file_size_type file_size(const char *path)
Returns the size of a file.
Flint-compatible database locking.
Interface to Btree cursors.
C++ class definition for glass database.
static bool is_user_metadata_key(const string &key)
Definitions, types, etc for use inside glass.
#define GLASS_MIN_BLOCKSIZE
Minimum B-tree block size.
#define GLASS_DEFAULT_BLOCKSIZE
Default B-tree block size.
#define GLASS_TABLE_EXTENSION
Glass table extension.
#define GLASS_MAX_BLOCKSIZE
Maximum B-tree block size.
static void multimerge_postlists(Xapian::Compactor *compactor, GlassTable *out, const char *tmpdir, vector< const GlassTable * > tmp, vector< Xapian::docid > off)
static string encode_valuestats(Xapian::doccount freq, const string &lbound, const string &ubound)
static void merge_docid_keyed(GlassTable *out, const vector< const GlassTable * > &inputs, const vector< Xapian::docid > &offset)
static void merge_synonyms(GlassTable *out, vector< const GlassTable * >::const_iterator b, vector< const GlassTable * >::const_iterator e)
static bool is_doclenchunk_key(const string &key)
static void merge_spellings(GlassTable *out, vector< const GlassTable * >::const_iterator b, vector< const GlassTable * >::const_iterator e)
static bool is_valuestats_key(const string &key)
static void merge_positions(GlassTable *out, const vector< const GlassTable * > &inputs, const vector< Xapian::docid > &offset)
static void merge_postlists(Xapian::Compactor *compactor, GlassTable *out, vector< Xapian::docid >::const_iterator offset, vector< const GlassTable * >::const_iterator b, vector< const GlassTable * >::const_iterator e)
static bool is_valuechunk_key(const string &key)
string str(int value)
Convert int to std::string.
Database open(std::string_view host, unsigned int port, unsigned timeout=10000, unsigned connect_timeout=10000)
Construct a Database object for read-only access to a remote database accessed via a TCP connection.
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
const int DB_NO_SYNC
Don't attempt to ensure changes have hit disk.
const int DBCOMPACT_MULTIPASS
If merging more than 3 databases, merge the postlists in multiple passes.
unsigned valueno
The number for a value slot in a document.
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
const int DBCOMPACT_SINGLE_FILE
Produce a single-file database.
const int DB_DANGEROUS
Update the database in-place.
#define AssertRel(A, REL, B)
Pack types into strings and unpack them again.
bool unpack_uint_last(const char **p, const char *end, U *result)
Decode an unsigned integer as the last item in a string.
bool unpack_string_preserving_sort(const char **p, const char *end, std::string &result)
Decode a "sort preserved" std::string from a string.
bool unpack_string(const char **p, const char *end, std::string &result)
Decode a std::string from a string.
void pack_uint_last(std::string &s, U value)
Append an encoded unsigned integer to a string as the last item.
bool unpack_uint(const char **p, const char *end, U *result)
Decode an unsigned integer from a string.
void pack_string_preserving_sort(std::string &s, std::string_view value, bool last=false)
Append an encoded std::string to a string, preserving the sort order.
void pack_uint(std::string &s, U value)
Append an encoded unsigned integer to a string.
void pack_string(std::string &s, std::string_view value)
Append an encoded std::string to a string.
bool unpack_uint_preserving_sort(const char **p, const char *end, U *result)
Decode a "sort preserved" unsigned integer from a string.
std::string pack_glass_postlist_key(std::string_view term)
void pack_uint_preserving_sort(std::string &s, U value)
Append an encoded unsigned integer to a string, preserving the sort order.
bool operator()(const GlassCursor *a, const GlassCursor *b) const
Return true if and only if a's key is strictly greater than b's key.
MergeCursor(const GlassTable *in)