46 #include "../byte_length_strings.h" 47 #include "../prefix_compressed_strings.h" 58 return key.size() > 1 && key[0] ==
'\0' && key[1] ==
'\xc0';
64 return key.size() > 1 && key[0] ==
'\0' && key[1] ==
'\xd0';
70 return key.size() > 1 && key[0] ==
'\0' && key[1] ==
'\xd8';
76 return key.size() > 1 && key[0] ==
'\0' && key[1] ==
'\xe0';
105 const char * p = key.data();
106 const char * end = p + key.length();
116 key.assign(
"\0\xd8", 2);
125 const char * d = key.data();
126 const char * e = d + key.size();
145 tag.erase(0, d - tag.data());
148 size_t tmp = d - key.data();
167 if (a->
key > b->
key)
return true;
168 if (a->
key != b->
key)
return false;
175 const string & lbound,
const string & ubound)
183 if (lbound != ubound) value += ubound;
189 GlassTable * out, vector<Xapian::docid>::const_iterator offset,
190 vector<GlassTable*>::const_iterator b,
191 vector<GlassTable*>::const_iterator e)
193 priority_queue<PostlistCursor *, vector<PostlistCursor *>,
PostlistCursorGt> pq;
194 for ( ; b != e; ++b, ++offset) {
208 while (!pq.empty()) {
210 const string& key = cur->
key;
213 if (key != last_key) {
215 if (tags.size() > 1 && compactor) {
216 Assert(!last_key.empty());
220 const string & resolved_tag =
224 if (!resolved_tag.empty())
225 out->
add(last_key, resolved_tag);
227 Assert(!last_key.empty());
228 out->
add(last_key, tags[0]);
234 tags.push_back(cur->
tag);
244 if (tags.size() > 1 && compactor) {
245 Assert(!last_key.empty());
246 const string & resolved_tag =
250 if (!resolved_tag.empty())
251 out->
add(last_key, resolved_tag);
253 Assert(!last_key.empty());
254 out->
add(last_key, tags[0]);
262 string lbound, ubound;
264 while (!pq.empty()) {
266 const string& key = cur->
key;
268 if (key != last_key) {
279 const string & tag = cur->
tag;
281 const char * pos = tag.data();
282 const char * end = pos + tag.size();
294 size_t len = end - pos;
306 if (l < lbound) lbound = l;
307 if (u > ubound) ubound = u;
324 while (!pq.empty()) {
326 const string & key = cur->
key;
339 vector<pair<Xapian::docid, string>>
tags;
347 if (cur == NULL || cur->
key != last_key) {
353 string tag = tags[0].second;
354 tag[0] = (tags.size() == 1) ?
'1' :
'0';
356 out->
add(last_key, first_tag);
360 const char * p = last_key.data();
361 const char * end = p + last_key.size();
366 auto i = tags.begin();
367 while (++i != tags.end()) {
369 tag[0] = (i + 1 == tags.end()) ?
'1' :
'0';
374 if (cur == NULL)
break;
380 tags.push_back(make_pair(cur->
firstdid, cur->
tag));
391 find_entry(
string());
407 vector<GlassTable*>::const_iterator b,
408 vector<GlassTable*>::const_iterator e)
410 priority_queue<MergeCursor *, vector<MergeCursor *>,
CursorGt> pq;
411 for ( ; b != e; ++b) {
418 while (!pq.empty()) {
423 if (pq.empty() || pq.top()->current_key > key) {
426 bool compressed = cur->
read_tag(
true);
442 vector<PrefixCompressedStringItor *>,
447 vector<MergeCursor *> vec;
448 vec.reserve(pq.size());
452 pqtag.push(
new PrefixCompressedStringItor(cur->
current_tag));
454 if (pq.empty() || pq.top()->current_key != key)
break;
461 while (!pqtag.empty()) {
462 PrefixCompressedStringItor * it = pqtag.top();
465 if (word != lastword) {
477 vector<MergeCursor *>::const_iterator i;
478 for (i = vec.begin(); i != vec.end(); ++i) {
503 if (pq.empty() || pq.top()->current_key != key)
break;
516 vector<GlassTable*>::const_iterator b,
517 vector<GlassTable*>::const_iterator e)
519 priority_queue<MergeCursor *, vector<MergeCursor *>,
CursorGt> pq;
520 for ( ; b != e; ++b) {
527 while (!pq.empty()) {
532 if (pq.empty() || pq.top()->current_key > key) {
535 bool compressed = cur->
read_tag(
true);
551 vector<ByteLengthPrefixedStringItor *>,
553 vector<MergeCursor *> vec;
557 pqtag.push(
new ByteLengthPrefixedStringItor(cur->
current_tag));
559 if (pq.empty() || pq.top()->current_key != key)
break;
565 while (!pqtag.empty()) {
566 ByteLengthPrefixedStringItor * it = pqtag.top();
568 if (**it != lastword) {
581 vector<MergeCursor *>::const_iterator i;
582 for (i = vec.begin(); i != vec.end(); ++i) {
598 vector<GlassTable *> tmp,
599 vector<Xapian::docid> off)
602 while (tmp.size() > 3) {
603 vector<GlassTable *> tmpout;
604 tmpout.reserve(tmp.size() / 2);
605 vector<Xapian::docid> newoff;
606 newoff.resize(tmp.size() / 2);
607 for (
unsigned int i = 0, j; i < tmp.size(); i = j) {
609 if (j == tmp.size() - 1) ++j;
611 string dest = tmpdir;
625 root_info.
init(65536, 0);
630 tmp.begin() + i, tmp.begin() + j);
632 for (
unsigned int k = i; k < j; ++k) {
633 unlink(tmp[k]->get_path().c_str());
638 tmpout.push_back(tmptab);
640 tmptab->
commit(1, &root_info);
649 for (
size_t k = 0; k < tmp.size(); ++k) {
650 unlink(tmp[k]->get_path().c_str());
666 find_entry(
string());
673 const char * d = current_key.data();
674 const char * e = d + current_key.size();
705 const vector<Xapian::docid> & offset)
707 priority_queue<PositionCursor *, vector<PositionCursor *>,
PositionCursorGt> pq;
708 for (
size_t i = 0; i < inputs.size(); ++i) {
718 while (!pq.empty()) {
732 const vector<Xapian::docid> & offset)
734 for (
size_t i = 0; i < inputs.size(); ++i) {
738 if (in->
empty())
continue;
751 string msg =
"Bad key in ";
752 msg += inputs[i]->get_path();
761 key.append(d, e - d);
766 bool compressed = cur.
read_tag(
true);
778 const char * destdir,
780 const vector<Xapian::Database::Internal*> & sources,
781 const vector<Xapian::docid> & offset,
796 static const table_list tables[] = {
805 const table_list * tables_end = tables +
806 (
sizeof(tables) /
sizeof(tables[0]));
818 for (
size_t i = 0; i != sources.size(); ++i) {
820 if (db->has_uncommitted_changes()) {
822 "Can't compact from a WritableDatabase with uncommitted " 823 "changes - either call commit() first, or create a new " 824 "Database object from the filename on disk";
829 if (block_size < 2048 || block_size > 65536 ||
830 (block_size & (block_size - 1)) != 0) {
843 AutoPtr<GlassVersion> version_file_out;
857 version_file_out->create(block_size);
858 for (
size_t i = 0; i != sources.size(); ++i) {
863 string fl_serialised;
867 fl.
pack(fl_serialised);
870 vector<GlassTable *> tabs;
871 tabs.reserve(tables_end - tables);
872 off_t prev_size = block_size;
873 for (
const table_list * t = tables; t < tables_end; ++t) {
890 bool output_will_exist = !t->lazy;
894 bool bad_stat =
false;
898 bool single_file_in =
false;
902 vector<GlassTable*> inputs;
903 inputs.reserve(sources.size());
904 size_t inputs_present = 0;
905 for (
auto src : sources) {
933 if (t->lazy && table->
empty()) {
938 single_file_in =
true;
939 output_will_exist =
true;
945 in_size += db_size / 1024;
946 output_will_exist =
true;
948 }
else if (errno != ENOENT) {
951 output_will_exist =
true;
955 inputs.push_back(table);
960 if (inputs_present != 0) {
962 string m =
str(inputs_present);
964 m +=
str(sources.size());
965 m +=
" inputs present, so suppressing output";
970 output_will_exist =
false;
973 if (!output_will_exist) {
975 compactor->
set_status(t->name,
"doesn't exist");
981 out =
new GlassTable(t->name, fd, version_file_out->get_offset(),
984 out =
new GlassTable(t->name, dest,
false, t->lazy);
987 RootInfo * root_info = version_file_out->root_to_set(t->type);
990 out->
open(FLAGS, version_file_out->get_root(t->type), version_file_out->get_revision());
1000 if (multipass && inputs.size() > 3) {
1005 inputs.begin(), inputs.end());
1027 out->
commit(1, root_info);
1030 if (single_file) fl_serialised = root_info->
get_free_list();
1033 if (!bad_stat && !single_file_in) {
1042 off_t old_prev_size = max(prev_size, off_t(block_size));
1043 prev_size = db_size;
1044 db_size = max(db_size, off_t(block_size));
1045 db_size -= old_prev_size;
1047 out_size = db_size / 1024;
1049 bad_stat = (errno != ENOENT);
1054 compactor->
set_status(t->name,
"Done (couldn't stat all the DB files)");
1055 }
else if (single_file_in) {
1057 compactor->
set_status(t->name,
"Done (table sizes unknown for single file DB input)");
1060 if (out_size == in_size) {
1061 status =
"Size unchanged (";
1064 if (out_size < in_size) {
1065 delta = in_size - out_size;
1066 status =
"Reduced by ";
1068 delta = out_size - in_size;
1069 status =
"INCREASED by ";
1072 status +=
str(100 * delta / in_size);
1075 status +=
str(delta);
1077 status +=
str(in_size);
1080 status +=
str(out_size);
1090 if (single_file && prev_size < off_t(block_size)) {
1091 #ifdef HAVE_FTRUNCATE 1092 if (ftruncate(fd, block_size) < 0) {
1096 const off_t off = block_size - 1;
1097 if (lseek(fd, off, SEEK_SET) != off || write(fd,
"", 1) != 1) {
1104 if (lseek(fd, version_file_out->get_offset(), SEEK_SET) < 0) {
1108 version_file_out->set_last_docid(last_docid);
1109 string tmpfile = version_file_out->write(1, FLAGS);
1110 for (
unsigned j = 0; j != tabs.size(); ++j) {
1114 version_file_out->sync(tmpfile, 1, FLAGS);
1115 for (
unsigned j = 0; j != tabs.size(); ++j) {
1119 if (!single_file) lock.
release();
GlassVersion version_file
The file describing the Glass database.
void throw_databaselockerror(FlintLock::reason why, const std::string &db_dir, const std::string &explanation) const
Throw Xapian::DatabaseLockError.
void release()
Release the lock.
MergeCursor(GlassTable *in)
GlassPositionListTable position_table
Table storing position lists.
void create_and_open(int flags_, const RootInfo &root_info)
Create a new empty btree structure on disk and open it at the initial revision.
static void merge_positions(GlassTable *out, const vector< GlassTable *> &inputs, const vector< Xapian::docid > &offset)
Allow oversize items to save more space (not recommended if you ever plan to update the compacted dat...
#define AssertRel(A, REL, B)
InvalidOperationError indicates the API was used in an invalid way.
bool empty() const
Return true if there are no entries in the table.
Class managing a Btree table in a Glass database.
GlassSynonymTable synonym_table
Table storing synonym data.
static void merge_synonyms(GlassTable *out, vector< GlassTable *>::const_iterator b, vector< GlassTable *>::const_iterator e)
Constants in the Xapian namespace.
The GlassVersion class manages the revision files.
static void compact(Xapian::Compactor *compactor, const char *destdir, int fd, const std::vector< Xapian::Database::Internal *> &sources, const std::vector< Xapian::docid > &offset, size_t block_size, Xapian::Compactor::compaction_level compaction, unsigned flags, Xapian::docid last_docid)
WritableDatabase open()
Construct a WritableDatabase object for a new, empty InMemory database.
Don't split items unnecessarily.
Compact a database, or merge and compact several.
bool next()
Advance to the next key.
Definitions, types, etc for use inside glass.
Flint-compatible database locking.
void add(const std::string &key, const std::string &tag, bool already_compressed=false)
Add a key/tag pair to the table, replacing any existing pair with the same key.
static void merge_postlists(Xapian::Compactor *compactor, GlassTable *out, vector< Xapian::docid >::const_iterator offset, vector< GlassTable *>::const_iterator b, vector< GlassTable *>::const_iterator e)
bool after_end() const
Determine whether cursor is off the end of table.
Utility functions for testing files.
GlassDocDataTable docdata_table
Table storing document data.
#define GLASS_TABLE_EXTENSION
Glass table extension.
bool read_tag(bool keep_compressed=false)
Read the tag from the table and store it in current_tag.
virtual void set_status(const std::string &table, const std::string &status)
Update progress.
Hierarchy of classes which Xapian can throw as exceptions.
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
void flush_db()
Flush any outstanding changes to the DB file of the table.
RangeError indicates an attempt to access outside the bounds of a container.
void pack_uint_last(std::string &s, U value)
Append an encoded unsigned integer to a string as the last item.
static void multimerge_postlists(Xapian::Compactor *compactor, GlassTable *out, const char *tmpdir, vector< GlassTable *> tmp, vector< Xapian::docid > off)
string current_key
Current key pointed to by cursor.
GlassPostListTable postlist_table
Table storing posting lists.
PostlistCursor(GlassTable *in, Xapian::docid offset_)
bool operator()(const PositionCursor *a, const PositionCursor *b) const
Return true if and only if a's key is strictly greater than b's key.
virtual std::string resolve_duplicate_metadata(const std::string &key, size_t num_tags, const std::string tags[])
Resolve multiple user metadata entries with the same key.
bool is_modified() const
Determine whether the object contains uncommitted modifications.
DatabaseCreateError indicates a failure to create a database.
void commit(glass_revision_number_t revision, RootInfo *root_info)
Commit any outstanding changes to the table.
string current_tag
Current tag pointed to by cursor.
Compact a database, or merge and compact several.
static void merge_spellings(GlassTable *out, vector< GlassTable *>::const_iterator b, vector< GlassTable *>::const_iterator e)
static bool is_valuestats_key(const string &key)
string str(int value)
Convert int to std::string.
GlassTermListTable termlist_table
Table storing term lists.
#define GLASS_DEFAULT_BLOCKSIZE
Default B-tree block size.
C++ class definition for glass database.
static void merge_docid_keyed(GlassTable *out, const vector< GlassTable *> &inputs, const vector< Xapian::docid > &offset)
static bool is_user_metadata_key(const string &key)
bool unpack_string_preserving_sort(const char **p, const char *end, std::string &result)
Decode a "sort preserved" std::string from a string.
bool unpack_uint_preserving_sort(const char **p, const char *end, U *result)
Decode a "sort preserved" unsigned integer from a string.
static string encode_valuestats(Xapian::doccount freq, const string &lbound, const string &ubound)
const int DB_DANGEROUS
Update the database in-place.
void set_max_item_size(size_t block_capacity)
Set the maximum item size given the block capacity.
A backend designed for efficient indexing and retrieval, using compressed posting lists and a btree s...
A cursor pointing to a position in a Btree table, for reading several entries in order, or finding approximate matches.
const int DB_NO_SYNC
Don't attempt to ensure changes have hit disk.
DatabaseCorruptError indicates database corruption was detected.
void open(int flags_, const RootInfo &root_info, glass_revision_number_t rev)
Open the btree.
void pack_uint(std::string &s, U value)
Append an encoded unsigned integer to a string.
void init(unsigned blocksize_, uint4 compress_min_)
unsigned get_blocksize() const
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
void pack_string(std::string &s, const std::string &value)
Append an encoded std::string to a string.
Interface to Btree cursors.
std::string pack_glass_postlist_key(const std::string &term)
compaction_level
Compaction level.
Pack types into strings and unpack them again.
unsigned valueno
The number for a value slot in a document.
bool unpack_uint_last(const char **p, const char *end, U *result)
Decode an unsigned integer as the last item in a string.
bool unpack_uint(const char **p, const char *end, U *result)
Decode an unsigned integer from a string.
PositionCursor(GlassTable *in, Xapian::docid offset_)
const int DBCOMPACT_MULTIPASS
If merging more than 3 databases, merge the postlists in multiple passes.
reason lock(bool exclusive, bool wait, std::string &explanation)
Attempt to obtain the lock.
off_t file_size(const char *path)
Returns the size of a file.
void pack(std::string &buf)
void append(const std::string &word)
bool unpack_string(const char **p, const char *end, std::string &result)
Decode a std::string from a string.
bool find_entry(const string &key)
Position the cursor on the highest entry with key <= key.
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
DatabaseError indicates some sort of database related error.
static bool is_valuechunk_key(const string &key)
const int DBCOMPACT_SINGLE_FILE
Produce a single-file database.
void set_first_unused_block(uint4 base)
const std::string & get_free_list() const
void set_free_list(const std::string &s)
bool operator()(const PostlistCursor *a, const PostlistCursor *b) const
Return true if and only if a's key is strictly greater than b's key.
GlassSpellingTable spelling_table
Table storing spelling correction data.
Wrapper around standard unique_ptr template.
static bool is_doclenchunk_key(const string &key)
void set_full_compaction(bool parity)
void pack_string_preserving_sort(std::string &s, const std::string &value, bool last=false)
Append an encoded std::string to a string, preserving the sort order.
void pack_uint_preserving_sort(std::string &s, U value)
Append an encoded unsigned integer to a string, preserving the sort order.
bool operator()(const GlassCursor *a, const GlassCursor *b) const
Return true if and only if a's key is strictly greater than b's key.
const string & get_tag() const