47 #include "../byte_length_strings.h" 48 #include "../prefix_compressed_strings.h" 59 return key.size() > 1 && key[0] ==
'\0' && key[1] ==
'\xc0';
65 return key.size() > 1 && key[0] ==
'\0' && key[1] ==
'\xd0';
71 return key.size() > 1 && key[0] ==
'\0' && key[1] ==
'\xd8';
77 return key.size() > 1 && key[0] ==
'\0' && key[1] ==
'\xe0';
106 const char * p = key.data();
107 const char * end = p + key.length();
117 key.assign(
"\0\xd8", 2);
126 const char * d = key.data();
127 const char * e = d + key.size();
146 tag.erase(0, d - tag.data());
149 size_t tmp = d - key.data();
168 if (a->
key > b->
key)
return true;
169 if (a->
key != b->
key)
return false;
176 const string & lbound,
const string & ubound)
184 if (lbound != ubound) value += ubound;
190 GlassTable * out, vector<Xapian::docid>::const_iterator offset,
191 vector<GlassTable*>::const_iterator b,
192 vector<GlassTable*>::const_iterator e)
194 priority_queue<PostlistCursor *, vector<PostlistCursor *>,
PostlistCursorGt> pq;
195 for ( ; b != e; ++b, ++offset) {
209 while (!pq.empty()) {
211 const string& key = cur->
key;
214 if (key != last_key) {
216 if (tags.size() > 1 && compactor) {
217 Assert(!last_key.empty());
221 const string & resolved_tag =
225 if (!resolved_tag.empty())
226 out->
add(last_key, resolved_tag);
228 Assert(!last_key.empty());
229 out->
add(last_key, tags[0]);
235 tags.push_back(cur->
tag);
245 if (tags.size() > 1 && compactor) {
246 Assert(!last_key.empty());
247 const string & resolved_tag =
251 if (!resolved_tag.empty())
252 out->
add(last_key, resolved_tag);
254 Assert(!last_key.empty());
255 out->
add(last_key, tags[0]);
263 string lbound, ubound;
265 while (!pq.empty()) {
267 const string& key = cur->
key;
269 if (key != last_key) {
280 const string & tag = cur->
tag;
282 const char * pos = tag.data();
283 const char * end = pos + tag.size();
295 size_t len = end - pos;
307 if (l < lbound) lbound = l;
308 if (u > ubound) ubound = u;
325 while (!pq.empty()) {
327 const string & key = cur->
key;
340 vector<pair<Xapian::docid, string>>
tags;
348 if (cur == NULL || cur->
key != last_key) {
354 string tag = tags[0].second;
355 tag[0] = (tags.size() == 1) ?
'1' :
'0';
357 out->
add(last_key, first_tag);
361 const char * p = last_key.data();
362 const char * end = p + last_key.size();
367 auto i = tags.begin();
368 while (++i != tags.end()) {
370 tag[0] = (i + 1 == tags.end()) ?
'1' :
'0';
375 if (cur == NULL)
break;
381 tags.push_back(make_pair(cur->
firstdid, cur->
tag));
392 find_entry(
string());
408 vector<GlassTable*>::const_iterator b,
409 vector<GlassTable*>::const_iterator e)
411 priority_queue<MergeCursor *, vector<MergeCursor *>,
CursorGt> pq;
412 for ( ; b != e; ++b) {
419 while (!pq.empty()) {
424 if (pq.empty() || pq.top()->current_key > key) {
427 bool compressed = cur->
read_tag(
true);
443 vector<PrefixCompressedStringItor *>,
448 vector<MergeCursor *> vec;
449 vec.reserve(pq.size());
453 pqtag.push(
new PrefixCompressedStringItor(cur->
current_tag));
455 if (pq.empty() || pq.top()->current_key != key)
break;
462 while (!pqtag.empty()) {
463 PrefixCompressedStringItor * it = pqtag.top();
466 if (word != lastword) {
478 vector<MergeCursor *>::const_iterator i;
479 for (i = vec.begin(); i != vec.end(); ++i) {
504 if (pq.empty() || pq.top()->current_key != key)
break;
517 vector<GlassTable*>::const_iterator b,
518 vector<GlassTable*>::const_iterator e)
520 priority_queue<MergeCursor *, vector<MergeCursor *>,
CursorGt> pq;
521 for ( ; b != e; ++b) {
528 while (!pq.empty()) {
533 if (pq.empty() || pq.top()->current_key > key) {
536 bool compressed = cur->
read_tag(
true);
552 vector<ByteLengthPrefixedStringItor *>,
554 vector<MergeCursor *> vec;
558 pqtag.push(
new ByteLengthPrefixedStringItor(cur->
current_tag));
560 if (pq.empty() || pq.top()->current_key != key)
break;
566 while (!pqtag.empty()) {
567 ByteLengthPrefixedStringItor * it = pqtag.top();
569 if (**it != lastword) {
582 vector<MergeCursor *>::const_iterator i;
583 for (i = vec.begin(); i != vec.end(); ++i) {
599 vector<GlassTable *> tmp,
600 vector<Xapian::docid> off)
603 while (tmp.size() > 3) {
604 vector<GlassTable *> tmpout;
605 tmpout.reserve(tmp.size() / 2);
606 vector<Xapian::docid> newoff;
607 newoff.resize(tmp.size() / 2);
608 for (
unsigned int i = 0, j; i < tmp.size(); i = j) {
610 if (j == tmp.size() - 1) ++j;
612 string dest = tmpdir;
614 sprintf(buf,
"/tmp%u_%u.", c, i / 2);
624 root_info.
init(65536, 0);
629 tmp.begin() + i, tmp.begin() + j);
631 for (
unsigned int k = i; k < j; ++k) {
632 unlink(tmp[k]->get_path().c_str());
637 tmpout.push_back(tmptab);
639 tmptab->
commit(1, &root_info);
648 for (
size_t k = 0; k < tmp.size(); ++k) {
649 unlink(tmp[k]->get_path().c_str());
665 find_entry(
string());
672 const char * d = current_key.data();
673 const char * e = d + current_key.size();
704 const vector<Xapian::docid> & offset)
706 priority_queue<PositionCursor *, vector<PositionCursor *>,
PositionCursorGt> pq;
707 for (
size_t i = 0; i < inputs.size(); ++i) {
717 while (!pq.empty()) {
731 const vector<Xapian::docid> & offset)
733 for (
size_t i = 0; i < inputs.size(); ++i) {
737 if (in->
empty())
continue;
750 string msg =
"Bad key in ";
751 msg += inputs[i]->get_path();
760 key.append(d, e - d);
765 bool compressed = cur.
read_tag(
true);
777 const char * destdir,
779 const vector<Xapian::Database::Internal*> & sources,
780 const vector<Xapian::docid> & offset,
795 static const table_list tables[] = {
804 const table_list * tables_end = tables +
805 (
sizeof(tables) /
sizeof(tables[0]));
817 for (
size_t i = 0; i != sources.size(); ++i) {
819 if (db->has_uncommitted_changes()) {
821 "Can't compact from a WritableDatabase with uncommitted " 822 "changes - either call commit() first, or create a new " 823 "Database object from the filename on disk";
828 if (block_size < 2048 || block_size > 65536 ||
829 (block_size & (block_size - 1)) != 0) {
842 AutoPtr<GlassVersion> version_file_out;
856 version_file_out->create(block_size);
857 for (
size_t i = 0; i != sources.size(); ++i) {
862 string fl_serialised;
866 fl.
pack(fl_serialised);
869 vector<GlassTable *> tabs;
870 tabs.reserve(tables_end - tables);
871 off_t prev_size = block_size;
872 for (
const table_list * t = tables; t < tables_end; ++t) {
889 bool output_will_exist = !t->lazy;
893 bool bad_stat =
false;
897 bool single_file_in =
false;
901 vector<GlassTable*> inputs;
902 inputs.reserve(sources.size());
903 size_t inputs_present = 0;
904 for (
auto src : sources) {
932 if (t->lazy && table->
empty()) {
937 single_file_in =
true;
938 output_will_exist =
true;
944 in_size += db_size / 1024;
945 output_will_exist =
true;
947 }
else if (errno != ENOENT) {
950 output_will_exist =
true;
954 inputs.push_back(table);
959 if (inputs_present != 0) {
961 string m =
str(inputs_present);
963 m +=
str(sources.size());
964 m +=
" inputs present, so suppressing output";
969 output_will_exist =
false;
972 if (!output_will_exist) {
974 compactor->
set_status(t->name,
"doesn't exist");
980 out =
new GlassTable(t->name, fd, version_file_out->get_offset(),
983 out =
new GlassTable(t->name, dest,
false, t->lazy);
986 RootInfo * root_info = version_file_out->root_to_set(t->type);
989 out->
open(FLAGS, version_file_out->get_root(t->type), version_file_out->get_revision());
999 if (multipass && inputs.size() > 3) {
1004 inputs.begin(), inputs.end());
1026 out->
commit(1, root_info);
1029 if (single_file) fl_serialised = root_info->
get_free_list();
1032 if (!bad_stat && !single_file_in) {
1041 off_t old_prev_size = max(prev_size, off_t(block_size));
1042 prev_size = db_size;
1043 db_size = max(db_size, off_t(block_size));
1044 db_size -= old_prev_size;
1046 out_size = db_size / 1024;
1048 bad_stat = (errno != ENOENT);
1053 compactor->
set_status(t->name,
"Done (couldn't stat all the DB files)");
1054 }
else if (single_file_in) {
1056 compactor->
set_status(t->name,
"Done (table sizes unknown for single file DB input)");
1059 if (out_size == in_size) {
1060 status =
"Size unchanged (";
1063 if (out_size < in_size) {
1064 delta = in_size - out_size;
1065 status =
"Reduced by ";
1067 delta = out_size - in_size;
1068 status =
"INCREASED by ";
1071 status +=
str(100 * delta / in_size);
1074 status +=
str(delta);
1076 status +=
str(in_size);
1079 status +=
str(out_size);
1089 if (single_file && prev_size < off_t(block_size)) {
1090 #ifdef HAVE_FTRUNCATE 1091 if (ftruncate(fd, block_size) < 0) {
1095 const off_t off = block_size - 1;
1096 if (lseek(fd, off, SEEK_SET) != off || write(fd,
"", 1) != 1) {
1103 if (lseek(fd, version_file_out->get_offset(), SEEK_SET) < 0) {
1107 version_file_out->set_last_docid(last_docid);
1108 string tmpfile = version_file_out->write(1, FLAGS);
1109 for (
unsigned j = 0; j != tabs.size(); ++j) {
1113 version_file_out->sync(tmpfile, 1, FLAGS);
1114 for (
unsigned j = 0; j != tabs.size(); ++j) {
1118 if (!single_file) lock.
release();
GlassVersion version_file
The file describing the Glass database.
void throw_databaselockerror(FlintLock::reason why, const std::string &db_dir, const std::string &explanation) const
Throw Xapian::DatabaseLockError.
void release()
Release the lock.
MergeCursor(GlassTable *in)
GlassPositionListTable position_table
Table storing position lists.
void create_and_open(int flags_, const RootInfo &root_info)
Create a new empty btree structure on disk and open it at the initial revision.
static void merge_positions(GlassTable *out, const vector< GlassTable *> &inputs, const vector< Xapian::docid > &offset)
Allow oversize items to save more space (not recommended if you ever plan to update the compacted dat...
#define AssertRel(A, REL, B)
InvalidOperationError indicates the API was used in an invalid way.
bool empty() const
Return true if there are no entries in the table.
Class managing a Btree table in a Glass database.
GlassSynonymTable synonym_table
Table storing synonym data.
static void merge_synonyms(GlassTable *out, vector< GlassTable *>::const_iterator b, vector< GlassTable *>::const_iterator e)
Constants in the Xapian namespace.
The GlassVersion class manages the revision files.
static void compact(Xapian::Compactor *compactor, const char *destdir, int fd, const std::vector< Xapian::Database::Internal *> &sources, const std::vector< Xapian::docid > &offset, size_t block_size, Xapian::Compactor::compaction_level compaction, unsigned flags, Xapian::docid last_docid)
WritableDatabase open()
Construct a WritableDatabase object for a new, empty InMemory database.
Don't split items unnecessarily.
Compact a database, or merge and compact several.
bool next()
Advance to the next key.
Definitions, types, etc for use inside glass.
Flint-compatible database locking.
void add(const std::string &key, const std::string &tag, bool already_compressed=false)
Add a key/tag pair to the table, replacing any existing pair with the same key.
static void merge_postlists(Xapian::Compactor *compactor, GlassTable *out, vector< Xapian::docid >::const_iterator offset, vector< GlassTable *>::const_iterator b, vector< GlassTable *>::const_iterator e)
bool after_end() const
Determine whether cursor is off the end of table.
Utility functions for testing files.
GlassDocDataTable docdata_table
Table storing document data.
#define GLASS_TABLE_EXTENSION
Glass table extension.
bool read_tag(bool keep_compressed=false)
Read the tag from the table and store it in current_tag.
virtual void set_status(const std::string &table, const std::string &status)
Update progress.
Hierarchy of classes which Xapian can throw as exceptions.
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
void flush_db()
Flush any outstanding changes to the DB file of the table.
RangeError indicates an attempt to access outside the bounds of a container.
void pack_uint_last(std::string &s, U value)
Append an encoded unsigned integer to a string as the last item.
static void multimerge_postlists(Xapian::Compactor *compactor, GlassTable *out, const char *tmpdir, vector< GlassTable *> tmp, vector< Xapian::docid > off)
string current_key
Current key pointed to by cursor.
GlassPostListTable postlist_table
Table storing posting lists.
PostlistCursor(GlassTable *in, Xapian::docid offset_)
bool operator()(const PositionCursor *a, const PositionCursor *b) const
Return true if and only if a's key is strictly greater than b's key.
virtual std::string resolve_duplicate_metadata(const std::string &key, size_t num_tags, const std::string tags[])
Resolve multiple user metadata entries with the same key.
bool is_modified() const
Determine whether the object contains uncommitted modifications.
DatabaseCreateError indicates a failure to create a database.
void commit(glass_revision_number_t revision, RootInfo *root_info)
Commit any outstanding changes to the table.
string current_tag
Current tag pointed to by cursor.
Compact a database, or merge and compact several.
static void merge_spellings(GlassTable *out, vector< GlassTable *>::const_iterator b, vector< GlassTable *>::const_iterator e)
static bool is_valuestats_key(const string &key)
string str(int value)
Convert int to std::string.
GlassTermListTable termlist_table
Table storing term lists.
#define GLASS_DEFAULT_BLOCKSIZE
Default B-tree block size.
C++ class definition for glass database.
static void merge_docid_keyed(GlassTable *out, const vector< GlassTable *> &inputs, const vector< Xapian::docid > &offset)
static bool is_user_metadata_key(const string &key)
bool unpack_string_preserving_sort(const char **p, const char *end, std::string &result)
Decode a "sort preserved" std::string from a string.
bool unpack_uint_preserving_sort(const char **p, const char *end, U *result)
Decode a "sort preserved" unsigned integer from a string.
static string encode_valuestats(Xapian::doccount freq, const string &lbound, const string &ubound)
const int DB_DANGEROUS
Update the database in-place.
void set_max_item_size(size_t block_capacity)
Set the maximum item size given the block capacity.
A backend designed for efficient indexing and retrieval, using compressed posting lists and a btree s...
A cursor pointing to a position in a Btree table, for reading several entries in order, or finding approximate matches.
const int DB_NO_SYNC
Don't attempt to ensure changes have hit disk.
DatabaseCorruptError indicates database corruption was detected.
void open(int flags_, const RootInfo &root_info, glass_revision_number_t rev)
Open the btree.
void pack_uint(std::string &s, U value)
Append an encoded unsigned integer to a string.
void init(unsigned blocksize_, uint4 compress_min_)
unsigned get_blocksize() const
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
void pack_string(std::string &s, const std::string &value)
Append an encoded std::string to a string.
Interface to Btree cursors.
std::string pack_glass_postlist_key(const std::string &term)
compaction_level
Compaction level.
Pack types into strings and unpack them again.
unsigned valueno
The number for a value slot in a document.
bool unpack_uint_last(const char **p, const char *end, U *result)
Decode an unsigned integer as the last item in a string.
bool unpack_uint(const char **p, const char *end, U *result)
Decode an unsigned integer from a string.
PositionCursor(GlassTable *in, Xapian::docid offset_)
const int DBCOMPACT_MULTIPASS
If merging more than 3 databases, merge the postlists in multiple passes.
reason lock(bool exclusive, bool wait, std::string &explanation)
Attempt to obtain the lock.
off_t file_size(const char *path)
Returns the size of a file.
void pack(std::string &buf)
void append(const std::string &word)
bool unpack_string(const char **p, const char *end, std::string &result)
Decode a std::string from a string.
bool find_entry(const string &key)
Position the cursor on the highest entry with key <= key.
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
DatabaseError indicates some sort of database related error.
static bool is_valuechunk_key(const string &key)
const int DBCOMPACT_SINGLE_FILE
Produce a single-file database.
void set_first_unused_block(uint4 base)
const std::string & get_free_list() const
void set_free_list(const std::string &s)
bool operator()(const PostlistCursor *a, const PostlistCursor *b) const
Return true if and only if a's key is strictly greater than b's key.
GlassSpellingTable spelling_table
Table storing spelling correction data.
Wrapper around standard unique_ptr template.
static bool is_doclenchunk_key(const string &key)
void set_full_compaction(bool parity)
void pack_string_preserving_sort(std::string &s, const std::string &value, bool last=false)
Append an encoded std::string to a string, preserving the sort order.
void pack_uint_preserving_sort(std::string &s, U value)
Append an encoded unsigned integer to a string, preserving the sort order.
bool operator()(const GlassCursor *a, const GlassCursor *b) const
Return true if and only if a's key is strictly greater than b's key.
const string & get_tag() const