28 #include <string_view>
35 #include <sys/types.h>
51 #ifdef XAPIAN_HAS_GLASS_BACKEND
56 #ifdef XAPIAN_HAS_HONEY_BACKEND
70 const vector<pair<Xapian::docid, Xapian::docid>>&
used_ranges;
78 return used_ranges[a].first < used_ranges[b].first;
84 Compactor::~Compactor() { }
87 Compactor::set_status(
const string & table,
const string & status)
94 Compactor::resolve_duplicate_metadata(
const string & key,
95 size_t num_tags,
const std::string
tags[])
107 const string &dbpath2,
int backend2)
111 string msg =
"All databases must be the same type ('";
126 Database::compact_(
const string_view* output_ptr,
int fd,
unsigned flags,
130 LOGCALL_VOID(API,
"Database::compact_", output_ptr | fd | flags | block_size | compactor);
134 enum { STUB_NO, STUB_FILE, STUB_DIR } compact_to_stub = STUB_NO;
138 destdir = *output_ptr;
142 compact_to_stub = STUB_FILE;
145 compact_to_stub = STUB_DIR;
153 auto n_shards =
internal->size();
157 vector<Xapian::docid> offset;
158 vector<pair<Xapian::docid, Xapian::docid>> used_ranges;
159 vector<const Xapian::Database::Internal*> internals;
160 offset.reserve(n_shards);
161 used_ranges.reserve(n_shards);
162 internals.reserve(n_shards);
166 for (
auto&& db : multi_db->shards) {
170 internals.push_back(
internal.get());
174 for (
auto&& shard : internals) {
176 int type = shard->get_backend_info(&
srcdir);
179 if (!compact_to_stub && !destdir.empty() &&
srcdir == destdir) {
181 "any source database, unless it is a "
208 shard->get_used_docid_range(first, last);
210 if (renumber && first) {
219 #ifdef XAPIAN_ASSERTIONS
220 PostList* pl = shard->open_post_list({});
238 offset.push_back(tot_off);
241 else if (last_docid < shard->get_lastdocid())
242 last_docid = shard->get_lastdocid();
243 used_ranges.push_back(make_pair(first, last));
247 last_docid = tot_off;
249 if (!renumber && n_shards > 1) {
253 vector<Xapian::doccount> order;
254 order.reserve(n_shards);
262 vector<const Xapian::Database::Internal*> internals_;
263 internals_.reserve(n_shards);
264 vector<pair<Xapian::docid, Xapian::docid>> used_ranges_;
265 used_ranges_.reserve(n_shards);
271 internals_.push_back(internals[n]);
272 used_ranges_.push_back(used_ranges[n]);
274 const pair<Xapian::docid, Xapian::docid>
p = used_ranges[n];
276 if (
p.first == 0 &&
p.second == 0)
279 if (
p.first <= last_end) {
281 string msg =
"when merging databases, --no-renumber is only currently supported if the databases have disjoint ranges of used document ids: ";
282 internals_[j - 1]->get_backend_info(&tmp);
284 msg +=
" has range ";
285 msg +=
str(last_start);
287 msg +=
str(last_end);
289 internals_[j]->get_backend_info(&tmp);
291 msg +=
" has range ";
294 msg +=
str(
p.second);
297 last_start =
p.first;
301 swap(internals, internals_);
302 swap(used_ranges, used_ranges_);
306 if (compact_to_stub) {
308 if (compact_to_stub == STUB_DIR) {
309 stub_file +=
"/XAPIANDB";
314 size_t sfx = destdir.size();
315 time_t
now = time(NULL);
319 if (mkdir(destdir.c_str(), 0755) == 0)
321 if (errno != EEXIST) {
322 string msg = destdir;
323 msg +=
": mkdir failed";
329 if (mkdir(destdir.c_str(), 0755) < 0) {
333 int mkdir_errno = errno;
334 if (mkdir_errno != EEXIST || !
dir_exists(destdir)) {
335 string msg = destdir;
336 msg +=
": cannot create directory";
342 #if defined XAPIAN_HAS_GLASS_BACKEND || defined XAPIAN_HAS_HONEY_BACKEND
350 auto output_backend = flags & Xapian::DB_BACKEND_MASK_;
352 switch (output_backend) {
355 #ifdef XAPIAN_HAS_GLASS_BACKEND
359 block_size, compaction, flags,
364 block_size, compaction, flags,
377 #ifdef XAPIAN_HAS_HONEY_BACKEND
400 "compacted to itself or "
404 switch (output_backend) {
407 #ifdef XAPIAN_HAS_HONEY_BACKEND
432 "compacted to itself");
436 if (compact_to_stub) {
437 string new_stub_file = destdir;
438 new_stub_file +=
"/new_stub.tmp";
440 ofstream new_stub(new_stub_file.c_str());
441 size_t slash = destdir.find_last_of(
DIR_SEPS);
442 new_stub <<
"auto " << destdir.substr(slash + 1) <<
'\n';
445 string msg =
"Cannot rename '";
446 msg += new_stub_file;
const char * backend_name(int code)
const vector< pair< Xapian::docid, Xapian::docid > > & used_ranges
bool operator()(Xapian::doccount a, Xapian::doccount b) const
CmpByFirstUsed(const vector< pair< Xapian::docid, Xapian::docid >> &ur)
static void compact(Xapian::Compactor *compactor, const char *destdir, int fd, const std::vector< const Xapian::Database::Internal * > &sources, const std::vector< Xapian::docid > &offset, unsigned block_size, Xapian::Compactor::compaction_level compaction, unsigned flags, Xapian::docid last_docid)
static void compact(Xapian::Compactor *compactor, const char *destdir, int fd, int source_backend, const std::vector< const Xapian::Database::Internal * > &sources, const std::vector< Xapian::docid > &offset, Xapian::Compactor::compaction_level compaction, unsigned flags, Xapian::docid last_docid)
Sharded database backend.
void push_back(Xapian::Database::Internal *shard)
Compact a database, or merge and compact several.
compaction_level
Compaction level.
@ FULL
Split items whenever it saves space (the default).
@ FULLER
Allow oversize items to save more space (not recommended if you ever plan to update the compacted dat...
@ STANDARD
Don't split items unnecessarily.
DatabaseError indicates some sort of database related error.
Virtual base class for Database internals.
virtual int get_backend_info(std::string *path) const =0
Get backend information about this database.
Indicates an attempt to use a feature which is unavailable.
Abstract base class for postlists.
virtual PostList * skip_to(Xapian::docid did, double w_min)=0
Skip forward to the specified docid.
virtual PostList * next(double w_min)=0
Advance the current position to the next document in the postlist.
virtual Xapian::docid get_docid() const =0
Return the current docid.
virtual bool at_end() const =0
Return true if the current position is past the last entry in this list.
InvalidArgumentError indicates an invalid parameter value was passed to the API.
InvalidOperationError indicates the API was used in an invalid way.
UnimplementedError indicates an attempt to use an unimplemented feature.
static void backend_mismatch(const Xapian::Database::Internal *db, int backend1, const string &dbpath2, int backend2)
Compact a database, or merge and compact several.
#define UNSIGNED_OVERFLOW_OK(X)
Constants in the Xapian namespace.
An indexed database of documents.
Virtual base class for Database internals.
#define LOGCALL_VOID(CATEGORY, FUNC, PARAMS)
Hierarchy of classes which Xapian can throw as exceptions.
Utility functions for testing files.
bool dir_exists(const char *path)
Test if a directory exists.
bool file_exists(const char *path)
Test if a file exists.
File and path manipulation routines.
C++ class definition for glass database.
Database using honey backend.
bool io_tmp_rename(const std::string &tmp_file, const std::string &real_file)
Rename a temporary file to its final position.
Wrappers for low-level POSIX I/O routines.
Sharded database backend.
void sort(_RandomAccessIterator first, _RandomAccessIterator last, _Compare comp)
double now()
Return the current time.
string str(int value)
Convert int to std::string.
The Xapian namespace contains public interfaces for the Xapian library.
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
const int DB_BACKEND_HONEY
Use the honey backend.
const int DB_BACKEND_GLASS
Use the glass backend.
const int DBCOMPACT_NO_RENUMBER
Use the same document ids in the output as in the input(s).
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
const int DBCOMPACT_SINGLE_FILE
Produce a single-file database.
Various assertion macros.
#define AssertRel(A, REL, B)
Abstract base class for postlists.
include <fcntl.h>, but working around broken platforms.
include <sys/stat.h> with portability enhancements
<unistd.h>, but with compat.
Convert types to std::string.
Various handy string-related helpers.