00001
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #include <config.h>
00024
00025 #include <xapian/compactor.h>
00026
00027 #include "safeerrno.h"
00028
00029 #include <algorithm>
00030 #include <fstream>
00031
00032 #include <cstdio>
00033 #include <cstdlib>
00034 #include <cstring>
00035 #include <ctime>
00036 #include "safesysstat.h"
00037 #include <sys/types.h>
00038
00039 #include "safeunistd.h"
00040 #include "safefcntl.h"
00041
00042 #include "noreturn.h"
00043 #include "omassert.h"
00044 #include "fileutils.h"
00045 #ifdef __WIN32__
00046 # include "msvc_posix_wrapper.h"
00047 #endif
00048 #include "stringutils.h"
00049 #include "str.h"
00050 #include "utils.h"
00051
00052 #include "backends/brass/brass_compact.h"
00053 #include "backends/brass/brass_version.h"
00054 #include "backends/chert/chert_compact.h"
00055 #include "backends/chert/chert_version.h"
00056 #include "backends/flint/flint_compact.h"
00057 #include "backends/flint/flint_version.h"
00058
00059 #include <xapian.h>
00060
00061 using namespace std;
00062
00063 class CmpByFirstUsed {
00064 const vector<pair<Xapian::docid, Xapian::docid> > & used_ranges;
00065
00066 public:
00067 CmpByFirstUsed(const vector<pair<Xapian::docid, Xapian::docid> > & ur)
00068 : used_ranges(ur) { }
00069
00070 bool operator()(size_t a, size_t b) {
00071 return used_ranges[a].first < used_ranges[b].first;
00072 }
00073 };
00074
00075 static const char * backend_names[] = {
00076 NULL,
00077 "brass",
00078 "chert",
00079 "flint"
00080 };
00081
00082 enum { STUB_NO, STUB_FILE, STUB_DIR };
00083
00084 namespace Xapian {
00085
00086 class Compactor::Internal : public Xapian::Internal::RefCntBase {
00087 friend class Compactor;
00088
00089 string destdir;
00090 bool renumber;
00091 bool multipass;
00092 int compact_to_stub;
00093 size_t block_size;
00094 compaction_level compaction;
00095
00096 Xapian::docid tot_off;
00097 Xapian::docid last_docid;
00098
00099 enum { UNKNOWN, BRASS, CHERT, FLINT } backend;
00100
00101 struct stat sb;
00102
00103 string first_source;
00104
00105 vector<string> sources;
00106 vector<Xapian::docid> offset;
00107 vector<pair<Xapian::docid, Xapian::docid> > used_ranges;
00108 public:
00109 Internal()
00110 : renumber(true), multipass(false),
00111 block_size(8192), compaction(FULL), tot_off(0),
00112 last_docid(0), backend(UNKNOWN)
00113 {
00114 }
00115
00116 void set_destdir(const string & destdir_);
00117
00118 void add_source(const string & srcdir);
00119
00120 void compact(Xapian::Compactor & compactor);
00121 };
00122
00123 Compactor::Compactor() : internal(new Compactor::Internal()) { }
00124
00125 Compactor::~Compactor() { }
00126
00127 void
00128 Compactor::set_block_size(size_t block_size)
00129 {
00130 internal->block_size = block_size;
00131 }
00132
00133 void
00134 Compactor::set_renumber(bool renumber)
00135 {
00136 internal->renumber = renumber;
00137 }
00138
00139 void
00140 Compactor::set_multipass(bool multipass)
00141 {
00142 internal->multipass = multipass;
00143 }
00144
00145 void
00146 Compactor::set_compaction_level(compaction_level compaction)
00147 {
00148 internal->compaction = compaction;
00149 }
00150
00151 void
00152 Compactor::set_destdir(const string & destdir)
00153 {
00154 internal->set_destdir(destdir);
00155 }
00156
00157 void
00158 Compactor::add_source(const string & srcdir)
00159 {
00160 internal->add_source(srcdir);
00161 }
00162
00163 void
00164 Compactor::compact()
00165 {
00166 internal->compact(*this);
00167 }
00168
00169 void
00170 Compactor::set_status(const string & table, const string & status)
00171 {
00172 (void)table;
00173 (void)status;
00174 }
00175
00176 string
00177 Compactor::resolve_duplicate_metadata(const string & key,
00178 size_t num_tags, const std::string tags[])
00179 {
00180 (void)key;
00181 (void)num_tags;
00182 return tags[0];
00183 }
00184
00185 }
00186
00187 XAPIAN_NORETURN(
00188 static void
00189 backend_mismatch(const string &dbpath1, int backend1,
00190 const string &dbpath2, int backend2)
00191 );
00192 static void
00193 backend_mismatch(const string &dbpath1, int backend1,
00194 const string &dbpath2, int backend2)
00195 {
00196 string msg = "All databases must be the same type ('";
00197 msg += dbpath1;
00198 msg += "' is ";
00199 msg += backend_names[backend1];
00200 msg += ", but '";
00201 msg += dbpath2;
00202 msg += "' is ";
00203 msg += backend_names[backend2];
00204 msg += ')';
00205 throw Xapian::InvalidArgumentError(msg);
00206 }
00207
00208 namespace Xapian {
00209
00210 void
00211 Compactor::Internal::set_destdir(const string & destdir_) {
00212 destdir = destdir_;
00213 compact_to_stub = STUB_NO;
00214 if (stat(destdir, &sb) == 0 && S_ISREG(sb.st_mode)) {
00215
00216 compact_to_stub = STUB_FILE;
00217 } else if (stat(destdir + "/XAPIANDB", &sb) == 0 && S_ISREG(sb.st_mode)) {
00218
00219 compact_to_stub = STUB_DIR;
00220 }
00221 }
00222
00223 void
00224 Compactor::Internal::add_source(const string & srcdir)
00225 {
00226
00227
00228 if (!compact_to_stub && srcdir == destdir) {
00229 throw Xapian::InvalidArgumentError("destination may not be the same as any source directory, unless it is a stub database");
00230 }
00231
00232 if (stat(srcdir, &sb) == 0) {
00233 bool is_stub = false;
00234 string file = srcdir;
00235 if (S_ISREG(sb.st_mode)) {
00236
00237 is_stub = true;
00238 } else if (S_ISDIR(sb.st_mode)) {
00239 file += "/XAPIANDB";
00240 if (stat(file.c_str(), &sb) == 0 && S_ISREG(sb.st_mode)) {
00241
00242 is_stub = true;
00243 }
00244 }
00245 if (is_stub) {
00246 ifstream stub(file.c_str());
00247 string line;
00248 unsigned int line_no = 0;
00249 while (getline(stub, line)) {
00250 ++line_no;
00251 if (line.empty() || line[0] == '#')
00252 continue;
00253 string::size_type space = line.find(' ');
00254 if (space == string::npos) space = line.size();
00255
00256 string type(line, 0, space);
00257 line.erase(0, space + 1);
00258
00259 if (type == "auto" || type == "chert" || type == "flint" ||
00260 type == "brass") {
00261 resolve_relative_path(line, file);
00262 add_source(line);
00263 continue;
00264 }
00265
00266 if (type == "remote" || type == "inmemory") {
00267 string msg = "Can't compact stub entry of type '";
00268 msg += type;
00269 msg += '\'';
00270 throw Xapian::InvalidOperationError(msg);
00271 }
00272
00273 throw Xapian::DatabaseError("Bad line in stub file");
00274 }
00275 return;
00276 }
00277 }
00278
00279 if (stat(string(srcdir) + "/iamflint", &sb) == 0) {
00280 if (backend == UNKNOWN) {
00281 backend = FLINT;
00282 } else if (backend != FLINT) {
00283 backend_mismatch(first_source, backend, srcdir, FLINT);
00284 }
00285 } else if (stat(string(srcdir) + "/iamchert", &sb) == 0) {
00286 if (backend == UNKNOWN) {
00287 backend = CHERT;
00288 } else if (backend != CHERT) {
00289 backend_mismatch(first_source, backend, srcdir, CHERT);
00290 }
00291 } else if (stat(string(srcdir) + "/iambrass", &sb) == 0) {
00292 if (backend == UNKNOWN) {
00293 backend = BRASS;
00294 } else if (backend != BRASS) {
00295 backend_mismatch(first_source, backend, srcdir, BRASS);
00296 }
00297 } else {
00298 string msg = srcdir;
00299 msg += ": not a flint, chert or brass database";
00300 throw Xapian::InvalidArgumentError(msg);
00301 }
00302
00303 if (first_source.empty())
00304 first_source = srcdir;
00305
00306 Xapian::Database db(srcdir);
00307 Xapian::docid first = 0, last = 0;
00308
00309
00310
00311 Xapian::doccount num_docs = db.get_doccount();
00312 if (num_docs != 0) {
00313 Xapian::PostingIterator it = db.postlist_begin(string());
00314
00315
00316 Assert(it != db.postlist_end(string()));
00317 first = *it;
00318
00319 if (renumber && first) {
00320
00321
00322
00323
00324
00325 tot_off -= (first - 1);
00326 }
00327
00328
00329
00330
00331 last = db.get_lastdocid();
00332 Xapian::docid last_lbound = first + num_docs - 1;
00333 while (last_lbound < last) {
00334 Xapian::docid mid;
00335 mid = last_lbound + (last - last_lbound + 1) / 2;
00336 it.skip_to(mid);
00337 if (it == db.postlist_end(string())) {
00338 last = mid - 1;
00339 it = db.postlist_begin(string());
00340 continue;
00341 }
00342 last_lbound = *it;
00343 }
00344 }
00345 offset.push_back(tot_off);
00346 if (renumber)
00347 tot_off += last;
00348 else if (last_docid < db.get_lastdocid())
00349 last_docid = db.get_lastdocid();
00350 used_ranges.push_back(make_pair(first, last));
00351
00352 sources.push_back(string(srcdir) + '/');
00353 }
00354
00355 void
00356 Compactor::Internal::compact(Xapian::Compactor & compactor)
00357 {
00358 if (renumber)
00359 last_docid = tot_off;
00360
00361 if (!renumber && sources.size() > 1) {
00362
00363
00364
00365
00366 vector<size_t> order;
00367 order.reserve(sources.size());
00368 for (size_t i = 0; i < sources.size(); ++i)
00369 order.push_back(i);
00370
00371 sort(order.begin(), order.end(), CmpByFirstUsed(used_ranges));
00372
00373
00374
00375 vector<string> sources_(sources.size());
00376 vector<pair<Xapian::docid, Xapian::docid> > used_ranges_;
00377 used_ranges_.reserve(sources.size());
00378
00379 Xapian::docid last_start = 0, last_end = 0;
00380 for (size_t j = 0; j != order.size(); ++j) {
00381 size_t n = order[j];
00382
00383 swap(sources_[j], sources[n]);
00384 used_ranges_.push_back(used_ranges[n]);
00385
00386 const pair<Xapian::docid, Xapian::docid> p = used_ranges[n];
00387
00388 if (p.first == 0 && p.second == 0)
00389 continue;
00390
00391 if (p.first <= last_end) {
00392 string msg = "when merging databases, --no-renumber is only currently supported if the databases have disjoint ranges of used document ids: ";
00393 msg += sources[order[j - 1]];
00394 msg += " has range ";
00395 msg += str(last_start);
00396 msg += '-';
00397 msg += str(last_end);
00398 msg += ", ";
00399 msg += sources[n];
00400 msg += " has range ";
00401 msg += str(p.first);
00402 msg += '-';
00403 msg += str(p.second);
00404 throw Xapian::InvalidOperationError(msg);
00405 }
00406 last_start = p.first;
00407 last_end = p.second;
00408 }
00409
00410 swap(sources, sources_);
00411 swap(used_ranges, used_ranges_);
00412 }
00413
00414 string stub_file;
00415 if (compact_to_stub) {
00416 stub_file = destdir;
00417 if (compact_to_stub == STUB_DIR) {
00418 stub_file += "/XAPIANDB";
00419 destdir += '/';
00420 } else {
00421 destdir += '_';
00422 }
00423 size_t sfx = destdir.size();
00424 time_t now = time(NULL);
00425 while (true) {
00426 destdir.resize(sfx);
00427 destdir += str(now++);
00428 if (mkdir(destdir, 0755) == 0)
00429 break;
00430 if (errno != EEXIST) {
00431 string msg = destdir;
00432 msg += ": mkdir failed";
00433 throw Xapian::DatabaseError(msg, errno);
00434 }
00435 }
00436 } else {
00437
00438 if (mkdir(destdir, 0755) < 0) {
00439
00440
00441
00442 if (errno == EEXIST) {
00443 if (stat(destdir, &sb) == 0 && S_ISDIR(sb.st_mode))
00444 errno = 0;
00445 else
00446 errno = EEXIST;
00447 }
00448 if (errno) {
00449 string msg = destdir;
00450 msg += ": cannot create directory";
00451 throw Xapian::DatabaseError(msg, errno);
00452 }
00453 }
00454 }
00455
00456 if (backend == CHERT) {
00457 #ifdef XAPIAN_HAS_CHERT_BACKEND
00458 compact_chert(compactor, destdir.c_str(), sources, offset, block_size,
00459 compaction, multipass, last_docid);
00460 #else
00461 throw Xapian::FeatureUnavailableError("Chert backend disabled at build time");
00462 #endif
00463 } else if (backend == BRASS) {
00464 #ifdef XAPIAN_HAS_BRASS_BACKEND
00465 compact_brass(compactor, destdir.c_str(), sources, offset, block_size,
00466 compaction, multipass, last_docid);
00467 #else
00468 throw Xapian::FeatureUnavailableError("Brass backend disabled at build time");
00469 #endif
00470 } else {
00471 #ifdef XAPIAN_HAS_FLINT_BACKEND
00472 compact_flint(compactor, destdir.c_str(), sources, offset, block_size,
00473 compaction, multipass, last_docid);
00474 #else
00475 throw Xapian::FeatureUnavailableError("Flint backend disabled at build time");
00476 #endif
00477 }
00478
00479
00480
00481
00482
00483 if (backend == CHERT) {
00484 #ifdef XAPIAN_HAS_CHERT_BACKEND
00485 ChertVersion(destdir).create();
00486 #else
00487
00488 exit(1);
00489 #endif
00490 } else if (backend == BRASS) {
00491 #ifdef XAPIAN_HAS_BRASS_BACKEND
00492 BrassVersion(destdir).create();
00493 #else
00494
00495 exit(1);
00496 #endif
00497 } else {
00498 #ifdef XAPIAN_HAS_FLINT_BACKEND
00499 FlintVersion(destdir).create();
00500 #else
00501
00502 exit(1);
00503 #endif
00504 }
00505
00506 if (compact_to_stub) {
00507 string new_stub_file = destdir;
00508 new_stub_file += "/new_stub.tmp";
00509 {
00510 ofstream new_stub(new_stub_file.c_str());
00511 #ifndef __WIN32__
00512 size_t slash = destdir.find_last_of('/');
00513 #else
00514 size_t slash = destdir.find_last_of("/\\");
00515 #endif
00516 new_stub << "auto " << destdir.substr(slash + 1) << '\n';
00517 }
00518 #ifndef __WIN32__
00519 if (rename(new_stub_file.c_str(), stub_file.c_str()) < 0) {
00520 #else
00521 if (msvc_posix_rename(new_stub_file.c_str(), stub_file.c_str()) < 0) {
00522 #endif
00523
00524 string msg = "Cannot rename '";
00525 msg += new_stub_file;
00526 msg += "' to '";
00527 msg += stub_file;
00528 msg += '\'';
00529 throw Xapian::DatabaseError(msg, errno);
00530 }
00531 }
00532 }
00533
00534 }
00535