xapian-core  2.0.0
xapian-compact.cc
Go to the documentation of this file.
1 
4 /* Copyright (C) 2003-2026 Olly Betts
5  * Copyright (C) 2008 Lemur Consulting Ltd
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License as
9  * published by the Free Software Foundation; either version 2 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, see
19  * <https://www.gnu.org/licenses/>.
20  */
21 
22 #include <config.h>
23 
24 #include <xapian.h>
25 
26 #include <cstdlib>
27 #include <cstring>
28 #include <iostream>
29 
30 #include "gnu_getopt.h"
31 
33 
34 using namespace std;
35 
36 #define PROG_NAME "xapian-compact"
37 #define PROG_DESC "Compact a database, or merge and compact several"
38 
39 #define OPT_HELP 1
40 #define OPT_VERSION 2
41 #define OPT_NO_RENUMBER 3
42 
43 static void show_usage() {
44  cout << "Usage: " PROG_NAME " [OPTIONS] SOURCE_DATABASE... DESTINATION_DATABASE\n\n"
45 "Options:\n"
46 " -b, --blocksize=B Set the blocksize in bytes (e.g. 4096) or K (e.g. 4K)\n"
47 " (must be between 2K and 64K and a power of 2, default 8K)\n"
48 " -B, --backend=B Set the output backend. Supported values are 'glass'\n"
49 " and 'honey'. By default input's backend is used. At\n"
50 " present only glass to honey conversion is implemented -\n"
51 " otherwise the backend must be the same.\n"
52 " -n, --no-full Disable full compaction\n"
53 " -F, --fuller No effect for glass since Xapian 1.4.31\n"
54 " -m, --multipass If merging more than 3 databases, merge the postlists in\n"
55 " multiple passes (which is generally faster but requires\n"
56 " more disk space for temporary files)\n"
57 " --no-renumber Preserve the numbering of document ids (useful if you have\n"
58 " external references to them, or have set them to match\n"
59 " unique ids from an external source). Currently this\n"
60 " option is only supported when merging databases if they\n"
61 " have disjoint ranges of used document ids\n"
62 " -s, --single-file Produce a single file database\n"
63 " --help display this help and exit\n"
64 " --version output version information and exit\n";
65 }
66 
68  bool quiet;
69 
70  public:
71  MyCompactor() : quiet(false) { }
72 
73  void set_quiet(bool quiet_) { quiet = quiet_; }
74 
75  void set_status(const string& table, const string& status) override;
76 
77  string
78  resolve_duplicate_metadata(const string & key,
79  size_t n,
80  const string tags[]) override;
81 };
82 
83 void
84 MyCompactor::set_status(const string & table, const string & status)
85 {
86  if (quiet)
87  return;
88  if (!status.empty())
89  cout << '\r' << table << ": " << status << '\n';
90  else
91  cout << table << " ..." << flush;
92 }
93 
94 string
96  size_t n,
97  const string tags[])
98 {
99  (void)key;
100  while (--n) {
101  if (tags[0] != tags[n]) {
102  cerr << "Warning: duplicate user metadata key with different tag "
103  "value - picking value from first source database with a "
104  "non-empty value\n";
105  break;
106  }
107  }
108  return tags[0];
109 }
110 
111 int
112 main(int argc, char **argv)
113 {
114  const char * opts = "b:B:nFmqs";
115  static const struct option long_opts[] = {
116  {"fuller", no_argument, 0, 'F'},
117  {"no-full", no_argument, 0, 'n'},
118  {"multipass", no_argument, 0, 'm'},
119  {"blocksize", required_argument, 0, 'b'},
120  {"backend", required_argument, 0, 'B'},
121  {"no-renumber", no_argument, 0, OPT_NO_RENUMBER},
122  {"single-file", no_argument, 0, 's'},
123  {"quiet", no_argument, 0, 'q'},
124  {"help", no_argument, 0, OPT_HELP},
125  {"version", no_argument, 0, OPT_VERSION},
126  {NULL, 0, 0, 0}
127  };
128 
129  MyCompactor compactor;
131  unsigned backend = 0;
132  unsigned flags = 0;
133  unsigned block_size = 0;
134 
135  int c;
136  while ((c = gnu_getopt_long(argc, argv, opts, long_opts, 0)) != -1) {
137  switch (c) {
138  case 'b': {
139  char *p;
140  unsigned long value = strtoul(optarg, &p, 10);
141  if (value <= GLASS_MAX_BLOCKSIZE / 1024 &&
142  (*p == 'K' || *p == 'k')) {
143  ++p;
144  value *= 1024;
145  }
146  if (*p ||
147  value < GLASS_MIN_BLOCKSIZE ||
148  value > GLASS_MAX_BLOCKSIZE ||
149  (value & (value - 1)) != 0) {
150  cerr << PROG_NAME": Bad value '" << optarg << "' passed "
151  "for blocksize, must be a power of 2 between "
152  << (GLASS_MIN_BLOCKSIZE / 1024) << "K and "
153  << (GLASS_MAX_BLOCKSIZE / 1024) << "K\n";
154  exit(1);
155  }
156  block_size = unsigned(value);
157  break;
158  }
159  case 'B':
160  if (strcmp(optarg, "honey") == 0) {
161  backend = Xapian::DB_BACKEND_HONEY;
162  } else if (strcmp(optarg, "glass") == 0) {
163  backend = Xapian::DB_BACKEND_GLASS;
164  } else {
165  cerr << PROG_NAME": Bad value '" << optarg
166  << "' passed for backend - must be 'glass' or "
167  "'honey'\n";
168  exit(1);
169  }
170  break;
171  case 'n':
172  level = compactor.STANDARD;
173  break;
174  case 'F':
175  level = compactor.FULLER;
176  break;
177  case 'm':
179  break;
180  case OPT_NO_RENUMBER:
182  break;
183  case 's':
185  break;
186  case 'q':
187  compactor.set_quiet(true);
188  break;
189  case OPT_HELP:
190  cout << PROG_NAME " - " PROG_DESC "\n\n";
191  show_usage();
192  exit(0);
193  case OPT_VERSION:
194  cout << PROG_NAME " - " PACKAGE_STRING "\n";
195  exit(0);
196  default:
197  show_usage();
198  exit(1);
199  }
200  }
201 
202  if (argc - optind < 2) {
203  show_usage();
204  exit(1);
205  }
206 
207  // Path to the database to create.
208  string destdir = argv[argc - 1];
209 
210  flags |= backend | level;
211 
212  try {
213  Xapian::Database src;
214  for (int i = optind; i < argc - 1; ++i) {
215  src.add_database(Xapian::Database(argv[i]));
216  }
217  src.compact(destdir, flags, block_size, compactor);
218  } catch (const Xapian::Error &error) {
219  cerr << argv[0] << ": " << error.get_description() << '\n';
220  exit(1);
221  } catch (const char * msg) {
222  cerr << argv[0] << ": " << msg << '\n';
223  exit(1);
224  }
225 }
void set_status(const string &table, const string &status) override
Update progress.
void set_quiet(bool quiet_)
string resolve_duplicate_metadata(const string &key, size_t n, const string tags[]) override
Resolve multiple user metadata entries with the same key.
Compact a database, or merge and compact several.
Definition: compactor.h:39
compaction_level
Compaction level.
Definition: compactor.h:42
@ FULL
Split items whenever it saves space (the default).
Definition: compactor.h:46
@ FULLER
Allow oversize items to save more space (not recommended if you ever plan to update the compacted dat...
Definition: compactor.h:52
@ STANDARD
Don't split items unnecessarily.
Definition: compactor.h:44
An indexed database of documents.
Definition: database.h:75
void add_database(const Database &other)
Add shards from another Database.
Definition: database.h:109
void compact(std::string_view output, unsigned flags=0, int block_size=0)
Produce a compact version of this database.
Definition: database.h:738
All exceptions thrown by Xapian are subclasses of Xapian::Error.
Definition: error.h:41
std::string get_description() const
Return a string describing this object.
Definition: error.cc:93
#define PACKAGE_STRING
Definition: config.h:361
PositionList * p
int optind
Definition: getopt.cc:93
char * optarg
Definition: getopt.cc:78
Definitions, types, etc for use inside glass.
#define GLASS_MIN_BLOCKSIZE
Minimum B-tree block size.
Definition: glass_defs.h:33
#define GLASS_MAX_BLOCKSIZE
Maximum B-tree block size.
Definition: glass_defs.h:36
Wrappers to allow GNU getopt to be used cleanly from C++ code.
#define no_argument
Definition: gnu_getopt.h:78
#define required_argument
Definition: gnu_getopt.h:79
int gnu_getopt_long(int argc_, char *const *argv_, const char *shortopts_, const struct option *longopts_, int *optind_)
Definition: gnu_getopt.h:96
#define false
Definition: header.h:9
const int DBCOMPACT_MULTIPASS
If merging more than 3 databases, merge the postlists in multiple passes.
Definition: constants.h:257
const int DB_BACKEND_HONEY
Use the honey backend.
Definition: constants.h:197
const int DB_BACKEND_GLASS
Use the glass backend.
Definition: constants.h:157
const int DBCOMPACT_NO_RENUMBER
Use the same document ids in the output as in the input(s).
Definition: constants.h:251
const int DBCOMPACT_SINGLE_FILE
Produce a single-file database.
Definition: constants.h:263
static void show_usage()
#define OPT_VERSION
int main(int argc, char **argv)
#define PROG_NAME
#define OPT_NO_RENUMBER
#define PROG_DESC
#define OPT_HELP
static bool tags
static const char * opts
static const struct option long_opts[]
Public interfaces for the Xapian library.