xapian-core  1.4.26
xapian-compact.cc
Go to the documentation of this file.
1 
4 /* Copyright (C) 2003,2004,2005,2006,2007,2008,2009,2010,2015 Olly Betts
5  * Copyright (C) 2008 Lemur Consulting Ltd
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License as
9  * published by the Free Software Foundation; either version 2 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
20  * USA
21  */
22 
23 #include <config.h>
24 
25 #include <xapian.h>
26 
27 #include <cstdlib>
28 #include <iostream>
29 
30 #include "gnu_getopt.h"
31 
32 using namespace std;
33 
34 #define PROG_NAME "xapian-compact"
35 #define PROG_DESC "Compact a database, or merge and compact several"
36 
37 #define OPT_HELP 1
38 #define OPT_VERSION 2
39 #define OPT_NO_RENUMBER 3
40 
41 static void show_usage() {
42  cout << "Usage: " PROG_NAME " [OPTIONS] SOURCE_DATABASE... DESTINATION_DATABASE\n\n"
43 "Options:\n"
44 " -b, --blocksize=B Set the blocksize in bytes (e.g. 4096) or K (e.g. 4K)\n"
45 " (must be between 2K and 64K and a power of 2, default 8K)\n"
46 " -n, --no-full Disable full compaction\n"
47 " -F, --fuller Enable fuller compaction (not recommended if you plan to\n"
48 " update the compacted database)\n"
49 " -m, --multipass If merging more than 3 databases, merge the postlists in\n"
50 " multiple passes (which is generally faster but requires\n"
51 " more disk space for temporary files)\n"
52 " --no-renumber Preserve the numbering of document ids (useful if you have\n"
53 " external references to them, or have set them to match\n"
54 " unique ids from an external source). Currently this\n"
55 " option is only supported when merging databases if they\n"
56 " have disjoint ranges of used document ids\n"
57 " -s, --single-file Produce a single file database (not supported for chert)\n"
58 " --help display this help and exit\n"
59 " --version output version information and exit\n";
60 }
61 
63  bool quiet;
64 
65  public:
66  MyCompactor() : quiet(false) { }
67 
68  void set_quiet(bool quiet_) { quiet = quiet_; }
69 
70  void set_status(const string& table, const string& status) override;
71 
72  string
73  resolve_duplicate_metadata(const string & key,
74  size_t n,
75  const string tags[]) override;
76 };
77 
78 void
79 MyCompactor::set_status(const string & table, const string & status)
80 {
81  if (quiet)
82  return;
83  if (!status.empty())
84  cout << '\r' << table << ": " << status << '\n';
85  else
86  cout << table << " ..." << flush;
87 }
88 
89 string
91  size_t n,
92  const string tags[])
93 {
94  (void)key;
95  while (--n) {
96  if (tags[0] != tags[n]) {
97  cerr << "Warning: duplicate user metadata key with different tag "
98  "value - picking value from first source database with a "
99  "non-empty value\n";
100  break;
101  }
102  }
103  return tags[0];
104 }
105 
106 int
107 main(int argc, char **argv)
108 {
109  const char * opts = "b:nFmqs";
110  static const struct option long_opts[] = {
111  {"fuller", no_argument, 0, 'F'},
112  {"no-full", no_argument, 0, 'n'},
113  {"multipass", no_argument, 0, 'm'},
114  {"blocksize", required_argument, 0, 'b'},
115  {"no-renumber", no_argument, 0, OPT_NO_RENUMBER},
116  {"single-file", no_argument, 0, 's'},
117  {"quiet", no_argument, 0, 'q'},
118  {"help", no_argument, 0, OPT_HELP},
119  {"version", no_argument, 0, OPT_VERSION},
120  {NULL, 0, 0, 0}
121  };
122 
123  MyCompactor compactor;
125  unsigned flags = 0;
126  size_t block_size = 0;
127 
128  int c;
129  while ((c = gnu_getopt_long(argc, argv, opts, long_opts, 0)) != -1) {
130  switch (c) {
131  case 'b': {
132  char *p;
133  block_size = strtoul(optarg, &p, 10);
134  if (block_size <= 64 && (*p == 'K' || *p == 'k')) {
135  ++p;
136  block_size *= 1024;
137  }
138  if (*p || block_size < 2048 || block_size > 65536 ||
139  (block_size & (block_size - 1)) != 0) {
140  cerr << PROG_NAME": Bad value '" << optarg
141  << "' passed for blocksize, must be a power of 2 "
142  "between 2K and 64K\n";
143  exit(1);
144  }
145  break;
146  }
147  case 'n':
148  level = compactor.STANDARD;
149  break;
150  case 'F':
151  level = compactor.FULLER;
152  break;
153  case 'm':
155  break;
156  case OPT_NO_RENUMBER:
158  break;
159  case 's':
161  break;
162  case 'q':
163  compactor.set_quiet(true);
164  break;
165  case OPT_HELP:
166  cout << PROG_NAME " - " PROG_DESC "\n\n";
167  show_usage();
168  exit(0);
169  case OPT_VERSION:
170  cout << PROG_NAME " - " PACKAGE_STRING "\n";
171  exit(0);
172  default:
173  show_usage();
174  exit(1);
175  }
176  }
177 
178  if (argc - optind < 2) {
179  show_usage();
180  exit(1);
181  }
182 
183  // Path to the database to create.
184  string destdir = argv[argc - 1];
185 
186  try {
187  Xapian::Database src;
188  for (int i = optind; i < argc - 1; ++i) {
189  src.add_database(Xapian::Database(argv[i]));
190  }
191  src.compact(destdir, level | flags, block_size, compactor);
192  } catch (const Xapian::Error &error) {
193  cerr << argv[0] << ": " << error.get_description() << '\n';
194  exit(1);
195  } catch (const char * msg) {
196  cerr << argv[0] << ": " << msg << '\n';
197  exit(1);
198  }
199 }
Wrappers to allow GNU getopt to be used cleanly from C++ code.
int optind
Definition: getopt.cc:94
This class is used to access a database, or a group of databases.
Definition: database.h:68
int gnu_getopt_long(int argc_, char *const *argv_, const char *shortopts_, const struct option *longopts_, int *optind_)
Definition: gnu_getopt.h:97
Allow oversize items to save more space (not recommended if you ever plan to update the compacted dat...
Definition: compactor.h:55
void set_quiet(bool quiet_)
int main(int argc, char **argv)
static const char * opts
Don&#39;t split items unnecessarily.
Definition: compactor.h:50
static bool tags
STL namespace.
#define false
Definition: header.h:9
const int DBCOMPACT_NO_RENUMBER
Use the same document ids in the output as in the input(s).
Definition: constants.h:256
#define no_argument
Definition: gnu_getopt.h:79
#define OPT_VERSION
Public interfaces for the Xapian library.
#define OPT_HELP
Compact a database, or merge and compact several.
Definition: compactor.h:42
char * optarg
Definition: getopt.cc:79
string resolve_duplicate_metadata(const string &key, size_t n, const string tags[]) override
Resolve multiple user metadata entries with the same key.
#define OPT_NO_RENUMBER
#define required_argument
Definition: gnu_getopt.h:80
Split items whenever it saves space (the default).
Definition: compactor.h:52
void compact(const std::string &output, unsigned flags=0, int block_size=0)
Produce a compact version of this database.
Definition: database.h:627
void add_database(const Database &database)
Add an existing database (or group of databases) to those accessed by this object.
Definition: omdatabase.cc:148
std::string get_description() const
Return a string describing this object.
Definition: error.cc:93
static void show_usage()
All exceptions thrown by Xapian are subclasses of Xapian::Error.
Definition: error.h:43
compaction_level
Compaction level.
Definition: compactor.h:48
#define PACKAGE_STRING
Definition: config.h:337
const int DBCOMPACT_MULTIPASS
If merging more than 3 databases, merge the postlists in multiple passes.
Definition: constants.h:262
#define PROG_DESC
const int DBCOMPACT_SINGLE_FILE
Produce a single-file database.
Definition: constants.h:268
#define PROG_NAME
void set_status(const string &table, const string &status) override
Update progress.