xapian-core  2.0.0
backendmanager_multi.cc
Go to the documentation of this file.
1 
4 /* Copyright (C) 2007,2008,2009,2011,2012,2013,2015,2017,2018,2019,2020 Olly Betts
5  * Copyright (C) 2008 Lemur Consulting Ltd
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License as
9  * published by the Free Software Foundation; either version 2 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, see
19  * <https://www.gnu.org/licenses/>.
20  */
21 
22 #include <config.h>
23 
24 #include "backendmanager_multi.h"
25 
26 #include "errno_to_string.h"
27 #include "filetests.h"
28 #include "index_utils.h"
29 #include "str.h"
30 
31 #include "safeunistd.h"
32 
33 #include <cerrno>
34 #include <cstdio> // For rename().
35 #include <cstring>
36 
37 #ifdef __WIN32__
38 # include <stdlib.h>
39 # include <winerror.h>
40 #endif
41 
42 using namespace std;
43 
44 static std::string
45 build_dbtype(const vector<BackendManager*>& sub_managers)
46 {
47  string dbtype = "multi";
48  if (sub_managers.size() == 2 &&
49  sub_managers[0]->get_dbtype() == sub_managers[1]->get_dbtype()) {
50  dbtype += "_" + sub_managers[0]->get_dbtype();
51  } else {
52  for (auto sub_manager : sub_managers) {
53  dbtype += "_" + sub_manager->get_dbtype();
54  }
55  }
56  return dbtype;
57 }
58 
59 BackendManagerMulti::BackendManagerMulti(const std::string& datadir_,
60  const vector<BackendManager*>& sub_managers_)
61  : BackendManager(datadir_, build_dbtype(sub_managers_)),
62  sub_managers(sub_managers_)
63 {
64  cachedir = ".multi";
65  if (sub_managers.size() == 2 &&
66  sub_managers[0]->get_dbtype() == sub_managers[1]->get_dbtype()) {
67  cachedir += sub_managers[0]->get_dbtype();
68  } else {
69  for (auto sub_manager : sub_managers) {
70  cachedir += sub_manager->get_dbtype();
71  }
72  }
73  // Ensure the directory we store cached test databases in exists.
75 }
76 
77 #define NUMBER_OF_SUB_DBS 2
78 
79 string
81  const vector<string>& files)
82 {
83  string dbname;
84  if (!name.empty()) {
85  dbname = name;
86  } else {
87  dbname = "db";
88  for (const string& file : files) {
89  dbname += "__";
90  dbname += file;
91  }
92  }
93 
94  string db_path = cachedir;
95  db_path += '/';
96  db_path += dbname;
97 
98  if (!name.empty()) {
99 #ifdef __WIN32__
100 retry_unlink:
101 #endif
102  if (unlink(db_path.c_str()) < 0 && errno != ENOENT) {
103 #ifdef __WIN32__
104  if (errno == EACCES && _doserrno == ERROR_SHARING_VIOLATION) {
105  /* This happens when running multiremoteprog tests under
106  * Wine with a cross-build from Linux to mingw64 x86-64.
107  *
108  * FIXME: Work out what is going on...
109  */
110  sleep(1);
111  goto retry_unlink;
112  }
113 #endif
114  string msg = "Couldn't unlink file '";
115  msg += db_path;
116  msg += "' (";
117  errno_to_string(errno, msg);
118  msg += ')';
119  throw msg;
120  }
121  } else {
122  // Use cached database if there is one.
123  if (file_exists(db_path)) return db_path;
124  }
125 
126  string tmpfile = db_path + ".tmp";
127  ofstream out(tmpfile.c_str());
128  if (!out.is_open()) {
129  string msg = "Couldn't create file '";
130  msg += tmpfile;
131  msg += "' (";
132  errno_to_string(errno, msg);
133  msg += ')';
134  throw msg;
135  }
136 
137  // Open NUMBER_OF_SUB_DBS databases and index files to them alternately so
138  // a multi-db combining them contains the documents in the expected order.
140 
141  string dbbase = db_path;
142  dbbase += "___";
143  size_t dbbase_len = dbbase.size();
144 
145  for (size_t n = 0; n < NUMBER_OF_SUB_DBS; ++n) {
146  const string& subtype = sub_managers[n]->get_dbtype();
147  int flags = Xapian::DB_CREATE_OR_OVERWRITE;
148  if (subtype == "glass") {
149  flags |= Xapian::DB_BACKEND_GLASS;
150  dbbase += str(n);
151  dbs.add_database(Xapian::WritableDatabase(dbbase, flags));
152  out << subtype << ' ' << dbname << "___" << n << '\n';
153  } else if (subtype == "remoteprog_glass") {
154  flags |= Xapian::DB_BACKEND_GLASS;
155  dbbase += str(n);
156  Xapian::WritableDatabase remote_db(dbbase, flags);
157  remote_db.close();
158  string args = sub_managers[n]->get_writable_database_args(dbbase,
159  300000);
160 
161  dbs.add_database(
163 
164  out << "remote :" << BackendManager::get_xapian_progsrv_command()
165  << " " << args << '\n';
166  } else {
167  string msg = "Unknown multidb subtype: ";
168  msg += subtype;
169  throw msg;
170  }
171  dbbase.resize(dbbase_len);
172  }
173 
174  out.close();
175 
176  FileIndexer(get_datadir(), files).index_to(dbs);
177  dbs.close();
178 
179 #ifdef __WIN32__
180 retry_rename:
181 #endif
182  if (rename(tmpfile.c_str(), db_path.c_str()) < 0) {
183 #ifdef __WIN32__
184  if (errno == EACCES) {
185  if (_doserrno == ERROR_SHARING_VIOLATION) {
186  // Sometimes we hit this failure case. It happens with various
187  // testcases, and with both mingw and MSVC. We've seen it on
188  // both appveyor and GHA.
189  //
190  // Debugging shows the destination file doesn't exist (and it
191  // shouldn't). The _doserrno code is ERROR_SHARING_VIOLATION
192  // which suggests that tmpfile is still open, but a sleep+retry
193  // makes it work. Perhaps some AV is kicking in and opening
194  // newly created files to inspect them or something?
195  //
196  // FIXME: It would be good to get to the bottom of this!
197  sleep(1);
198  goto retry_rename;
199  }
200  }
201 #endif
202  string msg = "rename failed (";
203  errno_to_string(errno, msg);
204  msg += ')';
205  throw msg;
206  }
207 
208  last_wdb_path = db_path;
209  return db_path;
210 }
211 
212 string
213 BackendManagerMulti::do_get_database_path(const vector<string> & files)
214 {
215  return createdb_multi(string(), files);
216 }
217 
219 BackendManagerMulti::get_writable_database(const string& name, const string& file)
220 {
221  vector<string> files;
222  if (!file.empty()) files.push_back(file);
224 }
225 
226 string
228 {
229  return cachedir + "/" + name;
230 }
231 
233 BackendManagerMulti::get_remote_database(const std::vector<std::string>& files,
234  unsigned int timeout,
235  int* port_ptr)
236 {
237  Xapian::Database db;
238  size_t remotes = 0;
239  for (auto sub_manager : sub_managers) {
240  if (sub_manager->get_dbtype().find("remote") == string::npos) {
241  db.add_database(sub_manager->get_database(files));
242  continue;
243  }
244 
245  ++remotes;
246  // If there are multiple remote shards, we'll set *port_ptr to the port
247  // used by the last one opened.
248  db.add_database(sub_manager->get_remote_database(files, timeout,
249  port_ptr));
250  }
251 
252  if (remotes == 0) {
253  // It's useful to support mixed local/remote multi databases with a
254  // custom timeout so we can test timeout and keepalive handling for
255  // this case, but this method shouldn't be called on an all-local
256  // multi database.
257  const char* m = "BackendManager::get_remote_database() called for "
258  "multi with no remote shards";
260  }
261  return db;
262 }
263 
264 string
266 {
267  return cachedir + "/" + name;
268 }
269 
270 string
272 {
274 }
275 
278 {
280 }
281 
282 string
284 {
285  return last_wdb_path;
286 }
#define NUMBER_OF_SUB_DBS
static std::string build_dbtype(const vector< BackendManager * > &sub_managers)
BackendManager subclass for multi databases.
char name[9]
Definition: dbcheck.cc:57
Xapian::WritableDatabase get_writable_database_again()
Create a WritableDatabase object for the last opened WritableDatabase.
Xapian::WritableDatabase get_writable_database(const std::string &name, const std::string &file)
Create a Multi Xapian::WritableDatabase object indexing a single file.
std::string last_wdb_path
The path of the last writable database used.
std::string get_writable_database_path(const std::string &name)
Get the path of Xapian::WritableDatabase instance.
std::string get_compaction_output_path(const std::string &name)
Get a path to compact a database to.
std::vector< BackendManager * > sub_managers
std::string createdb_multi(const std::string &name, const std::vector< std::string > &files)
std::string do_get_database_path(const std::vector< std::string > &files)
Get the path of the Xapian::Database instance.
std::string get_generated_database_path(const std::string &name)
Get the path to use for generating a database, if supported.
std::string get_writable_database_path_again()
Get the path of the last opened WritableDatabase.
Xapian::Database get_remote_database(const std::vector< std::string > &files, unsigned int timeout, int *port_ptr)
Get a remote database instance with the specified timeout.
BackendManagerMulti(const BackendManagerMulti &)
Don't allow copying.
static const char * get_xapian_progsrv_command()
Get the command line required to run xapian-progsrv.
bool create_dir_if_needed(const std::string &dirname)
Create the directory dirname if needed.
virtual Xapian::WritableDatabase get_remote_writable_database(std::string args)
Get a remote Xapian::WritableDatabase instance with specified args.
const std::string & get_datadir() const
Get the directory to store data in.
void index_to(Xapian::WritableDatabase &db)
Definition: index_utils.cc:52
An indexed database of documents.
Definition: database.h:75
void close()
Close the database.
Definition: database.cc:99
size_t size() const
Return number of shards in this Database object.
Definition: database.cc:105
void add_database(const Database &other)
Add shards from another Database.
Definition: database.h:109
InvalidOperationError indicates the API was used in an invalid way.
Definition: error.h:271
This class provides read/write access to a database.
Definition: database.h:964
void add_database(const WritableDatabase &other)
Add shards from another WritableDatabase.
Definition: database.h:990
void errno_to_string(int e, string &s)
Convert errno value to std::string, thread-safe if possible.
Utility functions for testing files.
bool file_exists(const char *path)
Test if a file exists.
Definition: filetests.h:40
utility functions for indexing testcase data
void sleep(double t)
Sleep until the time represented by this object.
Definition: realtime.h:127
string str(int value)
Convert int to std::string.
Definition: str.cc:91
const int DB_BACKEND_GLASS
Use the glass backend.
Definition: constants.h:157
const int DB_CREATE_OR_OVERWRITE
Create database if it doesn't already exist, or overwrite if it does.
Definition: constants.h:37
<unistd.h>, but with compat.
Convert types to std::string.
Definition: header.h:215