xapian-core  1.4.26
backendmanager_multi.cc
Go to the documentation of this file.
1 
4 /* Copyright (C) 2007,2008,2009,2011,2012,2013,2015,2017,2018,2019,2020 Olly Betts
5  * Copyright (C) 2008 Lemur Consulting Ltd
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License as
9  * published by the Free Software Foundation; either version 2 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 #include <config.h>
23 
24 #include "backendmanager_multi.h"
25 
26 #include "errno_to_string.h"
27 #include "filetests.h"
28 #include "index_utils.h"
29 #include "str.h"
30 
31 #include "safeunistd.h"
32 
33 #include <cerrno>
34 #include <cstdio> // For rename().
35 #include <cstring>
36 
37 #ifdef __WIN32__
38 # include <stdlib.h>
39 # include <winerror.h>
40 #endif
41 
42 using namespace std;
43 
44 static std::string
45 build_dbtype(const vector<BackendManager*>& sub_managers)
46 {
47  string dbtype = "multi";
48  if (sub_managers.size() == 2 &&
49  sub_managers[0]->get_dbtype() == sub_managers[1]->get_dbtype()) {
50  dbtype += "_" + sub_managers[0]->get_dbtype();
51  } else {
52  for (auto sub_manager : sub_managers) {
53  dbtype += "_" + sub_manager->get_dbtype();
54  }
55  }
56  return dbtype;
57 }
58 
59 BackendManagerMulti::BackendManagerMulti(const std::string& datadir_,
60  const vector<BackendManager*>& sub_managers_)
61  : BackendManager(datadir_, build_dbtype(sub_managers_)),
62  sub_managers(sub_managers_)
63 {
64  cachedir = ".multi";
65  if (sub_managers.size() == 2 &&
66  sub_managers[0]->get_dbtype() == sub_managers[1]->get_dbtype()) {
67  cachedir += sub_managers[0]->get_dbtype();
68  } else {
69  for (auto sub_manager : sub_managers) {
70  cachedir += sub_manager->get_dbtype();
71  }
72  }
73  // Ensure the directory we store cached test databases in exists.
75 }
76 
77 #define NUMBER_OF_SUB_DBS 2
78 
79 string
81  const vector<string>& files)
82 {
83  string dbname;
84  if (!name.empty()) {
85  dbname = name;
86  } else {
87  dbname = "db";
88  for (const string& file : files) {
89  dbname += "__";
90  dbname += file;
91  }
92  }
93 
94  string db_path = cachedir;
95  db_path += '/';
96  db_path += dbname;
97 
98  if (!name.empty()) {
99 #ifdef __WIN32__
100 retry_unlink:
101 #endif
102  if (unlink(db_path.c_str()) < 0 && errno != ENOENT) {
103 #ifdef __WIN32__
104  if (errno == EACCES && _doserrno == ERROR_SHARING_VIOLATION) {
105  /* This happens when running multiremoteprog tests under
106  * Wine with a cross-build from Linux to mingw64 x86-64.
107  *
108  * FIXME: Work out what is going on...
109  */
110  sleep(1);
111  goto retry_unlink;
112  }
113 #endif
114  string msg = "Couldn't unlink file '";
115  msg += db_path;
116  msg += "' (";
117  errno_to_string(errno, msg);
118  msg += ')';
119  throw msg;
120  }
121  } else {
122  // Use cached database if there is one.
123  if (file_exists(db_path)) return db_path;
124  }
125 
126  string tmpfile = db_path + ".tmp";
127  ofstream out(tmpfile.c_str());
128  if (!out.is_open()) {
129  string msg = "Couldn't create file '";
130  msg += tmpfile;
131  msg += "' (";
132  errno_to_string(errno, msg);
133  msg += ')';
134  throw msg;
135  }
136 
137  // Open NUMBER_OF_SUB_DBS databases and index files to them alternately so
138  // a multi-db combining them contains the documents in the expected order.
140 
141  string dbbase = db_path;
142  dbbase += "___";
143  size_t dbbase_len = dbbase.size();
144 
145  for (size_t n = 0; n < NUMBER_OF_SUB_DBS; ++n) {
146  const string& subtype = sub_managers[n]->get_dbtype();
147  int flags = Xapian::DB_CREATE_OR_OVERWRITE;
148  if (subtype == "glass") {
149  flags |= Xapian::DB_BACKEND_GLASS;
150  dbbase += str(n);
151  dbs.add_database(Xapian::WritableDatabase(dbbase, flags));
152  out << subtype << ' ' << dbname << "___" << n << '\n';
153  } else if (subtype == "chert") {
154  flags |= Xapian::DB_BACKEND_CHERT;
155  dbbase += str(n);
156  dbs.add_database(Xapian::WritableDatabase(dbbase, flags));
157  out << subtype << ' ' << dbname << "___" << n << '\n';
158  } else if (subtype == "remoteprog_glass") {
159  flags |= Xapian::DB_BACKEND_GLASS;
160  dbbase += str(n);
161  Xapian::WritableDatabase remote_db(dbbase, flags);
162  remote_db.close();
163  string args = sub_managers[n]->get_writable_database_args(dbbase,
164  300000);
165 
166  dbs.add_database(
168 
169  out << "remote :" << BackendManager::get_xapian_progsrv_command()
170  << " " << args << '\n';
171  } else {
172  string msg = "Unknown multidb subtype: ";
173  msg += subtype;
174  throw msg;
175  }
176  dbbase.resize(dbbase_len);
177  }
178 
179  out.close();
180 
181  FileIndexer(get_datadir(), files).index_to(dbs);
182  dbs.close();
183 
184 #ifdef __WIN32__
185 retry_rename:
186 #endif
187  if (rename(tmpfile.c_str(), db_path.c_str()) < 0) {
188 #ifdef __WIN32__
189  if (errno == EACCES) {
190  if (_doserrno == ERROR_SHARING_VIOLATION) {
191  // Sometimes we hit this failure case. It happens with various
192  // testcases, and with both mingw and MSVC. We've seen it on
193  // both appveyor and GHA.
194  //
195  // Debugging shows the destination file doesn't exist (and it
196  // shouldn't). The _doserrno code is ERROR_SHARING_VIOLATION
197  // which suggests that tmpfile is still open, but a sleep+retry
198  // makes it work. Perhaps some AV is kicking in and opening
199  // newly created files to inspect them or something?
200  //
201  // FIXME: It would be good to get to the bottom of this!
202  sleep(1);
203  goto retry_rename;
204  }
205  }
206 #endif
207  string msg = "rename failed (";
208  errno_to_string(errno, msg);
209  msg += ')';
210  throw msg;
211  }
212 
213  last_wdb_path = db_path;
214  return db_path;
215 }
216 
217 string
218 BackendManagerMulti::do_get_database_path(const vector<string> & files)
219 {
220  return createdb_multi(string(), files);
221 }
222 
224 BackendManagerMulti::get_writable_database(const string& name, const string& file)
225 {
226  vector<string> files;
227  if (!file.empty()) files.push_back(file);
228  return Xapian::WritableDatabase(createdb_multi(name, files));
229 }
230 
231 string
233 {
234  return cachedir + "/" + name;
235 }
236 
238 BackendManagerMulti::get_remote_database(const std::vector<std::string>& files,
239  unsigned int timeout)
240 {
241  Xapian::Database db;
242  size_t remotes = 0;
243  for (auto sub_manager : sub_managers) {
244  if (sub_manager->get_dbtype().find("remote") == string::npos) {
245  db.add_database(sub_manager->get_database(files));
246  continue;
247  }
248 
249  ++remotes;
250  db.add_database(sub_manager->get_remote_database(files, timeout));
251  }
252 
253  if (remotes == 0) {
254  // It's useful to support mixed local/remote multi databases with a
255  // custom timeout so we can test timeout and keepalive handling for
256  // this case, but this method shouldn't be called on an all-local
257  // multi database.
258  const char* m = "BackendManager::get_remote_database() called for "
259  "multi with no remote shards";
261  }
262  return db;
263 }
264 
265 string
267 {
268  return cachedir + "/" + name;
269 }
270 
271 string
273 {
275 }
276 
279 {
281 }
282 
283 string
285 {
286  return last_wdb_path;
287 }
static const char * get_xapian_progsrv_command()
Get the command line required to run xapian-progsrv.
const std::string & get_datadir() const
Get the directory to store data in.
static std::string build_dbtype(const vector< BackendManager *> &sub_managers)
This class is used to access a database, or a group of databases.
Definition: database.h:68
unsigned timeout
A timeout value in milliseconds.
Definition: types.h:100
InvalidOperationError indicates the API was used in an invalid way.
Definition: error.h:283
Convert errno value to std::string, thread-safe if possible.
void sleep(double t)
Sleep until the time represented by this object.
Definition: realtime.h:127
STL namespace.
Convert types to std::string.
Xapian::WritableDatabase get_writable_database_again()
Create a WritableDatabase object for the last opened WritableDatabase.
Utility functions for testing files.
std::string last_wdb_path
The path of the last writable database used.
BackendManager subclass for multi databases.
std::string get_writable_database_path(const std::string &name)
Get the path of Xapian::WritableDatabase instance.
std::string get_generated_database_path(const std::string &name)
Get the path to use for generating a database, if supported.
const int DB_BACKEND_GLASS
Use the glass backend.
Definition: constants.h:158
bool create_dir_if_needed(const std::string &dirname)
Create the directory dirname if needed.
std::string createdb_multi(const std::string &name, const std::vector< std::string > &files)
This class provides read/write access to a database.
Definition: database.h:789
void errno_to_string(int e, string &s)
virtual Xapian::WritableDatabase get_remote_writable_database(std::string args)
Get a remote Xapian::WritableDatabase instance with specified args.
std::string get_compaction_output_path(const std::string &name)
Get a path to compact a database to.
std::string get_writable_database_path_again()
Get the path of the last opened WritableDatabase.
string str(int value)
Convert int to std::string.
Definition: str.cc:90
std::vector< BackendManager * > sub_managers
void add_database(const Database &database)
Add an existing database (or group of databases) to those accessed by this object.
Definition: omdatabase.cc:148
size_t size() const
Return number of shards in this Database object.
Definition: database.h:93
Xapian::Database get_remote_database(const std::vector< std::string > &files, unsigned int timeout)
Get a remote database instance with the specified timeout.
Xapian::WritableDatabase get_writable_database(const std::string &name, const std::string &file)
Create a Multi Xapian::WritableDatabase object indexing a single file.
std::string do_get_database_path(const std::vector< std::string > &files)
Get the path of the Xapian::Database instance.
char name[9]
Definition: dbcheck.cc:55
void add_database(const WritableDatabase &other)
Add shards from another WritableDatabase.
Definition: database.h:895
void index_to(Xapian::WritableDatabase &db)
Definition: index_utils.cc:52
<unistd.h>, but with compat.
Definition: header.h:151
BackendManagerMulti(const BackendManagerMulti &)
Don&#39;t allow copying.
utility functions for indexing testcase data
const int DB_CREATE_OR_OVERWRITE
Create database if it doesn&#39;t already exist, or overwrite if it does.
Definition: constants.h:38
bool file_exists(const char *path)
Test if a file exists.
Definition: filetests.h:39
#define NUMBER_OF_SUB_DBS
virtual void close()
Close the database.
Definition: omdatabase.cc:138
const int DB_BACKEND_CHERT
Use the chert backend.
Definition: constants.h:170