xapian-core  2.0.0
flint_lock.cc
Go to the documentation of this file.
1 
4 /* Copyright (C) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017 Olly Betts
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License as
8  * published by the Free Software Foundation; either version 2 of the
9  * License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, see
18  * <https://www.gnu.org/licenses/>.
19  */
20 
21 #include <config.h>
22 
23 #include "flint_lock.h"
24 
25 #ifndef __WIN32__
26 #include <cerrno>
27 
28 #include "safefcntl.h"
29 #include <unistd.h>
30 #include <cstdlib>
31 #include <sys/types.h>
32 #include "safesyssocket.h"
33 #include <sys/wait.h>
34 #include <signal.h>
35 #include <cstring>
36 #endif
37 
38 #include "closefrom.h"
39 #include "errno_to_string.h"
40 #include "omassert.h"
41 
42 #ifdef __CYGWIN__
43 # include <cygwin/version.h>
44 # include <sys/cygwin.h>
45 #endif
46 
47 #ifdef FLINTLOCK_USE_FLOCK
48 # include <sys/file.h>
49 #endif
50 
51 #include "xapian/error.h"
52 
53 using namespace std;
54 
55 #ifndef F_OFD_SETLK
56 # ifdef __linux__
57 // Apparently defining _GNU_SOURCE should get us F_OFD_SETLK, etc, but that
58 // doesn't actually seem to work, so hard-code the known values.
59 # define F_OFD_GETLK 36
60 # define F_OFD_SETLK 37
61 # define F_OFD_SETLKW 38
62 # endif
63 #endif
64 
65 [[noreturn]]
66 static void
68 {
69  throw Xapian::FeatureUnavailableError("Can't test lock without trying to "
70  "take it");
71 }
72 
73 bool
75 {
76  // A database which doesn't support update can't be locked for update.
77  if (filename.empty()) return false;
78 
79 #if defined __CYGWIN__ || defined __WIN32__
80  if (hFile != INVALID_HANDLE_VALUE) return true;
81  // Doesn't seem to be possible to check if the lock is held without briefly
82  // taking the lock.
84 #elif defined FLINTLOCK_USE_FLOCK
85  if (fd != -1) return true;
86  // Doesn't seem to be possible to check if the lock is held without briefly
87  // taking the lock.
89 #else
90  if (fd != -1) return true;
91  int lockfd = open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0666);
92  if (lockfd < 0) {
93  // Couldn't open lockfile.
94  reason why = ((errno == EMFILE || errno == ENFILE) ? FDLIMIT : UNKNOWN);
95  throw_databaselockerror(why, filename, "Testing lock");
96  }
97 
98  struct flock fl;
99  fl.l_type = F_WRLCK;
100  fl.l_whence = SEEK_SET;
101  fl.l_start = 0;
102  fl.l_len = 1;
103  fl.l_pid = 0;
104  while (fcntl(lockfd, F_GETLK, &fl) == -1) {
105  if (errno != EINTR) {
106  // Translate known errno values into a reason code.
107  int e = errno;
108  close(lockfd);
109  if (e == ENOSYS) {
110  // F_GETLK always failed with ENOSYS on older GNU Hurd libc
111  // versions: https://bugs.debian.org/190367
113  }
114  reason why = (e == ENOLCK ? UNSUPPORTED : UNKNOWN);
115  throw_databaselockerror(why, filename, "Testing lock");
116  }
117  }
118  close(lockfd);
119  return fl.l_type != F_UNLCK;
120 #endif
121 }
122 
124 FlintLock::lock(bool exclusive, bool wait, string & explanation) {
125  // Currently we only support exclusive locks.
126  (void)exclusive;
127  Assert(exclusive);
128 #if defined __CYGWIN__ || defined __WIN32__
129  Assert(hFile == INVALID_HANDLE_VALUE);
130 #ifdef __CYGWIN__
131  char fnm[MAX_PATH];
132 #if CYGWIN_VERSION_API_MAJOR == 0 && CYGWIN_VERSION_API_MINOR < 181
133  cygwin_conv_to_win32_path(filename.c_str(), fnm);
134 #else
135  if (cygwin_conv_path(CCP_POSIX_TO_WIN_A|CCP_RELATIVE, filename.c_str(),
136  fnm, MAX_PATH) < 0) {
137  explanation.assign("cygwin_conv_path failed: ");
138  errno_to_string(errno, explanation);
139  return UNKNOWN;
140  }
141 #endif
142 #else
143  const char *fnm = filename.c_str();
144 #endif
145 retry:
146  // FIXME: Use LockFileEx() for locking, which would allow proper blocking
147  // and also byte-range locking for when we implement MVCC. But is there a
148  // way to interwork with the CreateFile()-based locking while doing so?
149  hFile = CreateFile(fnm, GENERIC_WRITE, FILE_SHARE_READ,
150  NULL, OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
151  if (hFile != INVALID_HANDLE_VALUE) return SUCCESS;
152  if (GetLastError() == ERROR_ALREADY_EXISTS) {
153  if (wait) {
154  Sleep(1000);
155  goto retry;
156  }
157  return INUSE;
158  }
159  explanation = string();
160  return UNKNOWN;
161 #elif defined FLINTLOCK_USE_FLOCK
162  // This is much simpler than using fcntl() due to saner semantics around
163  // releasing locks when closing other descriptors on the same file (at
164  // least on platforms where flock() isn't just a compatibility wrapper
165  // around fcntl()). We can't simply switch to this without breaking
166  // locking compatibility with previous releases, though it might be useful
167  // for porting to platforms without fcntl() locking.
168  //
169  // Also, flock() is problematic over NFS at least on Linux - it's been
170  // supported since Linux 2.6.12 but it's actually emulated by taking out an
171  // fcntl() byte-range lock on the entire file, which means that a process
172  // on the NFS server can get a (genuine) flock() lock on the same file a
173  // process on an NFS client has locked by flock() emulated as an fcntl()
174  // lock.
175  Assert(fd == -1);
176  int lockfd = open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0666);
177  if (lockfd < 0) {
178  // Couldn't open lockfile.
179  explanation.assign("Couldn't open lockfile: ");
180  errno_to_string(errno, explanation);
181  return ((errno == EMFILE || errno == ENFILE) ? FDLIMIT : UNKNOWN);
182  }
183 
184  int op = LOCK_EX;
185  if (!wait) op |= LOCK_NB;
186  while (flock(lockfd, op) == -1) {
187  if (errno != EINTR) {
188  // Lock failed - translate known errno values into a reason code.
189  close(lockfd);
190  switch (errno) {
191  case EWOULDBLOCK:
192  return INUSE;
193  case ENOLCK:
194  return UNSUPPORTED; // FIXME: what do we get for NFS?
195  default:
196  return UNKNOWN;
197  }
198  }
199  }
200 
201  fd = lockfd;
202  return SUCCESS;
203 #else
204  Assert(fd == -1);
205  // Set the close-on-exec flag. If we don't have OFD locks then our child
206  // will clear it after we fork() but before the child exec()s so that
207  // there's no window where another thread in the parent process could
208  // fork()+exec() and end up with these fds still open.
209  int lockfd = open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0666);
210  if (lockfd < 0) {
211  // Couldn't open lockfile.
212  explanation.assign("Couldn't open lockfile: ");
213  errno_to_string(errno, explanation);
214  return ((errno == EMFILE || errno == ENFILE) ? FDLIMIT : UNKNOWN);
215  }
216 
217 #ifdef F_OFD_SETLK
218  // F_OFD_SETLK has exactly the semantics we want, so use it if it's
219  // available. Support was added in Linux 3.15 and was standardised in
220  // POSIX issue 8 so hopefully will be available for all POSIX platforms
221  // with time. So far it seems they've also been implemented for macOS.
222 
223  // Use a static flag so we don't repeatedly try F_OFD_SETLK when
224  // the kernel in use doesn't support it. This should be safe in a
225  // threaded context - at worst multiple threads might end up trying
226  // F_OFD_SETLK and then setting f_ofd_setlk_fails to true.
227  static bool f_ofd_setlk_fails = false;
228  if (!f_ofd_setlk_fails) {
229  struct flock fl;
230  fl.l_type = F_WRLCK;
231  fl.l_whence = SEEK_SET;
232  fl.l_start = 0;
233  fl.l_len = 1;
234  fl.l_pid = 0;
235  while (fcntl(lockfd, wait ? F_OFD_SETLKW : F_OFD_SETLK, &fl) == -1) {
236  if (errno != EINTR) {
237  if (errno == EINVAL) {
238  // F_OFD_SETLK not supported by this kernel.
239  goto no_ofd_support;
240  }
241  // Lock failed - translate known errno values into a reason
242  // code.
243  int e = errno;
244  close(lockfd);
245  switch (e) {
246  case EACCES: case EAGAIN:
247  return INUSE;
248  case ENOLCK:
249  return UNSUPPORTED;
250  default:
251  return UNKNOWN;
252  }
253  }
254  }
255  fd = lockfd;
256  pid = 0;
257  return SUCCESS;
258 no_ofd_support:
259  f_ofd_setlk_fails = true;
260  }
261 #endif
262 
263  int fds[2];
264  if (socketpair(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, PF_UNSPEC, fds) < 0) {
265  // Couldn't create socketpair.
266  explanation.assign("Couldn't create socketpair: ");
267  errno_to_string(errno, explanation);
268  reason why = ((errno == EMFILE || errno == ENFILE) ? FDLIMIT : UNKNOWN);
269  (void)close(lockfd);
270  return why;
271  }
272  // "The two sockets are indistinguishable" so we can just swap the fds
273  // if we want, and being able to assume fds[1] != 2 is useful in the child
274  // code below.
275  if (rare(fds[1] == 2)) swap(fds[0], fds[1]);
276 
277  pid_t child = fork();
278 
279  if (child == 0) {
280  // Child process.
281 
282  // Close the other socket which ensures we have at least one free fd.
283  // That means that we don't expect failure due to not having a free
284  // file descriptor, but we still check for it as it might be possible
285  // e.g. if the process open fd limit was reduced by another thread
286  // between socketpair() and fork().
287  close(fds[0]);
288  int parentfd = fds[1];
289 
290  // Closing *ANY* file descriptor open on the lock file will release the
291  // lock. Therefore before we attempt to take the lock, any other fds
292  // which could be open on the lock file must have been closed.
293  //
294  // We also need to arrange that fds 0 and 1 are the socket back to our
295  // parent (so that exec-ing /bin/cat does what we want) and both need
296  // to have their close-on-exec flag cleared (we can achieve all this
297  // part with two dup2() calls with a little care). This means that
298  // we need lockfd >= 2, but we actually arrange that lockfd == 2 since
299  // then we can call closefrom(3) to close any other open fds in a
300  // single call.
301  //
302  // If we need to use dup() to clear the close-on-exec flag on lockfd,
303  // that must also have been done (because otherwise we can't close
304  // the original, and since it has close-on-exec set, that means we
305  // can't call exec()).
306 
307  bool lockfd_cloexec_cleared = false;
308  int dup_parent_to_first = parentfd == 0 ? 1 : 0;
309  if (rare(lockfd < 2)) {
310  int oldlockfd = lockfd;
311  // This dup2() will clear the close-on-exec flag for the new lockfd.
312  // Note that we ensured above that parentfd != 2.
313  lockfd = dup2(lockfd, 2);
314  if (rare(lockfd < 0)) goto report_dup_failure;
315  lockfd_cloexec_cleared = true;
316  // Ensure we reuse an already open fd as we just used up our spare.
317  dup_parent_to_first = oldlockfd;
318  }
319 
320  // Connect our stdin and stdout to our parent via the socket. With
321  // a little care here we ensure that both dup2() calls actually
322  // duplicate the fd and so the close-on-exec flag should be clear for
323  // both fds 0 and 1.
324  if (rare(dup2(parentfd, dup_parent_to_first) < 0)) {
325 report_dup_failure:
326  _exit((errno == EMFILE || errno == ENFILE) ? FDLIMIT : UNKNOWN);
327  }
328  close(parentfd);
329  if (rare(dup2(dup_parent_to_first, dup_parent_to_first ^ 1) < 0))
330  goto report_dup_failure;
331 
332  // Ensure lockfd is fd 2, and clear close-on-exec if necessary.
333  if (lockfd != 2) {
334  // This dup2() will clear the close-on-exec flag for the new lockfd.
335  lockfd = dup2(lockfd, 2);
336  if (rare(lockfd < 0)) goto report_dup_failure;
337  } else if (!lockfd_cloexec_cleared && O_CLOEXEC != 0) {
338 #if defined F_SETFD && defined FD_CLOEXEC
339  (void)fcntl(lockfd, F_SETFD, 0);
340 #else
341  // We use dup2() twice to clear the close-on-exec flag but keep
342  // lockfd == 2.
343  if (rare(dup2(lockfd, 3) < 0 || dup2(3, lockfd) < 0))
344  goto report_dup_failure;
345 #endif
346  }
347 
348  closefrom(3);
349 
350  {
351  struct flock fl;
352  fl.l_type = F_WRLCK;
353  fl.l_whence = SEEK_SET;
354  fl.l_start = 0;
355  fl.l_len = 1;
356  while (fcntl(lockfd, wait ? F_SETLKW : F_SETLK, &fl) == -1) {
357  if (errno != EINTR) {
358  // Lock failed - translate known errno values into a reason
359  // code.
360  if (errno == EACCES || errno == EAGAIN) {
361  _exit(INUSE);
362  } else if (errno == ENOLCK) {
363  _exit(UNSUPPORTED);
364  } else {
365  _exit(UNKNOWN);
366  }
367  break;
368  }
369  }
370  }
371 
372  {
373  // Tell the parent if we got the lock by writing a byte.
374  while (write(1, "", 1) < 0) {
375  // EINTR means a signal interrupted us, so retry.
376  //
377  // Otherwise we can't tell our parent that we got the lock so
378  // we just exit and our parent will think that the locking
379  // attempt failed for "UNKNOWN" reasons.
380  if (errno != EINTR) _exit(UNKNOWN);
381  }
382  }
383 
384  // Make sure we don't block unmount of partition holding the current
385  // directory.
386  if (chdir("/") < 0) {
387  // We can't usefully do anything in response to an error, so just
388  // ignore it - the worst harm it can do is make it impossible to
389  // unmount a partition.
390  //
391  // We need the if statement because glibc's _FORTIFY_SOURCE mode
392  // gives a warning even if we cast the result to void.
393  }
394 
395  // FIXME: use special statically linked helper instead of cat.
396  execl("/bin/cat", "/bin/cat", static_cast<void*>(NULL));
397  // Emulate cat ourselves (we try to avoid this to reduce VM overhead).
398  char ch;
399  while (read(0, &ch, 1) != 0) {
400  /* Do nothing */
401  }
402  _exit(0);
403  }
404 
405  close(lockfd);
406  close(fds[1]);
407 
408  if (child == -1) {
409  // Couldn't fork.
410  explanation.assign("Couldn't fork: ");
411  errno_to_string(errno, explanation);
412  close(fds[0]);
413  return UNKNOWN;
414  }
415 
416  // Parent process.
417  while (true) {
418  char ch;
419  ssize_t n = read(fds[0], &ch, 1);
420  if (n == 1) {
421  // Got the lock.
422  fd = fds[0];
423  pid = child;
424  return SUCCESS;
425  }
426  if (n == 0) {
427  // EOF means the lock failed. The child's exit status should be a
428  // reason code.
429  break;
430  }
431  if (errno != EINTR) {
432  // Treat unexpected errors from read() as failure to get the lock.
433  explanation.assign("Error reading from child process: ");
434  errno_to_string(errno, explanation);
435  break;
436  }
437  }
438 
439  close(fds[0]);
440 
441  int status;
442  while (waitpid(child, &status, 0) < 0) {
443  if (errno != EINTR) return UNKNOWN;
444  }
445 
446  reason why = UNKNOWN;
447  if (WIFEXITED(status)) {
448  int exit_status = WEXITSTATUS(status);
449  if (usual(exit_status > 0 && exit_status <= UNKNOWN))
450  why = static_cast<reason>(exit_status);
451  }
452 
453  return why;
454 #endif
455 }
456 
457 void
459 #if defined __CYGWIN__ || defined __WIN32__
460  if (hFile == INVALID_HANDLE_VALUE) return;
461  CloseHandle(hFile);
462  hFile = INVALID_HANDLE_VALUE;
463 #elif defined FLINTLOCK_USE_FLOCK
464  if (fd < 0) return;
465  close(fd);
466  fd = -1;
467 #else
468  if (fd < 0) return;
469  close(fd);
470  fd = -1;
471 #ifdef F_OFD_SETLK
472  if (pid == 0) return;
473 #endif
474  // Kill the child process which is holding the lock. Use SIGKILL since
475  // that can't be caught or ignored (we used to use SIGHUP, but if the
476  // application has set that to SIG_IGN, the child process inherits that
477  // setting, which sometimes results in the child process not exiting -
478  // noted on Linux).
479  //
480  // The only likely error from kill is ESRCH (pid doesn't exist). The other
481  // possibilities (according to the Linux man page) are EINVAL (invalid
482  // signal) and EPERM (don't have permission to SIGKILL the process) but in
483  // none of the cases does calling waitpid do us any good!
484  if (kill(pid, SIGKILL) == 0) {
485  int status;
486  while (waitpid(pid, &status, 0) < 0) {
487  if (errno != EINTR) break;
488  }
489  }
490 #endif
491 }
492 
493 void
495  const string & db_dir,
496  const string & explanation) const
497 {
498  string msg("Unable to get write lock on ");
499  msg += db_dir;
500  if (why == FlintLock::INUSE) {
501  msg += ": already locked";
502  } else if (why == FlintLock::UNSUPPORTED) {
503  msg += ": locking probably not supported by this FS";
504  } else if (why == FlintLock::FDLIMIT) {
505  msg += ": too many open files";
506  } else if (why == FlintLock::UNKNOWN) {
507  if (!explanation.empty())
508  msg += ": " + explanation;
509  }
510  throw Xapian::DatabaseLockError(msg);
511 }
void release()
Release the lock.
Definition: flint_lock.cc:458
reason lock(bool exclusive, bool wait, std::string &explanation)
Attempt to obtain the lock.
Definition: flint_lock.cc:124
bool test() const
Test if the lock is held.
Definition: flint_lock.cc:74
@ UNSUPPORTED
Definition: flint_lock.h:51
void throw_databaselockerror(FlintLock::reason why, const std::string &db_dir, const std::string &explanation) const
Throw Xapian::DatabaseLockError.
Definition: flint_lock.cc:494
DatabaseLockError indicates failure to lock a database.
Definition: error.h:481
Indicates an attempt to use a feature which is unavailable.
Definition: error.h:707
Implementation of closefrom() function.
#define usual(COND)
Definition: config.h:608
#define rare(COND)
Definition: config.h:607
void errno_to_string(int e, string &s)
Convert errno value to std::string, thread-safe if possible.
Hierarchy of classes which Xapian can throw as exceptions.
int close(FD &fd)
Definition: fd.h:63
static void throw_cannot_test_lock()
Definition: flint_lock.cc:67
Flint-compatible database locking.
void closefrom(int fd)
Definition: closefrom.cc:91
Database open(std::string_view host, unsigned int port, unsigned timeout=10000, unsigned connect_timeout=10000)
Construct a Database object for read-only access to a remote database accessed via a TCP connection.
Various assertion macros.
#define Assert(COND)
Definition: omassert.h:122
include <fcntl.h>, but working around broken platforms.
#define O_CLOEXEC
Definition: safefcntl.h:89
include <sys/socket.h> with portability workarounds.
#define SOCK_CLOEXEC