xapian-core  1.4.27
flint_lock.cc
Go to the documentation of this file.
1 
4 /* Copyright (C) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017 Olly Betts
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License as
8  * published by the Free Software Foundation; either version 2 of the
9  * License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
19  * USA
20  */
21 
22 #include <config.h>
23 
24 #include "flint_lock.h"
25 
26 #ifndef __WIN32__
27 #include <cerrno>
28 
29 #include "safefcntl.h"
30 #include <unistd.h>
31 #include <cstdlib>
32 #include <sys/types.h>
33 #include "safesyssocket.h"
34 #include <sys/wait.h>
35 #include <signal.h>
36 #include <cstring>
37 #endif
38 
39 #include "closefrom.h"
40 #include "errno_to_string.h"
41 #include "omassert.h"
42 
43 #ifdef __CYGWIN__
44 # include <cygwin/version.h>
45 # include <sys/cygwin.h>
46 #endif
47 
48 #ifdef FLINTLOCK_USE_FLOCK
49 # include <sys/file.h>
50 #endif
51 
52 #include "xapian/error.h"
53 
54 using namespace std;
55 
56 #ifndef F_OFD_SETLK
57 # ifdef __linux__
58 // Apparently defining _GNU_SOURCE should get us F_OFD_SETLK, etc, but that
59 // doesn't actually seem to work, so hard-code the known values.
60 # define F_OFD_GETLK 36
61 # define F_OFD_SETLK 37
62 # define F_OFD_SETLKW 38
63 # endif
64 #endif
65 
66 XAPIAN_NORETURN(static void throw_cannot_test_lock());
67 static void
69 {
70  throw Xapian::FeatureUnavailableError("Can't test lock without trying to "
71  "take it");
72 }
73 
74 bool
76 {
77  // A database which doesn't support update can't be locked for update.
78  if (filename.empty()) return false;
79 
80 #if defined __CYGWIN__ || defined __WIN32__
81  if (hFile != INVALID_HANDLE_VALUE) return true;
82  // Doesn't seem to be possible to check if the lock is held without briefly
83  // taking the lock.
85 #elif defined FLINTLOCK_USE_FLOCK
86  if (fd != -1) return true;
87  // Doesn't seem to be possible to check if the lock is held without briefly
88  // taking the lock.
90 #else
91  if (fd != -1) return true;
92  int lockfd = open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0666);
93  if (lockfd < 0) {
94  // Couldn't open lockfile.
95  reason why = ((errno == EMFILE || errno == ENFILE) ? FDLIMIT : UNKNOWN);
96  throw_databaselockerror(why, filename, "Testing lock");
97  }
98 
99  struct flock fl;
100  fl.l_type = F_WRLCK;
101  fl.l_whence = SEEK_SET;
102  fl.l_start = 0;
103  fl.l_len = 1;
104  fl.l_pid = 0;
105  while (fcntl(lockfd, F_GETLK, &fl) == -1) {
106  if (errno != EINTR) {
107  // Translate known errno values into a reason code.
108  int e = errno;
109  close(lockfd);
110  if (e == ENOSYS) {
111  // F_GETLK always failed with ENOSYS on older GNU Hurd libc
112  // versions: https://bugs.debian.org/190367
114  }
115  reason why = (e == ENOLCK ? UNSUPPORTED : UNKNOWN);
116  throw_databaselockerror(why, filename, "Testing lock");
117  }
118  }
119  close(lockfd);
120  return fl.l_type != F_UNLCK;
121 #endif
122 }
123 
125 FlintLock::lock(bool exclusive, bool wait, string & explanation) {
126  // Currently we only support exclusive locks.
127  (void)exclusive;
128  Assert(exclusive);
129 #if defined __CYGWIN__ || defined __WIN32__
130  Assert(hFile == INVALID_HANDLE_VALUE);
131 #ifdef __CYGWIN__
132  char fnm[MAX_PATH];
133 #if CYGWIN_VERSION_API_MAJOR == 0 && CYGWIN_VERSION_API_MINOR < 181
134  cygwin_conv_to_win32_path(filename.c_str(), fnm);
135 #else
136  if (cygwin_conv_path(CCP_POSIX_TO_WIN_A|CCP_RELATIVE, filename.c_str(),
137  fnm, MAX_PATH) < 0) {
138  explanation.assign("cygwin_conv_path failed: ");
139  errno_to_string(errno, explanation);
140  return UNKNOWN;
141  }
142 #endif
143 #else
144  const char *fnm = filename.c_str();
145 #endif
146 retry:
147  // FIXME: Use LockFileEx() for locking, which would allow proper blocking
148  // and also byte-range locking for when we implement MVCC. But is there a
149  // way to interwork with the CreateFile()-based locking while doing so?
150  hFile = CreateFile(fnm, GENERIC_WRITE, FILE_SHARE_READ,
151  NULL, OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
152  if (hFile != INVALID_HANDLE_VALUE) return SUCCESS;
153  if (GetLastError() == ERROR_ALREADY_EXISTS) {
154  if (wait) {
155  Sleep(1000);
156  goto retry;
157  }
158  return INUSE;
159  }
160  explanation = string();
161  return UNKNOWN;
162 #elif defined FLINTLOCK_USE_FLOCK
163  // This is much simpler than using fcntl() due to saner semantics around
164  // releasing locks when closing other descriptors on the same file (at
165  // least on platforms where flock() isn't just a compatibility wrapper
166  // around fcntl()). We can't simply switch to this without breaking
167  // locking compatibility with previous releases, though it might be useful
168  // for porting to platforms without fcntl() locking.
169  //
170  // Also, flock() is problematic over NFS at least on Linux - it's been
171  // supported since Linux 2.6.12 but it's actually emulated by taking out an
172  // fcntl() byte-range lock on the entire file, which means that a process
173  // on the NFS server can get a (genuine) flock() lock on the same file a
174  // process on an NFS client has locked by flock() emulated as an fcntl()
175  // lock.
176  Assert(fd == -1);
177  int lockfd = open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0666);
178  if (lockfd < 0) {
179  // Couldn't open lockfile.
180  explanation.assign("Couldn't open lockfile: ");
181  errno_to_string(errno, explanation);
182  return ((errno == EMFILE || errno == ENFILE) ? FDLIMIT : UNKNOWN);
183  }
184 
185  int op = LOCK_EX;
186  if (!wait) op |= LOCK_NB;
187  while (flock(lockfd, op) == -1) {
188  if (errno != EINTR) {
189  // Lock failed - translate known errno values into a reason code.
190  close(lockfd);
191  switch (errno) {
192  case EWOULDBLOCK:
193  return INUSE;
194  case ENOLCK:
195  return UNSUPPORTED; // FIXME: what do we get for NFS?
196  default:
197  return UNKNOWN;
198  }
199  }
200  }
201 
202  fd = lockfd;
203  return SUCCESS;
204 #else
205  Assert(fd == -1);
206  // Set the close-on-exec flag. If we don't have OFD locks then our child
207  // will clear it after we fork() but before the child exec()s so that
208  // there's no window where another thread in the parent process could
209  // fork()+exec() and end up with these fds still open.
210  int lockfd = open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0666);
211  if (lockfd < 0) {
212  // Couldn't open lockfile.
213  explanation.assign("Couldn't open lockfile: ");
214  errno_to_string(errno, explanation);
215  return ((errno == EMFILE || errno == ENFILE) ? FDLIMIT : UNKNOWN);
216  }
217 
218 #ifdef F_OFD_SETLK
219  // F_OFD_SETLK has exactly the semantics we want, so use it if it's
220  // available. Support was added in Linux 3.15, and it was accepted
221  // for POSIX issue 8 on 2022-12-15:
222  // https://austingroupbugs.net/view.php?id=768
223 
224  // Use a static flag so we don't repeatedly try F_OFD_SETLK when
225  // the kernel in use doesn't support it. This should be safe in a
226  // threaded context - at worst multiple threads might end up trying
227  // F_OFD_SETLK and then setting f_ofd_setlk_fails to true.
228  static bool f_ofd_setlk_fails = false;
229  if (!f_ofd_setlk_fails) {
230  struct flock fl;
231  fl.l_type = F_WRLCK;
232  fl.l_whence = SEEK_SET;
233  fl.l_start = 0;
234  fl.l_len = 1;
235  fl.l_pid = 0;
236  while (fcntl(lockfd, wait ? F_OFD_SETLKW : F_OFD_SETLK, &fl) == -1) {
237  if (errno != EINTR) {
238  if (errno == EINVAL) {
239  // F_OFD_SETLK not supported by this kernel.
240  goto no_ofd_support;
241  }
242  // Lock failed - translate known errno values into a reason
243  // code.
244  int e = errno;
245  close(lockfd);
246  switch (e) {
247  case EACCES: case EAGAIN:
248  return INUSE;
249  case ENOLCK:
250  return UNSUPPORTED;
251  default:
252  return UNKNOWN;
253  }
254  }
255  }
256  fd = lockfd;
257  pid = 0;
258  return SUCCESS;
259 no_ofd_support:
260  f_ofd_setlk_fails = true;
261  }
262 #endif
263 
264  int fds[2];
265  if (socketpair(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, PF_UNSPEC, fds) < 0) {
266  // Couldn't create socketpair.
267  explanation.assign("Couldn't create socketpair: ");
268  errno_to_string(errno, explanation);
269  reason why = ((errno == EMFILE || errno == ENFILE) ? FDLIMIT : UNKNOWN);
270  (void)close(lockfd);
271  return why;
272  }
273  // "The two sockets are indistinguishable" so we can just swap the fds
274  // if we want, and being able to assume fds[1] != 2 is useful in the child
275  // code below.
276  if (rare(fds[1] == 2)) swap(fds[0], fds[1]);
277 
278  pid_t child = fork();
279 
280  if (child == 0) {
281  // Child process.
282 
283  // Close the other socket which ensures we have at least one free fd.
284  // That means that we don't expect failure due to not having a free
285  // file descriptor, but we still check for it as it might be possible
286  // e.g. if the process open fd limit was reduced by another thread
287  // between socketpair() and fork().
288  close(fds[0]);
289  int parentfd = fds[1];
290 
291  // Closing *ANY* file descriptor open on the lock file will release the
292  // lock. Therefore before we attempt to take the lock, any other fds
293  // which could be open on the lock file must have been closed.
294  //
295  // We also need to arrange that fds 0 and 1 are the socket back to our
296  // parent (so that exec-ing /bin/cat does what we want) and both need
297  // to have their close-on-exec flag cleared (we can achieve all this
298  // part with two dup2() calls with a little care). This means that
299  // we need lockfd >= 2, but we actually arrange that lockfd == 2 since
300  // then we can call closefrom(3) to close any other open fds in a
301  // single call.
302  //
303  // If we need to use dup() to clear the close-on-exec flag on lockfd,
304  // that must also have been done (because otherwise we can't close
305  // the original, and since it has close-on-exec set, that means we
306  // can't call exec()).
307 
308  bool lockfd_cloexec_cleared = false;
309  int dup_parent_to_first = parentfd == 0 ? 1 : 0;
310  if (rare(lockfd < 2)) {
311  int oldlockfd = lockfd;
312  // This dup2() will clear the close-on-exec flag for the new lockfd.
313  // Note that we ensured above that parentfd != 2.
314  lockfd = dup2(lockfd, 2);
315  if (rare(lockfd < 0)) goto report_dup_failure;
316  lockfd_cloexec_cleared = true;
317  // Ensure we reuse an already open fd as we just used up our spare.
318  dup_parent_to_first = oldlockfd;
319  }
320 
321  // Connect our stdin and stdout to our parent via the socket. With
322  // a little care here we ensure that both dup2() calls actually
323  // duplicate the fd and so the close-on-exec flag should be clear for
324  // both fds 0 and 1.
325  if (rare(dup2(parentfd, dup_parent_to_first) < 0)) {
326 report_dup_failure:
327  _exit((errno == EMFILE || errno == ENFILE) ? FDLIMIT : UNKNOWN);
328  }
329  close(parentfd);
330  if (rare(dup2(dup_parent_to_first, dup_parent_to_first ^ 1) < 0))
331  goto report_dup_failure;
332 
333  // Ensure lockfd is fd 2, and clear close-on-exec if necessary.
334  if (lockfd != 2) {
335  // This dup2() will clear the close-on-exec flag for the new lockfd.
336  lockfd = dup2(lockfd, 2);
337  if (rare(lockfd < 0)) goto report_dup_failure;
338  } else if (!lockfd_cloexec_cleared && O_CLOEXEC != 0) {
339 #if defined F_SETFD && defined FD_CLOEXEC
340  (void)fcntl(lockfd, F_SETFD, 0);
341 #else
342  // We use dup2() twice to clear the close-on-exec flag but keep
343  // lockfd == 2.
344  if (rare(dup2(lockfd, 3) < 0 || dup2(3, lockfd) < 0))
345  goto report_dup_failure;
346 #endif
347  }
348 
349  closefrom(3);
350 
351  {
352  struct flock fl;
353  fl.l_type = F_WRLCK;
354  fl.l_whence = SEEK_SET;
355  fl.l_start = 0;
356  fl.l_len = 1;
357  while (fcntl(lockfd, wait ? F_SETLKW : F_SETLK, &fl) == -1) {
358  if (errno != EINTR) {
359  // Lock failed - translate known errno values into a reason
360  // code.
361  if (errno == EACCES || errno == EAGAIN) {
362  _exit(INUSE);
363  } else if (errno == ENOLCK) {
364  _exit(UNSUPPORTED);
365  } else {
366  _exit(UNKNOWN);
367  }
368  break;
369  }
370  }
371  }
372 
373  {
374  // Tell the parent if we got the lock by writing a byte.
375  while (write(1, "", 1) < 0) {
376  // EINTR means a signal interrupted us, so retry.
377  //
378  // Otherwise we can't tell our parent that we got the lock so
379  // we just exit and our parent will think that the locking
380  // attempt failed for "UNKNOWN" reasons.
381  if (errno != EINTR) _exit(UNKNOWN);
382  }
383  }
384 
385  // Make sure we don't block unmount of partition holding the current
386  // directory.
387  if (chdir("/") < 0) {
388  // We can't usefully do anything in response to an error, so just
389  // ignore it - the worst harm it can do is make it impossible to
390  // unmount a partition.
391  //
392  // We need the if statement because glibc's _FORTIFY_SOURCE mode
393  // gives a warning even if we cast the result to void.
394  }
395 
396  // FIXME: use special statically linked helper instead of cat.
397  execl("/bin/cat", "/bin/cat", static_cast<void*>(NULL));
398  // Emulate cat ourselves (we try to avoid this to reduce VM overhead).
399  char ch;
400  while (read(0, &ch, 1) != 0) {
401  /* Do nothing */
402  }
403  _exit(0);
404  }
405 
406  close(lockfd);
407  close(fds[1]);
408 
409  if (child == -1) {
410  // Couldn't fork.
411  explanation.assign("Couldn't fork: ");
412  errno_to_string(errno, explanation);
413  close(fds[0]);
414  return UNKNOWN;
415  }
416 
417  // Parent process.
418  while (true) {
419  char ch;
420  ssize_t n = read(fds[0], &ch, 1);
421  if (n == 1) {
422  // Got the lock.
423  fd = fds[0];
424  pid = child;
425  return SUCCESS;
426  }
427  if (n == 0) {
428  // EOF means the lock failed. The child's exit status should be a
429  // reason code.
430  break;
431  }
432  if (errno != EINTR) {
433  // Treat unexpected errors from read() as failure to get the lock.
434  explanation.assign("Error reading from child process: ");
435  errno_to_string(errno, explanation);
436  break;
437  }
438  }
439 
440  close(fds[0]);
441 
442  int status;
443  while (waitpid(child, &status, 0) < 0) {
444  if (errno != EINTR) return UNKNOWN;
445  }
446 
447  reason why = UNKNOWN;
448  if (WIFEXITED(status)) {
449  int exit_status = WEXITSTATUS(status);
450  if (usual(exit_status > 0 && exit_status <= UNKNOWN))
451  why = static_cast<reason>(exit_status);
452  }
453 
454  return why;
455 #endif
456 }
457 
458 void
460 #if defined __CYGWIN__ || defined __WIN32__
461  if (hFile == INVALID_HANDLE_VALUE) return;
462  CloseHandle(hFile);
463  hFile = INVALID_HANDLE_VALUE;
464 #elif defined FLINTLOCK_USE_FLOCK
465  if (fd < 0) return;
466  close(fd);
467  fd = -1;
468 #else
469  if (fd < 0) return;
470  close(fd);
471  fd = -1;
472 #ifdef F_OFD_SETLK
473  if (pid == 0) return;
474 #endif
475  // Kill the child process which is holding the lock. Use SIGKILL since
476  // that can't be caught or ignored (we used to use SIGHUP, but if the
477  // application has set that to SIG_IGN, the child process inherits that
478  // setting, which sometimes results in the child process not exiting -
479  // noted on Linux).
480  //
481  // The only likely error from kill is ESRCH (pid doesn't exist). The other
482  // possibilities (according to the Linux man page) are EINVAL (invalid
483  // signal) and EPERM (don't have permission to SIGKILL the process) but in
484  // none of the cases does calling waitpid do us any good!
485  if (kill(pid, SIGKILL) == 0) {
486  int status;
487  while (waitpid(pid, &status, 0) < 0) {
488  if (errno != EINTR) break;
489  }
490  }
491 #endif
492 }
493 
494 void
496  const string & db_dir,
497  const string & explanation) const
498 {
499  string msg("Unable to get write lock on ");
500  msg += db_dir;
501  if (why == FlintLock::INUSE) {
502  msg += ": already locked";
503  } else if (why == FlintLock::UNSUPPORTED) {
504  msg += ": locking probably not supported by this FS";
505  } else if (why == FlintLock::FDLIMIT) {
506  msg += ": too many open files";
507  } else if (why == FlintLock::UNKNOWN) {
508  if (!explanation.empty())
509  msg += ": " + explanation;
510  }
511  throw Xapian::DatabaseLockError(msg);
512 }
int close(FD &fd)
Definition: fd.h:63
void throw_databaselockerror(FlintLock::reason why, const std::string &db_dir, const std::string &explanation) const
Throw Xapian::DatabaseLockError.
Definition: flint_lock.cc:495
#define Assert(COND)
Definition: omassert.h:122
void release()
Release the lock.
Definition: flint_lock.cc:459
void closefrom(int fd)
Definition: closefrom.cc:89
#define usual(COND)
Definition: config.h:576
include <sys/socket.h> with portability workarounds.
Convert errno value to std::string, thread-safe if possible.
Implementation of closefrom() function.
WritableDatabase open()
Construct a WritableDatabase object for a new, empty InMemory database.
Definition: dbfactory.h:104
STL namespace.
Flint-compatible database locking.
#define O_CLOEXEC
Definition: safefcntl.h:90
bool test() const
Test if the lock is held.
Definition: flint_lock.cc:75
#define rare(COND)
Definition: config.h:575
#define SOCK_CLOEXEC
Definition: safesyssocket.h:83
Hierarchy of classes which Xapian can throw as exceptions.
DatabaseLockError indicates failure to lock a database.
Definition: error.h:493
void errno_to_string(int e, string &s)
Indicates an attempt to use a feature which is unavailable.
Definition: error.h:719
static void throw_cannot_test_lock()
Definition: flint_lock.cc:68
reason lock(bool exclusive, bool wait, std::string &explanation)
Attempt to obtain the lock.
Definition: flint_lock.cc:125
Various assertion macros.
include <fcntl.h>, but working around broken platforms.