xapian-core  1.4.30
io_utils.cc
Go to the documentation of this file.
1 
4 /* Copyright (C) 2004-2025 Olly Betts
5  * Copyright (C) 2010 Richard Boulton
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 #include <config.h>
23 
24 #include "io_utils.h"
25 #include "posixy_wrapper.h"
26 
27 #include "safeunistd.h"
28 
29 #include <cerrno>
30 #include <cstring>
31 #include <string>
32 
33 #include <xapian/error.h>
34 
35 #include "noreturn.h"
36 #include "omassert.h"
37 #include "str.h"
38 
39 // Trying to include the correct headers with the correct defines set to
40 // get pread() and pwrite() prototyped on every platform without breaking any
41 // other platform is a real can of worms. So instead we probe for what
42 // prototypes (if any) are required in configure and put them into
43 // PREAD_PROTOTYPE and PWRITE_PROTOTYPE.
44 #if defined HAVE_PREAD && defined PREAD_PROTOTYPE
45 PREAD_PROTOTYPE
46 #endif
47 #if defined HAVE_PWRITE && defined PWRITE_PROTOTYPE
48 PWRITE_PROTOTYPE
49 #endif
50 
51 bool
52 io_unlink(const std::string & filename)
53 {
54  if (posixy_unlink(filename.c_str()) == 0) {
55  return true;
56  }
57  if (errno != ENOENT) {
58  throw Xapian::DatabaseError(filename + ": delete failed", errno);
59  }
60  return false;
61 }
62 
63 // The smallest fd we want to use for a writable handle.
64 //
65 // We want to avoid using fd < MIN_WRITE_FD, in case some other code in the
66 // same process tries to write to stdout or stderr, which would end up
67 // corrupting our database.
68 const int MIN_WRITE_FD = 3;
69 
70 static int
72 {
73  int badfd = fd;
74 #ifdef F_DUPFD_CLOEXEC
75  // dup to the first unused fd >= MIN_WRITE_FD.
76  fd = fcntl(badfd, F_DUPFD_CLOEXEC, MIN_WRITE_FD);
77  // F_DUPFD_CLOEXEC may not be supported.
78  if (fd < 0 && errno == EINVAL)
79 #endif
80 #ifdef F_DUPFD
81  {
82  fd = fcntl(badfd, F_DUPFD, MIN_WRITE_FD);
83 # ifdef FD_CLOEXEC
84  if (fd >= 0)
85  (void)fcntl(fd, F_SETFD, FD_CLOEXEC);
86 # endif
87  }
88  int save_errno = errno;
89  (void)close(badfd);
90  errno = save_errno;
91 #else
92  {
93  char toclose[MIN_WRITE_FD];
94  memset(toclose, 0, sizeof(toclose));
95  fd = badfd;
96  do {
97  toclose[fd] = 1;
98  fd = dup(fd);
99  } while (fd >= 0 && fd < MIN_WRITE_FD);
100  int save_errno = errno;
101  for (badfd = 0; badfd != MIN_WRITE_FD; ++badfd)
102  if (toclose[badfd])
103  close(badfd);
104  if (fd < 0) {
105  errno = save_errno;
106  } else {
107 # ifdef FD_CLOEXEC
108  (void)fcntl(fd, F_SETFD, FD_CLOEXEC);
109 # endif
110  }
111  }
112 #endif
113  Assert(fd >= MIN_WRITE_FD || fd < 0);
114  return fd;
115 }
116 
117 static inline int
119 {
120  if (usual(fd >= MIN_WRITE_FD || fd < 0)) return fd;
121  return move_to_higher_fd_(fd);
122 }
123 
124 int
125 io_open_block_wr(const char* filename, bool anew)
126 {
127  // Use auto because on AIX O_CLOEXEC may be a 64-bit integer constant.
128  auto flags = O_RDWR | O_BINARY | O_CLOEXEC;
129  if (anew) flags |= O_CREAT | O_TRUNC;
130  int fd = ::open(filename, flags, 0666);
131  return move_to_higher_fd(fd);
132 }
133 
134 size_t
135 io_read(int fd, char * p, size_t n, size_t min)
136 {
137  size_t total = 0;
138  while (n) {
139  ssize_t c = read(fd, p, n);
140  if (c <= 0) {
141  if (c == 0) {
142  if (total >= min) break;
143  throw Xapian::DatabaseCorruptError("Couldn't read enough (EOF)");
144  }
145  if (errno == EINTR) continue;
146  throw Xapian::DatabaseError("Error reading from file", errno);
147  }
148  p += c;
149  total += c;
150  n -= c;
151  }
152  return total;
153 }
154 
156 void
157 io_write(int fd, const char * p, size_t n)
158 {
159  while (n) {
160  ssize_t c = write(fd, p, n);
161  if (c < 0) {
162  if (errno == EINTR) continue;
163  throw Xapian::DatabaseError("Error writing to file", errno);
164  }
165  p += c;
166  n -= c;
167  }
168 }
169 
170 XAPIAN_NORETURN(
171  static void throw_block_error(const char * s, off_t b, int e = 0));
172 static void
173 throw_block_error(const char * s, off_t b, int e)
174 {
175  std::string m = s;
176  m += str(b);
177  throw Xapian::DatabaseError(m, e);
178 }
179 
180 #ifdef HAVE_POSIX_FADVISE
181 bool
182 io_readahead_block(int fd, size_t n, off_t b, off_t o)
183 {
184  o += b * n;
185  // Assume that any failure is likely to also happen for another call with
186  // the same fd.
187  return posix_fadvise(fd, o, n, POSIX_FADV_WILLNEED) == 0;
188 }
189 #endif
190 
191 void
192 io_read_block(int fd, char * p, size_t n, off_t b, off_t o)
193 {
194  o += b * n;
195  // Prefer pread if available since it's typically implemented as a
196  // separate syscall, and that eliminates the overhead of an extra syscall
197  // per block read.
198 #ifdef HAVE_PREAD
199  while (true) {
200  ssize_t c = pread(fd, p, n, o);
201  // We should get a full read most of the time, so streamline that case.
202  if (usual(c == ssize_t(n)))
203  return;
204  // -1 is error, 0 is EOF
205  if (c <= 0) {
206  if (c == 0)
207  throw_block_error("EOF reading block ", b);
208  // We get EINTR if the syscall was interrupted by a signal.
209  // In this case we should retry the read.
210  if (errno == EINTR) continue;
211  throw_block_error("Error reading block ", b, errno);
212  }
213  p += c;
214  n -= c;
215  o += c;
216  }
217 #else
218  if (rare(lseek(fd, o, SEEK_SET) < 0))
219  throw_block_error("Error seeking to block ", b, errno);
220  while (true) {
221  ssize_t c = read(fd, p, n);
222  // We should get a full read most of the time, so streamline that case.
223  if (usual(c == ssize_t(n)))
224  return;
225  if (c <= 0) {
226  if (c == 0)
227  throw_block_error("EOF reading block ", b);
228  // We get EINTR if the syscall was interrupted by a signal.
229  // In this case we should retry the read.
230  if (errno == EINTR) continue;
231  throw_block_error("Error reading block ", b, errno);
232  }
233  p += c;
234  n -= c;
235  }
236 #endif
237 }
238 
239 void
240 io_write_block(int fd, const char * p, size_t n, off_t b, off_t o)
241 {
242  o += b * n;
243  // Prefer pwrite if available since it's typically implemented as a
244  // separate syscall, and that eliminates the overhead of an extra syscall
245  // per block write.
246 #ifdef HAVE_PWRITE
247  while (true) {
248  ssize_t c = pwrite(fd, p, n, o);
249  // We should get a full write most of the time, so streamline that case.
250  if (usual(c == ssize_t(n)))
251  return;
252  if (c < 0) {
253  // We get EINTR if the syscall was interrupted by a signal.
254  // In this case we should retry the write.
255  if (errno == EINTR) continue;
256  throw_block_error("Error writing block ", b, errno);
257  }
258  p += c;
259  n -= c;
260  o += c;
261  }
262 #else
263  if (rare(lseek(fd, o, SEEK_SET) < 0))
264  throw_block_error("Error seeking to block ", b, errno);
265  while (true) {
266  ssize_t c = write(fd, p, n);
267  // We should get a full write most of the time, so streamline that case.
268  if (usual(c == ssize_t(n)))
269  return;
270  if (c < 0) {
271  // We get EINTR if the syscall was interrupted by a signal.
272  // In this case we should retry the write.
273  if (errno == EINTR) continue;
274  throw_block_error("Error writing block ", b, errno);
275  }
276  p += c;
277  n -= c;
278  }
279 #endif
280 }
281 
282 bool
283 io_tmp_rename(const std::string & tmp_file, const std::string & real_file)
284 {
285 #ifdef EXDEV
286  // We retry on EXDEV a few times as some older Linux kernels are buggy and
287  // fail with EXDEV when the two files are on the same device (as they
288  // always ought to be when this function is used). Don't retry forever in
289  // case someone calls this with files on different devices.
290  //
291  // We're not sure exactly which kernels are buggy in this way, but there's
292  // discussion here: https://www.spinics.net/lists/linux-nfs/msg17306.html
293  //
294  // Reported at: https://trac.xapian.org/ticket/698
295  int retries = 5;
296 retry:
297 #endif
298  if (posixy_rename(tmp_file.c_str(), real_file.c_str()) < 0) {
299 #ifdef EXDEV
300  if (errno == EXDEV && --retries > 0) goto retry;
301 #endif
302  // With NFS, rename() failing may just mean that the server crashed
303  // after successfully renaming, but before reporting this, and then
304  // the retried operation fails. So we need to check if the source
305  // file still exists, which we do by calling unlink(), since we want
306  // to remove the temporary file anyway.
307  int saved_errno = errno;
308  if (unlink(tmp_file.c_str()) == 0 || errno != ENOENT) {
309  errno = saved_errno;
310  return false;
311  }
312  }
313  return true;
314 }
DatabaseCorruptError indicates database corruption was detected.
Definition: error.h:409
DatabaseError indicates some sort of database related error.
Definition: error.h:367
#define usual(COND)
Definition: config.h:576
#define rare(COND)
Definition: config.h:575
Hierarchy of classes which Xapian can throw as exceptions.
int close(FD &fd)
Definition: fd.h:63
void io_read_block(int fd, char *p, size_t n, off_t b, off_t o)
Read block b size n bytes into buffer p from file descriptor fd, offset o.
Definition: io_utils.cc:192
int io_open_block_wr(const char *filename, bool anew)
Open a block-based file for writing.
Definition: io_utils.cc:125
bool io_unlink(const std::string &filename)
Delete a file.
Definition: io_utils.cc:52
void io_write(int fd, const char *p, size_t n)
Write n bytes from block pointed to by p to file descriptor fd.
Definition: io_utils.cc:157
const int MIN_WRITE_FD
Definition: io_utils.cc:68
size_t io_read(int fd, char *p, size_t n, size_t min)
Read n bytes (or until EOF) into block pointed to by p from file descriptor fd.
Definition: io_utils.cc:135
static int move_to_higher_fd_(int fd)
Definition: io_utils.cc:71
bool io_tmp_rename(const std::string &tmp_file, const std::string &real_file)
Rename a temporary file to its final position.
Definition: io_utils.cc:283
void io_write_block(int fd, const char *p, size_t n, off_t b, off_t o)
Write block b size n bytes from buffer p to file descriptor fd, offset o.
Definition: io_utils.cc:240
static void throw_block_error(const char *s, off_t b, int e=0)
Definition: io_utils.cc:173
static int move_to_higher_fd(int fd)
Definition: io_utils.cc:118
Wrappers for low-level POSIX I/O routines.
bool io_readahead_block(int, size_t, off_t, off_t=0)
Readahead block b size n bytes from file descriptor fd.
Definition: io_utils.h:133
WritableDatabase open()
Construct a WritableDatabase object for a new, empty InMemory database.
Definition: dbfactory.h:104
string str(int value)
Convert int to std::string.
Definition: str.cc:90
Define the XAPIAN_NORETURN macro.
Various assertion macros.
#define Assert(COND)
Definition: omassert.h:122
Provides wrappers with POSIXy semantics.
#define posixy_rename(F, T)
#define posixy_unlink(F)
#define O_BINARY
Definition: safefcntl.h:81
#define O_CLOEXEC
Definition: safefcntl.h:90
<unistd.h>, but with compat.
Convert types to std::string.