summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/sync.c135
-rw-r--r--include/linux/fs.h3
2 files changed, 74 insertions, 64 deletions
diff --git a/fs/sync.c b/fs/sync.c
index b54e0541ad89..01e82170545a 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -234,58 +234,10 @@ SYSCALL_DEFINE1(fdatasync, unsigned int, fd)
234 return do_fsync(fd, 1); 234 return do_fsync(fd, 1);
235} 235}
236 236
237/* 237int sync_file_range(struct file *file, loff_t offset, loff_t nbytes,
238 * sys_sync_file_range() permits finely controlled syncing over a segment of 238 unsigned int flags)
239 * a file in the range offset .. (offset+nbytes-1) inclusive. If nbytes is
240 * zero then sys_sync_file_range() will operate from offset out to EOF.
241 *
242 * The flag bits are:
243 *
244 * SYNC_FILE_RANGE_WAIT_BEFORE: wait upon writeout of all pages in the range
245 * before performing the write.
246 *
247 * SYNC_FILE_RANGE_WRITE: initiate writeout of all those dirty pages in the
248 * range which are not presently under writeback. Note that this may block for
249 * significant periods due to exhaustion of disk request structures.
250 *
251 * SYNC_FILE_RANGE_WAIT_AFTER: wait upon writeout of all pages in the range
252 * after performing the write.
253 *
254 * Useful combinations of the flag bits are:
255 *
256 * SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE: ensures that all pages
257 * in the range which were dirty on entry to sys_sync_file_range() are placed
258 * under writeout. This is a start-write-for-data-integrity operation.
259 *
260 * SYNC_FILE_RANGE_WRITE: start writeout of all dirty pages in the range which
261 * are not presently under writeout. This is an asynchronous flush-to-disk
262 * operation. Not suitable for data integrity operations.
263 *
264 * SYNC_FILE_RANGE_WAIT_BEFORE (or SYNC_FILE_RANGE_WAIT_AFTER): wait for
265 * completion of writeout of all pages in the range. This will be used after an
266 * earlier SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE operation to wait
267 * for that operation to complete and to return the result.
268 *
269 * SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE|SYNC_FILE_RANGE_WAIT_AFTER:
270 * a traditional sync() operation. This is a write-for-data-integrity operation
271 * which will ensure that all pages in the range which were dirty on entry to
272 * sys_sync_file_range() are committed to disk.
273 *
274 *
275 * SYNC_FILE_RANGE_WAIT_BEFORE and SYNC_FILE_RANGE_WAIT_AFTER will detect any
276 * I/O errors or ENOSPC conditions and will return those to the caller, after
277 * clearing the EIO and ENOSPC flags in the address_space.
278 *
279 * It should be noted that none of these operations write out the file's
280 * metadata. So unless the application is strictly performing overwrites of
281 * already-instantiated disk blocks, there are no guarantees here that the data
282 * will be available after a crash.
283 */
284int ksys_sync_file_range(int fd, loff_t offset, loff_t nbytes,
285 unsigned int flags)
286{ 239{
287 int ret; 240 int ret;
288 struct fd f;
289 struct address_space *mapping; 241 struct address_space *mapping;
290 loff_t endbyte; /* inclusive */ 242 loff_t endbyte; /* inclusive */
291 umode_t i_mode; 243 umode_t i_mode;
@@ -325,41 +277,96 @@ int ksys_sync_file_range(int fd, loff_t offset, loff_t nbytes,
325 else 277 else
326 endbyte--; /* inclusive */ 278 endbyte--; /* inclusive */
327 279
328 ret = -EBADF; 280 i_mode = file_inode(file)->i_mode;
329 f = fdget(fd);
330 if (!f.file)
331 goto out;
332
333 i_mode = file_inode(f.file)->i_mode;
334 ret = -ESPIPE; 281 ret = -ESPIPE;
335 if (!S_ISREG(i_mode) && !S_ISBLK(i_mode) && !S_ISDIR(i_mode) && 282 if (!S_ISREG(i_mode) && !S_ISBLK(i_mode) && !S_ISDIR(i_mode) &&
336 !S_ISLNK(i_mode)) 283 !S_ISLNK(i_mode))
337 goto out_put; 284 goto out;
338 285
339 mapping = f.file->f_mapping; 286 mapping = file->f_mapping;
340 ret = 0; 287 ret = 0;
341 if (flags & SYNC_FILE_RANGE_WAIT_BEFORE) { 288 if (flags & SYNC_FILE_RANGE_WAIT_BEFORE) {
342 ret = file_fdatawait_range(f.file, offset, endbyte); 289 ret = file_fdatawait_range(file, offset, endbyte);
343 if (ret < 0) 290 if (ret < 0)
344 goto out_put; 291 goto out;
345 } 292 }
346 293
347 if (flags & SYNC_FILE_RANGE_WRITE) { 294 if (flags & SYNC_FILE_RANGE_WRITE) {
348 ret = __filemap_fdatawrite_range(mapping, offset, endbyte, 295 ret = __filemap_fdatawrite_range(mapping, offset, endbyte,
349 WB_SYNC_NONE); 296 WB_SYNC_NONE);
350 if (ret < 0) 297 if (ret < 0)
351 goto out_put; 298 goto out;
352 } 299 }
353 300
354 if (flags & SYNC_FILE_RANGE_WAIT_AFTER) 301 if (flags & SYNC_FILE_RANGE_WAIT_AFTER)
355 ret = file_fdatawait_range(f.file, offset, endbyte); 302 ret = file_fdatawait_range(file, offset, endbyte);
356 303
357out_put:
358 fdput(f);
359out: 304out:
360 return ret; 305 return ret;
361} 306}
362 307
308/*
309 * sys_sync_file_range() permits finely controlled syncing over a segment of
310 * a file in the range offset .. (offset+nbytes-1) inclusive. If nbytes is
311 * zero then sys_sync_file_range() will operate from offset out to EOF.
312 *
313 * The flag bits are:
314 *
315 * SYNC_FILE_RANGE_WAIT_BEFORE: wait upon writeout of all pages in the range
316 * before performing the write.
317 *
318 * SYNC_FILE_RANGE_WRITE: initiate writeout of all those dirty pages in the
319 * range which are not presently under writeback. Note that this may block for
320 * significant periods due to exhaustion of disk request structures.
321 *
322 * SYNC_FILE_RANGE_WAIT_AFTER: wait upon writeout of all pages in the range
323 * after performing the write.
324 *
325 * Useful combinations of the flag bits are:
326 *
327 * SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE: ensures that all pages
328 * in the range which were dirty on entry to sys_sync_file_range() are placed
329 * under writeout. This is a start-write-for-data-integrity operation.
330 *
331 * SYNC_FILE_RANGE_WRITE: start writeout of all dirty pages in the range which
332 * are not presently under writeout. This is an asynchronous flush-to-disk
333 * operation. Not suitable for data integrity operations.
334 *
335 * SYNC_FILE_RANGE_WAIT_BEFORE (or SYNC_FILE_RANGE_WAIT_AFTER): wait for
336 * completion of writeout of all pages in the range. This will be used after an
337 * earlier SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE operation to wait
338 * for that operation to complete and to return the result.
339 *
340 * SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE|SYNC_FILE_RANGE_WAIT_AFTER:
341 * a traditional sync() operation. This is a write-for-data-integrity operation
342 * which will ensure that all pages in the range which were dirty on entry to
343 * sys_sync_file_range() are committed to disk.
344 *
345 *
346 * SYNC_FILE_RANGE_WAIT_BEFORE and SYNC_FILE_RANGE_WAIT_AFTER will detect any
347 * I/O errors or ENOSPC conditions and will return those to the caller, after
348 * clearing the EIO and ENOSPC flags in the address_space.
349 *
350 * It should be noted that none of these operations write out the file's
351 * metadata. So unless the application is strictly performing overwrites of
352 * already-instantiated disk blocks, there are no guarantees here that the data
353 * will be available after a crash.
354 */
355int ksys_sync_file_range(int fd, loff_t offset, loff_t nbytes,
356 unsigned int flags)
357{
358 int ret;
359 struct fd f;
360
361 ret = -EBADF;
362 f = fdget(fd);
363 if (f.file)
364 ret = sync_file_range(f.file, offset, nbytes, flags);
365
366 fdput(f);
367 return ret;
368}
369
363SYSCALL_DEFINE4(sync_file_range, int, fd, loff_t, offset, loff_t, nbytes, 370SYSCALL_DEFINE4(sync_file_range, int, fd, loff_t, offset, loff_t, nbytes,
364 unsigned int, flags) 371 unsigned int, flags)
365{ 372{
diff --git a/include/linux/fs.h b/include/linux/fs.h
index dd28e7679089..2f66e247ecba 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2785,6 +2785,9 @@ extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end,
2785 int datasync); 2785 int datasync);
2786extern int vfs_fsync(struct file *file, int datasync); 2786extern int vfs_fsync(struct file *file, int datasync);
2787 2787
2788extern int sync_file_range(struct file *file, loff_t offset, loff_t nbytes,
2789 unsigned int flags);
2790
2788/* 2791/*
2789 * Sync the bytes written if this was a synchronous write. Expect ki_pos 2792 * Sync the bytes written if this was a synchronous write. Expect ki_pos
2790 * to already be updated for the write, and will return either the amount 2793 * to already be updated for the write, and will return either the amount