diff options
author | Pavel Emelyanov <xemul@openvz.org> | 2013-10-10 09:12:05 -0400 |
---|---|---|
committer | Miklos Szeredi <mszeredi@suse.cz> | 2014-04-02 09:38:50 -0400 |
commit | ea8cd33390fafc1eca06a26e6a9c7bf1d386526f (patch) | |
tree | eb0b3ecdf4760b3b96bc2d0d179e4c59c33d7ade /fs/fuse | |
parent | fe38d7df230b022e72014ef7aa799a4f2acfecf3 (diff) |
fuse: Fix O_DIRECT operations vs cached writeback misorder
The problem is:
1. write cached data to a file
2. read directly from the same file (via another fd)
The 2nd operation may read stale data, i.e. the one that was in a file
before the 1st op. Problem is in how fuse manages writeback.
When direct op occurs the core kernel code calls filemap_write_and_wait
to flush all the cached ops in flight. But fuse acks the writeback right
after the ->writepages callback exits w/o waiting for the real write to
happen. Thus the subsequent direct op proceeds while the real writeback
is still in flight. This is a problem for backends that reorder operation.
Fix this by making the fuse direct IO callback explicitly wait on the
in-flight writeback to finish.
Signed-off-by: Maxim Patlasov <MPatlasov@parallels.com>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Diffstat (limited to 'fs/fuse')
-rw-r--r-- | fs/fuse/cuse.c | 5 | ||||
-rw-r--r-- | fs/fuse/file.c | 32 | ||||
-rw-r--r-- | fs/fuse/fuse_i.h | 13 |
3 files changed, 41 insertions, 9 deletions
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index b96a49b37d66..23e363f38302 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c | |||
@@ -95,7 +95,7 @@ static ssize_t cuse_read(struct file *file, char __user *buf, size_t count, | |||
95 | struct iovec iov = { .iov_base = buf, .iov_len = count }; | 95 | struct iovec iov = { .iov_base = buf, .iov_len = count }; |
96 | struct fuse_io_priv io = { .async = 0, .file = file }; | 96 | struct fuse_io_priv io = { .async = 0, .file = file }; |
97 | 97 | ||
98 | return fuse_direct_io(&io, &iov, 1, count, &pos, 0); | 98 | return fuse_direct_io(&io, &iov, 1, count, &pos, FUSE_DIO_CUSE); |
99 | } | 99 | } |
100 | 100 | ||
101 | static ssize_t cuse_write(struct file *file, const char __user *buf, | 101 | static ssize_t cuse_write(struct file *file, const char __user *buf, |
@@ -109,7 +109,8 @@ static ssize_t cuse_write(struct file *file, const char __user *buf, | |||
109 | * No locking or generic_write_checks(), the server is | 109 | * No locking or generic_write_checks(), the server is |
110 | * responsible for locking and sanity checks. | 110 | * responsible for locking and sanity checks. |
111 | */ | 111 | */ |
112 | return fuse_direct_io(&io, &iov, 1, count, &pos, 1); | 112 | return fuse_direct_io(&io, &iov, 1, count, &pos, |
113 | FUSE_DIO_WRITE | FUSE_DIO_CUSE); | ||
113 | } | 114 | } |
114 | 115 | ||
115 | static int cuse_open(struct inode *inode, struct file *file) | 116 | static int cuse_open(struct inode *inode, struct file *file) |
diff --git a/fs/fuse/file.c b/fs/fuse/file.c index d93f2a1aa7de..276433021561 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c | |||
@@ -358,12 +358,13 @@ u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id) | |||
358 | } | 358 | } |
359 | 359 | ||
360 | /* | 360 | /* |
361 | * Check if page is under writeback | 361 | * Check if any page in a range is under writeback |
362 | * | 362 | * |
363 | * This is currently done by walking the list of writepage requests | 363 | * This is currently done by walking the list of writepage requests |
364 | * for the inode, which can be pretty inefficient. | 364 | * for the inode, which can be pretty inefficient. |
365 | */ | 365 | */ |
366 | static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index) | 366 | static bool fuse_range_is_writeback(struct inode *inode, pgoff_t idx_from, |
367 | pgoff_t idx_to) | ||
367 | { | 368 | { |
368 | struct fuse_conn *fc = get_fuse_conn(inode); | 369 | struct fuse_conn *fc = get_fuse_conn(inode); |
369 | struct fuse_inode *fi = get_fuse_inode(inode); | 370 | struct fuse_inode *fi = get_fuse_inode(inode); |
@@ -376,8 +377,8 @@ static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index) | |||
376 | 377 | ||
377 | BUG_ON(req->inode != inode); | 378 | BUG_ON(req->inode != inode); |
378 | curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT; | 379 | curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT; |
379 | if (curr_index <= index && | 380 | if (idx_from < curr_index + req->num_pages && |
380 | index < curr_index + req->num_pages) { | 381 | curr_index <= idx_to) { |
381 | found = true; | 382 | found = true; |
382 | break; | 383 | break; |
383 | } | 384 | } |
@@ -387,6 +388,11 @@ static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index) | |||
387 | return found; | 388 | return found; |
388 | } | 389 | } |
389 | 390 | ||
391 | static inline bool fuse_page_is_writeback(struct inode *inode, pgoff_t index) | ||
392 | { | ||
393 | return fuse_range_is_writeback(inode, index, index); | ||
394 | } | ||
395 | |||
390 | /* | 396 | /* |
391 | * Wait for page writeback to be completed. | 397 | * Wait for page writeback to be completed. |
392 | * | 398 | * |
@@ -1364,13 +1370,18 @@ static inline int fuse_iter_npages(const struct iov_iter *ii_p) | |||
1364 | 1370 | ||
1365 | ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov, | 1371 | ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov, |
1366 | unsigned long nr_segs, size_t count, loff_t *ppos, | 1372 | unsigned long nr_segs, size_t count, loff_t *ppos, |
1367 | int write) | 1373 | int flags) |
1368 | { | 1374 | { |
1375 | int write = flags & FUSE_DIO_WRITE; | ||
1376 | int cuse = flags & FUSE_DIO_CUSE; | ||
1369 | struct file *file = io->file; | 1377 | struct file *file = io->file; |
1378 | struct inode *inode = file->f_mapping->host; | ||
1370 | struct fuse_file *ff = file->private_data; | 1379 | struct fuse_file *ff = file->private_data; |
1371 | struct fuse_conn *fc = ff->fc; | 1380 | struct fuse_conn *fc = ff->fc; |
1372 | size_t nmax = write ? fc->max_write : fc->max_read; | 1381 | size_t nmax = write ? fc->max_write : fc->max_read; |
1373 | loff_t pos = *ppos; | 1382 | loff_t pos = *ppos; |
1383 | pgoff_t idx_from = pos >> PAGE_CACHE_SHIFT; | ||
1384 | pgoff_t idx_to = (pos + count - 1) >> PAGE_CACHE_SHIFT; | ||
1374 | ssize_t res = 0; | 1385 | ssize_t res = 0; |
1375 | struct fuse_req *req; | 1386 | struct fuse_req *req; |
1376 | struct iov_iter ii; | 1387 | struct iov_iter ii; |
@@ -1384,6 +1395,14 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov, | |||
1384 | if (IS_ERR(req)) | 1395 | if (IS_ERR(req)) |
1385 | return PTR_ERR(req); | 1396 | return PTR_ERR(req); |
1386 | 1397 | ||
1398 | if (!cuse && fuse_range_is_writeback(inode, idx_from, idx_to)) { | ||
1399 | if (!write) | ||
1400 | mutex_lock(&inode->i_mutex); | ||
1401 | fuse_sync_writes(inode); | ||
1402 | if (!write) | ||
1403 | mutex_unlock(&inode->i_mutex); | ||
1404 | } | ||
1405 | |||
1387 | while (count) { | 1406 | while (count) { |
1388 | size_t nres; | 1407 | size_t nres; |
1389 | fl_owner_t owner = current->files; | 1408 | fl_owner_t owner = current->files; |
@@ -1472,7 +1491,8 @@ static ssize_t __fuse_direct_write(struct fuse_io_priv *io, | |||
1472 | 1491 | ||
1473 | res = generic_write_checks(file, ppos, &count, 0); | 1492 | res = generic_write_checks(file, ppos, &count, 0); |
1474 | if (!res) | 1493 | if (!res) |
1475 | res = fuse_direct_io(io, iov, nr_segs, count, ppos, 1); | 1494 | res = fuse_direct_io(io, iov, nr_segs, count, ppos, |
1495 | FUSE_DIO_WRITE); | ||
1476 | 1496 | ||
1477 | fuse_invalidate_attr(inode); | 1497 | fuse_invalidate_attr(inode); |
1478 | 1498 | ||
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 1e6ad6d43051..a257ed8ebee6 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h | |||
@@ -868,9 +868,20 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid, | |||
868 | 868 | ||
869 | int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, | 869 | int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, |
870 | bool isdir); | 870 | bool isdir); |
871 | |||
872 | /** | ||
873 | * fuse_direct_io() flags | ||
874 | */ | ||
875 | |||
876 | /** If set, it is WRITE; otherwise - READ */ | ||
877 | #define FUSE_DIO_WRITE (1 << 0) | ||
878 | |||
879 | /** CUSE pass fuse_direct_io() a file which f_mapping->host is not from FUSE */ | ||
880 | #define FUSE_DIO_CUSE (1 << 1) | ||
881 | |||
871 | ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov, | 882 | ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov, |
872 | unsigned long nr_segs, size_t count, loff_t *ppos, | 883 | unsigned long nr_segs, size_t count, loff_t *ppos, |
873 | int write); | 884 | int flags); |
874 | long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, | 885 | long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, |
875 | unsigned int flags); | 886 | unsigned int flags); |
876 | long fuse_ioctl_common(struct file *file, unsigned int cmd, | 887 | long fuse_ioctl_common(struct file *file, unsigned int cmd, |