aboutsummaryrefslogtreecommitdiffstats
path: root/fs/fuse
diff options
context:
space:
mode:
authorPavel Emelyanov <xemul@openvz.org>2013-10-10 09:12:05 -0400
committerMiklos Szeredi <mszeredi@suse.cz>2014-04-02 09:38:50 -0400
commitea8cd33390fafc1eca06a26e6a9c7bf1d386526f (patch)
treeeb0b3ecdf4760b3b96bc2d0d179e4c59c33d7ade /fs/fuse
parentfe38d7df230b022e72014ef7aa799a4f2acfecf3 (diff)
fuse: Fix O_DIRECT operations vs cached writeback misorder
The problem is: 1. write cached data to a file 2. read directly from the same file (via another fd) The 2nd operation may read stale data, i.e. the one that was in a file before the 1st op. Problem is in how fuse manages writeback. When direct op occurs the core kernel code calls filemap_write_and_wait to flush all the cached ops in flight. But fuse acks the writeback right after the ->writepages callback exits w/o waiting for the real write to happen. Thus the subsequent direct op proceeds while the real writeback is still in flight. This is a problem for backends that reorder operation. Fix this by making the fuse direct IO callback explicitly wait on the in-flight writeback to finish. Signed-off-by: Maxim Patlasov <MPatlasov@parallels.com> Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Diffstat (limited to 'fs/fuse')
-rw-r--r--fs/fuse/cuse.c5
-rw-r--r--fs/fuse/file.c32
-rw-r--r--fs/fuse/fuse_i.h13
3 files changed, 41 insertions, 9 deletions
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index b96a49b37d66..23e363f38302 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -95,7 +95,7 @@ static ssize_t cuse_read(struct file *file, char __user *buf, size_t count,
95 struct iovec iov = { .iov_base = buf, .iov_len = count }; 95 struct iovec iov = { .iov_base = buf, .iov_len = count };
96 struct fuse_io_priv io = { .async = 0, .file = file }; 96 struct fuse_io_priv io = { .async = 0, .file = file };
97 97
98 return fuse_direct_io(&io, &iov, 1, count, &pos, 0); 98 return fuse_direct_io(&io, &iov, 1, count, &pos, FUSE_DIO_CUSE);
99} 99}
100 100
101static ssize_t cuse_write(struct file *file, const char __user *buf, 101static ssize_t cuse_write(struct file *file, const char __user *buf,
@@ -109,7 +109,8 @@ static ssize_t cuse_write(struct file *file, const char __user *buf,
109 * No locking or generic_write_checks(), the server is 109 * No locking or generic_write_checks(), the server is
110 * responsible for locking and sanity checks. 110 * responsible for locking and sanity checks.
111 */ 111 */
112 return fuse_direct_io(&io, &iov, 1, count, &pos, 1); 112 return fuse_direct_io(&io, &iov, 1, count, &pos,
113 FUSE_DIO_WRITE | FUSE_DIO_CUSE);
113} 114}
114 115
115static int cuse_open(struct inode *inode, struct file *file) 116static int cuse_open(struct inode *inode, struct file *file)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index d93f2a1aa7de..276433021561 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -358,12 +358,13 @@ u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id)
358} 358}
359 359
360/* 360/*
361 * Check if page is under writeback 361 * Check if any page in a range is under writeback
362 * 362 *
363 * This is currently done by walking the list of writepage requests 363 * This is currently done by walking the list of writepage requests
364 * for the inode, which can be pretty inefficient. 364 * for the inode, which can be pretty inefficient.
365 */ 365 */
366static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index) 366static bool fuse_range_is_writeback(struct inode *inode, pgoff_t idx_from,
367 pgoff_t idx_to)
367{ 368{
368 struct fuse_conn *fc = get_fuse_conn(inode); 369 struct fuse_conn *fc = get_fuse_conn(inode);
369 struct fuse_inode *fi = get_fuse_inode(inode); 370 struct fuse_inode *fi = get_fuse_inode(inode);
@@ -376,8 +377,8 @@ static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index)
376 377
377 BUG_ON(req->inode != inode); 378 BUG_ON(req->inode != inode);
378 curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT; 379 curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT;
379 if (curr_index <= index && 380 if (idx_from < curr_index + req->num_pages &&
380 index < curr_index + req->num_pages) { 381 curr_index <= idx_to) {
381 found = true; 382 found = true;
382 break; 383 break;
383 } 384 }
@@ -387,6 +388,11 @@ static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index)
387 return found; 388 return found;
388} 389}
389 390
391static inline bool fuse_page_is_writeback(struct inode *inode, pgoff_t index)
392{
393 return fuse_range_is_writeback(inode, index, index);
394}
395
390/* 396/*
391 * Wait for page writeback to be completed. 397 * Wait for page writeback to be completed.
392 * 398 *
@@ -1364,13 +1370,18 @@ static inline int fuse_iter_npages(const struct iov_iter *ii_p)
1364 1370
1365ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov, 1371ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov,
1366 unsigned long nr_segs, size_t count, loff_t *ppos, 1372 unsigned long nr_segs, size_t count, loff_t *ppos,
1367 int write) 1373 int flags)
1368{ 1374{
1375 int write = flags & FUSE_DIO_WRITE;
1376 int cuse = flags & FUSE_DIO_CUSE;
1369 struct file *file = io->file; 1377 struct file *file = io->file;
1378 struct inode *inode = file->f_mapping->host;
1370 struct fuse_file *ff = file->private_data; 1379 struct fuse_file *ff = file->private_data;
1371 struct fuse_conn *fc = ff->fc; 1380 struct fuse_conn *fc = ff->fc;
1372 size_t nmax = write ? fc->max_write : fc->max_read; 1381 size_t nmax = write ? fc->max_write : fc->max_read;
1373 loff_t pos = *ppos; 1382 loff_t pos = *ppos;
1383 pgoff_t idx_from = pos >> PAGE_CACHE_SHIFT;
1384 pgoff_t idx_to = (pos + count - 1) >> PAGE_CACHE_SHIFT;
1374 ssize_t res = 0; 1385 ssize_t res = 0;
1375 struct fuse_req *req; 1386 struct fuse_req *req;
1376 struct iov_iter ii; 1387 struct iov_iter ii;
@@ -1384,6 +1395,14 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov,
1384 if (IS_ERR(req)) 1395 if (IS_ERR(req))
1385 return PTR_ERR(req); 1396 return PTR_ERR(req);
1386 1397
1398 if (!cuse && fuse_range_is_writeback(inode, idx_from, idx_to)) {
1399 if (!write)
1400 mutex_lock(&inode->i_mutex);
1401 fuse_sync_writes(inode);
1402 if (!write)
1403 mutex_unlock(&inode->i_mutex);
1404 }
1405
1387 while (count) { 1406 while (count) {
1388 size_t nres; 1407 size_t nres;
1389 fl_owner_t owner = current->files; 1408 fl_owner_t owner = current->files;
@@ -1472,7 +1491,8 @@ static ssize_t __fuse_direct_write(struct fuse_io_priv *io,
1472 1491
1473 res = generic_write_checks(file, ppos, &count, 0); 1492 res = generic_write_checks(file, ppos, &count, 0);
1474 if (!res) 1493 if (!res)
1475 res = fuse_direct_io(io, iov, nr_segs, count, ppos, 1); 1494 res = fuse_direct_io(io, iov, nr_segs, count, ppos,
1495 FUSE_DIO_WRITE);
1476 1496
1477 fuse_invalidate_attr(inode); 1497 fuse_invalidate_attr(inode);
1478 1498
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 1e6ad6d43051..a257ed8ebee6 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -868,9 +868,20 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
868 868
869int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, 869int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
870 bool isdir); 870 bool isdir);
871
872/**
873 * fuse_direct_io() flags
874 */
875
876/** If set, it is WRITE; otherwise - READ */
877#define FUSE_DIO_WRITE (1 << 0)
878
879/** CUSE pass fuse_direct_io() a file which f_mapping->host is not from FUSE */
880#define FUSE_DIO_CUSE (1 << 1)
881
871ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov, 882ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov,
872 unsigned long nr_segs, size_t count, loff_t *ppos, 883 unsigned long nr_segs, size_t count, loff_t *ppos,
873 int write); 884 int flags);
874long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, 885long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
875 unsigned int flags); 886 unsigned int flags);
876long fuse_ioctl_common(struct file *file, unsigned int cmd, 887long fuse_ioctl_common(struct file *file, unsigned int cmd,