diff options
author | Christoph Hellwig <hch@lst.de> | 2009-10-27 06:05:28 -0400 |
---|---|---|
committer | Jan Kara <jack@suse.cz> | 2009-12-10 09:02:50 -0500 |
commit | 6b2f3d1f769be5779b479c37800229d9a4809fc3 (patch) | |
tree | 046ef6736ec6c25ab1c68741ba715d13645af336 /fs | |
parent | 59bc055211b8d266ab6089158058bf8268e02006 (diff) |
vfs: Implement proper O_SYNC semantics
While Linux provided an O_SYNC flag basically since day 1, it took until
Linux 2.4.0-test12pre2 to actually get it implemented for filesystems,
since that day we had generic_osync_around with only minor changes and the
great "For now, when the user asks for O_SYNC, we'll actually give
O_DSYNC" comment. This patch intends to actually give us real O_SYNC
semantics in addition to the O_DSYNC semantics. After Jan's O_SYNC
patches which are required before this patch it's actually surprisingly
simple, we just need to figure out when to set the datasync flag to
vfs_fsync_range and when not.
This patch renames the existing O_SYNC flag to O_DSYNC while keeping it's
numerical value to keep binary compatibility, and adds a new real O_SYNC
flag. To guarantee backwards compatiblity it is defined as expanding to
both the O_DSYNC and the new additional binary flag (__O_SYNC) to make
sure we are backwards-compatible when compiled against the new headers.
This also means that all places that don't care about the differences can
just check O_DSYNC and get the right behaviour for O_SYNC, too - only
places that actuall care need to check __O_SYNC in addition. Drivers and
network filesystems have been updated in a fail safe way to always do the
full sync magic if O_DSYNC is set. The few places setting O_SYNC for
lower layers are kept that way for now to stay failsafe.
We enforce that O_DSYNC is set when __O_SYNC is set early in the open path
to make sure we always get these sane options.
Note that parisc really screwed up their headers as they already define a
O_DSYNC that has always been a no-op. We try to repair it by using it for
the new O_DSYNC and redefinining O_SYNC to send both the traditional
O_SYNC numerical value _and_ the O_DSYNC one.
Cc: Richard Henderson <rth@twiddle.net>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Grant Grundler <grundler@parisc-linux.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andreas Dilger <adilger@sun.com>
Acked-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Acked-by: Kyle McMartin <kyle@mcmartin.ca>
Acked-by: Ulrich Drepper <drepper@redhat.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Jan Kara <jack@suse.cz>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/afs/write.c | 5 | ||||
-rw-r--r-- | fs/btrfs/file.c | 4 | ||||
-rw-r--r-- | fs/cifs/dir.c | 3 | ||||
-rw-r--r-- | fs/cifs/file.c | 6 | ||||
-rw-r--r-- | fs/namei.c | 9 | ||||
-rw-r--r-- | fs/nfs/file.c | 4 | ||||
-rw-r--r-- | fs/nfs/write.c | 2 | ||||
-rw-r--r-- | fs/ocfs2/file.c | 2 | ||||
-rw-r--r-- | fs/sync.c | 5 | ||||
-rw-r--r-- | fs/ubifs/file.c | 2 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_lrw.c | 2 |
11 files changed, 29 insertions, 15 deletions
diff --git a/fs/afs/write.c b/fs/afs/write.c index c63a3c8beb73..6be1bc31616a 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c | |||
@@ -692,8 +692,9 @@ ssize_t afs_file_write(struct kiocb *iocb, const struct iovec *iov, | |||
692 | } | 692 | } |
693 | 693 | ||
694 | /* return error values for O_SYNC and IS_SYNC() */ | 694 | /* return error values for O_SYNC and IS_SYNC() */ |
695 | if (IS_SYNC(&vnode->vfs_inode) || iocb->ki_filp->f_flags & O_SYNC) { | 695 | if (IS_SYNC(&vnode->vfs_inode) || iocb->ki_filp->f_flags & O_DSYNC) { |
696 | ret = afs_fsync(iocb->ki_filp, dentry, 1); | 696 | ret = afs_fsync(iocb->ki_filp, dentry, |
697 | (iocb->ki_filp->f_flags & __O_SYNC) ? 0 : 1); | ||
697 | if (ret < 0) | 698 | if (ret < 0) |
698 | result = ret; | 699 | result = ret; |
699 | } | 700 | } |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 06550affbd27..77f759302e12 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -909,7 +909,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
909 | unsigned long last_index; | 909 | unsigned long last_index; |
910 | int will_write; | 910 | int will_write; |
911 | 911 | ||
912 | will_write = ((file->f_flags & O_SYNC) || IS_SYNC(inode) || | 912 | will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) || |
913 | (file->f_flags & O_DIRECT)); | 913 | (file->f_flags & O_DIRECT)); |
914 | 914 | ||
915 | nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE, | 915 | nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE, |
@@ -1076,7 +1076,7 @@ out_nolock: | |||
1076 | if (err) | 1076 | if (err) |
1077 | num_written = err; | 1077 | num_written = err; |
1078 | 1078 | ||
1079 | if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) { | 1079 | if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { |
1080 | trans = btrfs_start_transaction(root, 1); | 1080 | trans = btrfs_start_transaction(root, 1); |
1081 | ret = btrfs_log_dentry_safe(trans, root, | 1081 | ret = btrfs_log_dentry_safe(trans, root, |
1082 | file->f_dentry); | 1082 | file->f_dentry); |
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 1f42f772865a..6ccf7262d1b7 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c | |||
@@ -214,7 +214,8 @@ int cifs_posix_open(char *full_path, struct inode **pinode, | |||
214 | posix_flags |= SMB_O_EXCL; | 214 | posix_flags |= SMB_O_EXCL; |
215 | if (oflags & O_TRUNC) | 215 | if (oflags & O_TRUNC) |
216 | posix_flags |= SMB_O_TRUNC; | 216 | posix_flags |= SMB_O_TRUNC; |
217 | if (oflags & O_SYNC) | 217 | /* be safe and imply O_SYNC for O_DSYNC */ |
218 | if (oflags & O_DSYNC) | ||
218 | posix_flags |= SMB_O_SYNC; | 219 | posix_flags |= SMB_O_SYNC; |
219 | if (oflags & O_DIRECTORY) | 220 | if (oflags & O_DIRECTORY) |
220 | posix_flags |= SMB_O_DIRECTORY; | 221 | posix_flags |= SMB_O_DIRECTORY; |
diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 429337eb7afe..057e1dae12ab 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c | |||
@@ -76,8 +76,10 @@ static inline fmode_t cifs_posix_convert_flags(unsigned int flags) | |||
76 | reopening a file. They had their effect on the original open */ | 76 | reopening a file. They had their effect on the original open */ |
77 | if (flags & O_APPEND) | 77 | if (flags & O_APPEND) |
78 | posix_flags |= (fmode_t)O_APPEND; | 78 | posix_flags |= (fmode_t)O_APPEND; |
79 | if (flags & O_SYNC) | 79 | if (flags & O_DSYNC) |
80 | posix_flags |= (fmode_t)O_SYNC; | 80 | posix_flags |= (fmode_t)O_DSYNC; |
81 | if (flags & __O_SYNC) | ||
82 | posix_flags |= (fmode_t)__O_SYNC; | ||
81 | if (flags & O_DIRECTORY) | 83 | if (flags & O_DIRECTORY) |
82 | posix_flags |= (fmode_t)O_DIRECTORY; | 84 | posix_flags |= (fmode_t)O_DIRECTORY; |
83 | if (flags & O_NOFOLLOW) | 85 | if (flags & O_NOFOLLOW) |
diff --git a/fs/namei.c b/fs/namei.c index d11f404667e9..b83d38f614ff 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -1678,6 +1678,15 @@ struct file *do_filp_open(int dfd, const char *pathname, | |||
1678 | int will_write; | 1678 | int will_write; |
1679 | int flag = open_to_namei_flags(open_flag); | 1679 | int flag = open_to_namei_flags(open_flag); |
1680 | 1680 | ||
1681 | /* | ||
1682 | * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only | ||
1683 | * check for O_DSYNC if the need any syncing at all we enforce it's | ||
1684 | * always set instead of having to deal with possibly weird behaviour | ||
1685 | * for malicious applications setting only __O_SYNC. | ||
1686 | */ | ||
1687 | if (open_flag & __O_SYNC) | ||
1688 | open_flag |= O_DSYNC; | ||
1689 | |||
1681 | if (!acc_mode) | 1690 | if (!acc_mode) |
1682 | acc_mode = MAY_OPEN | ACC_MODE(flag); | 1691 | acc_mode = MAY_OPEN | ACC_MODE(flag); |
1683 | 1692 | ||
diff --git a/fs/nfs/file.c b/fs/nfs/file.c index f5fdd39e037a..6b891328f332 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c | |||
@@ -581,7 +581,7 @@ static int nfs_need_sync_write(struct file *filp, struct inode *inode) | |||
581 | { | 581 | { |
582 | struct nfs_open_context *ctx; | 582 | struct nfs_open_context *ctx; |
583 | 583 | ||
584 | if (IS_SYNC(inode) || (filp->f_flags & O_SYNC)) | 584 | if (IS_SYNC(inode) || (filp->f_flags & O_DSYNC)) |
585 | return 1; | 585 | return 1; |
586 | ctx = nfs_file_open_context(filp); | 586 | ctx = nfs_file_open_context(filp); |
587 | if (test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags)) | 587 | if (test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags)) |
@@ -622,7 +622,7 @@ static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, | |||
622 | 622 | ||
623 | nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count); | 623 | nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count); |
624 | result = generic_file_aio_write(iocb, iov, nr_segs, pos); | 624 | result = generic_file_aio_write(iocb, iov, nr_segs, pos); |
625 | /* Return error values for O_SYNC and IS_SYNC() */ | 625 | /* Return error values for O_DSYNC and IS_SYNC() */ |
626 | if (result >= 0 && nfs_need_sync_write(iocb->ki_filp, inode)) { | 626 | if (result >= 0 && nfs_need_sync_write(iocb->ki_filp, inode)) { |
627 | int err = nfs_do_fsync(nfs_file_open_context(iocb->ki_filp), inode); | 627 | int err = nfs_do_fsync(nfs_file_open_context(iocb->ki_filp), inode); |
628 | if (err < 0) | 628 | if (err < 0) |
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index c84b5cc1a943..b1ce2ea9b93b 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
@@ -774,7 +774,7 @@ int nfs_updatepage(struct file *file, struct page *page, | |||
774 | */ | 774 | */ |
775 | if (nfs_write_pageuptodate(page, inode) && | 775 | if (nfs_write_pageuptodate(page, inode) && |
776 | inode->i_flock == NULL && | 776 | inode->i_flock == NULL && |
777 | !(file->f_flags & O_SYNC)) { | 777 | !(file->f_flags & O_DSYNC)) { |
778 | count = max(count + offset, nfs_page_length(page)); | 778 | count = max(count + offset, nfs_page_length(page)); |
779 | offset = 0; | 779 | offset = 0; |
780 | } | 780 | } |
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index de059f490586..3d30a1c974a8 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
@@ -2006,7 +2006,7 @@ out_dio: | |||
2006 | /* buffered aio wouldn't have proper lock coverage today */ | 2006 | /* buffered aio wouldn't have proper lock coverage today */ |
2007 | BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT)); | 2007 | BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT)); |
2008 | 2008 | ||
2009 | if ((file->f_flags & O_SYNC && !direct_io) || IS_SYNC(inode)) { | 2009 | if ((file->f_flags & O_DSYNC && !direct_io) || IS_SYNC(inode)) { |
2010 | ret = filemap_fdatawrite_range(file->f_mapping, pos, | 2010 | ret = filemap_fdatawrite_range(file->f_mapping, pos, |
2011 | pos + count - 1); | 2011 | pos + count - 1); |
2012 | if (ret < 0) | 2012 | if (ret < 0) |
@@ -295,10 +295,11 @@ SYSCALL_DEFINE1(fdatasync, unsigned int, fd) | |||
295 | */ | 295 | */ |
296 | int generic_write_sync(struct file *file, loff_t pos, loff_t count) | 296 | int generic_write_sync(struct file *file, loff_t pos, loff_t count) |
297 | { | 297 | { |
298 | if (!(file->f_flags & O_SYNC) && !IS_SYNC(file->f_mapping->host)) | 298 | if (!(file->f_flags & O_DSYNC) && !IS_SYNC(file->f_mapping->host)) |
299 | return 0; | 299 | return 0; |
300 | return vfs_fsync_range(file, file->f_path.dentry, pos, | 300 | return vfs_fsync_range(file, file->f_path.dentry, pos, |
301 | pos + count - 1, 1); | 301 | pos + count - 1, |
302 | (file->f_flags & __O_SYNC) ? 0 : 1); | ||
302 | } | 303 | } |
303 | EXPORT_SYMBOL(generic_write_sync); | 304 | EXPORT_SYMBOL(generic_write_sync); |
304 | 305 | ||
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 1009adc8d602..eaa3d480bc20 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c | |||
@@ -1401,7 +1401,7 @@ static ssize_t ubifs_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
1401 | if (ret < 0) | 1401 | if (ret < 0) |
1402 | return ret; | 1402 | return ret; |
1403 | 1403 | ||
1404 | if (ret > 0 && (IS_SYNC(inode) || iocb->ki_filp->f_flags & O_SYNC)) { | 1404 | if (ret > 0 && (IS_SYNC(inode) || iocb->ki_filp->f_flags & O_DSYNC)) { |
1405 | err = ubifs_sync_wbufs_by_inode(c, inode); | 1405 | err = ubifs_sync_wbufs_by_inode(c, inode); |
1406 | if (err) | 1406 | if (err) |
1407 | return err; | 1407 | return err; |
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index 072050f8d346..339c52b1a434 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c | |||
@@ -811,7 +811,7 @@ write_retry: | |||
811 | XFS_STATS_ADD(xs_write_bytes, ret); | 811 | XFS_STATS_ADD(xs_write_bytes, ret); |
812 | 812 | ||
813 | /* Handle various SYNC-type writes */ | 813 | /* Handle various SYNC-type writes */ |
814 | if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) { | 814 | if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { |
815 | loff_t end = pos + ret - 1; | 815 | loff_t end = pos + ret - 1; |
816 | int error2; | 816 | int error2; |
817 | 817 | ||