aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@lst.de>2009-10-27 06:05:28 -0400
committerJan Kara <jack@suse.cz>2009-12-10 09:02:50 -0500
commit6b2f3d1f769be5779b479c37800229d9a4809fc3 (patch)
tree046ef6736ec6c25ab1c68741ba715d13645af336 /fs
parent59bc055211b8d266ab6089158058bf8268e02006 (diff)
vfs: Implement proper O_SYNC semantics
While Linux provided an O_SYNC flag basically since day 1, it took until Linux 2.4.0-test12pre2 to actually get it implemented for filesystems, since that day we had generic_osync_around with only minor changes and the great "For now, when the user asks for O_SYNC, we'll actually give O_DSYNC" comment. This patch intends to actually give us real O_SYNC semantics in addition to the O_DSYNC semantics. After Jan's O_SYNC patches which are required before this patch it's actually surprisingly simple, we just need to figure out when to set the datasync flag to vfs_fsync_range and when not. This patch renames the existing O_SYNC flag to O_DSYNC while keeping it's numerical value to keep binary compatibility, and adds a new real O_SYNC flag. To guarantee backwards compatiblity it is defined as expanding to both the O_DSYNC and the new additional binary flag (__O_SYNC) to make sure we are backwards-compatible when compiled against the new headers. This also means that all places that don't care about the differences can just check O_DSYNC and get the right behaviour for O_SYNC, too - only places that actuall care need to check __O_SYNC in addition. Drivers and network filesystems have been updated in a fail safe way to always do the full sync magic if O_DSYNC is set. The few places setting O_SYNC for lower layers are kept that way for now to stay failsafe. We enforce that O_DSYNC is set when __O_SYNC is set early in the open path to make sure we always get these sane options. Note that parisc really screwed up their headers as they already define a O_DSYNC that has always been a no-op. We try to repair it by using it for the new O_DSYNC and redefinining O_SYNC to send both the traditional O_SYNC numerical value _and_ the O_DSYNC one. Cc: Richard Henderson <rth@twiddle.net> Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru> Cc: Grant Grundler <grundler@parisc-linux.org> Cc: "David S. Miller" <davem@davemloft.net> Cc: Ingo Molnar <mingo@elte.hu> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Andreas Dilger <adilger@sun.com> Acked-by: Trond Myklebust <Trond.Myklebust@netapp.com> Acked-by: Kyle McMartin <kyle@mcmartin.ca> Acked-by: Ulrich Drepper <drepper@redhat.com> Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Jan Kara <jack@suse.cz>
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/write.c5
-rw-r--r--fs/btrfs/file.c4
-rw-r--r--fs/cifs/dir.c3
-rw-r--r--fs/cifs/file.c6
-rw-r--r--fs/namei.c9
-rw-r--r--fs/nfs/file.c4
-rw-r--r--fs/nfs/write.c2
-rw-r--r--fs/ocfs2/file.c2
-rw-r--r--fs/sync.c5
-rw-r--r--fs/ubifs/file.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c2
11 files changed, 29 insertions, 15 deletions
diff --git a/fs/afs/write.c b/fs/afs/write.c
index c63a3c8beb73..6be1bc31616a 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -692,8 +692,9 @@ ssize_t afs_file_write(struct kiocb *iocb, const struct iovec *iov,
692 } 692 }
693 693
694 /* return error values for O_SYNC and IS_SYNC() */ 694 /* return error values for O_SYNC and IS_SYNC() */
695 if (IS_SYNC(&vnode->vfs_inode) || iocb->ki_filp->f_flags & O_SYNC) { 695 if (IS_SYNC(&vnode->vfs_inode) || iocb->ki_filp->f_flags & O_DSYNC) {
696 ret = afs_fsync(iocb->ki_filp, dentry, 1); 696 ret = afs_fsync(iocb->ki_filp, dentry,
697 (iocb->ki_filp->f_flags & __O_SYNC) ? 0 : 1);
697 if (ret < 0) 698 if (ret < 0)
698 result = ret; 699 result = ret;
699 } 700 }
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 06550affbd27..77f759302e12 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -909,7 +909,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
909 unsigned long last_index; 909 unsigned long last_index;
910 int will_write; 910 int will_write;
911 911
912 will_write = ((file->f_flags & O_SYNC) || IS_SYNC(inode) || 912 will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) ||
913 (file->f_flags & O_DIRECT)); 913 (file->f_flags & O_DIRECT));
914 914
915 nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE, 915 nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE,
@@ -1076,7 +1076,7 @@ out_nolock:
1076 if (err) 1076 if (err)
1077 num_written = err; 1077 num_written = err;
1078 1078
1079 if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) { 1079 if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) {
1080 trans = btrfs_start_transaction(root, 1); 1080 trans = btrfs_start_transaction(root, 1);
1081 ret = btrfs_log_dentry_safe(trans, root, 1081 ret = btrfs_log_dentry_safe(trans, root,
1082 file->f_dentry); 1082 file->f_dentry);
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 1f42f772865a..6ccf7262d1b7 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -214,7 +214,8 @@ int cifs_posix_open(char *full_path, struct inode **pinode,
214 posix_flags |= SMB_O_EXCL; 214 posix_flags |= SMB_O_EXCL;
215 if (oflags & O_TRUNC) 215 if (oflags & O_TRUNC)
216 posix_flags |= SMB_O_TRUNC; 216 posix_flags |= SMB_O_TRUNC;
217 if (oflags & O_SYNC) 217 /* be safe and imply O_SYNC for O_DSYNC */
218 if (oflags & O_DSYNC)
218 posix_flags |= SMB_O_SYNC; 219 posix_flags |= SMB_O_SYNC;
219 if (oflags & O_DIRECTORY) 220 if (oflags & O_DIRECTORY)
220 posix_flags |= SMB_O_DIRECTORY; 221 posix_flags |= SMB_O_DIRECTORY;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 429337eb7afe..057e1dae12ab 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -76,8 +76,10 @@ static inline fmode_t cifs_posix_convert_flags(unsigned int flags)
76 reopening a file. They had their effect on the original open */ 76 reopening a file. They had their effect on the original open */
77 if (flags & O_APPEND) 77 if (flags & O_APPEND)
78 posix_flags |= (fmode_t)O_APPEND; 78 posix_flags |= (fmode_t)O_APPEND;
79 if (flags & O_SYNC) 79 if (flags & O_DSYNC)
80 posix_flags |= (fmode_t)O_SYNC; 80 posix_flags |= (fmode_t)O_DSYNC;
81 if (flags & __O_SYNC)
82 posix_flags |= (fmode_t)__O_SYNC;
81 if (flags & O_DIRECTORY) 83 if (flags & O_DIRECTORY)
82 posix_flags |= (fmode_t)O_DIRECTORY; 84 posix_flags |= (fmode_t)O_DIRECTORY;
83 if (flags & O_NOFOLLOW) 85 if (flags & O_NOFOLLOW)
diff --git a/fs/namei.c b/fs/namei.c
index d11f404667e9..b83d38f614ff 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1678,6 +1678,15 @@ struct file *do_filp_open(int dfd, const char *pathname,
1678 int will_write; 1678 int will_write;
1679 int flag = open_to_namei_flags(open_flag); 1679 int flag = open_to_namei_flags(open_flag);
1680 1680
1681 /*
1682 * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only
1683 * check for O_DSYNC if the need any syncing at all we enforce it's
1684 * always set instead of having to deal with possibly weird behaviour
1685 * for malicious applications setting only __O_SYNC.
1686 */
1687 if (open_flag & __O_SYNC)
1688 open_flag |= O_DSYNC;
1689
1681 if (!acc_mode) 1690 if (!acc_mode)
1682 acc_mode = MAY_OPEN | ACC_MODE(flag); 1691 acc_mode = MAY_OPEN | ACC_MODE(flag);
1683 1692
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index f5fdd39e037a..6b891328f332 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -581,7 +581,7 @@ static int nfs_need_sync_write(struct file *filp, struct inode *inode)
581{ 581{
582 struct nfs_open_context *ctx; 582 struct nfs_open_context *ctx;
583 583
584 if (IS_SYNC(inode) || (filp->f_flags & O_SYNC)) 584 if (IS_SYNC(inode) || (filp->f_flags & O_DSYNC))
585 return 1; 585 return 1;
586 ctx = nfs_file_open_context(filp); 586 ctx = nfs_file_open_context(filp);
587 if (test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags)) 587 if (test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags))
@@ -622,7 +622,7 @@ static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
622 622
623 nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count); 623 nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count);
624 result = generic_file_aio_write(iocb, iov, nr_segs, pos); 624 result = generic_file_aio_write(iocb, iov, nr_segs, pos);
625 /* Return error values for O_SYNC and IS_SYNC() */ 625 /* Return error values for O_DSYNC and IS_SYNC() */
626 if (result >= 0 && nfs_need_sync_write(iocb->ki_filp, inode)) { 626 if (result >= 0 && nfs_need_sync_write(iocb->ki_filp, inode)) {
627 int err = nfs_do_fsync(nfs_file_open_context(iocb->ki_filp), inode); 627 int err = nfs_do_fsync(nfs_file_open_context(iocb->ki_filp), inode);
628 if (err < 0) 628 if (err < 0)
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index c84b5cc1a943..b1ce2ea9b93b 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -774,7 +774,7 @@ int nfs_updatepage(struct file *file, struct page *page,
774 */ 774 */
775 if (nfs_write_pageuptodate(page, inode) && 775 if (nfs_write_pageuptodate(page, inode) &&
776 inode->i_flock == NULL && 776 inode->i_flock == NULL &&
777 !(file->f_flags & O_SYNC)) { 777 !(file->f_flags & O_DSYNC)) {
778 count = max(count + offset, nfs_page_length(page)); 778 count = max(count + offset, nfs_page_length(page));
779 offset = 0; 779 offset = 0;
780 } 780 }
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index de059f490586..3d30a1c974a8 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2006,7 +2006,7 @@ out_dio:
2006 /* buffered aio wouldn't have proper lock coverage today */ 2006 /* buffered aio wouldn't have proper lock coverage today */
2007 BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT)); 2007 BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT));
2008 2008
2009 if ((file->f_flags & O_SYNC && !direct_io) || IS_SYNC(inode)) { 2009 if ((file->f_flags & O_DSYNC && !direct_io) || IS_SYNC(inode)) {
2010 ret = filemap_fdatawrite_range(file->f_mapping, pos, 2010 ret = filemap_fdatawrite_range(file->f_mapping, pos,
2011 pos + count - 1); 2011 pos + count - 1);
2012 if (ret < 0) 2012 if (ret < 0)
diff --git a/fs/sync.c b/fs/sync.c
index d104591b066b..b75ca68dc081 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -295,10 +295,11 @@ SYSCALL_DEFINE1(fdatasync, unsigned int, fd)
295 */ 295 */
296int generic_write_sync(struct file *file, loff_t pos, loff_t count) 296int generic_write_sync(struct file *file, loff_t pos, loff_t count)
297{ 297{
298 if (!(file->f_flags & O_SYNC) && !IS_SYNC(file->f_mapping->host)) 298 if (!(file->f_flags & O_DSYNC) && !IS_SYNC(file->f_mapping->host))
299 return 0; 299 return 0;
300 return vfs_fsync_range(file, file->f_path.dentry, pos, 300 return vfs_fsync_range(file, file->f_path.dentry, pos,
301 pos + count - 1, 1); 301 pos + count - 1,
302 (file->f_flags & __O_SYNC) ? 0 : 1);
302} 303}
303EXPORT_SYMBOL(generic_write_sync); 304EXPORT_SYMBOL(generic_write_sync);
304 305
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 1009adc8d602..eaa3d480bc20 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1401,7 +1401,7 @@ static ssize_t ubifs_aio_write(struct kiocb *iocb, const struct iovec *iov,
1401 if (ret < 0) 1401 if (ret < 0)
1402 return ret; 1402 return ret;
1403 1403
1404 if (ret > 0 && (IS_SYNC(inode) || iocb->ki_filp->f_flags & O_SYNC)) { 1404 if (ret > 0 && (IS_SYNC(inode) || iocb->ki_filp->f_flags & O_DSYNC)) {
1405 err = ubifs_sync_wbufs_by_inode(c, inode); 1405 err = ubifs_sync_wbufs_by_inode(c, inode);
1406 if (err) 1406 if (err)
1407 return err; 1407 return err;
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 072050f8d346..339c52b1a434 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -811,7 +811,7 @@ write_retry:
811 XFS_STATS_ADD(xs_write_bytes, ret); 811 XFS_STATS_ADD(xs_write_bytes, ret);
812 812
813 /* Handle various SYNC-type writes */ 813 /* Handle various SYNC-type writes */
814 if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) { 814 if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) {
815 loff_t end = pos + ret - 1; 815 loff_t end = pos + ret - 1;
816 int error2; 816 int error2;
817 817