aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-09-14 17:36:47 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-09-14 17:36:47 -0400
commit4142e0d1def2c0176c27fd2e810243045a62eb6d (patch)
treea21f76fafcd7609419a3ce610d8b9360748ccd76 /fs
parent33f1de69312432baecb997a570b7d77c4d02d1ed (diff)
parent2daea67e966dc0c42067ebea015ddac6834cef88 (diff)
Merge branch 'osync_cleanup' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs-2.6
* 'osync_cleanup' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs-2.6: fsync: wait for data writeout completion before calling ->fsync vfs: Remove generic_osync_inode() and sync_page_range{_nolock}() fat: Opencode sync_page_range_nolock() pohmelfs: Use new syncing helper xfs: Convert sync_page_range() to simple filemap_write_and_wait_range() ocfs2: Update syncing after splicing to match generic version ntfs: Use new syncing helpers and update comments ext4: Remove syncing logic from ext4_file_write ext3: Remove syncing logic from ext3_file_write ext2: Update comment about generic_osync_inode vfs: Introduce new helpers for syncing after writing to O_SYNC file or IS_SYNC inode vfs: Rename generic_file_aio_write_nolock ocfs2: Use __generic_file_aio_write instead of generic_file_aio_write_nolock pohmelfs: Use __generic_file_aio_write instead of generic_file_aio_write_nolock vfs: Remove syncing from generic_file_direct_write() and generic_file_buffered_write() vfs: Export __generic_file_aio_write() and add some comments vfs: Introduce filemap_fdatawait_range
Diffstat (limited to 'fs')
-rw-r--r--fs/block_dev.c29
-rw-r--r--fs/ext2/inode.c2
-rw-r--r--fs/ext3/file.c61
-rw-r--r--fs/ext4/file.c53
-rw-r--r--fs/fat/file.c22
-rw-r--r--fs/fat/misc.c4
-rw-r--r--fs/fs-writeback.c54
-rw-r--r--fs/ntfs/file.c16
-rw-r--r--fs/ntfs/mft.c13
-rw-r--r--fs/ocfs2/file.c49
-rw-r--r--fs/splice.c22
-rw-r--r--fs/sync.c56
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c3
13 files changed, 137 insertions, 247 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 94dfda24c06e..3581a4e53942 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1405,6 +1405,33 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1405} 1405}
1406 1406
1407/* 1407/*
1408 * Write data to the block device. Only intended for the block device itself
1409 * and the raw driver which basically is a fake block device.
1410 *
1411 * Does not take i_mutex for the write and thus is not for general purpose
1412 * use.
1413 */
1414ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
1415 unsigned long nr_segs, loff_t pos)
1416{
1417 struct file *file = iocb->ki_filp;
1418 ssize_t ret;
1419
1420 BUG_ON(iocb->ki_pos != pos);
1421
1422 ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
1423 if (ret > 0 || ret == -EIOCBQUEUED) {
1424 ssize_t err;
1425
1426 err = generic_write_sync(file, pos, ret);
1427 if (err < 0 && ret > 0)
1428 ret = err;
1429 }
1430 return ret;
1431}
1432EXPORT_SYMBOL_GPL(blkdev_aio_write);
1433
1434/*
1408 * Try to release a page associated with block device when the system 1435 * Try to release a page associated with block device when the system
1409 * is under memory pressure. 1436 * is under memory pressure.
1410 */ 1437 */
@@ -1436,7 +1463,7 @@ const struct file_operations def_blk_fops = {
1436 .read = do_sync_read, 1463 .read = do_sync_read,
1437 .write = do_sync_write, 1464 .write = do_sync_write,
1438 .aio_read = generic_file_aio_read, 1465 .aio_read = generic_file_aio_read,
1439 .aio_write = generic_file_aio_write_nolock, 1466 .aio_write = blkdev_aio_write,
1440 .mmap = generic_file_mmap, 1467 .mmap = generic_file_mmap,
1441 .fsync = block_fsync, 1468 .fsync = block_fsync,
1442 .unlocked_ioctl = block_ioctl, 1469 .unlocked_ioctl = block_ioctl,
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index e27130341d4f..1c1638f873a4 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -482,7 +482,7 @@ static int ext2_alloc_branch(struct inode *inode,
482 unlock_buffer(bh); 482 unlock_buffer(bh);
483 mark_buffer_dirty_inode(bh, inode); 483 mark_buffer_dirty_inode(bh, inode);
484 /* We used to sync bh here if IS_SYNC(inode). 484 /* We used to sync bh here if IS_SYNC(inode).
485 * But we now rely upon generic_osync_inode() 485 * But we now rely upon generic_write_sync()
486 * and b_inode_buffers. But not for directories. 486 * and b_inode_buffers. But not for directories.
487 */ 487 */
488 if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode)) 488 if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode))
diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index 299253214789..388bbdfa0b4e 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -51,71 +51,12 @@ static int ext3_release_file (struct inode * inode, struct file * filp)
51 return 0; 51 return 0;
52} 52}
53 53
54static ssize_t
55ext3_file_write(struct kiocb *iocb, const struct iovec *iov,
56 unsigned long nr_segs, loff_t pos)
57{
58 struct file *file = iocb->ki_filp;
59 struct inode *inode = file->f_path.dentry->d_inode;
60 ssize_t ret;
61 int err;
62
63 ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
64
65 /*
66 * Skip flushing if there was an error, or if nothing was written.
67 */
68 if (ret <= 0)
69 return ret;
70
71 /*
72 * If the inode is IS_SYNC, or is O_SYNC and we are doing data
73 * journalling then we need to make sure that we force the transaction
74 * to disk to keep all metadata uptodate synchronously.
75 */
76 if (file->f_flags & O_SYNC) {
77 /*
78 * If we are non-data-journaled, then the dirty data has
79 * already been flushed to backing store by generic_osync_inode,
80 * and the inode has been flushed too if there have been any
81 * modifications other than mere timestamp updates.
82 *
83 * Open question --- do we care about flushing timestamps too
84 * if the inode is IS_SYNC?
85 */
86 if (!ext3_should_journal_data(inode))
87 return ret;
88
89 goto force_commit;
90 }
91
92 /*
93 * So we know that there has been no forced data flush. If the inode
94 * is marked IS_SYNC, we need to force one ourselves.
95 */
96 if (!IS_SYNC(inode))
97 return ret;
98
99 /*
100 * Open question #2 --- should we force data to disk here too? If we
101 * don't, the only impact is that data=writeback filesystems won't
102 * flush data to disk automatically on IS_SYNC, only metadata (but
103 * historically, that is what ext2 has done.)
104 */
105
106force_commit:
107 err = ext3_force_commit(inode->i_sb);
108 if (err)
109 return err;
110 return ret;
111}
112
113const struct file_operations ext3_file_operations = { 54const struct file_operations ext3_file_operations = {
114 .llseek = generic_file_llseek, 55 .llseek = generic_file_llseek,
115 .read = do_sync_read, 56 .read = do_sync_read,
116 .write = do_sync_write, 57 .write = do_sync_write,
117 .aio_read = generic_file_aio_read, 58 .aio_read = generic_file_aio_read,
118 .aio_write = ext3_file_write, 59 .aio_write = generic_file_aio_write,
119 .unlocked_ioctl = ext3_ioctl, 60 .unlocked_ioctl = ext3_ioctl,
120#ifdef CONFIG_COMPAT 61#ifdef CONFIG_COMPAT
121 .compat_ioctl = ext3_compat_ioctl, 62 .compat_ioctl = ext3_compat_ioctl,
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 27f3c5354c0e..5ca3eca70a1e 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -58,10 +58,7 @@ static ssize_t
58ext4_file_write(struct kiocb *iocb, const struct iovec *iov, 58ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
59 unsigned long nr_segs, loff_t pos) 59 unsigned long nr_segs, loff_t pos)
60{ 60{
61 struct file *file = iocb->ki_filp; 61 struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
62 struct inode *inode = file->f_path.dentry->d_inode;
63 ssize_t ret;
64 int err;
65 62
66 /* 63 /*
67 * If we have encountered a bitmap-format file, the size limit 64 * If we have encountered a bitmap-format file, the size limit
@@ -81,53 +78,7 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
81 } 78 }
82 } 79 }
83 80
84 ret = generic_file_aio_write(iocb, iov, nr_segs, pos); 81 return generic_file_aio_write(iocb, iov, nr_segs, pos);
85 /*
86 * Skip flushing if there was an error, or if nothing was written.
87 */
88 if (ret <= 0)
89 return ret;
90
91 /*
92 * If the inode is IS_SYNC, or is O_SYNC and we are doing data
93 * journalling then we need to make sure that we force the transaction
94 * to disk to keep all metadata uptodate synchronously.
95 */
96 if (file->f_flags & O_SYNC) {
97 /*
98 * If we are non-data-journaled, then the dirty data has
99 * already been flushed to backing store by generic_osync_inode,
100 * and the inode has been flushed too if there have been any
101 * modifications other than mere timestamp updates.
102 *
103 * Open question --- do we care about flushing timestamps too
104 * if the inode is IS_SYNC?
105 */
106 if (!ext4_should_journal_data(inode))
107 return ret;
108
109 goto force_commit;
110 }
111
112 /*
113 * So we know that there has been no forced data flush. If the inode
114 * is marked IS_SYNC, we need to force one ourselves.
115 */
116 if (!IS_SYNC(inode))
117 return ret;
118
119 /*
120 * Open question #2 --- should we force data to disk here too? If we
121 * don't, the only impact is that data=writeback filesystems won't
122 * flush data to disk automatically on IS_SYNC, only metadata (but
123 * historically, that is what ext2 has done.)
124 */
125
126force_commit:
127 err = ext4_force_commit(inode->i_sb);
128 if (err)
129 return err;
130 return ret;
131} 82}
132 83
133static struct vm_operations_struct ext4_file_vm_ops = { 84static struct vm_operations_struct ext4_file_vm_ops = {
diff --git a/fs/fat/file.c b/fs/fat/file.c
index f042b965c95c..e8c159de236b 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -176,8 +176,26 @@ static int fat_cont_expand(struct inode *inode, loff_t size)
176 176
177 inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC; 177 inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC;
178 mark_inode_dirty(inode); 178 mark_inode_dirty(inode);
179 if (IS_SYNC(inode)) 179 if (IS_SYNC(inode)) {
180 err = sync_page_range_nolock(inode, mapping, start, count); 180 int err2;
181
182 /*
183 * Opencode syncing since we don't have a file open to use
184 * standard fsync path.
185 */
186 err = filemap_fdatawrite_range(mapping, start,
187 start + count - 1);
188 err2 = sync_mapping_buffers(mapping);
189 if (!err)
190 err = err2;
191 err2 = write_inode_now(inode, 1);
192 if (!err)
193 err = err2;
194 if (!err) {
195 err = filemap_fdatawait_range(mapping, start,
196 start + count - 1);
197 }
198 }
181out: 199out:
182 return err; 200 return err;
183} 201}
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index a6c20473dfd7..4e35be873e09 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -119,8 +119,8 @@ int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster)
119 MSDOS_I(inode)->i_start = new_dclus; 119 MSDOS_I(inode)->i_start = new_dclus;
120 MSDOS_I(inode)->i_logstart = new_dclus; 120 MSDOS_I(inode)->i_logstart = new_dclus;
121 /* 121 /*
122 * Since generic_osync_inode() synchronize later if 122 * Since generic_write_sync() synchronizes regular files later,
123 * this is not directory, we don't here. 123 * we sync here only directories.
124 */ 124 */
125 if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode)) { 125 if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode)) {
126 ret = fat_sync_inode(inode); 126 ret = fat_sync_inode(inode);
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index da86ef58e427..628235cf44b5 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1242,57 +1242,3 @@ int sync_inode(struct inode *inode, struct writeback_control *wbc)
1242 return ret; 1242 return ret;
1243} 1243}
1244EXPORT_SYMBOL(sync_inode); 1244EXPORT_SYMBOL(sync_inode);
1245
1246/**
1247 * generic_osync_inode - flush all dirty data for a given inode to disk
1248 * @inode: inode to write
1249 * @mapping: the address_space that should be flushed
1250 * @what: what to write and wait upon
1251 *
1252 * This can be called by file_write functions for files which have the
1253 * O_SYNC flag set, to flush dirty writes to disk.
1254 *
1255 * @what is a bitmask, specifying which part of the inode's data should be
1256 * written and waited upon.
1257 *
1258 * OSYNC_DATA: i_mapping's dirty data
1259 * OSYNC_METADATA: the buffers at i_mapping->private_list
1260 * OSYNC_INODE: the inode itself
1261 */
1262
1263int generic_osync_inode(struct inode *inode, struct address_space *mapping, int what)
1264{
1265 int err = 0;
1266 int need_write_inode_now = 0;
1267 int err2;
1268
1269 if (what & OSYNC_DATA)
1270 err = filemap_fdatawrite(mapping);
1271 if (what & (OSYNC_METADATA|OSYNC_DATA)) {
1272 err2 = sync_mapping_buffers(mapping);
1273 if (!err)
1274 err = err2;
1275 }
1276 if (what & OSYNC_DATA) {
1277 err2 = filemap_fdatawait(mapping);
1278 if (!err)
1279 err = err2;
1280 }
1281
1282 spin_lock(&inode_lock);
1283 if ((inode->i_state & I_DIRTY) &&
1284 ((what & OSYNC_INODE) || (inode->i_state & I_DIRTY_DATASYNC)))
1285 need_write_inode_now = 1;
1286 spin_unlock(&inode_lock);
1287
1288 if (need_write_inode_now) {
1289 err2 = write_inode_now(inode, 1);
1290 if (!err)
1291 err = err2;
1292 }
1293 else
1294 inode_sync_wait(inode);
1295
1296 return err;
1297}
1298EXPORT_SYMBOL(generic_osync_inode);
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 3140a4429af1..4350d4993b18 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -2076,14 +2076,6 @@ err_out:
2076 *ppos = pos; 2076 *ppos = pos;
2077 if (cached_page) 2077 if (cached_page)
2078 page_cache_release(cached_page); 2078 page_cache_release(cached_page);
2079 /* For now, when the user asks for O_SYNC, we actually give O_DSYNC. */
2080 if (likely(!status)) {
2081 if (unlikely((file->f_flags & O_SYNC) || IS_SYNC(vi))) {
2082 if (!mapping->a_ops->writepage || !is_sync_kiocb(iocb))
2083 status = generic_osync_inode(vi, mapping,
2084 OSYNC_METADATA|OSYNC_DATA);
2085 }
2086 }
2087 pagevec_lru_add_file(&lru_pvec); 2079 pagevec_lru_add_file(&lru_pvec);
2088 ntfs_debug("Done. Returning %s (written 0x%lx, status %li).", 2080 ntfs_debug("Done. Returning %s (written 0x%lx, status %li).",
2089 written ? "written" : "status", (unsigned long)written, 2081 written ? "written" : "status", (unsigned long)written,
@@ -2145,8 +2137,8 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
2145 mutex_lock(&inode->i_mutex); 2137 mutex_lock(&inode->i_mutex);
2146 ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos); 2138 ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos);
2147 mutex_unlock(&inode->i_mutex); 2139 mutex_unlock(&inode->i_mutex);
2148 if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { 2140 if (ret > 0) {
2149 int err = sync_page_range(inode, mapping, pos, ret); 2141 int err = generic_write_sync(file, pos, ret);
2150 if (err < 0) 2142 if (err < 0)
2151 ret = err; 2143 ret = err;
2152 } 2144 }
@@ -2173,8 +2165,8 @@ static ssize_t ntfs_file_writev(struct file *file, const struct iovec *iov,
2173 if (ret == -EIOCBQUEUED) 2165 if (ret == -EIOCBQUEUED)
2174 ret = wait_on_sync_kiocb(&kiocb); 2166 ret = wait_on_sync_kiocb(&kiocb);
2175 mutex_unlock(&inode->i_mutex); 2167 mutex_unlock(&inode->i_mutex);
2176 if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { 2168 if (ret > 0) {
2177 int err = sync_page_range(inode, mapping, *ppos - ret, ret); 2169 int err = generic_write_sync(file, *ppos - ret, ret);
2178 if (err < 0) 2170 if (err < 0)
2179 ret = err; 2171 ret = err;
2180 } 2172 }
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 23bf68453d7d..1caa0ef0b2bb 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -384,13 +384,12 @@ unm_err_out:
384 * it is dirty in the inode meta data rather than the data page cache of the 384 * it is dirty in the inode meta data rather than the data page cache of the
385 * inode, and thus there are no data pages that need writing out. Therefore, a 385 * inode, and thus there are no data pages that need writing out. Therefore, a
386 * full mark_inode_dirty() is overkill. A mark_inode_dirty_sync(), on the 386 * full mark_inode_dirty() is overkill. A mark_inode_dirty_sync(), on the
387 * other hand, is not sufficient, because I_DIRTY_DATASYNC needs to be set to 387 * other hand, is not sufficient, because ->write_inode needs to be called even
388 * ensure ->write_inode is called from generic_osync_inode() and this needs to 388 * in case of fdatasync. This needs to happen or the file data would not
389 * happen or the file data would not necessarily hit the device synchronously, 389 * necessarily hit the device synchronously, even though the vfs inode has the
390 * even though the vfs inode has the O_SYNC flag set. Also, I_DIRTY_DATASYNC 390 * O_SYNC flag set. Also, I_DIRTY_DATASYNC simply "feels" better than just
391 * simply "feels" better than just I_DIRTY_SYNC, since the file data has not 391 * I_DIRTY_SYNC, since the file data has not actually hit the block device yet,
392 * actually hit the block device yet, which is not what I_DIRTY_SYNC on its own 392 * which is not what I_DIRTY_SYNC on its own would suggest.
393 * would suggest.
394 */ 393 */
395void __mark_mft_record_dirty(ntfs_inode *ni) 394void __mark_mft_record_dirty(ntfs_inode *ni)
396{ 395{
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index aa501d3f93f1..221c5e98957b 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1871,8 +1871,7 @@ relock:
1871 goto out_dio; 1871 goto out_dio;
1872 } 1872 }
1873 } else { 1873 } else {
1874 written = generic_file_aio_write_nolock(iocb, iov, nr_segs, 1874 written = __generic_file_aio_write(iocb, iov, nr_segs, ppos);
1875 *ppos);
1876 } 1875 }
1877 1876
1878out_dio: 1877out_dio:
@@ -1880,18 +1879,21 @@ out_dio:
1880 BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT)); 1879 BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT));
1881 1880
1882 if ((file->f_flags & O_SYNC && !direct_io) || IS_SYNC(inode)) { 1881 if ((file->f_flags & O_SYNC && !direct_io) || IS_SYNC(inode)) {
1883 /* 1882 ret = filemap_fdatawrite_range(file->f_mapping, pos,
1884 * The generic write paths have handled getting data 1883 pos + count - 1);
1885 * to disk, but since we don't make use of the dirty 1884 if (ret < 0)
1886 * inode list, a manual journal commit is necessary 1885 written = ret;
1887 * here. 1886
1888 */ 1887 if (!ret && (old_size != i_size_read(inode) ||
1889 if (old_size != i_size_read(inode) || 1888 old_clusters != OCFS2_I(inode)->ip_clusters)) {
1890 old_clusters != OCFS2_I(inode)->ip_clusters) {
1891 ret = jbd2_journal_force_commit(osb->journal->j_journal); 1889 ret = jbd2_journal_force_commit(osb->journal->j_journal);
1892 if (ret < 0) 1890 if (ret < 0)
1893 written = ret; 1891 written = ret;
1894 } 1892 }
1893
1894 if (!ret)
1895 ret = filemap_fdatawait_range(file->f_mapping, pos,
1896 pos + count - 1);
1895 } 1897 }
1896 1898
1897 /* 1899 /*
@@ -1991,31 +1993,16 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
1991 1993
1992 if (ret > 0) { 1994 if (ret > 0) {
1993 unsigned long nr_pages; 1995 unsigned long nr_pages;
1996 int err;
1994 1997
1995 *ppos += ret;
1996 nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 1998 nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
1997 1999
1998 /* 2000 err = generic_write_sync(out, *ppos, ret);
1999 * If file or inode is SYNC and we actually wrote some data, 2001 if (err)
2000 * sync it. 2002 ret = err;
2001 */ 2003 else
2002 if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) { 2004 *ppos += ret;
2003 int err;
2004
2005 mutex_lock(&inode->i_mutex);
2006 err = ocfs2_rw_lock(inode, 1);
2007 if (err < 0) {
2008 mlog_errno(err);
2009 } else {
2010 err = generic_osync_inode(inode, mapping,
2011 OSYNC_METADATA|OSYNC_DATA);
2012 ocfs2_rw_unlock(inode, 1);
2013 }
2014 mutex_unlock(&inode->i_mutex);
2015 2005
2016 if (err)
2017 ret = err;
2018 }
2019 balance_dirty_pages_ratelimited_nr(mapping, nr_pages); 2006 balance_dirty_pages_ratelimited_nr(mapping, nr_pages);
2020 } 2007 }
2021 2008
diff --git a/fs/splice.c b/fs/splice.c
index 73766d24f97b..819023733f8e 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -976,25 +976,15 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
976 976
977 if (ret > 0) { 977 if (ret > 0) {
978 unsigned long nr_pages; 978 unsigned long nr_pages;
979 int err;
979 980
980 *ppos += ret;
981 nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 981 nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
982 982
983 /* 983 err = generic_write_sync(out, *ppos, ret);
984 * If file or inode is SYNC and we actually wrote some data, 984 if (err)
985 * sync it. 985 ret = err;
986 */ 986 else
987 if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) { 987 *ppos += ret;
988 int err;
989
990 mutex_lock(&inode->i_mutex);
991 err = generic_osync_inode(inode, mapping,
992 OSYNC_METADATA|OSYNC_DATA);
993 mutex_unlock(&inode->i_mutex);
994
995 if (err)
996 ret = err;
997 }
998 balance_dirty_pages_ratelimited_nr(mapping, nr_pages); 988 balance_dirty_pages_ratelimited_nr(mapping, nr_pages);
999 } 989 }
1000 990
diff --git a/fs/sync.c b/fs/sync.c
index 103cc7fdd3df..192340930bb4 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -178,19 +178,23 @@ int file_fsync(struct file *filp, struct dentry *dentry, int datasync)
178} 178}
179 179
180/** 180/**
181 * vfs_fsync - perform a fsync or fdatasync on a file 181 * vfs_fsync_range - helper to sync a range of data & metadata to disk
182 * @file: file to sync 182 * @file: file to sync
183 * @dentry: dentry of @file 183 * @dentry: dentry of @file
184 * @data: only perform a fdatasync operation 184 * @start: offset in bytes of the beginning of data range to sync
185 * @end: offset in bytes of the end of data range (inclusive)
186 * @datasync: perform only datasync
185 * 187 *
186 * Write back data and metadata for @file to disk. If @datasync is 188 * Write back data in range @start..@end and metadata for @file to disk. If
187 * set only metadata needed to access modified file data is written. 189 * @datasync is set only metadata needed to access modified file data is
190 * written.
188 * 191 *
189 * In case this function is called from nfsd @file may be %NULL and 192 * In case this function is called from nfsd @file may be %NULL and
190 * only @dentry is set. This can only happen when the filesystem 193 * only @dentry is set. This can only happen when the filesystem
191 * implements the export_operations API. 194 * implements the export_operations API.
192 */ 195 */
193int vfs_fsync(struct file *file, struct dentry *dentry, int datasync) 196int vfs_fsync_range(struct file *file, struct dentry *dentry, loff_t start,
197 loff_t end, int datasync)
194{ 198{
195 const struct file_operations *fop; 199 const struct file_operations *fop;
196 struct address_space *mapping; 200 struct address_space *mapping;
@@ -214,7 +218,7 @@ int vfs_fsync(struct file *file, struct dentry *dentry, int datasync)
214 goto out; 218 goto out;
215 } 219 }
216 220
217 ret = filemap_fdatawrite(mapping); 221 ret = filemap_write_and_wait_range(mapping, start, end);
218 222
219 /* 223 /*
220 * We need to protect against concurrent writers, which could cause 224 * We need to protect against concurrent writers, which could cause
@@ -225,12 +229,29 @@ int vfs_fsync(struct file *file, struct dentry *dentry, int datasync)
225 if (!ret) 229 if (!ret)
226 ret = err; 230 ret = err;
227 mutex_unlock(&mapping->host->i_mutex); 231 mutex_unlock(&mapping->host->i_mutex);
228 err = filemap_fdatawait(mapping); 232
229 if (!ret)
230 ret = err;
231out: 233out:
232 return ret; 234 return ret;
233} 235}
236EXPORT_SYMBOL(vfs_fsync_range);
237
238/**
239 * vfs_fsync - perform a fsync or fdatasync on a file
240 * @file: file to sync
241 * @dentry: dentry of @file
242 * @datasync: only perform a fdatasync operation
243 *
244 * Write back data and metadata for @file to disk. If @datasync is
245 * set only metadata needed to access modified file data is written.
246 *
247 * In case this function is called from nfsd @file may be %NULL and
248 * only @dentry is set. This can only happen when the filesystem
249 * implements the export_operations API.
250 */
251int vfs_fsync(struct file *file, struct dentry *dentry, int datasync)
252{
253 return vfs_fsync_range(file, dentry, 0, LLONG_MAX, datasync);
254}
234EXPORT_SYMBOL(vfs_fsync); 255EXPORT_SYMBOL(vfs_fsync);
235 256
236static int do_fsync(unsigned int fd, int datasync) 257static int do_fsync(unsigned int fd, int datasync)
@@ -256,6 +277,23 @@ SYSCALL_DEFINE1(fdatasync, unsigned int, fd)
256 return do_fsync(fd, 1); 277 return do_fsync(fd, 1);
257} 278}
258 279
280/**
281 * generic_write_sync - perform syncing after a write if file / inode is sync
282 * @file: file to which the write happened
283 * @pos: offset where the write started
284 * @count: length of the write
285 *
286 * This is just a simple wrapper about our general syncing function.
287 */
288int generic_write_sync(struct file *file, loff_t pos, loff_t count)
289{
290 if (!(file->f_flags & O_SYNC) && !IS_SYNC(file->f_mapping->host))
291 return 0;
292 return vfs_fsync_range(file, file->f_path.dentry, pos,
293 pos + count - 1, 1);
294}
295EXPORT_SYMBOL(generic_write_sync);
296
259/* 297/*
260 * sys_sync_file_range() permits finely controlled syncing over a segment of 298 * sys_sync_file_range() permits finely controlled syncing over a segment of
261 * a file in the range offset .. (offset+nbytes-1) inclusive. If nbytes is 299 * a file in the range offset .. (offset+nbytes-1) inclusive. If nbytes is
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 7078974a6eee..fde63a3c4ecc 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -817,7 +817,8 @@ write_retry:
817 xfs_iunlock(xip, iolock); 817 xfs_iunlock(xip, iolock);
818 if (need_i_mutex) 818 if (need_i_mutex)
819 mutex_unlock(&inode->i_mutex); 819 mutex_unlock(&inode->i_mutex);
820 error2 = sync_page_range(inode, mapping, pos, ret); 820 error2 = filemap_write_and_wait_range(mapping, pos,
821 pos + ret - 1);
821 if (!error) 822 if (!error)
822 error = error2; 823 error = error2;
823 if (need_i_mutex) 824 if (need_i_mutex)