aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-09-14 17:36:47 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-09-14 17:36:47 -0400
commit4142e0d1def2c0176c27fd2e810243045a62eb6d (patch)
treea21f76fafcd7609419a3ce610d8b9360748ccd76
parent33f1de69312432baecb997a570b7d77c4d02d1ed (diff)
parent2daea67e966dc0c42067ebea015ddac6834cef88 (diff)
Merge branch 'osync_cleanup' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs-2.6
* 'osync_cleanup' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs-2.6: fsync: wait for data writeout completion before calling ->fsync vfs: Remove generic_osync_inode() and sync_page_range{_nolock}() fat: Opencode sync_page_range_nolock() pohmelfs: Use new syncing helper xfs: Convert sync_page_range() to simple filemap_write_and_wait_range() ocfs2: Update syncing after splicing to match generic version ntfs: Use new syncing helpers and update comments ext4: Remove syncing logic from ext4_file_write ext3: Remove syncing logic from ext3_file_write ext2: Update comment about generic_osync_inode vfs: Introduce new helpers for syncing after writing to O_SYNC file or IS_SYNC inode vfs: Rename generic_file_aio_write_nolock ocfs2: Use __generic_file_aio_write instead of generic_file_aio_write_nolock pohmelfs: Use __generic_file_aio_write instead of generic_file_aio_write_nolock vfs: Remove syncing from generic_file_direct_write() and generic_file_buffered_write() vfs: Export __generic_file_aio_write() and add some comments vfs: Introduce filemap_fdatawait_range
-rw-r--r--drivers/char/raw.c2
-rw-r--r--drivers/staging/pohmelfs/inode.c6
-rw-r--r--fs/block_dev.c29
-rw-r--r--fs/ext2/inode.c2
-rw-r--r--fs/ext3/file.c61
-rw-r--r--fs/ext4/file.c53
-rw-r--r--fs/fat/file.c22
-rw-r--r--fs/fat/misc.c4
-rw-r--r--fs/fs-writeback.c54
-rw-r--r--fs/ntfs/file.c16
-rw-r--r--fs/ntfs/mft.c13
-rw-r--r--fs/ocfs2/file.c49
-rw-r--r--fs/splice.c22
-rw-r--r--fs/sync.c56
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c3
-rw-r--r--include/linux/fs.h18
-rw-r--r--include/linux/writeback.h4
-rw-r--r--mm/filemap.c170
18 files changed, 204 insertions, 380 deletions
diff --git a/drivers/char/raw.c b/drivers/char/raw.c
index 05f9d18b9361..40268db02e22 100644
--- a/drivers/char/raw.c
+++ b/drivers/char/raw.c
@@ -246,7 +246,7 @@ static const struct file_operations raw_fops = {
246 .read = do_sync_read, 246 .read = do_sync_read,
247 .aio_read = generic_file_aio_read, 247 .aio_read = generic_file_aio_read,
248 .write = do_sync_write, 248 .write = do_sync_write,
249 .aio_write = generic_file_aio_write_nolock, 249 .aio_write = blkdev_aio_write,
250 .open = raw_open, 250 .open = raw_open,
251 .release= raw_release, 251 .release= raw_release,
252 .ioctl = raw_ioctl, 252 .ioctl = raw_ioctl,
diff --git a/drivers/staging/pohmelfs/inode.c b/drivers/staging/pohmelfs/inode.c
index e63c9bea6c54..d004a9ddddbc 100644
--- a/drivers/staging/pohmelfs/inode.c
+++ b/drivers/staging/pohmelfs/inode.c
@@ -921,16 +921,16 @@ ssize_t pohmelfs_write(struct file *file, const char __user *buf,
921 if (ret) 921 if (ret)
922 goto err_out_unlock; 922 goto err_out_unlock;
923 923
924 ret = generic_file_aio_write_nolock(&kiocb, &iov, 1, pos); 924 ret = __generic_file_aio_write(&kiocb, &iov, 1, &kiocb.ki_pos);
925 *ppos = kiocb.ki_pos; 925 *ppos = kiocb.ki_pos;
926 926
927 mutex_unlock(&inode->i_mutex); 927 mutex_unlock(&inode->i_mutex);
928 WARN_ON(ret < 0); 928 WARN_ON(ret < 0);
929 929
930 if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { 930 if (ret > 0) {
931 ssize_t err; 931 ssize_t err;
932 932
933 err = sync_page_range(inode, mapping, pos, ret); 933 err = generic_write_sync(file, pos, ret);
934 if (err < 0) 934 if (err < 0)
935 ret = err; 935 ret = err;
936 WARN_ON(ret < 0); 936 WARN_ON(ret < 0);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 94dfda24c06e..3581a4e53942 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1405,6 +1405,33 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1405} 1405}
1406 1406
1407/* 1407/*
1408 * Write data to the block device. Only intended for the block device itself
1409 * and the raw driver which basically is a fake block device.
1410 *
1411 * Does not take i_mutex for the write and thus is not for general purpose
1412 * use.
1413 */
1414ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
1415 unsigned long nr_segs, loff_t pos)
1416{
1417 struct file *file = iocb->ki_filp;
1418 ssize_t ret;
1419
1420 BUG_ON(iocb->ki_pos != pos);
1421
1422 ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
1423 if (ret > 0 || ret == -EIOCBQUEUED) {
1424 ssize_t err;
1425
1426 err = generic_write_sync(file, pos, ret);
1427 if (err < 0 && ret > 0)
1428 ret = err;
1429 }
1430 return ret;
1431}
1432EXPORT_SYMBOL_GPL(blkdev_aio_write);
1433
1434/*
1408 * Try to release a page associated with block device when the system 1435 * Try to release a page associated with block device when the system
1409 * is under memory pressure. 1436 * is under memory pressure.
1410 */ 1437 */
@@ -1436,7 +1463,7 @@ const struct file_operations def_blk_fops = {
1436 .read = do_sync_read, 1463 .read = do_sync_read,
1437 .write = do_sync_write, 1464 .write = do_sync_write,
1438 .aio_read = generic_file_aio_read, 1465 .aio_read = generic_file_aio_read,
1439 .aio_write = generic_file_aio_write_nolock, 1466 .aio_write = blkdev_aio_write,
1440 .mmap = generic_file_mmap, 1467 .mmap = generic_file_mmap,
1441 .fsync = block_fsync, 1468 .fsync = block_fsync,
1442 .unlocked_ioctl = block_ioctl, 1469 .unlocked_ioctl = block_ioctl,
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index e27130341d4f..1c1638f873a4 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -482,7 +482,7 @@ static int ext2_alloc_branch(struct inode *inode,
482 unlock_buffer(bh); 482 unlock_buffer(bh);
483 mark_buffer_dirty_inode(bh, inode); 483 mark_buffer_dirty_inode(bh, inode);
484 /* We used to sync bh here if IS_SYNC(inode). 484 /* We used to sync bh here if IS_SYNC(inode).
485 * But we now rely upon generic_osync_inode() 485 * But we now rely upon generic_write_sync()
486 * and b_inode_buffers. But not for directories. 486 * and b_inode_buffers. But not for directories.
487 */ 487 */
488 if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode)) 488 if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode))
diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index 299253214789..388bbdfa0b4e 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -51,71 +51,12 @@ static int ext3_release_file (struct inode * inode, struct file * filp)
51 return 0; 51 return 0;
52} 52}
53 53
54static ssize_t
55ext3_file_write(struct kiocb *iocb, const struct iovec *iov,
56 unsigned long nr_segs, loff_t pos)
57{
58 struct file *file = iocb->ki_filp;
59 struct inode *inode = file->f_path.dentry->d_inode;
60 ssize_t ret;
61 int err;
62
63 ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
64
65 /*
66 * Skip flushing if there was an error, or if nothing was written.
67 */
68 if (ret <= 0)
69 return ret;
70
71 /*
72 * If the inode is IS_SYNC, or is O_SYNC and we are doing data
73 * journalling then we need to make sure that we force the transaction
74 * to disk to keep all metadata uptodate synchronously.
75 */
76 if (file->f_flags & O_SYNC) {
77 /*
78 * If we are non-data-journaled, then the dirty data has
79 * already been flushed to backing store by generic_osync_inode,
80 * and the inode has been flushed too if there have been any
81 * modifications other than mere timestamp updates.
82 *
83 * Open question --- do we care about flushing timestamps too
84 * if the inode is IS_SYNC?
85 */
86 if (!ext3_should_journal_data(inode))
87 return ret;
88
89 goto force_commit;
90 }
91
92 /*
93 * So we know that there has been no forced data flush. If the inode
94 * is marked IS_SYNC, we need to force one ourselves.
95 */
96 if (!IS_SYNC(inode))
97 return ret;
98
99 /*
100 * Open question #2 --- should we force data to disk here too? If we
101 * don't, the only impact is that data=writeback filesystems won't
102 * flush data to disk automatically on IS_SYNC, only metadata (but
103 * historically, that is what ext2 has done.)
104 */
105
106force_commit:
107 err = ext3_force_commit(inode->i_sb);
108 if (err)
109 return err;
110 return ret;
111}
112
113const struct file_operations ext3_file_operations = { 54const struct file_operations ext3_file_operations = {
114 .llseek = generic_file_llseek, 55 .llseek = generic_file_llseek,
115 .read = do_sync_read, 56 .read = do_sync_read,
116 .write = do_sync_write, 57 .write = do_sync_write,
117 .aio_read = generic_file_aio_read, 58 .aio_read = generic_file_aio_read,
118 .aio_write = ext3_file_write, 59 .aio_write = generic_file_aio_write,
119 .unlocked_ioctl = ext3_ioctl, 60 .unlocked_ioctl = ext3_ioctl,
120#ifdef CONFIG_COMPAT 61#ifdef CONFIG_COMPAT
121 .compat_ioctl = ext3_compat_ioctl, 62 .compat_ioctl = ext3_compat_ioctl,
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 27f3c5354c0e..5ca3eca70a1e 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -58,10 +58,7 @@ static ssize_t
58ext4_file_write(struct kiocb *iocb, const struct iovec *iov, 58ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
59 unsigned long nr_segs, loff_t pos) 59 unsigned long nr_segs, loff_t pos)
60{ 60{
61 struct file *file = iocb->ki_filp; 61 struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
62 struct inode *inode = file->f_path.dentry->d_inode;
63 ssize_t ret;
64 int err;
65 62
66 /* 63 /*
67 * If we have encountered a bitmap-format file, the size limit 64 * If we have encountered a bitmap-format file, the size limit
@@ -81,53 +78,7 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
81 } 78 }
82 } 79 }
83 80
84 ret = generic_file_aio_write(iocb, iov, nr_segs, pos); 81 return generic_file_aio_write(iocb, iov, nr_segs, pos);
85 /*
86 * Skip flushing if there was an error, or if nothing was written.
87 */
88 if (ret <= 0)
89 return ret;
90
91 /*
92 * If the inode is IS_SYNC, or is O_SYNC and we are doing data
93 * journalling then we need to make sure that we force the transaction
94 * to disk to keep all metadata uptodate synchronously.
95 */
96 if (file->f_flags & O_SYNC) {
97 /*
98 * If we are non-data-journaled, then the dirty data has
99 * already been flushed to backing store by generic_osync_inode,
100 * and the inode has been flushed too if there have been any
101 * modifications other than mere timestamp updates.
102 *
103 * Open question --- do we care about flushing timestamps too
104 * if the inode is IS_SYNC?
105 */
106 if (!ext4_should_journal_data(inode))
107 return ret;
108
109 goto force_commit;
110 }
111
112 /*
113 * So we know that there has been no forced data flush. If the inode
114 * is marked IS_SYNC, we need to force one ourselves.
115 */
116 if (!IS_SYNC(inode))
117 return ret;
118
119 /*
120 * Open question #2 --- should we force data to disk here too? If we
121 * don't, the only impact is that data=writeback filesystems won't
122 * flush data to disk automatically on IS_SYNC, only metadata (but
123 * historically, that is what ext2 has done.)
124 */
125
126force_commit:
127 err = ext4_force_commit(inode->i_sb);
128 if (err)
129 return err;
130 return ret;
131} 82}
132 83
133static struct vm_operations_struct ext4_file_vm_ops = { 84static struct vm_operations_struct ext4_file_vm_ops = {
diff --git a/fs/fat/file.c b/fs/fat/file.c
index f042b965c95c..e8c159de236b 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -176,8 +176,26 @@ static int fat_cont_expand(struct inode *inode, loff_t size)
176 176
177 inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC; 177 inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC;
178 mark_inode_dirty(inode); 178 mark_inode_dirty(inode);
179 if (IS_SYNC(inode)) 179 if (IS_SYNC(inode)) {
180 err = sync_page_range_nolock(inode, mapping, start, count); 180 int err2;
181
182 /*
183 * Opencode syncing since we don't have a file open to use
184 * standard fsync path.
185 */
186 err = filemap_fdatawrite_range(mapping, start,
187 start + count - 1);
188 err2 = sync_mapping_buffers(mapping);
189 if (!err)
190 err = err2;
191 err2 = write_inode_now(inode, 1);
192 if (!err)
193 err = err2;
194 if (!err) {
195 err = filemap_fdatawait_range(mapping, start,
196 start + count - 1);
197 }
198 }
181out: 199out:
182 return err; 200 return err;
183} 201}
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index a6c20473dfd7..4e35be873e09 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -119,8 +119,8 @@ int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster)
119 MSDOS_I(inode)->i_start = new_dclus; 119 MSDOS_I(inode)->i_start = new_dclus;
120 MSDOS_I(inode)->i_logstart = new_dclus; 120 MSDOS_I(inode)->i_logstart = new_dclus;
121 /* 121 /*
122 * Since generic_osync_inode() synchronize later if 122 * Since generic_write_sync() synchronizes regular files later,
123 * this is not directory, we don't here. 123 * we sync here only directories.
124 */ 124 */
125 if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode)) { 125 if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode)) {
126 ret = fat_sync_inode(inode); 126 ret = fat_sync_inode(inode);
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index da86ef58e427..628235cf44b5 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1242,57 +1242,3 @@ int sync_inode(struct inode *inode, struct writeback_control *wbc)
1242 return ret; 1242 return ret;
1243} 1243}
1244EXPORT_SYMBOL(sync_inode); 1244EXPORT_SYMBOL(sync_inode);
1245
1246/**
1247 * generic_osync_inode - flush all dirty data for a given inode to disk
1248 * @inode: inode to write
1249 * @mapping: the address_space that should be flushed
1250 * @what: what to write and wait upon
1251 *
1252 * This can be called by file_write functions for files which have the
1253 * O_SYNC flag set, to flush dirty writes to disk.
1254 *
1255 * @what is a bitmask, specifying which part of the inode's data should be
1256 * written and waited upon.
1257 *
1258 * OSYNC_DATA: i_mapping's dirty data
1259 * OSYNC_METADATA: the buffers at i_mapping->private_list
1260 * OSYNC_INODE: the inode itself
1261 */
1262
1263int generic_osync_inode(struct inode *inode, struct address_space *mapping, int what)
1264{
1265 int err = 0;
1266 int need_write_inode_now = 0;
1267 int err2;
1268
1269 if (what & OSYNC_DATA)
1270 err = filemap_fdatawrite(mapping);
1271 if (what & (OSYNC_METADATA|OSYNC_DATA)) {
1272 err2 = sync_mapping_buffers(mapping);
1273 if (!err)
1274 err = err2;
1275 }
1276 if (what & OSYNC_DATA) {
1277 err2 = filemap_fdatawait(mapping);
1278 if (!err)
1279 err = err2;
1280 }
1281
1282 spin_lock(&inode_lock);
1283 if ((inode->i_state & I_DIRTY) &&
1284 ((what & OSYNC_INODE) || (inode->i_state & I_DIRTY_DATASYNC)))
1285 need_write_inode_now = 1;
1286 spin_unlock(&inode_lock);
1287
1288 if (need_write_inode_now) {
1289 err2 = write_inode_now(inode, 1);
1290 if (!err)
1291 err = err2;
1292 }
1293 else
1294 inode_sync_wait(inode);
1295
1296 return err;
1297}
1298EXPORT_SYMBOL(generic_osync_inode);
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 3140a4429af1..4350d4993b18 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -2076,14 +2076,6 @@ err_out:
2076 *ppos = pos; 2076 *ppos = pos;
2077 if (cached_page) 2077 if (cached_page)
2078 page_cache_release(cached_page); 2078 page_cache_release(cached_page);
2079 /* For now, when the user asks for O_SYNC, we actually give O_DSYNC. */
2080 if (likely(!status)) {
2081 if (unlikely((file->f_flags & O_SYNC) || IS_SYNC(vi))) {
2082 if (!mapping->a_ops->writepage || !is_sync_kiocb(iocb))
2083 status = generic_osync_inode(vi, mapping,
2084 OSYNC_METADATA|OSYNC_DATA);
2085 }
2086 }
2087 pagevec_lru_add_file(&lru_pvec); 2079 pagevec_lru_add_file(&lru_pvec);
2088 ntfs_debug("Done. Returning %s (written 0x%lx, status %li).", 2080 ntfs_debug("Done. Returning %s (written 0x%lx, status %li).",
2089 written ? "written" : "status", (unsigned long)written, 2081 written ? "written" : "status", (unsigned long)written,
@@ -2145,8 +2137,8 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
2145 mutex_lock(&inode->i_mutex); 2137 mutex_lock(&inode->i_mutex);
2146 ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos); 2138 ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos);
2147 mutex_unlock(&inode->i_mutex); 2139 mutex_unlock(&inode->i_mutex);
2148 if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { 2140 if (ret > 0) {
2149 int err = sync_page_range(inode, mapping, pos, ret); 2141 int err = generic_write_sync(file, pos, ret);
2150 if (err < 0) 2142 if (err < 0)
2151 ret = err; 2143 ret = err;
2152 } 2144 }
@@ -2173,8 +2165,8 @@ static ssize_t ntfs_file_writev(struct file *file, const struct iovec *iov,
2173 if (ret == -EIOCBQUEUED) 2165 if (ret == -EIOCBQUEUED)
2174 ret = wait_on_sync_kiocb(&kiocb); 2166 ret = wait_on_sync_kiocb(&kiocb);
2175 mutex_unlock(&inode->i_mutex); 2167 mutex_unlock(&inode->i_mutex);
2176 if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { 2168 if (ret > 0) {
2177 int err = sync_page_range(inode, mapping, *ppos - ret, ret); 2169 int err = generic_write_sync(file, *ppos - ret, ret);
2178 if (err < 0) 2170 if (err < 0)
2179 ret = err; 2171 ret = err;
2180 } 2172 }
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 23bf68453d7d..1caa0ef0b2bb 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -384,13 +384,12 @@ unm_err_out:
384 * it is dirty in the inode meta data rather than the data page cache of the 384 * it is dirty in the inode meta data rather than the data page cache of the
385 * inode, and thus there are no data pages that need writing out. Therefore, a 385 * inode, and thus there are no data pages that need writing out. Therefore, a
386 * full mark_inode_dirty() is overkill. A mark_inode_dirty_sync(), on the 386 * full mark_inode_dirty() is overkill. A mark_inode_dirty_sync(), on the
387 * other hand, is not sufficient, because I_DIRTY_DATASYNC needs to be set to 387 * other hand, is not sufficient, because ->write_inode needs to be called even
388 * ensure ->write_inode is called from generic_osync_inode() and this needs to 388 * in case of fdatasync. This needs to happen or the file data would not
389 * happen or the file data would not necessarily hit the device synchronously, 389 * necessarily hit the device synchronously, even though the vfs inode has the
390 * even though the vfs inode has the O_SYNC flag set. Also, I_DIRTY_DATASYNC 390 * O_SYNC flag set. Also, I_DIRTY_DATASYNC simply "feels" better than just
391 * simply "feels" better than just I_DIRTY_SYNC, since the file data has not 391 * I_DIRTY_SYNC, since the file data has not actually hit the block device yet,
392 * actually hit the block device yet, which is not what I_DIRTY_SYNC on its own 392 * which is not what I_DIRTY_SYNC on its own would suggest.
393 * would suggest.
394 */ 393 */
395void __mark_mft_record_dirty(ntfs_inode *ni) 394void __mark_mft_record_dirty(ntfs_inode *ni)
396{ 395{
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index aa501d3f93f1..221c5e98957b 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1871,8 +1871,7 @@ relock:
1871 goto out_dio; 1871 goto out_dio;
1872 } 1872 }
1873 } else { 1873 } else {
1874 written = generic_file_aio_write_nolock(iocb, iov, nr_segs, 1874 written = __generic_file_aio_write(iocb, iov, nr_segs, ppos);
1875 *ppos);
1876 } 1875 }
1877 1876
1878out_dio: 1877out_dio:
@@ -1880,18 +1879,21 @@ out_dio:
1880 BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT)); 1879 BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT));
1881 1880
1882 if ((file->f_flags & O_SYNC && !direct_io) || IS_SYNC(inode)) { 1881 if ((file->f_flags & O_SYNC && !direct_io) || IS_SYNC(inode)) {
1883 /* 1882 ret = filemap_fdatawrite_range(file->f_mapping, pos,
1884 * The generic write paths have handled getting data 1883 pos + count - 1);
1885 * to disk, but since we don't make use of the dirty 1884 if (ret < 0)
1886 * inode list, a manual journal commit is necessary 1885 written = ret;
1887 * here. 1886
1888 */ 1887 if (!ret && (old_size != i_size_read(inode) ||
1889 if (old_size != i_size_read(inode) || 1888 old_clusters != OCFS2_I(inode)->ip_clusters)) {
1890 old_clusters != OCFS2_I(inode)->ip_clusters) {
1891 ret = jbd2_journal_force_commit(osb->journal->j_journal); 1889 ret = jbd2_journal_force_commit(osb->journal->j_journal);
1892 if (ret < 0) 1890 if (ret < 0)
1893 written = ret; 1891 written = ret;
1894 } 1892 }
1893
1894 if (!ret)
1895 ret = filemap_fdatawait_range(file->f_mapping, pos,
1896 pos + count - 1);
1895 } 1897 }
1896 1898
1897 /* 1899 /*
@@ -1991,31 +1993,16 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
1991 1993
1992 if (ret > 0) { 1994 if (ret > 0) {
1993 unsigned long nr_pages; 1995 unsigned long nr_pages;
1996 int err;
1994 1997
1995 *ppos += ret;
1996 nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 1998 nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
1997 1999
1998 /* 2000 err = generic_write_sync(out, *ppos, ret);
1999 * If file or inode is SYNC and we actually wrote some data, 2001 if (err)
2000 * sync it. 2002 ret = err;
2001 */ 2003 else
2002 if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) { 2004 *ppos += ret;
2003 int err;
2004
2005 mutex_lock(&inode->i_mutex);
2006 err = ocfs2_rw_lock(inode, 1);
2007 if (err < 0) {
2008 mlog_errno(err);
2009 } else {
2010 err = generic_osync_inode(inode, mapping,
2011 OSYNC_METADATA|OSYNC_DATA);
2012 ocfs2_rw_unlock(inode, 1);
2013 }
2014 mutex_unlock(&inode->i_mutex);
2015 2005
2016 if (err)
2017 ret = err;
2018 }
2019 balance_dirty_pages_ratelimited_nr(mapping, nr_pages); 2006 balance_dirty_pages_ratelimited_nr(mapping, nr_pages);
2020 } 2007 }
2021 2008
diff --git a/fs/splice.c b/fs/splice.c
index 73766d24f97b..819023733f8e 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -976,25 +976,15 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
976 976
977 if (ret > 0) { 977 if (ret > 0) {
978 unsigned long nr_pages; 978 unsigned long nr_pages;
979 int err;
979 980
980 *ppos += ret;
981 nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 981 nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
982 982
983 /* 983 err = generic_write_sync(out, *ppos, ret);
984 * If file or inode is SYNC and we actually wrote some data, 984 if (err)
985 * sync it. 985 ret = err;
986 */ 986 else
987 if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) { 987 *ppos += ret;
988 int err;
989
990 mutex_lock(&inode->i_mutex);
991 err = generic_osync_inode(inode, mapping,
992 OSYNC_METADATA|OSYNC_DATA);
993 mutex_unlock(&inode->i_mutex);
994
995 if (err)
996 ret = err;
997 }
998 balance_dirty_pages_ratelimited_nr(mapping, nr_pages); 988 balance_dirty_pages_ratelimited_nr(mapping, nr_pages);
999 } 989 }
1000 990
diff --git a/fs/sync.c b/fs/sync.c
index 103cc7fdd3df..192340930bb4 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -178,19 +178,23 @@ int file_fsync(struct file *filp, struct dentry *dentry, int datasync)
178} 178}
179 179
180/** 180/**
181 * vfs_fsync - perform a fsync or fdatasync on a file 181 * vfs_fsync_range - helper to sync a range of data & metadata to disk
182 * @file: file to sync 182 * @file: file to sync
183 * @dentry: dentry of @file 183 * @dentry: dentry of @file
184 * @data: only perform a fdatasync operation 184 * @start: offset in bytes of the beginning of data range to sync
185 * @end: offset in bytes of the end of data range (inclusive)
186 * @datasync: perform only datasync
185 * 187 *
186 * Write back data and metadata for @file to disk. If @datasync is 188 * Write back data in range @start..@end and metadata for @file to disk. If
187 * set only metadata needed to access modified file data is written. 189 * @datasync is set only metadata needed to access modified file data is
190 * written.
188 * 191 *
189 * In case this function is called from nfsd @file may be %NULL and 192 * In case this function is called from nfsd @file may be %NULL and
190 * only @dentry is set. This can only happen when the filesystem 193 * only @dentry is set. This can only happen when the filesystem
191 * implements the export_operations API. 194 * implements the export_operations API.
192 */ 195 */
193int vfs_fsync(struct file *file, struct dentry *dentry, int datasync) 196int vfs_fsync_range(struct file *file, struct dentry *dentry, loff_t start,
197 loff_t end, int datasync)
194{ 198{
195 const struct file_operations *fop; 199 const struct file_operations *fop;
196 struct address_space *mapping; 200 struct address_space *mapping;
@@ -214,7 +218,7 @@ int vfs_fsync(struct file *file, struct dentry *dentry, int datasync)
214 goto out; 218 goto out;
215 } 219 }
216 220
217 ret = filemap_fdatawrite(mapping); 221 ret = filemap_write_and_wait_range(mapping, start, end);
218 222
219 /* 223 /*
220 * We need to protect against concurrent writers, which could cause 224 * We need to protect against concurrent writers, which could cause
@@ -225,12 +229,29 @@ int vfs_fsync(struct file *file, struct dentry *dentry, int datasync)
225 if (!ret) 229 if (!ret)
226 ret = err; 230 ret = err;
227 mutex_unlock(&mapping->host->i_mutex); 231 mutex_unlock(&mapping->host->i_mutex);
228 err = filemap_fdatawait(mapping); 232
229 if (!ret)
230 ret = err;
231out: 233out:
232 return ret; 234 return ret;
233} 235}
236EXPORT_SYMBOL(vfs_fsync_range);
237
238/**
239 * vfs_fsync - perform a fsync or fdatasync on a file
240 * @file: file to sync
241 * @dentry: dentry of @file
242 * @datasync: only perform a fdatasync operation
243 *
244 * Write back data and metadata for @file to disk. If @datasync is
245 * set only metadata needed to access modified file data is written.
246 *
247 * In case this function is called from nfsd @file may be %NULL and
248 * only @dentry is set. This can only happen when the filesystem
249 * implements the export_operations API.
250 */
251int vfs_fsync(struct file *file, struct dentry *dentry, int datasync)
252{
253 return vfs_fsync_range(file, dentry, 0, LLONG_MAX, datasync);
254}
234EXPORT_SYMBOL(vfs_fsync); 255EXPORT_SYMBOL(vfs_fsync);
235 256
236static int do_fsync(unsigned int fd, int datasync) 257static int do_fsync(unsigned int fd, int datasync)
@@ -256,6 +277,23 @@ SYSCALL_DEFINE1(fdatasync, unsigned int, fd)
256 return do_fsync(fd, 1); 277 return do_fsync(fd, 1);
257} 278}
258 279
280/**
281 * generic_write_sync - perform syncing after a write if file / inode is sync
282 * @file: file to which the write happened
283 * @pos: offset where the write started
284 * @count: length of the write
285 *
286 * This is just a simple wrapper about our general syncing function.
287 */
288int generic_write_sync(struct file *file, loff_t pos, loff_t count)
289{
290 if (!(file->f_flags & O_SYNC) && !IS_SYNC(file->f_mapping->host))
291 return 0;
292 return vfs_fsync_range(file, file->f_path.dentry, pos,
293 pos + count - 1, 1);
294}
295EXPORT_SYMBOL(generic_write_sync);
296
259/* 297/*
260 * sys_sync_file_range() permits finely controlled syncing over a segment of 298 * sys_sync_file_range() permits finely controlled syncing over a segment of
261 * a file in the range offset .. (offset+nbytes-1) inclusive. If nbytes is 299 * a file in the range offset .. (offset+nbytes-1) inclusive. If nbytes is
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 7078974a6eee..fde63a3c4ecc 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -817,7 +817,8 @@ write_retry:
817 xfs_iunlock(xip, iolock); 817 xfs_iunlock(xip, iolock);
818 if (need_i_mutex) 818 if (need_i_mutex)
819 mutex_unlock(&inode->i_mutex); 819 mutex_unlock(&inode->i_mutex);
820 error2 = sync_page_range(inode, mapping, pos, ret); 820 error2 = filemap_write_and_wait_range(mapping, pos,
821 pos + ret - 1);
821 if (!error) 822 if (!error)
822 error = error2; 823 error = error2;
823 if (need_i_mutex) 824 if (need_i_mutex)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index a79f48373e7e..37f53216998a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1455,11 +1455,6 @@ int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags);
1455#define DT_SOCK 12 1455#define DT_SOCK 12
1456#define DT_WHT 14 1456#define DT_WHT 14
1457 1457
1458#define OSYNC_METADATA (1<<0)
1459#define OSYNC_DATA (1<<1)
1460#define OSYNC_INODE (1<<2)
1461int generic_osync_inode(struct inode *, struct address_space *, int);
1462
1463/* 1458/*
1464 * This is the "filldir" function type, used by readdir() to let 1459 * This is the "filldir" function type, used by readdir() to let
1465 * the kernel specify what kind of dirent layout it wants to have. 1460 * the kernel specify what kind of dirent layout it wants to have.
@@ -2086,6 +2081,8 @@ extern int write_inode_now(struct inode *, int);
2086extern int filemap_fdatawrite(struct address_space *); 2081extern int filemap_fdatawrite(struct address_space *);
2087extern int filemap_flush(struct address_space *); 2082extern int filemap_flush(struct address_space *);
2088extern int filemap_fdatawait(struct address_space *); 2083extern int filemap_fdatawait(struct address_space *);
2084extern int filemap_fdatawait_range(struct address_space *, loff_t lstart,
2085 loff_t lend);
2089extern int filemap_write_and_wait(struct address_space *mapping); 2086extern int filemap_write_and_wait(struct address_space *mapping);
2090extern int filemap_write_and_wait_range(struct address_space *mapping, 2087extern int filemap_write_and_wait_range(struct address_space *mapping,
2091 loff_t lstart, loff_t lend); 2088 loff_t lstart, loff_t lend);
@@ -2096,7 +2093,10 @@ extern int __filemap_fdatawrite_range(struct address_space *mapping,
2096extern int filemap_fdatawrite_range(struct address_space *mapping, 2093extern int filemap_fdatawrite_range(struct address_space *mapping,
2097 loff_t start, loff_t end); 2094 loff_t start, loff_t end);
2098 2095
2096extern int vfs_fsync_range(struct file *file, struct dentry *dentry,
2097 loff_t start, loff_t end, int datasync);
2099extern int vfs_fsync(struct file *file, struct dentry *dentry, int datasync); 2098extern int vfs_fsync(struct file *file, struct dentry *dentry, int datasync);
2099extern int generic_write_sync(struct file *file, loff_t pos, loff_t count);
2100extern void sync_supers(void); 2100extern void sync_supers(void);
2101extern void emergency_sync(void); 2101extern void emergency_sync(void);
2102extern void emergency_remount(void); 2102extern void emergency_remount(void);
@@ -2202,9 +2202,9 @@ extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *);
2202extern int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size); 2202extern int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size);
2203int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk); 2203int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk);
2204extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t); 2204extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t);
2205extern ssize_t __generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long,
2206 loff_t *);
2205extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t); 2207extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t);
2206extern ssize_t generic_file_aio_write_nolock(struct kiocb *, const struct iovec *,
2207 unsigned long, loff_t);
2208extern ssize_t generic_file_direct_write(struct kiocb *, const struct iovec *, 2208extern ssize_t generic_file_direct_write(struct kiocb *, const struct iovec *,
2209 unsigned long *, loff_t, loff_t *, size_t, size_t); 2209 unsigned long *, loff_t, loff_t *, size_t, size_t);
2210extern ssize_t generic_file_buffered_write(struct kiocb *, const struct iovec *, 2210extern ssize_t generic_file_buffered_write(struct kiocb *, const struct iovec *,
@@ -2214,6 +2214,10 @@ extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t l
2214extern int generic_segment_checks(const struct iovec *iov, 2214extern int generic_segment_checks(const struct iovec *iov,
2215 unsigned long *nr_segs, size_t *count, int access_flags); 2215 unsigned long *nr_segs, size_t *count, int access_flags);
2216 2216
2217/* fs/block_dev.c */
2218extern ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
2219 unsigned long nr_segs, loff_t pos);
2220
2217/* fs/splice.c */ 2221/* fs/splice.c */
2218extern ssize_t generic_file_splice_read(struct file *, loff_t *, 2222extern ssize_t generic_file_splice_read(struct file *, loff_t *,
2219 struct pipe_inode_info *, size_t, unsigned int); 2223 struct pipe_inode_info *, size_t, unsigned int);
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 78b1e4684cc9..d347632f1861 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -150,10 +150,6 @@ int write_cache_pages(struct address_space *mapping,
150 struct writeback_control *wbc, writepage_t writepage, 150 struct writeback_control *wbc, writepage_t writepage,
151 void *data); 151 void *data);
152int do_writepages(struct address_space *mapping, struct writeback_control *wbc); 152int do_writepages(struct address_space *mapping, struct writeback_control *wbc);
153int sync_page_range(struct inode *inode, struct address_space *mapping,
154 loff_t pos, loff_t count);
155int sync_page_range_nolock(struct inode *inode, struct address_space *mapping,
156 loff_t pos, loff_t count);
157void set_page_dirty_balance(struct page *page, int page_mkwrite); 153void set_page_dirty_balance(struct page *page, int page_mkwrite);
158void writeback_set_ratelimit(void); 154void writeback_set_ratelimit(void);
159 155
diff --git a/mm/filemap.c b/mm/filemap.c
index ccea3b665c12..dd51c68e2b86 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -39,11 +39,10 @@
39/* 39/*
40 * FIXME: remove all knowledge of the buffer layer from the core VM 40 * FIXME: remove all knowledge of the buffer layer from the core VM
41 */ 41 */
42#include <linux/buffer_head.h> /* for generic_osync_inode */ 42#include <linux/buffer_head.h> /* for try_to_free_buffers */
43 43
44#include <asm/mman.h> 44#include <asm/mman.h>
45 45
46
47/* 46/*
48 * Shared mappings implemented 30.11.1994. It's not fully working yet, 47 * Shared mappings implemented 30.11.1994. It's not fully working yet,
49 * though. 48 * though.
@@ -307,68 +306,24 @@ int wait_on_page_writeback_range(struct address_space *mapping,
307} 306}
308 307
309/** 308/**
310 * sync_page_range - write and wait on all pages in the passed range 309 * filemap_fdatawait_range - wait for all under-writeback pages to complete in a given range
311 * @inode: target inode 310 * @mapping: address space structure to wait for
312 * @mapping: target address_space 311 * @start: offset in bytes where the range starts
313 * @pos: beginning offset in pages to write 312 * @end: offset in bytes where the range ends (inclusive)
314 * @count: number of bytes to write
315 *
316 * Write and wait upon all the pages in the passed range. This is a "data
317 * integrity" operation. It waits upon in-flight writeout before starting and
318 * waiting upon new writeout. If there was an IO error, return it.
319 * 313 *
320 * We need to re-take i_mutex during the generic_osync_inode list walk because 314 * Walk the list of under-writeback pages of the given address space
321 * it is otherwise livelockable. 315 * in the given range and wait for all of them.
322 */
323int sync_page_range(struct inode *inode, struct address_space *mapping,
324 loff_t pos, loff_t count)
325{
326 pgoff_t start = pos >> PAGE_CACHE_SHIFT;
327 pgoff_t end = (pos + count - 1) >> PAGE_CACHE_SHIFT;
328 int ret;
329
330 if (!mapping_cap_writeback_dirty(mapping) || !count)
331 return 0;
332 ret = filemap_fdatawrite_range(mapping, pos, pos + count - 1);
333 if (ret == 0) {
334 mutex_lock(&inode->i_mutex);
335 ret = generic_osync_inode(inode, mapping, OSYNC_METADATA);
336 mutex_unlock(&inode->i_mutex);
337 }
338 if (ret == 0)
339 ret = wait_on_page_writeback_range(mapping, start, end);
340 return ret;
341}
342EXPORT_SYMBOL(sync_page_range);
343
344/**
345 * sync_page_range_nolock - write & wait on all pages in the passed range without locking
346 * @inode: target inode
347 * @mapping: target address_space
348 * @pos: beginning offset in pages to write
349 * @count: number of bytes to write
350 * 316 *
351 * Note: Holding i_mutex across sync_page_range_nolock() is not a good idea 317 * This is just a simple wrapper so that callers don't have to convert offsets
352 * as it forces O_SYNC writers to different parts of the same file 318 * to page indexes themselves
353 * to be serialised right until io completion.
354 */ 319 */
355int sync_page_range_nolock(struct inode *inode, struct address_space *mapping, 320int filemap_fdatawait_range(struct address_space *mapping, loff_t start,
356 loff_t pos, loff_t count) 321 loff_t end)
357{ 322{
358 pgoff_t start = pos >> PAGE_CACHE_SHIFT; 323 return wait_on_page_writeback_range(mapping, start >> PAGE_CACHE_SHIFT,
359 pgoff_t end = (pos + count - 1) >> PAGE_CACHE_SHIFT; 324 end >> PAGE_CACHE_SHIFT);
360 int ret;
361
362 if (!mapping_cap_writeback_dirty(mapping) || !count)
363 return 0;
364 ret = filemap_fdatawrite_range(mapping, pos, pos + count - 1);
365 if (ret == 0)
366 ret = generic_osync_inode(inode, mapping, OSYNC_METADATA);
367 if (ret == 0)
368 ret = wait_on_page_writeback_range(mapping, start, end);
369 return ret;
370} 325}
371EXPORT_SYMBOL(sync_page_range_nolock); 326EXPORT_SYMBOL(filemap_fdatawait_range);
372 327
373/** 328/**
374 * filemap_fdatawait - wait for all under-writeback pages to complete 329 * filemap_fdatawait - wait for all under-writeback pages to complete
@@ -2167,20 +2122,7 @@ generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
2167 } 2122 }
2168 *ppos = end; 2123 *ppos = end;
2169 } 2124 }
2170
2171 /*
2172 * Sync the fs metadata but not the minor inode changes and
2173 * of course not the data as we did direct DMA for the IO.
2174 * i_mutex is held, which protects generic_osync_inode() from
2175 * livelocking. AIO O_DIRECT ops attempt to sync metadata here.
2176 */
2177out: 2125out:
2178 if ((written >= 0 || written == -EIOCBQUEUED) &&
2179 ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
2180 int err = generic_osync_inode(inode, mapping, OSYNC_METADATA);
2181 if (err < 0)
2182 written = err;
2183 }
2184 return written; 2126 return written;
2185} 2127}
2186EXPORT_SYMBOL(generic_file_direct_write); 2128EXPORT_SYMBOL(generic_file_direct_write);
@@ -2312,8 +2254,6 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
2312{ 2254{
2313 struct file *file = iocb->ki_filp; 2255 struct file *file = iocb->ki_filp;
2314 struct address_space *mapping = file->f_mapping; 2256 struct address_space *mapping = file->f_mapping;
2315 const struct address_space_operations *a_ops = mapping->a_ops;
2316 struct inode *inode = mapping->host;
2317 ssize_t status; 2257 ssize_t status;
2318 struct iov_iter i; 2258 struct iov_iter i;
2319 2259
@@ -2323,16 +2263,6 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
2323 if (likely(status >= 0)) { 2263 if (likely(status >= 0)) {
2324 written += status; 2264 written += status;
2325 *ppos = pos + status; 2265 *ppos = pos + status;
2326
2327 /*
2328 * For now, when the user asks for O_SYNC, we'll actually give
2329 * O_DSYNC
2330 */
2331 if (unlikely((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
2332 if (!a_ops->writepage || !is_sync_kiocb(iocb))
2333 status = generic_osync_inode(inode, mapping,
2334 OSYNC_METADATA|OSYNC_DATA);
2335 }
2336 } 2266 }
2337 2267
2338 /* 2268 /*
@@ -2348,9 +2278,27 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
2348} 2278}
2349EXPORT_SYMBOL(generic_file_buffered_write); 2279EXPORT_SYMBOL(generic_file_buffered_write);
2350 2280
2351static ssize_t 2281/**
2352__generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov, 2282 * __generic_file_aio_write - write data to a file
2353 unsigned long nr_segs, loff_t *ppos) 2283 * @iocb: IO state structure (file, offset, etc.)
2284 * @iov: vector with data to write
2285 * @nr_segs: number of segments in the vector
2286 * @ppos: position where to write
2287 *
2288 * This function does all the work needed for actually writing data to a
2289 * file. It does all basic checks, removes SUID from the file, updates
2290 * modification times and calls proper subroutines depending on whether we
2291 * do direct IO or a standard buffered write.
2292 *
2293 * It expects i_mutex to be grabbed unless we work on a block device or similar
2294 * object which does not need locking at all.
2295 *
2296 * This function does *not* take care of syncing data in case of O_SYNC write.
2297 * A caller has to handle it. This is mainly due to the fact that we want to
2298 * avoid syncing under i_mutex.
2299 */
2300ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
2301 unsigned long nr_segs, loff_t *ppos)
2354{ 2302{
2355 struct file *file = iocb->ki_filp; 2303 struct file *file = iocb->ki_filp;
2356 struct address_space * mapping = file->f_mapping; 2304 struct address_space * mapping = file->f_mapping;
@@ -2447,51 +2395,37 @@ out:
2447 current->backing_dev_info = NULL; 2395 current->backing_dev_info = NULL;
2448 return written ? written : err; 2396 return written ? written : err;
2449} 2397}
2398EXPORT_SYMBOL(__generic_file_aio_write);
2450 2399
2451ssize_t generic_file_aio_write_nolock(struct kiocb *iocb, 2400/**
2452 const struct iovec *iov, unsigned long nr_segs, loff_t pos) 2401 * generic_file_aio_write - write data to a file
2453{ 2402 * @iocb: IO state structure
2454 struct file *file = iocb->ki_filp; 2403 * @iov: vector with data to write
2455 struct address_space *mapping = file->f_mapping; 2404 * @nr_segs: number of segments in the vector
2456 struct inode *inode = mapping->host; 2405 * @pos: position in file where to write
2457 ssize_t ret; 2406 *
2458 2407 * This is a wrapper around __generic_file_aio_write() to be used by most
2459 BUG_ON(iocb->ki_pos != pos); 2408 * filesystems. It takes care of syncing the file in case of O_SYNC file
2460 2409 * and acquires i_mutex as needed.
2461 ret = __generic_file_aio_write_nolock(iocb, iov, nr_segs, 2410 */
2462 &iocb->ki_pos);
2463
2464 if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
2465 ssize_t err;
2466
2467 err = sync_page_range_nolock(inode, mapping, pos, ret);
2468 if (err < 0)
2469 ret = err;
2470 }
2471 return ret;
2472}
2473EXPORT_SYMBOL(generic_file_aio_write_nolock);
2474
2475ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, 2411ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
2476 unsigned long nr_segs, loff_t pos) 2412 unsigned long nr_segs, loff_t pos)
2477{ 2413{
2478 struct file *file = iocb->ki_filp; 2414 struct file *file = iocb->ki_filp;
2479 struct address_space *mapping = file->f_mapping; 2415 struct inode *inode = file->f_mapping->host;
2480 struct inode *inode = mapping->host;
2481 ssize_t ret; 2416 ssize_t ret;
2482 2417
2483 BUG_ON(iocb->ki_pos != pos); 2418 BUG_ON(iocb->ki_pos != pos);
2484 2419
2485 mutex_lock(&inode->i_mutex); 2420 mutex_lock(&inode->i_mutex);
2486 ret = __generic_file_aio_write_nolock(iocb, iov, nr_segs, 2421 ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
2487 &iocb->ki_pos);
2488 mutex_unlock(&inode->i_mutex); 2422 mutex_unlock(&inode->i_mutex);
2489 2423
2490 if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { 2424 if (ret > 0 || ret == -EIOCBQUEUED) {
2491 ssize_t err; 2425 ssize_t err;
2492 2426
2493 err = sync_page_range(inode, mapping, pos, ret); 2427 err = generic_write_sync(file, pos, ret);
2494 if (err < 0) 2428 if (err < 0 && ret > 0)
2495 ret = err; 2429 ret = err;
2496 } 2430 }
2497 return ret; 2431 return ret;