diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-09-14 17:36:47 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-09-14 17:36:47 -0400 |
commit | 4142e0d1def2c0176c27fd2e810243045a62eb6d (patch) | |
tree | a21f76fafcd7609419a3ce610d8b9360748ccd76 | |
parent | 33f1de69312432baecb997a570b7d77c4d02d1ed (diff) | |
parent | 2daea67e966dc0c42067ebea015ddac6834cef88 (diff) |
Merge branch 'osync_cleanup' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs-2.6
* 'osync_cleanup' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs-2.6:
fsync: wait for data writeout completion before calling ->fsync
vfs: Remove generic_osync_inode() and sync_page_range{_nolock}()
fat: Opencode sync_page_range_nolock()
pohmelfs: Use new syncing helper
xfs: Convert sync_page_range() to simple filemap_write_and_wait_range()
ocfs2: Update syncing after splicing to match generic version
ntfs: Use new syncing helpers and update comments
ext4: Remove syncing logic from ext4_file_write
ext3: Remove syncing logic from ext3_file_write
ext2: Update comment about generic_osync_inode
vfs: Introduce new helpers for syncing after writing to O_SYNC file or IS_SYNC inode
vfs: Rename generic_file_aio_write_nolock
ocfs2: Use __generic_file_aio_write instead of generic_file_aio_write_nolock
pohmelfs: Use __generic_file_aio_write instead of generic_file_aio_write_nolock
vfs: Remove syncing from generic_file_direct_write() and generic_file_buffered_write()
vfs: Export __generic_file_aio_write() and add some comments
vfs: Introduce filemap_fdatawait_range
-rw-r--r-- | drivers/char/raw.c | 2 | ||||
-rw-r--r-- | drivers/staging/pohmelfs/inode.c | 6 | ||||
-rw-r--r-- | fs/block_dev.c | 29 | ||||
-rw-r--r-- | fs/ext2/inode.c | 2 | ||||
-rw-r--r-- | fs/ext3/file.c | 61 | ||||
-rw-r--r-- | fs/ext4/file.c | 53 | ||||
-rw-r--r-- | fs/fat/file.c | 22 | ||||
-rw-r--r-- | fs/fat/misc.c | 4 | ||||
-rw-r--r-- | fs/fs-writeback.c | 54 | ||||
-rw-r--r-- | fs/ntfs/file.c | 16 | ||||
-rw-r--r-- | fs/ntfs/mft.c | 13 | ||||
-rw-r--r-- | fs/ocfs2/file.c | 49 | ||||
-rw-r--r-- | fs/splice.c | 22 | ||||
-rw-r--r-- | fs/sync.c | 56 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_lrw.c | 3 | ||||
-rw-r--r-- | include/linux/fs.h | 18 | ||||
-rw-r--r-- | include/linux/writeback.h | 4 | ||||
-rw-r--r-- | mm/filemap.c | 170 |
18 files changed, 204 insertions, 380 deletions
diff --git a/drivers/char/raw.c b/drivers/char/raw.c index 05f9d18b9361..40268db02e22 100644 --- a/drivers/char/raw.c +++ b/drivers/char/raw.c | |||
@@ -246,7 +246,7 @@ static const struct file_operations raw_fops = { | |||
246 | .read = do_sync_read, | 246 | .read = do_sync_read, |
247 | .aio_read = generic_file_aio_read, | 247 | .aio_read = generic_file_aio_read, |
248 | .write = do_sync_write, | 248 | .write = do_sync_write, |
249 | .aio_write = generic_file_aio_write_nolock, | 249 | .aio_write = blkdev_aio_write, |
250 | .open = raw_open, | 250 | .open = raw_open, |
251 | .release= raw_release, | 251 | .release= raw_release, |
252 | .ioctl = raw_ioctl, | 252 | .ioctl = raw_ioctl, |
diff --git a/drivers/staging/pohmelfs/inode.c b/drivers/staging/pohmelfs/inode.c index e63c9bea6c54..d004a9ddddbc 100644 --- a/drivers/staging/pohmelfs/inode.c +++ b/drivers/staging/pohmelfs/inode.c | |||
@@ -921,16 +921,16 @@ ssize_t pohmelfs_write(struct file *file, const char __user *buf, | |||
921 | if (ret) | 921 | if (ret) |
922 | goto err_out_unlock; | 922 | goto err_out_unlock; |
923 | 923 | ||
924 | ret = generic_file_aio_write_nolock(&kiocb, &iov, 1, pos); | 924 | ret = __generic_file_aio_write(&kiocb, &iov, 1, &kiocb.ki_pos); |
925 | *ppos = kiocb.ki_pos; | 925 | *ppos = kiocb.ki_pos; |
926 | 926 | ||
927 | mutex_unlock(&inode->i_mutex); | 927 | mutex_unlock(&inode->i_mutex); |
928 | WARN_ON(ret < 0); | 928 | WARN_ON(ret < 0); |
929 | 929 | ||
930 | if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { | 930 | if (ret > 0) { |
931 | ssize_t err; | 931 | ssize_t err; |
932 | 932 | ||
933 | err = sync_page_range(inode, mapping, pos, ret); | 933 | err = generic_write_sync(file, pos, ret); |
934 | if (err < 0) | 934 | if (err < 0) |
935 | ret = err; | 935 | ret = err; |
936 | WARN_ON(ret < 0); | 936 | WARN_ON(ret < 0); |
diff --git a/fs/block_dev.c b/fs/block_dev.c index 94dfda24c06e..3581a4e53942 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -1405,6 +1405,33 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) | |||
1405 | } | 1405 | } |
1406 | 1406 | ||
1407 | /* | 1407 | /* |
1408 | * Write data to the block device. Only intended for the block device itself | ||
1409 | * and the raw driver which basically is a fake block device. | ||
1410 | * | ||
1411 | * Does not take i_mutex for the write and thus is not for general purpose | ||
1412 | * use. | ||
1413 | */ | ||
1414 | ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, | ||
1415 | unsigned long nr_segs, loff_t pos) | ||
1416 | { | ||
1417 | struct file *file = iocb->ki_filp; | ||
1418 | ssize_t ret; | ||
1419 | |||
1420 | BUG_ON(iocb->ki_pos != pos); | ||
1421 | |||
1422 | ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); | ||
1423 | if (ret > 0 || ret == -EIOCBQUEUED) { | ||
1424 | ssize_t err; | ||
1425 | |||
1426 | err = generic_write_sync(file, pos, ret); | ||
1427 | if (err < 0 && ret > 0) | ||
1428 | ret = err; | ||
1429 | } | ||
1430 | return ret; | ||
1431 | } | ||
1432 | EXPORT_SYMBOL_GPL(blkdev_aio_write); | ||
1433 | |||
1434 | /* | ||
1408 | * Try to release a page associated with block device when the system | 1435 | * Try to release a page associated with block device when the system |
1409 | * is under memory pressure. | 1436 | * is under memory pressure. |
1410 | */ | 1437 | */ |
@@ -1436,7 +1463,7 @@ const struct file_operations def_blk_fops = { | |||
1436 | .read = do_sync_read, | 1463 | .read = do_sync_read, |
1437 | .write = do_sync_write, | 1464 | .write = do_sync_write, |
1438 | .aio_read = generic_file_aio_read, | 1465 | .aio_read = generic_file_aio_read, |
1439 | .aio_write = generic_file_aio_write_nolock, | 1466 | .aio_write = blkdev_aio_write, |
1440 | .mmap = generic_file_mmap, | 1467 | .mmap = generic_file_mmap, |
1441 | .fsync = block_fsync, | 1468 | .fsync = block_fsync, |
1442 | .unlocked_ioctl = block_ioctl, | 1469 | .unlocked_ioctl = block_ioctl, |
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index e27130341d4f..1c1638f873a4 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c | |||
@@ -482,7 +482,7 @@ static int ext2_alloc_branch(struct inode *inode, | |||
482 | unlock_buffer(bh); | 482 | unlock_buffer(bh); |
483 | mark_buffer_dirty_inode(bh, inode); | 483 | mark_buffer_dirty_inode(bh, inode); |
484 | /* We used to sync bh here if IS_SYNC(inode). | 484 | /* We used to sync bh here if IS_SYNC(inode). |
485 | * But we now rely upon generic_osync_inode() | 485 | * But we now rely upon generic_write_sync() |
486 | * and b_inode_buffers. But not for directories. | 486 | * and b_inode_buffers. But not for directories. |
487 | */ | 487 | */ |
488 | if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode)) | 488 | if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode)) |
diff --git a/fs/ext3/file.c b/fs/ext3/file.c index 299253214789..388bbdfa0b4e 100644 --- a/fs/ext3/file.c +++ b/fs/ext3/file.c | |||
@@ -51,71 +51,12 @@ static int ext3_release_file (struct inode * inode, struct file * filp) | |||
51 | return 0; | 51 | return 0; |
52 | } | 52 | } |
53 | 53 | ||
54 | static ssize_t | ||
55 | ext3_file_write(struct kiocb *iocb, const struct iovec *iov, | ||
56 | unsigned long nr_segs, loff_t pos) | ||
57 | { | ||
58 | struct file *file = iocb->ki_filp; | ||
59 | struct inode *inode = file->f_path.dentry->d_inode; | ||
60 | ssize_t ret; | ||
61 | int err; | ||
62 | |||
63 | ret = generic_file_aio_write(iocb, iov, nr_segs, pos); | ||
64 | |||
65 | /* | ||
66 | * Skip flushing if there was an error, or if nothing was written. | ||
67 | */ | ||
68 | if (ret <= 0) | ||
69 | return ret; | ||
70 | |||
71 | /* | ||
72 | * If the inode is IS_SYNC, or is O_SYNC and we are doing data | ||
73 | * journalling then we need to make sure that we force the transaction | ||
74 | * to disk to keep all metadata uptodate synchronously. | ||
75 | */ | ||
76 | if (file->f_flags & O_SYNC) { | ||
77 | /* | ||
78 | * If we are non-data-journaled, then the dirty data has | ||
79 | * already been flushed to backing store by generic_osync_inode, | ||
80 | * and the inode has been flushed too if there have been any | ||
81 | * modifications other than mere timestamp updates. | ||
82 | * | ||
83 | * Open question --- do we care about flushing timestamps too | ||
84 | * if the inode is IS_SYNC? | ||
85 | */ | ||
86 | if (!ext3_should_journal_data(inode)) | ||
87 | return ret; | ||
88 | |||
89 | goto force_commit; | ||
90 | } | ||
91 | |||
92 | /* | ||
93 | * So we know that there has been no forced data flush. If the inode | ||
94 | * is marked IS_SYNC, we need to force one ourselves. | ||
95 | */ | ||
96 | if (!IS_SYNC(inode)) | ||
97 | return ret; | ||
98 | |||
99 | /* | ||
100 | * Open question #2 --- should we force data to disk here too? If we | ||
101 | * don't, the only impact is that data=writeback filesystems won't | ||
102 | * flush data to disk automatically on IS_SYNC, only metadata (but | ||
103 | * historically, that is what ext2 has done.) | ||
104 | */ | ||
105 | |||
106 | force_commit: | ||
107 | err = ext3_force_commit(inode->i_sb); | ||
108 | if (err) | ||
109 | return err; | ||
110 | return ret; | ||
111 | } | ||
112 | |||
113 | const struct file_operations ext3_file_operations = { | 54 | const struct file_operations ext3_file_operations = { |
114 | .llseek = generic_file_llseek, | 55 | .llseek = generic_file_llseek, |
115 | .read = do_sync_read, | 56 | .read = do_sync_read, |
116 | .write = do_sync_write, | 57 | .write = do_sync_write, |
117 | .aio_read = generic_file_aio_read, | 58 | .aio_read = generic_file_aio_read, |
118 | .aio_write = ext3_file_write, | 59 | .aio_write = generic_file_aio_write, |
119 | .unlocked_ioctl = ext3_ioctl, | 60 | .unlocked_ioctl = ext3_ioctl, |
120 | #ifdef CONFIG_COMPAT | 61 | #ifdef CONFIG_COMPAT |
121 | .compat_ioctl = ext3_compat_ioctl, | 62 | .compat_ioctl = ext3_compat_ioctl, |
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 27f3c5354c0e..5ca3eca70a1e 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c | |||
@@ -58,10 +58,7 @@ static ssize_t | |||
58 | ext4_file_write(struct kiocb *iocb, const struct iovec *iov, | 58 | ext4_file_write(struct kiocb *iocb, const struct iovec *iov, |
59 | unsigned long nr_segs, loff_t pos) | 59 | unsigned long nr_segs, loff_t pos) |
60 | { | 60 | { |
61 | struct file *file = iocb->ki_filp; | 61 | struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; |
62 | struct inode *inode = file->f_path.dentry->d_inode; | ||
63 | ssize_t ret; | ||
64 | int err; | ||
65 | 62 | ||
66 | /* | 63 | /* |
67 | * If we have encountered a bitmap-format file, the size limit | 64 | * If we have encountered a bitmap-format file, the size limit |
@@ -81,53 +78,7 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov, | |||
81 | } | 78 | } |
82 | } | 79 | } |
83 | 80 | ||
84 | ret = generic_file_aio_write(iocb, iov, nr_segs, pos); | 81 | return generic_file_aio_write(iocb, iov, nr_segs, pos); |
85 | /* | ||
86 | * Skip flushing if there was an error, or if nothing was written. | ||
87 | */ | ||
88 | if (ret <= 0) | ||
89 | return ret; | ||
90 | |||
91 | /* | ||
92 | * If the inode is IS_SYNC, or is O_SYNC and we are doing data | ||
93 | * journalling then we need to make sure that we force the transaction | ||
94 | * to disk to keep all metadata uptodate synchronously. | ||
95 | */ | ||
96 | if (file->f_flags & O_SYNC) { | ||
97 | /* | ||
98 | * If we are non-data-journaled, then the dirty data has | ||
99 | * already been flushed to backing store by generic_osync_inode, | ||
100 | * and the inode has been flushed too if there have been any | ||
101 | * modifications other than mere timestamp updates. | ||
102 | * | ||
103 | * Open question --- do we care about flushing timestamps too | ||
104 | * if the inode is IS_SYNC? | ||
105 | */ | ||
106 | if (!ext4_should_journal_data(inode)) | ||
107 | return ret; | ||
108 | |||
109 | goto force_commit; | ||
110 | } | ||
111 | |||
112 | /* | ||
113 | * So we know that there has been no forced data flush. If the inode | ||
114 | * is marked IS_SYNC, we need to force one ourselves. | ||
115 | */ | ||
116 | if (!IS_SYNC(inode)) | ||
117 | return ret; | ||
118 | |||
119 | /* | ||
120 | * Open question #2 --- should we force data to disk here too? If we | ||
121 | * don't, the only impact is that data=writeback filesystems won't | ||
122 | * flush data to disk automatically on IS_SYNC, only metadata (but | ||
123 | * historically, that is what ext2 has done.) | ||
124 | */ | ||
125 | |||
126 | force_commit: | ||
127 | err = ext4_force_commit(inode->i_sb); | ||
128 | if (err) | ||
129 | return err; | ||
130 | return ret; | ||
131 | } | 82 | } |
132 | 83 | ||
133 | static struct vm_operations_struct ext4_file_vm_ops = { | 84 | static struct vm_operations_struct ext4_file_vm_ops = { |
diff --git a/fs/fat/file.c b/fs/fat/file.c index f042b965c95c..e8c159de236b 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c | |||
@@ -176,8 +176,26 @@ static int fat_cont_expand(struct inode *inode, loff_t size) | |||
176 | 176 | ||
177 | inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC; | 177 | inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC; |
178 | mark_inode_dirty(inode); | 178 | mark_inode_dirty(inode); |
179 | if (IS_SYNC(inode)) | 179 | if (IS_SYNC(inode)) { |
180 | err = sync_page_range_nolock(inode, mapping, start, count); | 180 | int err2; |
181 | |||
182 | /* | ||
183 | * Opencode syncing since we don't have a file open to use | ||
184 | * standard fsync path. | ||
185 | */ | ||
186 | err = filemap_fdatawrite_range(mapping, start, | ||
187 | start + count - 1); | ||
188 | err2 = sync_mapping_buffers(mapping); | ||
189 | if (!err) | ||
190 | err = err2; | ||
191 | err2 = write_inode_now(inode, 1); | ||
192 | if (!err) | ||
193 | err = err2; | ||
194 | if (!err) { | ||
195 | err = filemap_fdatawait_range(mapping, start, | ||
196 | start + count - 1); | ||
197 | } | ||
198 | } | ||
181 | out: | 199 | out: |
182 | return err; | 200 | return err; |
183 | } | 201 | } |
diff --git a/fs/fat/misc.c b/fs/fat/misc.c index a6c20473dfd7..4e35be873e09 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c | |||
@@ -119,8 +119,8 @@ int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster) | |||
119 | MSDOS_I(inode)->i_start = new_dclus; | 119 | MSDOS_I(inode)->i_start = new_dclus; |
120 | MSDOS_I(inode)->i_logstart = new_dclus; | 120 | MSDOS_I(inode)->i_logstart = new_dclus; |
121 | /* | 121 | /* |
122 | * Since generic_osync_inode() synchronize later if | 122 | * Since generic_write_sync() synchronizes regular files later, |
123 | * this is not directory, we don't here. | 123 | * we sync here only directories. |
124 | */ | 124 | */ |
125 | if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode)) { | 125 | if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode)) { |
126 | ret = fat_sync_inode(inode); | 126 | ret = fat_sync_inode(inode); |
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index da86ef58e427..628235cf44b5 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -1242,57 +1242,3 @@ int sync_inode(struct inode *inode, struct writeback_control *wbc) | |||
1242 | return ret; | 1242 | return ret; |
1243 | } | 1243 | } |
1244 | EXPORT_SYMBOL(sync_inode); | 1244 | EXPORT_SYMBOL(sync_inode); |
1245 | |||
1246 | /** | ||
1247 | * generic_osync_inode - flush all dirty data for a given inode to disk | ||
1248 | * @inode: inode to write | ||
1249 | * @mapping: the address_space that should be flushed | ||
1250 | * @what: what to write and wait upon | ||
1251 | * | ||
1252 | * This can be called by file_write functions for files which have the | ||
1253 | * O_SYNC flag set, to flush dirty writes to disk. | ||
1254 | * | ||
1255 | * @what is a bitmask, specifying which part of the inode's data should be | ||
1256 | * written and waited upon. | ||
1257 | * | ||
1258 | * OSYNC_DATA: i_mapping's dirty data | ||
1259 | * OSYNC_METADATA: the buffers at i_mapping->private_list | ||
1260 | * OSYNC_INODE: the inode itself | ||
1261 | */ | ||
1262 | |||
1263 | int generic_osync_inode(struct inode *inode, struct address_space *mapping, int what) | ||
1264 | { | ||
1265 | int err = 0; | ||
1266 | int need_write_inode_now = 0; | ||
1267 | int err2; | ||
1268 | |||
1269 | if (what & OSYNC_DATA) | ||
1270 | err = filemap_fdatawrite(mapping); | ||
1271 | if (what & (OSYNC_METADATA|OSYNC_DATA)) { | ||
1272 | err2 = sync_mapping_buffers(mapping); | ||
1273 | if (!err) | ||
1274 | err = err2; | ||
1275 | } | ||
1276 | if (what & OSYNC_DATA) { | ||
1277 | err2 = filemap_fdatawait(mapping); | ||
1278 | if (!err) | ||
1279 | err = err2; | ||
1280 | } | ||
1281 | |||
1282 | spin_lock(&inode_lock); | ||
1283 | if ((inode->i_state & I_DIRTY) && | ||
1284 | ((what & OSYNC_INODE) || (inode->i_state & I_DIRTY_DATASYNC))) | ||
1285 | need_write_inode_now = 1; | ||
1286 | spin_unlock(&inode_lock); | ||
1287 | |||
1288 | if (need_write_inode_now) { | ||
1289 | err2 = write_inode_now(inode, 1); | ||
1290 | if (!err) | ||
1291 | err = err2; | ||
1292 | } | ||
1293 | else | ||
1294 | inode_sync_wait(inode); | ||
1295 | |||
1296 | return err; | ||
1297 | } | ||
1298 | EXPORT_SYMBOL(generic_osync_inode); | ||
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index 3140a4429af1..4350d4993b18 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c | |||
@@ -2076,14 +2076,6 @@ err_out: | |||
2076 | *ppos = pos; | 2076 | *ppos = pos; |
2077 | if (cached_page) | 2077 | if (cached_page) |
2078 | page_cache_release(cached_page); | 2078 | page_cache_release(cached_page); |
2079 | /* For now, when the user asks for O_SYNC, we actually give O_DSYNC. */ | ||
2080 | if (likely(!status)) { | ||
2081 | if (unlikely((file->f_flags & O_SYNC) || IS_SYNC(vi))) { | ||
2082 | if (!mapping->a_ops->writepage || !is_sync_kiocb(iocb)) | ||
2083 | status = generic_osync_inode(vi, mapping, | ||
2084 | OSYNC_METADATA|OSYNC_DATA); | ||
2085 | } | ||
2086 | } | ||
2087 | pagevec_lru_add_file(&lru_pvec); | 2079 | pagevec_lru_add_file(&lru_pvec); |
2088 | ntfs_debug("Done. Returning %s (written 0x%lx, status %li).", | 2080 | ntfs_debug("Done. Returning %s (written 0x%lx, status %li).", |
2089 | written ? "written" : "status", (unsigned long)written, | 2081 | written ? "written" : "status", (unsigned long)written, |
@@ -2145,8 +2137,8 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
2145 | mutex_lock(&inode->i_mutex); | 2137 | mutex_lock(&inode->i_mutex); |
2146 | ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos); | 2138 | ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos); |
2147 | mutex_unlock(&inode->i_mutex); | 2139 | mutex_unlock(&inode->i_mutex); |
2148 | if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { | 2140 | if (ret > 0) { |
2149 | int err = sync_page_range(inode, mapping, pos, ret); | 2141 | int err = generic_write_sync(file, pos, ret); |
2150 | if (err < 0) | 2142 | if (err < 0) |
2151 | ret = err; | 2143 | ret = err; |
2152 | } | 2144 | } |
@@ -2173,8 +2165,8 @@ static ssize_t ntfs_file_writev(struct file *file, const struct iovec *iov, | |||
2173 | if (ret == -EIOCBQUEUED) | 2165 | if (ret == -EIOCBQUEUED) |
2174 | ret = wait_on_sync_kiocb(&kiocb); | 2166 | ret = wait_on_sync_kiocb(&kiocb); |
2175 | mutex_unlock(&inode->i_mutex); | 2167 | mutex_unlock(&inode->i_mutex); |
2176 | if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { | 2168 | if (ret > 0) { |
2177 | int err = sync_page_range(inode, mapping, *ppos - ret, ret); | 2169 | int err = generic_write_sync(file, *ppos - ret, ret); |
2178 | if (err < 0) | 2170 | if (err < 0) |
2179 | ret = err; | 2171 | ret = err; |
2180 | } | 2172 | } |
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c index 23bf68453d7d..1caa0ef0b2bb 100644 --- a/fs/ntfs/mft.c +++ b/fs/ntfs/mft.c | |||
@@ -384,13 +384,12 @@ unm_err_out: | |||
384 | * it is dirty in the inode meta data rather than the data page cache of the | 384 | * it is dirty in the inode meta data rather than the data page cache of the |
385 | * inode, and thus there are no data pages that need writing out. Therefore, a | 385 | * inode, and thus there are no data pages that need writing out. Therefore, a |
386 | * full mark_inode_dirty() is overkill. A mark_inode_dirty_sync(), on the | 386 | * full mark_inode_dirty() is overkill. A mark_inode_dirty_sync(), on the |
387 | * other hand, is not sufficient, because I_DIRTY_DATASYNC needs to be set to | 387 | * other hand, is not sufficient, because ->write_inode needs to be called even |
388 | * ensure ->write_inode is called from generic_osync_inode() and this needs to | 388 | * in case of fdatasync. This needs to happen or the file data would not |
389 | * happen or the file data would not necessarily hit the device synchronously, | 389 | * necessarily hit the device synchronously, even though the vfs inode has the |
390 | * even though the vfs inode has the O_SYNC flag set. Also, I_DIRTY_DATASYNC | 390 | * O_SYNC flag set. Also, I_DIRTY_DATASYNC simply "feels" better than just |
391 | * simply "feels" better than just I_DIRTY_SYNC, since the file data has not | 391 | * I_DIRTY_SYNC, since the file data has not actually hit the block device yet, |
392 | * actually hit the block device yet, which is not what I_DIRTY_SYNC on its own | 392 | * which is not what I_DIRTY_SYNC on its own would suggest. |
393 | * would suggest. | ||
394 | */ | 393 | */ |
395 | void __mark_mft_record_dirty(ntfs_inode *ni) | 394 | void __mark_mft_record_dirty(ntfs_inode *ni) |
396 | { | 395 | { |
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index aa501d3f93f1..221c5e98957b 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
@@ -1871,8 +1871,7 @@ relock: | |||
1871 | goto out_dio; | 1871 | goto out_dio; |
1872 | } | 1872 | } |
1873 | } else { | 1873 | } else { |
1874 | written = generic_file_aio_write_nolock(iocb, iov, nr_segs, | 1874 | written = __generic_file_aio_write(iocb, iov, nr_segs, ppos); |
1875 | *ppos); | ||
1876 | } | 1875 | } |
1877 | 1876 | ||
1878 | out_dio: | 1877 | out_dio: |
@@ -1880,18 +1879,21 @@ out_dio: | |||
1880 | BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT)); | 1879 | BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT)); |
1881 | 1880 | ||
1882 | if ((file->f_flags & O_SYNC && !direct_io) || IS_SYNC(inode)) { | 1881 | if ((file->f_flags & O_SYNC && !direct_io) || IS_SYNC(inode)) { |
1883 | /* | 1882 | ret = filemap_fdatawrite_range(file->f_mapping, pos, |
1884 | * The generic write paths have handled getting data | 1883 | pos + count - 1); |
1885 | * to disk, but since we don't make use of the dirty | 1884 | if (ret < 0) |
1886 | * inode list, a manual journal commit is necessary | 1885 | written = ret; |
1887 | * here. | 1886 | |
1888 | */ | 1887 | if (!ret && (old_size != i_size_read(inode) || |
1889 | if (old_size != i_size_read(inode) || | 1888 | old_clusters != OCFS2_I(inode)->ip_clusters)) { |
1890 | old_clusters != OCFS2_I(inode)->ip_clusters) { | ||
1891 | ret = jbd2_journal_force_commit(osb->journal->j_journal); | 1889 | ret = jbd2_journal_force_commit(osb->journal->j_journal); |
1892 | if (ret < 0) | 1890 | if (ret < 0) |
1893 | written = ret; | 1891 | written = ret; |
1894 | } | 1892 | } |
1893 | |||
1894 | if (!ret) | ||
1895 | ret = filemap_fdatawait_range(file->f_mapping, pos, | ||
1896 | pos + count - 1); | ||
1895 | } | 1897 | } |
1896 | 1898 | ||
1897 | /* | 1899 | /* |
@@ -1991,31 +1993,16 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe, | |||
1991 | 1993 | ||
1992 | if (ret > 0) { | 1994 | if (ret > 0) { |
1993 | unsigned long nr_pages; | 1995 | unsigned long nr_pages; |
1996 | int err; | ||
1994 | 1997 | ||
1995 | *ppos += ret; | ||
1996 | nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 1998 | nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
1997 | 1999 | ||
1998 | /* | 2000 | err = generic_write_sync(out, *ppos, ret); |
1999 | * If file or inode is SYNC and we actually wrote some data, | 2001 | if (err) |
2000 | * sync it. | 2002 | ret = err; |
2001 | */ | 2003 | else |
2002 | if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) { | 2004 | *ppos += ret; |
2003 | int err; | ||
2004 | |||
2005 | mutex_lock(&inode->i_mutex); | ||
2006 | err = ocfs2_rw_lock(inode, 1); | ||
2007 | if (err < 0) { | ||
2008 | mlog_errno(err); | ||
2009 | } else { | ||
2010 | err = generic_osync_inode(inode, mapping, | ||
2011 | OSYNC_METADATA|OSYNC_DATA); | ||
2012 | ocfs2_rw_unlock(inode, 1); | ||
2013 | } | ||
2014 | mutex_unlock(&inode->i_mutex); | ||
2015 | 2005 | ||
2016 | if (err) | ||
2017 | ret = err; | ||
2018 | } | ||
2019 | balance_dirty_pages_ratelimited_nr(mapping, nr_pages); | 2006 | balance_dirty_pages_ratelimited_nr(mapping, nr_pages); |
2020 | } | 2007 | } |
2021 | 2008 | ||
diff --git a/fs/splice.c b/fs/splice.c index 73766d24f97b..819023733f8e 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
@@ -976,25 +976,15 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, | |||
976 | 976 | ||
977 | if (ret > 0) { | 977 | if (ret > 0) { |
978 | unsigned long nr_pages; | 978 | unsigned long nr_pages; |
979 | int err; | ||
979 | 980 | ||
980 | *ppos += ret; | ||
981 | nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 981 | nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
982 | 982 | ||
983 | /* | 983 | err = generic_write_sync(out, *ppos, ret); |
984 | * If file or inode is SYNC and we actually wrote some data, | 984 | if (err) |
985 | * sync it. | 985 | ret = err; |
986 | */ | 986 | else |
987 | if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) { | 987 | *ppos += ret; |
988 | int err; | ||
989 | |||
990 | mutex_lock(&inode->i_mutex); | ||
991 | err = generic_osync_inode(inode, mapping, | ||
992 | OSYNC_METADATA|OSYNC_DATA); | ||
993 | mutex_unlock(&inode->i_mutex); | ||
994 | |||
995 | if (err) | ||
996 | ret = err; | ||
997 | } | ||
998 | balance_dirty_pages_ratelimited_nr(mapping, nr_pages); | 988 | balance_dirty_pages_ratelimited_nr(mapping, nr_pages); |
999 | } | 989 | } |
1000 | 990 | ||
@@ -178,19 +178,23 @@ int file_fsync(struct file *filp, struct dentry *dentry, int datasync) | |||
178 | } | 178 | } |
179 | 179 | ||
180 | /** | 180 | /** |
181 | * vfs_fsync - perform a fsync or fdatasync on a file | 181 | * vfs_fsync_range - helper to sync a range of data & metadata to disk |
182 | * @file: file to sync | 182 | * @file: file to sync |
183 | * @dentry: dentry of @file | 183 | * @dentry: dentry of @file |
184 | * @data: only perform a fdatasync operation | 184 | * @start: offset in bytes of the beginning of data range to sync |
185 | * @end: offset in bytes of the end of data range (inclusive) | ||
186 | * @datasync: perform only datasync | ||
185 | * | 187 | * |
186 | * Write back data and metadata for @file to disk. If @datasync is | 188 | * Write back data in range @start..@end and metadata for @file to disk. If |
187 | * set only metadata needed to access modified file data is written. | 189 | * @datasync is set only metadata needed to access modified file data is |
190 | * written. | ||
188 | * | 191 | * |
189 | * In case this function is called from nfsd @file may be %NULL and | 192 | * In case this function is called from nfsd @file may be %NULL and |
190 | * only @dentry is set. This can only happen when the filesystem | 193 | * only @dentry is set. This can only happen when the filesystem |
191 | * implements the export_operations API. | 194 | * implements the export_operations API. |
192 | */ | 195 | */ |
193 | int vfs_fsync(struct file *file, struct dentry *dentry, int datasync) | 196 | int vfs_fsync_range(struct file *file, struct dentry *dentry, loff_t start, |
197 | loff_t end, int datasync) | ||
194 | { | 198 | { |
195 | const struct file_operations *fop; | 199 | const struct file_operations *fop; |
196 | struct address_space *mapping; | 200 | struct address_space *mapping; |
@@ -214,7 +218,7 @@ int vfs_fsync(struct file *file, struct dentry *dentry, int datasync) | |||
214 | goto out; | 218 | goto out; |
215 | } | 219 | } |
216 | 220 | ||
217 | ret = filemap_fdatawrite(mapping); | 221 | ret = filemap_write_and_wait_range(mapping, start, end); |
218 | 222 | ||
219 | /* | 223 | /* |
220 | * We need to protect against concurrent writers, which could cause | 224 | * We need to protect against concurrent writers, which could cause |
@@ -225,12 +229,29 @@ int vfs_fsync(struct file *file, struct dentry *dentry, int datasync) | |||
225 | if (!ret) | 229 | if (!ret) |
226 | ret = err; | 230 | ret = err; |
227 | mutex_unlock(&mapping->host->i_mutex); | 231 | mutex_unlock(&mapping->host->i_mutex); |
228 | err = filemap_fdatawait(mapping); | 232 | |
229 | if (!ret) | ||
230 | ret = err; | ||
231 | out: | 233 | out: |
232 | return ret; | 234 | return ret; |
233 | } | 235 | } |
236 | EXPORT_SYMBOL(vfs_fsync_range); | ||
237 | |||
238 | /** | ||
239 | * vfs_fsync - perform a fsync or fdatasync on a file | ||
240 | * @file: file to sync | ||
241 | * @dentry: dentry of @file | ||
242 | * @datasync: only perform a fdatasync operation | ||
243 | * | ||
244 | * Write back data and metadata for @file to disk. If @datasync is | ||
245 | * set only metadata needed to access modified file data is written. | ||
246 | * | ||
247 | * In case this function is called from nfsd @file may be %NULL and | ||
248 | * only @dentry is set. This can only happen when the filesystem | ||
249 | * implements the export_operations API. | ||
250 | */ | ||
251 | int vfs_fsync(struct file *file, struct dentry *dentry, int datasync) | ||
252 | { | ||
253 | return vfs_fsync_range(file, dentry, 0, LLONG_MAX, datasync); | ||
254 | } | ||
234 | EXPORT_SYMBOL(vfs_fsync); | 255 | EXPORT_SYMBOL(vfs_fsync); |
235 | 256 | ||
236 | static int do_fsync(unsigned int fd, int datasync) | 257 | static int do_fsync(unsigned int fd, int datasync) |
@@ -256,6 +277,23 @@ SYSCALL_DEFINE1(fdatasync, unsigned int, fd) | |||
256 | return do_fsync(fd, 1); | 277 | return do_fsync(fd, 1); |
257 | } | 278 | } |
258 | 279 | ||
280 | /** | ||
281 | * generic_write_sync - perform syncing after a write if file / inode is sync | ||
282 | * @file: file to which the write happened | ||
283 | * @pos: offset where the write started | ||
284 | * @count: length of the write | ||
285 | * | ||
286 | * This is just a simple wrapper about our general syncing function. | ||
287 | */ | ||
288 | int generic_write_sync(struct file *file, loff_t pos, loff_t count) | ||
289 | { | ||
290 | if (!(file->f_flags & O_SYNC) && !IS_SYNC(file->f_mapping->host)) | ||
291 | return 0; | ||
292 | return vfs_fsync_range(file, file->f_path.dentry, pos, | ||
293 | pos + count - 1, 1); | ||
294 | } | ||
295 | EXPORT_SYMBOL(generic_write_sync); | ||
296 | |||
259 | /* | 297 | /* |
260 | * sys_sync_file_range() permits finely controlled syncing over a segment of | 298 | * sys_sync_file_range() permits finely controlled syncing over a segment of |
261 | * a file in the range offset .. (offset+nbytes-1) inclusive. If nbytes is | 299 | * a file in the range offset .. (offset+nbytes-1) inclusive. If nbytes is |
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index 7078974a6eee..fde63a3c4ecc 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c | |||
@@ -817,7 +817,8 @@ write_retry: | |||
817 | xfs_iunlock(xip, iolock); | 817 | xfs_iunlock(xip, iolock); |
818 | if (need_i_mutex) | 818 | if (need_i_mutex) |
819 | mutex_unlock(&inode->i_mutex); | 819 | mutex_unlock(&inode->i_mutex); |
820 | error2 = sync_page_range(inode, mapping, pos, ret); | 820 | error2 = filemap_write_and_wait_range(mapping, pos, |
821 | pos + ret - 1); | ||
821 | if (!error) | 822 | if (!error) |
822 | error = error2; | 823 | error = error2; |
823 | if (need_i_mutex) | 824 | if (need_i_mutex) |
diff --git a/include/linux/fs.h b/include/linux/fs.h index a79f48373e7e..37f53216998a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
@@ -1455,11 +1455,6 @@ int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags); | |||
1455 | #define DT_SOCK 12 | 1455 | #define DT_SOCK 12 |
1456 | #define DT_WHT 14 | 1456 | #define DT_WHT 14 |
1457 | 1457 | ||
1458 | #define OSYNC_METADATA (1<<0) | ||
1459 | #define OSYNC_DATA (1<<1) | ||
1460 | #define OSYNC_INODE (1<<2) | ||
1461 | int generic_osync_inode(struct inode *, struct address_space *, int); | ||
1462 | |||
1463 | /* | 1458 | /* |
1464 | * This is the "filldir" function type, used by readdir() to let | 1459 | * This is the "filldir" function type, used by readdir() to let |
1465 | * the kernel specify what kind of dirent layout it wants to have. | 1460 | * the kernel specify what kind of dirent layout it wants to have. |
@@ -2086,6 +2081,8 @@ extern int write_inode_now(struct inode *, int); | |||
2086 | extern int filemap_fdatawrite(struct address_space *); | 2081 | extern int filemap_fdatawrite(struct address_space *); |
2087 | extern int filemap_flush(struct address_space *); | 2082 | extern int filemap_flush(struct address_space *); |
2088 | extern int filemap_fdatawait(struct address_space *); | 2083 | extern int filemap_fdatawait(struct address_space *); |
2084 | extern int filemap_fdatawait_range(struct address_space *, loff_t lstart, | ||
2085 | loff_t lend); | ||
2089 | extern int filemap_write_and_wait(struct address_space *mapping); | 2086 | extern int filemap_write_and_wait(struct address_space *mapping); |
2090 | extern int filemap_write_and_wait_range(struct address_space *mapping, | 2087 | extern int filemap_write_and_wait_range(struct address_space *mapping, |
2091 | loff_t lstart, loff_t lend); | 2088 | loff_t lstart, loff_t lend); |
@@ -2096,7 +2093,10 @@ extern int __filemap_fdatawrite_range(struct address_space *mapping, | |||
2096 | extern int filemap_fdatawrite_range(struct address_space *mapping, | 2093 | extern int filemap_fdatawrite_range(struct address_space *mapping, |
2097 | loff_t start, loff_t end); | 2094 | loff_t start, loff_t end); |
2098 | 2095 | ||
2096 | extern int vfs_fsync_range(struct file *file, struct dentry *dentry, | ||
2097 | loff_t start, loff_t end, int datasync); | ||
2099 | extern int vfs_fsync(struct file *file, struct dentry *dentry, int datasync); | 2098 | extern int vfs_fsync(struct file *file, struct dentry *dentry, int datasync); |
2099 | extern int generic_write_sync(struct file *file, loff_t pos, loff_t count); | ||
2100 | extern void sync_supers(void); | 2100 | extern void sync_supers(void); |
2101 | extern void emergency_sync(void); | 2101 | extern void emergency_sync(void); |
2102 | extern void emergency_remount(void); | 2102 | extern void emergency_remount(void); |
@@ -2202,9 +2202,9 @@ extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); | |||
2202 | extern int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size); | 2202 | extern int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size); |
2203 | int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk); | 2203 | int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk); |
2204 | extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t); | 2204 | extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t); |
2205 | extern ssize_t __generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, | ||
2206 | loff_t *); | ||
2205 | extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t); | 2207 | extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t); |
2206 | extern ssize_t generic_file_aio_write_nolock(struct kiocb *, const struct iovec *, | ||
2207 | unsigned long, loff_t); | ||
2208 | extern ssize_t generic_file_direct_write(struct kiocb *, const struct iovec *, | 2208 | extern ssize_t generic_file_direct_write(struct kiocb *, const struct iovec *, |
2209 | unsigned long *, loff_t, loff_t *, size_t, size_t); | 2209 | unsigned long *, loff_t, loff_t *, size_t, size_t); |
2210 | extern ssize_t generic_file_buffered_write(struct kiocb *, const struct iovec *, | 2210 | extern ssize_t generic_file_buffered_write(struct kiocb *, const struct iovec *, |
@@ -2214,6 +2214,10 @@ extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t l | |||
2214 | extern int generic_segment_checks(const struct iovec *iov, | 2214 | extern int generic_segment_checks(const struct iovec *iov, |
2215 | unsigned long *nr_segs, size_t *count, int access_flags); | 2215 | unsigned long *nr_segs, size_t *count, int access_flags); |
2216 | 2216 | ||
2217 | /* fs/block_dev.c */ | ||
2218 | extern ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, | ||
2219 | unsigned long nr_segs, loff_t pos); | ||
2220 | |||
2217 | /* fs/splice.c */ | 2221 | /* fs/splice.c */ |
2218 | extern ssize_t generic_file_splice_read(struct file *, loff_t *, | 2222 | extern ssize_t generic_file_splice_read(struct file *, loff_t *, |
2219 | struct pipe_inode_info *, size_t, unsigned int); | 2223 | struct pipe_inode_info *, size_t, unsigned int); |
diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 78b1e4684cc9..d347632f1861 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h | |||
@@ -150,10 +150,6 @@ int write_cache_pages(struct address_space *mapping, | |||
150 | struct writeback_control *wbc, writepage_t writepage, | 150 | struct writeback_control *wbc, writepage_t writepage, |
151 | void *data); | 151 | void *data); |
152 | int do_writepages(struct address_space *mapping, struct writeback_control *wbc); | 152 | int do_writepages(struct address_space *mapping, struct writeback_control *wbc); |
153 | int sync_page_range(struct inode *inode, struct address_space *mapping, | ||
154 | loff_t pos, loff_t count); | ||
155 | int sync_page_range_nolock(struct inode *inode, struct address_space *mapping, | ||
156 | loff_t pos, loff_t count); | ||
157 | void set_page_dirty_balance(struct page *page, int page_mkwrite); | 153 | void set_page_dirty_balance(struct page *page, int page_mkwrite); |
158 | void writeback_set_ratelimit(void); | 154 | void writeback_set_ratelimit(void); |
159 | 155 | ||
diff --git a/mm/filemap.c b/mm/filemap.c index ccea3b665c12..dd51c68e2b86 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -39,11 +39,10 @@ | |||
39 | /* | 39 | /* |
40 | * FIXME: remove all knowledge of the buffer layer from the core VM | 40 | * FIXME: remove all knowledge of the buffer layer from the core VM |
41 | */ | 41 | */ |
42 | #include <linux/buffer_head.h> /* for generic_osync_inode */ | 42 | #include <linux/buffer_head.h> /* for try_to_free_buffers */ |
43 | 43 | ||
44 | #include <asm/mman.h> | 44 | #include <asm/mman.h> |
45 | 45 | ||
46 | |||
47 | /* | 46 | /* |
48 | * Shared mappings implemented 30.11.1994. It's not fully working yet, | 47 | * Shared mappings implemented 30.11.1994. It's not fully working yet, |
49 | * though. | 48 | * though. |
@@ -307,68 +306,24 @@ int wait_on_page_writeback_range(struct address_space *mapping, | |||
307 | } | 306 | } |
308 | 307 | ||
309 | /** | 308 | /** |
310 | * sync_page_range - write and wait on all pages in the passed range | 309 | * filemap_fdatawait_range - wait for all under-writeback pages to complete in a given range |
311 | * @inode: target inode | 310 | * @mapping: address space structure to wait for |
312 | * @mapping: target address_space | 311 | * @start: offset in bytes where the range starts |
313 | * @pos: beginning offset in pages to write | 312 | * @end: offset in bytes where the range ends (inclusive) |
314 | * @count: number of bytes to write | ||
315 | * | ||
316 | * Write and wait upon all the pages in the passed range. This is a "data | ||
317 | * integrity" operation. It waits upon in-flight writeout before starting and | ||
318 | * waiting upon new writeout. If there was an IO error, return it. | ||
319 | * | 313 | * |
320 | * We need to re-take i_mutex during the generic_osync_inode list walk because | 314 | * Walk the list of under-writeback pages of the given address space |
321 | * it is otherwise livelockable. | 315 | * in the given range and wait for all of them. |
322 | */ | ||
323 | int sync_page_range(struct inode *inode, struct address_space *mapping, | ||
324 | loff_t pos, loff_t count) | ||
325 | { | ||
326 | pgoff_t start = pos >> PAGE_CACHE_SHIFT; | ||
327 | pgoff_t end = (pos + count - 1) >> PAGE_CACHE_SHIFT; | ||
328 | int ret; | ||
329 | |||
330 | if (!mapping_cap_writeback_dirty(mapping) || !count) | ||
331 | return 0; | ||
332 | ret = filemap_fdatawrite_range(mapping, pos, pos + count - 1); | ||
333 | if (ret == 0) { | ||
334 | mutex_lock(&inode->i_mutex); | ||
335 | ret = generic_osync_inode(inode, mapping, OSYNC_METADATA); | ||
336 | mutex_unlock(&inode->i_mutex); | ||
337 | } | ||
338 | if (ret == 0) | ||
339 | ret = wait_on_page_writeback_range(mapping, start, end); | ||
340 | return ret; | ||
341 | } | ||
342 | EXPORT_SYMBOL(sync_page_range); | ||
343 | |||
344 | /** | ||
345 | * sync_page_range_nolock - write & wait on all pages in the passed range without locking | ||
346 | * @inode: target inode | ||
347 | * @mapping: target address_space | ||
348 | * @pos: beginning offset in pages to write | ||
349 | * @count: number of bytes to write | ||
350 | * | 316 | * |
351 | * Note: Holding i_mutex across sync_page_range_nolock() is not a good idea | 317 | * This is just a simple wrapper so that callers don't have to convert offsets |
352 | * as it forces O_SYNC writers to different parts of the same file | 318 | * to page indexes themselves |
353 | * to be serialised right until io completion. | ||
354 | */ | 319 | */ |
355 | int sync_page_range_nolock(struct inode *inode, struct address_space *mapping, | 320 | int filemap_fdatawait_range(struct address_space *mapping, loff_t start, |
356 | loff_t pos, loff_t count) | 321 | loff_t end) |
357 | { | 322 | { |
358 | pgoff_t start = pos >> PAGE_CACHE_SHIFT; | 323 | return wait_on_page_writeback_range(mapping, start >> PAGE_CACHE_SHIFT, |
359 | pgoff_t end = (pos + count - 1) >> PAGE_CACHE_SHIFT; | 324 | end >> PAGE_CACHE_SHIFT); |
360 | int ret; | ||
361 | |||
362 | if (!mapping_cap_writeback_dirty(mapping) || !count) | ||
363 | return 0; | ||
364 | ret = filemap_fdatawrite_range(mapping, pos, pos + count - 1); | ||
365 | if (ret == 0) | ||
366 | ret = generic_osync_inode(inode, mapping, OSYNC_METADATA); | ||
367 | if (ret == 0) | ||
368 | ret = wait_on_page_writeback_range(mapping, start, end); | ||
369 | return ret; | ||
370 | } | 325 | } |
371 | EXPORT_SYMBOL(sync_page_range_nolock); | 326 | EXPORT_SYMBOL(filemap_fdatawait_range); |
372 | 327 | ||
373 | /** | 328 | /** |
374 | * filemap_fdatawait - wait for all under-writeback pages to complete | 329 | * filemap_fdatawait - wait for all under-writeback pages to complete |
@@ -2167,20 +2122,7 @@ generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov, | |||
2167 | } | 2122 | } |
2168 | *ppos = end; | 2123 | *ppos = end; |
2169 | } | 2124 | } |
2170 | |||
2171 | /* | ||
2172 | * Sync the fs metadata but not the minor inode changes and | ||
2173 | * of course not the data as we did direct DMA for the IO. | ||
2174 | * i_mutex is held, which protects generic_osync_inode() from | ||
2175 | * livelocking. AIO O_DIRECT ops attempt to sync metadata here. | ||
2176 | */ | ||
2177 | out: | 2125 | out: |
2178 | if ((written >= 0 || written == -EIOCBQUEUED) && | ||
2179 | ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { | ||
2180 | int err = generic_osync_inode(inode, mapping, OSYNC_METADATA); | ||
2181 | if (err < 0) | ||
2182 | written = err; | ||
2183 | } | ||
2184 | return written; | 2126 | return written; |
2185 | } | 2127 | } |
2186 | EXPORT_SYMBOL(generic_file_direct_write); | 2128 | EXPORT_SYMBOL(generic_file_direct_write); |
@@ -2312,8 +2254,6 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, | |||
2312 | { | 2254 | { |
2313 | struct file *file = iocb->ki_filp; | 2255 | struct file *file = iocb->ki_filp; |
2314 | struct address_space *mapping = file->f_mapping; | 2256 | struct address_space *mapping = file->f_mapping; |
2315 | const struct address_space_operations *a_ops = mapping->a_ops; | ||
2316 | struct inode *inode = mapping->host; | ||
2317 | ssize_t status; | 2257 | ssize_t status; |
2318 | struct iov_iter i; | 2258 | struct iov_iter i; |
2319 | 2259 | ||
@@ -2323,16 +2263,6 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, | |||
2323 | if (likely(status >= 0)) { | 2263 | if (likely(status >= 0)) { |
2324 | written += status; | 2264 | written += status; |
2325 | *ppos = pos + status; | 2265 | *ppos = pos + status; |
2326 | |||
2327 | /* | ||
2328 | * For now, when the user asks for O_SYNC, we'll actually give | ||
2329 | * O_DSYNC | ||
2330 | */ | ||
2331 | if (unlikely((file->f_flags & O_SYNC) || IS_SYNC(inode))) { | ||
2332 | if (!a_ops->writepage || !is_sync_kiocb(iocb)) | ||
2333 | status = generic_osync_inode(inode, mapping, | ||
2334 | OSYNC_METADATA|OSYNC_DATA); | ||
2335 | } | ||
2336 | } | 2266 | } |
2337 | 2267 | ||
2338 | /* | 2268 | /* |
@@ -2348,9 +2278,27 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, | |||
2348 | } | 2278 | } |
2349 | EXPORT_SYMBOL(generic_file_buffered_write); | 2279 | EXPORT_SYMBOL(generic_file_buffered_write); |
2350 | 2280 | ||
2351 | static ssize_t | 2281 | /** |
2352 | __generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov, | 2282 | * __generic_file_aio_write - write data to a file |
2353 | unsigned long nr_segs, loff_t *ppos) | 2283 | * @iocb: IO state structure (file, offset, etc.) |
2284 | * @iov: vector with data to write | ||
2285 | * @nr_segs: number of segments in the vector | ||
2286 | * @ppos: position where to write | ||
2287 | * | ||
2288 | * This function does all the work needed for actually writing data to a | ||
2289 | * file. It does all basic checks, removes SUID from the file, updates | ||
2290 | * modification times and calls proper subroutines depending on whether we | ||
2291 | * do direct IO or a standard buffered write. | ||
2292 | * | ||
2293 | * It expects i_mutex to be grabbed unless we work on a block device or similar | ||
2294 | * object which does not need locking at all. | ||
2295 | * | ||
2296 | * This function does *not* take care of syncing data in case of O_SYNC write. | ||
2297 | * A caller has to handle it. This is mainly due to the fact that we want to | ||
2298 | * avoid syncing under i_mutex. | ||
2299 | */ | ||
2300 | ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | ||
2301 | unsigned long nr_segs, loff_t *ppos) | ||
2354 | { | 2302 | { |
2355 | struct file *file = iocb->ki_filp; | 2303 | struct file *file = iocb->ki_filp; |
2356 | struct address_space * mapping = file->f_mapping; | 2304 | struct address_space * mapping = file->f_mapping; |
@@ -2447,51 +2395,37 @@ out: | |||
2447 | current->backing_dev_info = NULL; | 2395 | current->backing_dev_info = NULL; |
2448 | return written ? written : err; | 2396 | return written ? written : err; |
2449 | } | 2397 | } |
2398 | EXPORT_SYMBOL(__generic_file_aio_write); | ||
2450 | 2399 | ||
2451 | ssize_t generic_file_aio_write_nolock(struct kiocb *iocb, | 2400 | /** |
2452 | const struct iovec *iov, unsigned long nr_segs, loff_t pos) | 2401 | * generic_file_aio_write - write data to a file |
2453 | { | 2402 | * @iocb: IO state structure |
2454 | struct file *file = iocb->ki_filp; | 2403 | * @iov: vector with data to write |
2455 | struct address_space *mapping = file->f_mapping; | 2404 | * @nr_segs: number of segments in the vector |
2456 | struct inode *inode = mapping->host; | 2405 | * @pos: position in file where to write |
2457 | ssize_t ret; | 2406 | * |
2458 | 2407 | * This is a wrapper around __generic_file_aio_write() to be used by most | |
2459 | BUG_ON(iocb->ki_pos != pos); | 2408 | * filesystems. It takes care of syncing the file in case of O_SYNC file |
2460 | 2409 | * and acquires i_mutex as needed. | |
2461 | ret = __generic_file_aio_write_nolock(iocb, iov, nr_segs, | 2410 | */ |
2462 | &iocb->ki_pos); | ||
2463 | |||
2464 | if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { | ||
2465 | ssize_t err; | ||
2466 | |||
2467 | err = sync_page_range_nolock(inode, mapping, pos, ret); | ||
2468 | if (err < 0) | ||
2469 | ret = err; | ||
2470 | } | ||
2471 | return ret; | ||
2472 | } | ||
2473 | EXPORT_SYMBOL(generic_file_aio_write_nolock); | ||
2474 | |||
2475 | ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | 2411 | ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, |
2476 | unsigned long nr_segs, loff_t pos) | 2412 | unsigned long nr_segs, loff_t pos) |
2477 | { | 2413 | { |
2478 | struct file *file = iocb->ki_filp; | 2414 | struct file *file = iocb->ki_filp; |
2479 | struct address_space *mapping = file->f_mapping; | 2415 | struct inode *inode = file->f_mapping->host; |
2480 | struct inode *inode = mapping->host; | ||
2481 | ssize_t ret; | 2416 | ssize_t ret; |
2482 | 2417 | ||
2483 | BUG_ON(iocb->ki_pos != pos); | 2418 | BUG_ON(iocb->ki_pos != pos); |
2484 | 2419 | ||
2485 | mutex_lock(&inode->i_mutex); | 2420 | mutex_lock(&inode->i_mutex); |
2486 | ret = __generic_file_aio_write_nolock(iocb, iov, nr_segs, | 2421 | ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); |
2487 | &iocb->ki_pos); | ||
2488 | mutex_unlock(&inode->i_mutex); | 2422 | mutex_unlock(&inode->i_mutex); |
2489 | 2423 | ||
2490 | if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { | 2424 | if (ret > 0 || ret == -EIOCBQUEUED) { |
2491 | ssize_t err; | 2425 | ssize_t err; |
2492 | 2426 | ||
2493 | err = sync_page_range(inode, mapping, pos, ret); | 2427 | err = generic_write_sync(file, pos, ret); |
2494 | if (err < 0) | 2428 | if (err < 0 && ret > 0) |
2495 | ret = err; | 2429 | ret = err; |
2496 | } | 2430 | } |
2497 | return ret; | 2431 | return ret; |