aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext3
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext3')
-rw-r--r--fs/ext3/acl.c97
-rw-r--r--fs/ext3/acl.h4
-rw-r--r--fs/ext3/balloc.c38
-rw-r--r--fs/ext3/file.c3
-rw-r--r--fs/ext3/fsync.c27
-rw-r--r--fs/ext3/ialloc.c4
-rw-r--r--fs/ext3/inode.c229
-rw-r--r--fs/ext3/ioctl.c4
-rw-r--r--fs/ext3/namei.c34
-rw-r--r--fs/ext3/super.c15
-rw-r--r--fs/ext3/xattr.c12
11 files changed, 290 insertions, 177 deletions
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index 9d021c0d472..3091f62e55b 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -131,7 +131,7 @@ fail:
131 * 131 *
132 * inode->i_mutex: don't care 132 * inode->i_mutex: don't care
133 */ 133 */
134static struct posix_acl * 134struct posix_acl *
135ext3_get_acl(struct inode *inode, int type) 135ext3_get_acl(struct inode *inode, int type)
136{ 136{
137 int name_index; 137 int name_index;
@@ -199,12 +199,10 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type,
199 case ACL_TYPE_ACCESS: 199 case ACL_TYPE_ACCESS:
200 name_index = EXT3_XATTR_INDEX_POSIX_ACL_ACCESS; 200 name_index = EXT3_XATTR_INDEX_POSIX_ACL_ACCESS;
201 if (acl) { 201 if (acl) {
202 mode_t mode = inode->i_mode; 202 error = posix_acl_equiv_mode(acl, &inode->i_mode);
203 error = posix_acl_equiv_mode(acl, &mode);
204 if (error < 0) 203 if (error < 0)
205 return error; 204 return error;
206 else { 205 else {
207 inode->i_mode = mode;
208 inode->i_ctime = CURRENT_TIME_SEC; 206 inode->i_ctime = CURRENT_TIME_SEC;
209 ext3_mark_inode_dirty(handle, inode); 207 ext3_mark_inode_dirty(handle, inode);
210 if (error == 0) 208 if (error == 0)
@@ -239,29 +237,6 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type,
239 return error; 237 return error;
240} 238}
241 239
242int
243ext3_check_acl(struct inode *inode, int mask, unsigned int flags)
244{
245 struct posix_acl *acl;
246
247 if (flags & IPERM_FLAG_RCU) {
248 if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
249 return -ECHILD;
250 return -EAGAIN;
251 }
252
253 acl = ext3_get_acl(inode, ACL_TYPE_ACCESS);
254 if (IS_ERR(acl))
255 return PTR_ERR(acl);
256 if (acl) {
257 int error = posix_acl_permission(inode, acl, mask);
258 posix_acl_release(acl);
259 return error;
260 }
261
262 return -EAGAIN;
263}
264
265/* 240/*
266 * Initialize the ACLs of a new inode. Called from ext3_new_inode. 241 * Initialize the ACLs of a new inode. Called from ext3_new_inode.
267 * 242 *
@@ -284,31 +259,20 @@ ext3_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
284 inode->i_mode &= ~current_umask(); 259 inode->i_mode &= ~current_umask();
285 } 260 }
286 if (test_opt(inode->i_sb, POSIX_ACL) && acl) { 261 if (test_opt(inode->i_sb, POSIX_ACL) && acl) {
287 struct posix_acl *clone;
288 mode_t mode;
289
290 if (S_ISDIR(inode->i_mode)) { 262 if (S_ISDIR(inode->i_mode)) {
291 error = ext3_set_acl(handle, inode, 263 error = ext3_set_acl(handle, inode,
292 ACL_TYPE_DEFAULT, acl); 264 ACL_TYPE_DEFAULT, acl);
293 if (error) 265 if (error)
294 goto cleanup; 266 goto cleanup;
295 } 267 }
296 clone = posix_acl_clone(acl, GFP_NOFS); 268 error = posix_acl_create(&acl, GFP_NOFS, &inode->i_mode);
297 error = -ENOMEM; 269 if (error < 0)
298 if (!clone) 270 return error;
299 goto cleanup; 271
300 272 if (error > 0) {
301 mode = inode->i_mode; 273 /* This is an extended ACL */
302 error = posix_acl_create_masq(clone, &mode); 274 error = ext3_set_acl(handle, inode, ACL_TYPE_ACCESS, acl);
303 if (error >= 0) {
304 inode->i_mode = mode;
305 if (error > 0) {
306 /* This is an extended ACL */
307 error = ext3_set_acl(handle, inode,
308 ACL_TYPE_ACCESS, clone);
309 }
310 } 275 }
311 posix_acl_release(clone);
312 } 276 }
313cleanup: 277cleanup:
314 posix_acl_release(acl); 278 posix_acl_release(acl);
@@ -332,7 +296,9 @@ cleanup:
332int 296int
333ext3_acl_chmod(struct inode *inode) 297ext3_acl_chmod(struct inode *inode)
334{ 298{
335 struct posix_acl *acl, *clone; 299 struct posix_acl *acl;
300 handle_t *handle;
301 int retries = 0;
336 int error; 302 int error;
337 303
338 if (S_ISLNK(inode->i_mode)) 304 if (S_ISLNK(inode->i_mode))
@@ -342,31 +308,24 @@ ext3_acl_chmod(struct inode *inode)
342 acl = ext3_get_acl(inode, ACL_TYPE_ACCESS); 308 acl = ext3_get_acl(inode, ACL_TYPE_ACCESS);
343 if (IS_ERR(acl) || !acl) 309 if (IS_ERR(acl) || !acl)
344 return PTR_ERR(acl); 310 return PTR_ERR(acl);
345 clone = posix_acl_clone(acl, GFP_KERNEL); 311 error = posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode);
346 posix_acl_release(acl); 312 if (error)
347 if (!clone) 313 return error;
348 return -ENOMEM; 314retry:
349 error = posix_acl_chmod_masq(clone, inode->i_mode); 315 handle = ext3_journal_start(inode,
350 if (!error) { 316 EXT3_DATA_TRANS_BLOCKS(inode->i_sb));
351 handle_t *handle; 317 if (IS_ERR(handle)) {
352 int retries = 0; 318 error = PTR_ERR(handle);
353 319 ext3_std_error(inode->i_sb, error);
354 retry: 320 goto out;
355 handle = ext3_journal_start(inode,
356 EXT3_DATA_TRANS_BLOCKS(inode->i_sb));
357 if (IS_ERR(handle)) {
358 error = PTR_ERR(handle);
359 ext3_std_error(inode->i_sb, error);
360 goto out;
361 }
362 error = ext3_set_acl(handle, inode, ACL_TYPE_ACCESS, clone);
363 ext3_journal_stop(handle);
364 if (error == -ENOSPC &&
365 ext3_should_retry_alloc(inode->i_sb, &retries))
366 goto retry;
367 } 321 }
322 error = ext3_set_acl(handle, inode, ACL_TYPE_ACCESS, acl);
323 ext3_journal_stop(handle);
324 if (error == -ENOSPC &&
325 ext3_should_retry_alloc(inode->i_sb, &retries))
326 goto retry;
368out: 327out:
369 posix_acl_release(clone); 328 posix_acl_release(acl);
370 return error; 329 return error;
371} 330}
372 331
diff --git a/fs/ext3/acl.h b/fs/ext3/acl.h
index 5faf8048e90..dbc921e458c 100644
--- a/fs/ext3/acl.h
+++ b/fs/ext3/acl.h
@@ -54,13 +54,13 @@ static inline int ext3_acl_count(size_t size)
54#ifdef CONFIG_EXT3_FS_POSIX_ACL 54#ifdef CONFIG_EXT3_FS_POSIX_ACL
55 55
56/* acl.c */ 56/* acl.c */
57extern int ext3_check_acl (struct inode *, int, unsigned int); 57extern struct posix_acl *ext3_get_acl(struct inode *inode, int type);
58extern int ext3_acl_chmod (struct inode *); 58extern int ext3_acl_chmod (struct inode *);
59extern int ext3_init_acl (handle_t *, struct inode *, struct inode *); 59extern int ext3_init_acl (handle_t *, struct inode *, struct inode *);
60 60
61#else /* CONFIG_EXT3_FS_POSIX_ACL */ 61#else /* CONFIG_EXT3_FS_POSIX_ACL */
62#include <linux/sched.h> 62#include <linux/sched.h>
63#define ext3_check_acl NULL 63#define ext3_get_acl NULL
64 64
65static inline int 65static inline int
66ext3_acl_chmod(struct inode *inode) 66ext3_acl_chmod(struct inode *inode)
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index fe52297e31a..6386d76f44a 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -21,6 +21,7 @@
21#include <linux/quotaops.h> 21#include <linux/quotaops.h>
22#include <linux/buffer_head.h> 22#include <linux/buffer_head.h>
23#include <linux/blkdev.h> 23#include <linux/blkdev.h>
24#include <trace/events/ext3.h>
24 25
25/* 26/*
26 * balloc.c contains the blocks allocation and deallocation routines 27 * balloc.c contains the blocks allocation and deallocation routines
@@ -161,6 +162,7 @@ read_block_bitmap(struct super_block *sb, unsigned int block_group)
161 desc = ext3_get_group_desc(sb, block_group, NULL); 162 desc = ext3_get_group_desc(sb, block_group, NULL);
162 if (!desc) 163 if (!desc)
163 return NULL; 164 return NULL;
165 trace_ext3_read_block_bitmap(sb, block_group);
164 bitmap_blk = le32_to_cpu(desc->bg_block_bitmap); 166 bitmap_blk = le32_to_cpu(desc->bg_block_bitmap);
165 bh = sb_getblk(sb, bitmap_blk); 167 bh = sb_getblk(sb, bitmap_blk);
166 if (unlikely(!bh)) { 168 if (unlikely(!bh)) {
@@ -351,6 +353,7 @@ void ext3_rsv_window_add(struct super_block *sb,
351 struct rb_node * parent = NULL; 353 struct rb_node * parent = NULL;
352 struct ext3_reserve_window_node *this; 354 struct ext3_reserve_window_node *this;
353 355
356 trace_ext3_rsv_window_add(sb, rsv);
354 while (*p) 357 while (*p)
355 { 358 {
356 parent = *p; 359 parent = *p;
@@ -476,8 +479,10 @@ void ext3_discard_reservation(struct inode *inode)
476 rsv = &block_i->rsv_window_node; 479 rsv = &block_i->rsv_window_node;
477 if (!rsv_is_empty(&rsv->rsv_window)) { 480 if (!rsv_is_empty(&rsv->rsv_window)) {
478 spin_lock(rsv_lock); 481 spin_lock(rsv_lock);
479 if (!rsv_is_empty(&rsv->rsv_window)) 482 if (!rsv_is_empty(&rsv->rsv_window)) {
483 trace_ext3_discard_reservation(inode, rsv);
480 rsv_window_remove(inode->i_sb, rsv); 484 rsv_window_remove(inode->i_sb, rsv);
485 }
481 spin_unlock(rsv_lock); 486 spin_unlock(rsv_lock);
482 } 487 }
483} 488}
@@ -683,14 +688,10 @@ error_return:
683void ext3_free_blocks(handle_t *handle, struct inode *inode, 688void ext3_free_blocks(handle_t *handle, struct inode *inode,
684 ext3_fsblk_t block, unsigned long count) 689 ext3_fsblk_t block, unsigned long count)
685{ 690{
686 struct super_block * sb; 691 struct super_block *sb = inode->i_sb;
687 unsigned long dquot_freed_blocks; 692 unsigned long dquot_freed_blocks;
688 693
689 sb = inode->i_sb; 694 trace_ext3_free_blocks(inode, block, count);
690 if (!sb) {
691 printk ("ext3_free_blocks: nonexistent device");
692 return;
693 }
694 ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks); 695 ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks);
695 if (dquot_freed_blocks) 696 if (dquot_freed_blocks)
696 dquot_free_block(inode, dquot_freed_blocks); 697 dquot_free_block(inode, dquot_freed_blocks);
@@ -1136,6 +1137,7 @@ static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv,
1136 else 1137 else
1137 start_block = grp_goal + group_first_block; 1138 start_block = grp_goal + group_first_block;
1138 1139
1140 trace_ext3_alloc_new_reservation(sb, start_block);
1139 size = my_rsv->rsv_goal_size; 1141 size = my_rsv->rsv_goal_size;
1140 1142
1141 if (!rsv_is_empty(&my_rsv->rsv_window)) { 1143 if (!rsv_is_empty(&my_rsv->rsv_window)) {
@@ -1230,8 +1232,11 @@ retry:
1230 * check if the first free block is within the 1232 * check if the first free block is within the
1231 * free space we just reserved 1233 * free space we just reserved
1232 */ 1234 */
1233 if (start_block >= my_rsv->rsv_start && start_block <= my_rsv->rsv_end) 1235 if (start_block >= my_rsv->rsv_start &&
1236 start_block <= my_rsv->rsv_end) {
1237 trace_ext3_reserved(sb, start_block, my_rsv);
1234 return 0; /* success */ 1238 return 0; /* success */
1239 }
1235 /* 1240 /*
1236 * if the first free bit we found is out of the reservable space 1241 * if the first free bit we found is out of the reservable space
1237 * continue search for next reservable space, 1242 * continue search for next reservable space,
@@ -1514,10 +1519,6 @@ ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode,
1514 1519
1515 *errp = -ENOSPC; 1520 *errp = -ENOSPC;
1516 sb = inode->i_sb; 1521 sb = inode->i_sb;
1517 if (!sb) {
1518 printk("ext3_new_block: nonexistent device");
1519 return 0;
1520 }
1521 1522
1522 /* 1523 /*
1523 * Check quota for allocation of this block. 1524 * Check quota for allocation of this block.
@@ -1528,8 +1529,10 @@ ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode,
1528 return 0; 1529 return 0;
1529 } 1530 }
1530 1531
1532 trace_ext3_request_blocks(inode, goal, num);
1533
1531 sbi = EXT3_SB(sb); 1534 sbi = EXT3_SB(sb);
1532 es = EXT3_SB(sb)->s_es; 1535 es = sbi->s_es;
1533 ext3_debug("goal=%lu.\n", goal); 1536 ext3_debug("goal=%lu.\n", goal);
1534 /* 1537 /*
1535 * Allocate a block from reservation only when 1538 * Allocate a block from reservation only when
@@ -1742,6 +1745,10 @@ allocated:
1742 brelse(bitmap_bh); 1745 brelse(bitmap_bh);
1743 dquot_free_block(inode, *count-num); 1746 dquot_free_block(inode, *count-num);
1744 *count = num; 1747 *count = num;
1748
1749 trace_ext3_allocate_blocks(inode, goal, num,
1750 (unsigned long long)ret_block);
1751
1745 return ret_block; 1752 return ret_block;
1746 1753
1747io_error: 1754io_error:
@@ -1996,6 +2003,7 @@ ext3_grpblk_t ext3_trim_all_free(struct super_block *sb, unsigned int group,
1996 if ((next - start) < minblocks) 2003 if ((next - start) < minblocks)
1997 goto free_extent; 2004 goto free_extent;
1998 2005
2006 trace_ext3_discard_blocks(sb, discard_block, next - start);
1999 /* Send the TRIM command down to the device */ 2007 /* Send the TRIM command down to the device */
2000 err = sb_issue_discard(sb, discard_block, next - start, 2008 err = sb_issue_discard(sb, discard_block, next - start,
2001 GFP_NOFS, 0); 2009 GFP_NOFS, 0);
@@ -2100,7 +2108,7 @@ int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range)
2100 if (unlikely(minlen > EXT3_BLOCKS_PER_GROUP(sb))) 2108 if (unlikely(minlen > EXT3_BLOCKS_PER_GROUP(sb)))
2101 return -EINVAL; 2109 return -EINVAL;
2102 if (start >= max_blks) 2110 if (start >= max_blks)
2103 goto out; 2111 return -EINVAL;
2104 if (start + len > max_blks) 2112 if (start + len > max_blks)
2105 len = max_blks - start; 2113 len = max_blks - start;
2106 2114
@@ -2148,8 +2156,6 @@ int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range)
2148 2156
2149 if (ret >= 0) 2157 if (ret >= 0)
2150 ret = 0; 2158 ret = 0;
2151
2152out:
2153 range->len = trimmed * sb->s_blocksize; 2159 range->len = trimmed * sb->s_blocksize;
2154 2160
2155 return ret; 2161 return ret;
diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index f55df0e61cb..724df69847d 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -71,7 +71,6 @@ const struct file_operations ext3_file_operations = {
71}; 71};
72 72
73const struct inode_operations ext3_file_inode_operations = { 73const struct inode_operations ext3_file_inode_operations = {
74 .truncate = ext3_truncate,
75 .setattr = ext3_setattr, 74 .setattr = ext3_setattr,
76#ifdef CONFIG_EXT3_FS_XATTR 75#ifdef CONFIG_EXT3_FS_XATTR
77 .setxattr = generic_setxattr, 76 .setxattr = generic_setxattr,
@@ -79,7 +78,7 @@ const struct inode_operations ext3_file_inode_operations = {
79 .listxattr = ext3_listxattr, 78 .listxattr = ext3_listxattr,
80 .removexattr = generic_removexattr, 79 .removexattr = generic_removexattr,
81#endif 80#endif
82 .check_acl = ext3_check_acl, 81 .get_acl = ext3_get_acl,
83 .fiemap = ext3_fiemap, 82 .fiemap = ext3_fiemap,
84}; 83};
85 84
diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c
index 09b13bb34c9..d494c554c6e 100644
--- a/fs/ext3/fsync.c
+++ b/fs/ext3/fsync.c
@@ -30,6 +30,7 @@
30#include <linux/jbd.h> 30#include <linux/jbd.h>
31#include <linux/ext3_fs.h> 31#include <linux/ext3_fs.h>
32#include <linux/ext3_jbd.h> 32#include <linux/ext3_jbd.h>
33#include <trace/events/ext3.h>
33 34
34/* 35/*
35 * akpm: A new design for ext3_sync_file(). 36 * akpm: A new design for ext3_sync_file().
@@ -43,7 +44,7 @@
43 * inode to disk. 44 * inode to disk.
44 */ 45 */
45 46
46int ext3_sync_file(struct file *file, int datasync) 47int ext3_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
47{ 48{
48 struct inode *inode = file->f_mapping->host; 49 struct inode *inode = file->f_mapping->host;
49 struct ext3_inode_info *ei = EXT3_I(inode); 50 struct ext3_inode_info *ei = EXT3_I(inode);
@@ -51,9 +52,22 @@ int ext3_sync_file(struct file *file, int datasync)
51 int ret, needs_barrier = 0; 52 int ret, needs_barrier = 0;
52 tid_t commit_tid; 53 tid_t commit_tid;
53 54
55 trace_ext3_sync_file_enter(file, datasync);
56
54 if (inode->i_sb->s_flags & MS_RDONLY) 57 if (inode->i_sb->s_flags & MS_RDONLY)
55 return 0; 58 return 0;
56 59
60 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
61 if (ret)
62 goto out;
63
64 /*
65 * Taking the mutex here just to keep consistent with how fsync was
66 * called previously, however it looks like we don't need to take
67 * i_mutex at all.
68 */
69 mutex_lock(&inode->i_mutex);
70
57 J_ASSERT(ext3_journal_current_handle() == NULL); 71 J_ASSERT(ext3_journal_current_handle() == NULL);
58 72
59 /* 73 /*
@@ -70,8 +84,11 @@ int ext3_sync_file(struct file *file, int datasync)
70 * (they were dirtied by commit). But that's OK - the blocks are 84 * (they were dirtied by commit). But that's OK - the blocks are
71 * safe in-journal, which is all fsync() needs to ensure. 85 * safe in-journal, which is all fsync() needs to ensure.
72 */ 86 */
73 if (ext3_should_journal_data(inode)) 87 if (ext3_should_journal_data(inode)) {
74 return ext3_force_commit(inode->i_sb); 88 mutex_unlock(&inode->i_mutex);
89 ret = ext3_force_commit(inode->i_sb);
90 goto out;
91 }
75 92
76 if (datasync) 93 if (datasync)
77 commit_tid = atomic_read(&ei->i_datasync_tid); 94 commit_tid = atomic_read(&ei->i_datasync_tid);
@@ -91,5 +108,9 @@ int ext3_sync_file(struct file *file, int datasync)
91 */ 108 */
92 if (needs_barrier) 109 if (needs_barrier)
93 blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); 110 blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
111
112 mutex_unlock(&inode->i_mutex);
113out:
114 trace_ext3_sync_file_exit(inode, ret);
94 return ret; 115 return ret;
95} 116}
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index bfc2dc43681..bf09cbf938c 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -23,6 +23,7 @@
23#include <linux/buffer_head.h> 23#include <linux/buffer_head.h>
24#include <linux/random.h> 24#include <linux/random.h>
25#include <linux/bitops.h> 25#include <linux/bitops.h>
26#include <trace/events/ext3.h>
26 27
27#include <asm/byteorder.h> 28#include <asm/byteorder.h>
28 29
@@ -118,6 +119,7 @@ void ext3_free_inode (handle_t *handle, struct inode * inode)
118 119
119 ino = inode->i_ino; 120 ino = inode->i_ino;
120 ext3_debug ("freeing inode %lu\n", ino); 121 ext3_debug ("freeing inode %lu\n", ino);
122 trace_ext3_free_inode(inode);
121 123
122 is_directory = S_ISDIR(inode->i_mode); 124 is_directory = S_ISDIR(inode->i_mode);
123 125
@@ -426,6 +428,7 @@ struct inode *ext3_new_inode(handle_t *handle, struct inode * dir,
426 return ERR_PTR(-EPERM); 428 return ERR_PTR(-EPERM);
427 429
428 sb = dir->i_sb; 430 sb = dir->i_sb;
431 trace_ext3_request_inode(dir, mode);
429 inode = new_inode(sb); 432 inode = new_inode(sb);
430 if (!inode) 433 if (!inode)
431 return ERR_PTR(-ENOMEM); 434 return ERR_PTR(-ENOMEM);
@@ -601,6 +604,7 @@ got:
601 } 604 }
602 605
603 ext3_debug("allocating inode %lu\n", inode->i_ino); 606 ext3_debug("allocating inode %lu\n", inode->i_ino);
607 trace_ext3_allocate_inode(inode, dir, mode);
604 goto really_out; 608 goto really_out;
605fail: 609fail:
606 ext3_std_error(sb, err); 610 ext3_std_error(sb, err);
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 3451d23c3ba..2ce3c52db32 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -38,10 +38,12 @@
38#include <linux/bio.h> 38#include <linux/bio.h>
39#include <linux/fiemap.h> 39#include <linux/fiemap.h>
40#include <linux/namei.h> 40#include <linux/namei.h>
41#include <trace/events/ext3.h>
41#include "xattr.h" 42#include "xattr.h"
42#include "acl.h" 43#include "acl.h"
43 44
44static int ext3_writepage_trans_blocks(struct inode *inode); 45static int ext3_writepage_trans_blocks(struct inode *inode);
46static int ext3_block_truncate_page(struct inode *inode, loff_t from);
45 47
46/* 48/*
47 * Test whether an inode is a fast symlink. 49 * Test whether an inode is a fast symlink.
@@ -70,6 +72,7 @@ int ext3_forget(handle_t *handle, int is_metadata, struct inode *inode,
70 72
71 might_sleep(); 73 might_sleep();
72 74
75 trace_ext3_forget(inode, is_metadata, blocknr);
73 BUFFER_TRACE(bh, "enter"); 76 BUFFER_TRACE(bh, "enter");
74 77
75 jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, " 78 jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, "
@@ -194,20 +197,47 @@ static int truncate_restart_transaction(handle_t *handle, struct inode *inode)
194 */ 197 */
195void ext3_evict_inode (struct inode *inode) 198void ext3_evict_inode (struct inode *inode)
196{ 199{
200 struct ext3_inode_info *ei = EXT3_I(inode);
197 struct ext3_block_alloc_info *rsv; 201 struct ext3_block_alloc_info *rsv;
198 handle_t *handle; 202 handle_t *handle;
199 int want_delete = 0; 203 int want_delete = 0;
200 204
205 trace_ext3_evict_inode(inode);
201 if (!inode->i_nlink && !is_bad_inode(inode)) { 206 if (!inode->i_nlink && !is_bad_inode(inode)) {
202 dquot_initialize(inode); 207 dquot_initialize(inode);
203 want_delete = 1; 208 want_delete = 1;
204 } 209 }
205 210
211 /*
212 * When journalling data dirty buffers are tracked only in the journal.
213 * So although mm thinks everything is clean and ready for reaping the
214 * inode might still have some pages to write in the running
215 * transaction or waiting to be checkpointed. Thus calling
216 * journal_invalidatepage() (via truncate_inode_pages()) to discard
217 * these buffers can cause data loss. Also even if we did not discard
218 * these buffers, we would have no way to find them after the inode
219 * is reaped and thus user could see stale data if he tries to read
220 * them before the transaction is checkpointed. So be careful and
221 * force everything to disk here... We use ei->i_datasync_tid to
222 * store the newest transaction containing inode's data.
223 *
224 * Note that directories do not have this problem because they don't
225 * use page cache.
226 */
227 if (inode->i_nlink && ext3_should_journal_data(inode) &&
228 (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) {
229 tid_t commit_tid = atomic_read(&ei->i_datasync_tid);
230 journal_t *journal = EXT3_SB(inode->i_sb)->s_journal;
231
232 log_start_commit(journal, commit_tid);
233 log_wait_commit(journal, commit_tid);
234 filemap_write_and_wait(&inode->i_data);
235 }
206 truncate_inode_pages(&inode->i_data, 0); 236 truncate_inode_pages(&inode->i_data, 0);
207 237
208 ext3_discard_reservation(inode); 238 ext3_discard_reservation(inode);
209 rsv = EXT3_I(inode)->i_block_alloc_info; 239 rsv = ei->i_block_alloc_info;
210 EXT3_I(inode)->i_block_alloc_info = NULL; 240 ei->i_block_alloc_info = NULL;
211 if (unlikely(rsv)) 241 if (unlikely(rsv))
212 kfree(rsv); 242 kfree(rsv);
213 243
@@ -231,15 +261,13 @@ void ext3_evict_inode (struct inode *inode)
231 if (inode->i_blocks) 261 if (inode->i_blocks)
232 ext3_truncate(inode); 262 ext3_truncate(inode);
233 /* 263 /*
234 * Kill off the orphan record which ext3_truncate created. 264 * Kill off the orphan record created when the inode lost the last
235 * AKPM: I think this can be inside the above `if'. 265 * link. Note that ext3_orphan_del() has to be able to cope with the
236 * Note that ext3_orphan_del() has to be able to cope with the 266 * deletion of a non-existent orphan - ext3_truncate() could
237 * deletion of a non-existent orphan - this is because we don't 267 * have removed the record.
238 * know if ext3_truncate() actually created an orphan record.
239 * (Well, we could do this if we need to, but heck - it works)
240 */ 268 */
241 ext3_orphan_del(handle, inode); 269 ext3_orphan_del(handle, inode);
242 EXT3_I(inode)->i_dtime = get_seconds(); 270 ei->i_dtime = get_seconds();
243 271
244 /* 272 /*
245 * One subtle ordering requirement: if anything has gone wrong 273 * One subtle ordering requirement: if anything has gone wrong
@@ -842,6 +870,7 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
842 ext3_fsblk_t first_block = 0; 870 ext3_fsblk_t first_block = 0;
843 871
844 872
873 trace_ext3_get_blocks_enter(inode, iblock, maxblocks, create);
845 J_ASSERT(handle != NULL || create == 0); 874 J_ASSERT(handle != NULL || create == 0);
846 depth = ext3_block_to_path(inode,iblock,offsets,&blocks_to_boundary); 875 depth = ext3_block_to_path(inode,iblock,offsets,&blocks_to_boundary);
847 876
@@ -886,6 +915,9 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
886 if (!create || err == -EIO) 915 if (!create || err == -EIO)
887 goto cleanup; 916 goto cleanup;
888 917
918 /*
919 * Block out ext3_truncate while we alter the tree
920 */
889 mutex_lock(&ei->truncate_mutex); 921 mutex_lock(&ei->truncate_mutex);
890 922
891 /* 923 /*
@@ -934,9 +966,6 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
934 */ 966 */
935 count = ext3_blks_to_allocate(partial, indirect_blks, 967 count = ext3_blks_to_allocate(partial, indirect_blks,
936 maxblocks, blocks_to_boundary); 968 maxblocks, blocks_to_boundary);
937 /*
938 * Block out ext3_truncate while we alter the tree
939 */
940 err = ext3_alloc_branch(handle, inode, indirect_blks, &count, goal, 969 err = ext3_alloc_branch(handle, inode, indirect_blks, &count, goal,
941 offsets + (partial - chain), partial); 970 offsets + (partial - chain), partial);
942 971
@@ -970,6 +999,9 @@ cleanup:
970 } 999 }
971 BUFFER_TRACE(bh_result, "returned"); 1000 BUFFER_TRACE(bh_result, "returned");
972out: 1001out:
1002 trace_ext3_get_blocks_exit(inode, iblock,
1003 depth ? le32_to_cpu(chain[depth-1].key) : 0,
1004 count, err);
973 return err; 1005 return err;
974} 1006}
975 1007
@@ -1102,7 +1134,7 @@ struct buffer_head *ext3_bread(handle_t *handle, struct inode *inode,
1102 return bh; 1134 return bh;
1103 if (buffer_uptodate(bh)) 1135 if (buffer_uptodate(bh))
1104 return bh; 1136 return bh;
1105 ll_rw_block(READ_META, 1, &bh); 1137 ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh);
1106 wait_on_buffer(bh); 1138 wait_on_buffer(bh);
1107 if (buffer_uptodate(bh)) 1139 if (buffer_uptodate(bh))
1108 return bh; 1140 return bh;
@@ -1202,6 +1234,16 @@ static void ext3_truncate_failed_write(struct inode *inode)
1202 ext3_truncate(inode); 1234 ext3_truncate(inode);
1203} 1235}
1204 1236
1237/*
1238 * Truncate blocks that were not used by direct IO write. We have to zero out
1239 * the last file block as well because direct IO might have written to it.
1240 */
1241static void ext3_truncate_failed_direct_write(struct inode *inode)
1242{
1243 ext3_block_truncate_page(inode, inode->i_size);
1244 ext3_truncate(inode);
1245}
1246
1205static int ext3_write_begin(struct file *file, struct address_space *mapping, 1247static int ext3_write_begin(struct file *file, struct address_space *mapping,
1206 loff_t pos, unsigned len, unsigned flags, 1248 loff_t pos, unsigned len, unsigned flags,
1207 struct page **pagep, void **fsdata) 1249 struct page **pagep, void **fsdata)
@@ -1217,6 +1259,8 @@ static int ext3_write_begin(struct file *file, struct address_space *mapping,
1217 * we allocate blocks but write fails for some reason */ 1259 * we allocate blocks but write fails for some reason */
1218 int needed_blocks = ext3_writepage_trans_blocks(inode) + 1; 1260 int needed_blocks = ext3_writepage_trans_blocks(inode) + 1;
1219 1261
1262 trace_ext3_write_begin(inode, pos, len, flags);
1263
1220 index = pos >> PAGE_CACHE_SHIFT; 1264 index = pos >> PAGE_CACHE_SHIFT;
1221 from = pos & (PAGE_CACHE_SIZE - 1); 1265 from = pos & (PAGE_CACHE_SIZE - 1);
1222 to = from + len; 1266 to = from + len;
@@ -1332,6 +1376,7 @@ static int ext3_ordered_write_end(struct file *file,
1332 unsigned from, to; 1376 unsigned from, to;
1333 int ret = 0, ret2; 1377 int ret = 0, ret2;
1334 1378
1379 trace_ext3_ordered_write_end(inode, pos, len, copied);
1335 copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); 1380 copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
1336 1381
1337 from = pos & (PAGE_CACHE_SIZE - 1); 1382 from = pos & (PAGE_CACHE_SIZE - 1);
@@ -1367,6 +1412,7 @@ static int ext3_writeback_write_end(struct file *file,
1367 struct inode *inode = file->f_mapping->host; 1412 struct inode *inode = file->f_mapping->host;
1368 int ret; 1413 int ret;
1369 1414
1415 trace_ext3_writeback_write_end(inode, pos, len, copied);
1370 copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); 1416 copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
1371 update_file_sizes(inode, pos, copied); 1417 update_file_sizes(inode, pos, copied);
1372 /* 1418 /*
@@ -1391,10 +1437,12 @@ static int ext3_journalled_write_end(struct file *file,
1391{ 1437{
1392 handle_t *handle = ext3_journal_current_handle(); 1438 handle_t *handle = ext3_journal_current_handle();
1393 struct inode *inode = mapping->host; 1439 struct inode *inode = mapping->host;
1440 struct ext3_inode_info *ei = EXT3_I(inode);
1394 int ret = 0, ret2; 1441 int ret = 0, ret2;
1395 int partial = 0; 1442 int partial = 0;
1396 unsigned from, to; 1443 unsigned from, to;
1397 1444
1445 trace_ext3_journalled_write_end(inode, pos, len, copied);
1398 from = pos & (PAGE_CACHE_SIZE - 1); 1446 from = pos & (PAGE_CACHE_SIZE - 1);
1399 to = from + len; 1447 to = from + len;
1400 1448
@@ -1419,8 +1467,9 @@ static int ext3_journalled_write_end(struct file *file,
1419 if (pos + len > inode->i_size && ext3_can_truncate(inode)) 1467 if (pos + len > inode->i_size && ext3_can_truncate(inode))
1420 ext3_orphan_add(handle, inode); 1468 ext3_orphan_add(handle, inode);
1421 ext3_set_inode_state(inode, EXT3_STATE_JDATA); 1469 ext3_set_inode_state(inode, EXT3_STATE_JDATA);
1422 if (inode->i_size > EXT3_I(inode)->i_disksize) { 1470 atomic_set(&ei->i_datasync_tid, handle->h_transaction->t_tid);
1423 EXT3_I(inode)->i_disksize = inode->i_size; 1471 if (inode->i_size > ei->i_disksize) {
1472 ei->i_disksize = inode->i_size;
1424 ret2 = ext3_mark_inode_dirty(handle, inode); 1473 ret2 = ext3_mark_inode_dirty(handle, inode);
1425 if (!ret) 1474 if (!ret)
1426 ret = ret2; 1475 ret = ret2;
@@ -1568,7 +1617,13 @@ static int ext3_ordered_writepage(struct page *page,
1568 int err; 1617 int err;
1569 1618
1570 J_ASSERT(PageLocked(page)); 1619 J_ASSERT(PageLocked(page));
1571 WARN_ON_ONCE(IS_RDONLY(inode)); 1620 /*
1621 * We don't want to warn for emergency remount. The condition is
1622 * ordered to avoid dereferencing inode->i_sb in non-error case to
1623 * avoid slow-downs.
1624 */
1625 WARN_ON_ONCE(IS_RDONLY(inode) &&
1626 !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ERROR_FS));
1572 1627
1573 /* 1628 /*
1574 * We give up here if we're reentered, because it might be for a 1629 * We give up here if we're reentered, because it might be for a
@@ -1577,6 +1632,7 @@ static int ext3_ordered_writepage(struct page *page,
1577 if (ext3_journal_current_handle()) 1632 if (ext3_journal_current_handle())
1578 goto out_fail; 1633 goto out_fail;
1579 1634
1635 trace_ext3_ordered_writepage(page);
1580 if (!page_has_buffers(page)) { 1636 if (!page_has_buffers(page)) {
1581 create_empty_buffers(page, inode->i_sb->s_blocksize, 1637 create_empty_buffers(page, inode->i_sb->s_blocksize,
1582 (1 << BH_Dirty)|(1 << BH_Uptodate)); 1638 (1 << BH_Dirty)|(1 << BH_Uptodate));
@@ -1642,11 +1698,18 @@ static int ext3_writeback_writepage(struct page *page,
1642 int err; 1698 int err;
1643 1699
1644 J_ASSERT(PageLocked(page)); 1700 J_ASSERT(PageLocked(page));
1645 WARN_ON_ONCE(IS_RDONLY(inode)); 1701 /*
1702 * We don't want to warn for emergency remount. The condition is
1703 * ordered to avoid dereferencing inode->i_sb in non-error case to
1704 * avoid slow-downs.
1705 */
1706 WARN_ON_ONCE(IS_RDONLY(inode) &&
1707 !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ERROR_FS));
1646 1708
1647 if (ext3_journal_current_handle()) 1709 if (ext3_journal_current_handle())
1648 goto out_fail; 1710 goto out_fail;
1649 1711
1712 trace_ext3_writeback_writepage(page);
1650 if (page_has_buffers(page)) { 1713 if (page_has_buffers(page)) {
1651 if (!walk_page_buffers(NULL, page_buffers(page), 0, 1714 if (!walk_page_buffers(NULL, page_buffers(page), 0,
1652 PAGE_CACHE_SIZE, NULL, buffer_unmapped)) { 1715 PAGE_CACHE_SIZE, NULL, buffer_unmapped)) {
@@ -1684,11 +1747,18 @@ static int ext3_journalled_writepage(struct page *page,
1684 int err; 1747 int err;
1685 1748
1686 J_ASSERT(PageLocked(page)); 1749 J_ASSERT(PageLocked(page));
1687 WARN_ON_ONCE(IS_RDONLY(inode)); 1750 /*
1751 * We don't want to warn for emergency remount. The condition is
1752 * ordered to avoid dereferencing inode->i_sb in non-error case to
1753 * avoid slow-downs.
1754 */
1755 WARN_ON_ONCE(IS_RDONLY(inode) &&
1756 !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ERROR_FS));
1688 1757
1689 if (ext3_journal_current_handle()) 1758 if (ext3_journal_current_handle())
1690 goto no_write; 1759 goto no_write;
1691 1760
1761 trace_ext3_journalled_writepage(page);
1692 handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode)); 1762 handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode));
1693 if (IS_ERR(handle)) { 1763 if (IS_ERR(handle)) {
1694 ret = PTR_ERR(handle); 1764 ret = PTR_ERR(handle);
@@ -1715,6 +1785,8 @@ static int ext3_journalled_writepage(struct page *page,
1715 if (ret == 0) 1785 if (ret == 0)
1716 ret = err; 1786 ret = err;
1717 ext3_set_inode_state(inode, EXT3_STATE_JDATA); 1787 ext3_set_inode_state(inode, EXT3_STATE_JDATA);
1788 atomic_set(&EXT3_I(inode)->i_datasync_tid,
1789 handle->h_transaction->t_tid);
1718 unlock_page(page); 1790 unlock_page(page);
1719 } else { 1791 } else {
1720 /* 1792 /*
@@ -1739,6 +1811,7 @@ out_unlock:
1739 1811
1740static int ext3_readpage(struct file *file, struct page *page) 1812static int ext3_readpage(struct file *file, struct page *page)
1741{ 1813{
1814 trace_ext3_readpage(page);
1742 return mpage_readpage(page, ext3_get_block); 1815 return mpage_readpage(page, ext3_get_block);
1743} 1816}
1744 1817
@@ -1753,6 +1826,8 @@ static void ext3_invalidatepage(struct page *page, unsigned long offset)
1753{ 1826{
1754 journal_t *journal = EXT3_JOURNAL(page->mapping->host); 1827 journal_t *journal = EXT3_JOURNAL(page->mapping->host);
1755 1828
1829 trace_ext3_invalidatepage(page, offset);
1830
1756 /* 1831 /*
1757 * If it's a full truncate we just forget about the pending dirtying 1832 * If it's a full truncate we just forget about the pending dirtying
1758 */ 1833 */
@@ -1766,6 +1841,7 @@ static int ext3_releasepage(struct page *page, gfp_t wait)
1766{ 1841{
1767 journal_t *journal = EXT3_JOURNAL(page->mapping->host); 1842 journal_t *journal = EXT3_JOURNAL(page->mapping->host);
1768 1843
1844 trace_ext3_releasepage(page);
1769 WARN_ON(PageChecked(page)); 1845 WARN_ON(PageChecked(page));
1770 if (!page_has_buffers(page)) 1846 if (!page_has_buffers(page))
1771 return 0; 1847 return 0;
@@ -1794,6 +1870,8 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
1794 size_t count = iov_length(iov, nr_segs); 1870 size_t count = iov_length(iov, nr_segs);
1795 int retries = 0; 1871 int retries = 0;
1796 1872
1873 trace_ext3_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw);
1874
1797 if (rw == WRITE) { 1875 if (rw == WRITE) {
1798 loff_t final_size = offset + count; 1876 loff_t final_size = offset + count;
1799 1877
@@ -1816,9 +1894,8 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
1816 } 1894 }
1817 1895
1818retry: 1896retry:
1819 ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, 1897 ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
1820 offset, nr_segs, 1898 ext3_get_block);
1821 ext3_get_block, NULL);
1822 /* 1899 /*
1823 * In case of error extending write may have instantiated a few 1900 * In case of error extending write may have instantiated a few
1824 * blocks outside i_size. Trim these off again. 1901 * blocks outside i_size. Trim these off again.
@@ -1828,7 +1905,7 @@ retry:
1828 loff_t end = offset + iov_length(iov, nr_segs); 1905 loff_t end = offset + iov_length(iov, nr_segs);
1829 1906
1830 if (end > isize) 1907 if (end > isize)
1831 vmtruncate(inode, isize); 1908 ext3_truncate_failed_direct_write(inode);
1832 } 1909 }
1833 if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries)) 1910 if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
1834 goto retry; 1911 goto retry;
@@ -1842,7 +1919,7 @@ retry:
1842 /* This is really bad luck. We've written the data 1919 /* This is really bad luck. We've written the data
1843 * but cannot extend i_size. Truncate allocated blocks 1920 * but cannot extend i_size. Truncate allocated blocks
1844 * and pretend the write failed... */ 1921 * and pretend the write failed... */
1845 ext3_truncate(inode); 1922 ext3_truncate_failed_direct_write(inode);
1846 ret = PTR_ERR(handle); 1923 ret = PTR_ERR(handle);
1847 goto out; 1924 goto out;
1848 } 1925 }
@@ -1868,6 +1945,8 @@ retry:
1868 ret = err; 1945 ret = err;
1869 } 1946 }
1870out: 1947out:
1948 trace_ext3_direct_IO_exit(inode, offset,
1949 iov_length(iov, nr_segs), rw, ret);
1871 return ret; 1950 return ret;
1872} 1951}
1873 1952
@@ -1950,17 +2029,24 @@ void ext3_set_aops(struct inode *inode)
1950 * This required during truncate. We need to physically zero the tail end 2029 * This required during truncate. We need to physically zero the tail end
1951 * of that block so it doesn't yield old data if the file is later grown. 2030 * of that block so it doesn't yield old data if the file is later grown.
1952 */ 2031 */
1953static int ext3_block_truncate_page(handle_t *handle, struct page *page, 2032static int ext3_block_truncate_page(struct inode *inode, loff_t from)
1954 struct address_space *mapping, loff_t from)
1955{ 2033{
1956 ext3_fsblk_t index = from >> PAGE_CACHE_SHIFT; 2034 ext3_fsblk_t index = from >> PAGE_CACHE_SHIFT;
1957 unsigned offset = from & (PAGE_CACHE_SIZE-1); 2035 unsigned offset = from & (PAGE_CACHE_SIZE - 1);
1958 unsigned blocksize, iblock, length, pos; 2036 unsigned blocksize, iblock, length, pos;
1959 struct inode *inode = mapping->host; 2037 struct page *page;
2038 handle_t *handle = NULL;
1960 struct buffer_head *bh; 2039 struct buffer_head *bh;
1961 int err = 0; 2040 int err = 0;
1962 2041
2042 /* Truncated on block boundary - nothing to do */
1963 blocksize = inode->i_sb->s_blocksize; 2043 blocksize = inode->i_sb->s_blocksize;
2044 if ((from & (blocksize - 1)) == 0)
2045 return 0;
2046
2047 page = grab_cache_page(inode->i_mapping, index);
2048 if (!page)
2049 return -ENOMEM;
1964 length = blocksize - (offset & (blocksize - 1)); 2050 length = blocksize - (offset & (blocksize - 1));
1965 iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); 2051 iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
1966 2052
@@ -2005,11 +2091,23 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page,
2005 goto unlock; 2091 goto unlock;
2006 } 2092 }
2007 2093
2094 /* data=writeback mode doesn't need transaction to zero-out data */
2095 if (!ext3_should_writeback_data(inode)) {
2096 /* We journal at most one block */
2097 handle = ext3_journal_start(inode, 1);
2098 if (IS_ERR(handle)) {
2099 clear_highpage(page);
2100 flush_dcache_page(page);
2101 err = PTR_ERR(handle);
2102 goto unlock;
2103 }
2104 }
2105
2008 if (ext3_should_journal_data(inode)) { 2106 if (ext3_should_journal_data(inode)) {
2009 BUFFER_TRACE(bh, "get write access"); 2107 BUFFER_TRACE(bh, "get write access");
2010 err = ext3_journal_get_write_access(handle, bh); 2108 err = ext3_journal_get_write_access(handle, bh);
2011 if (err) 2109 if (err)
2012 goto unlock; 2110 goto stop;
2013 } 2111 }
2014 2112
2015 zero_user(page, offset, length); 2113 zero_user(page, offset, length);
@@ -2023,6 +2121,9 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page,
2023 err = ext3_journal_dirty_data(handle, bh); 2121 err = ext3_journal_dirty_data(handle, bh);
2024 mark_buffer_dirty(bh); 2122 mark_buffer_dirty(bh);
2025 } 2123 }
2124stop:
2125 if (handle)
2126 ext3_journal_stop(handle);
2026 2127
2027unlock: 2128unlock:
2028 unlock_page(page); 2129 unlock_page(page);
@@ -2391,8 +2492,6 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
2391 2492
2392int ext3_can_truncate(struct inode *inode) 2493int ext3_can_truncate(struct inode *inode)
2393{ 2494{
2394 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
2395 return 0;
2396 if (S_ISREG(inode->i_mode)) 2495 if (S_ISREG(inode->i_mode))
2397 return 1; 2496 return 1;
2398 if (S_ISDIR(inode->i_mode)) 2497 if (S_ISDIR(inode->i_mode))
@@ -2436,7 +2535,6 @@ void ext3_truncate(struct inode *inode)
2436 struct ext3_inode_info *ei = EXT3_I(inode); 2535 struct ext3_inode_info *ei = EXT3_I(inode);
2437 __le32 *i_data = ei->i_data; 2536 __le32 *i_data = ei->i_data;
2438 int addr_per_block = EXT3_ADDR_PER_BLOCK(inode->i_sb); 2537 int addr_per_block = EXT3_ADDR_PER_BLOCK(inode->i_sb);
2439 struct address_space *mapping = inode->i_mapping;
2440 int offsets[4]; 2538 int offsets[4];
2441 Indirect chain[4]; 2539 Indirect chain[4];
2442 Indirect *partial; 2540 Indirect *partial;
@@ -2444,7 +2542,8 @@ void ext3_truncate(struct inode *inode)
2444 int n; 2542 int n;
2445 long last_block; 2543 long last_block;
2446 unsigned blocksize = inode->i_sb->s_blocksize; 2544 unsigned blocksize = inode->i_sb->s_blocksize;
2447 struct page *page; 2545
2546 trace_ext3_truncate_enter(inode);
2448 2547
2449 if (!ext3_can_truncate(inode)) 2548 if (!ext3_can_truncate(inode))
2450 goto out_notrans; 2549 goto out_notrans;
@@ -2452,37 +2551,12 @@ void ext3_truncate(struct inode *inode)
2452 if (inode->i_size == 0 && ext3_should_writeback_data(inode)) 2551 if (inode->i_size == 0 && ext3_should_writeback_data(inode))
2453 ext3_set_inode_state(inode, EXT3_STATE_FLUSH_ON_CLOSE); 2552 ext3_set_inode_state(inode, EXT3_STATE_FLUSH_ON_CLOSE);
2454 2553
2455 /*
2456 * We have to lock the EOF page here, because lock_page() nests
2457 * outside journal_start().
2458 */
2459 if ((inode->i_size & (blocksize - 1)) == 0) {
2460 /* Block boundary? Nothing to do */
2461 page = NULL;
2462 } else {
2463 page = grab_cache_page(mapping,
2464 inode->i_size >> PAGE_CACHE_SHIFT);
2465 if (!page)
2466 goto out_notrans;
2467 }
2468
2469 handle = start_transaction(inode); 2554 handle = start_transaction(inode);
2470 if (IS_ERR(handle)) { 2555 if (IS_ERR(handle))
2471 if (page) {
2472 clear_highpage(page);
2473 flush_dcache_page(page);
2474 unlock_page(page);
2475 page_cache_release(page);
2476 }
2477 goto out_notrans; 2556 goto out_notrans;
2478 }
2479 2557
2480 last_block = (inode->i_size + blocksize-1) 2558 last_block = (inode->i_size + blocksize-1)
2481 >> EXT3_BLOCK_SIZE_BITS(inode->i_sb); 2559 >> EXT3_BLOCK_SIZE_BITS(inode->i_sb);
2482
2483 if (page)
2484 ext3_block_truncate_page(handle, page, mapping, inode->i_size);
2485
2486 n = ext3_block_to_path(inode, last_block, offsets, NULL); 2560 n = ext3_block_to_path(inode, last_block, offsets, NULL);
2487 if (n == 0) 2561 if (n == 0)
2488 goto out_stop; /* error */ 2562 goto out_stop; /* error */
@@ -2597,6 +2671,7 @@ out_stop:
2597 ext3_orphan_del(handle, inode); 2671 ext3_orphan_del(handle, inode);
2598 2672
2599 ext3_journal_stop(handle); 2673 ext3_journal_stop(handle);
2674 trace_ext3_truncate_exit(inode);
2600 return; 2675 return;
2601out_notrans: 2676out_notrans:
2602 /* 2677 /*
@@ -2605,6 +2680,7 @@ out_notrans:
2605 */ 2680 */
2606 if (inode->i_nlink) 2681 if (inode->i_nlink)
2607 ext3_orphan_del(NULL, inode); 2682 ext3_orphan_del(NULL, inode);
2683 trace_ext3_truncate_exit(inode);
2608} 2684}
2609 2685
2610static ext3_fsblk_t ext3_get_inode_block(struct super_block *sb, 2686static ext3_fsblk_t ext3_get_inode_block(struct super_block *sb,
@@ -2746,9 +2822,10 @@ make_io:
2746 * has in-inode xattrs, or we don't have this inode in memory. 2822 * has in-inode xattrs, or we don't have this inode in memory.
2747 * Read the block from disk. 2823 * Read the block from disk.
2748 */ 2824 */
2825 trace_ext3_load_inode(inode);
2749 get_bh(bh); 2826 get_bh(bh);
2750 bh->b_end_io = end_buffer_read_sync; 2827 bh->b_end_io = end_buffer_read_sync;
2751 submit_bh(READ_META, bh); 2828 submit_bh(READ | REQ_META | REQ_PRIO, bh);
2752 wait_on_buffer(bh); 2829 wait_on_buffer(bh);
2753 if (!buffer_uptodate(bh)) { 2830 if (!buffer_uptodate(bh)) {
2754 ext3_error(inode->i_sb, "ext3_get_inode_loc", 2831 ext3_error(inode->i_sb, "ext3_get_inode_loc",
@@ -3216,6 +3293,9 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
3216 ext3_journal_stop(handle); 3293 ext3_journal_stop(handle);
3217 } 3294 }
3218 3295
3296 if (attr->ia_valid & ATTR_SIZE)
3297 inode_dio_wait(inode);
3298
3219 if (S_ISREG(inode->i_mode) && 3299 if (S_ISREG(inode->i_mode) &&
3220 attr->ia_valid & ATTR_SIZE && attr->ia_size < inode->i_size) { 3300 attr->ia_valid & ATTR_SIZE && attr->ia_size < inode->i_size) {
3221 handle_t *handle; 3301 handle_t *handle;
@@ -3227,18 +3307,36 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
3227 } 3307 }
3228 3308
3229 error = ext3_orphan_add(handle, inode); 3309 error = ext3_orphan_add(handle, inode);
3310 if (error) {
3311 ext3_journal_stop(handle);
3312 goto err_out;
3313 }
3230 EXT3_I(inode)->i_disksize = attr->ia_size; 3314 EXT3_I(inode)->i_disksize = attr->ia_size;
3231 rc = ext3_mark_inode_dirty(handle, inode); 3315 error = ext3_mark_inode_dirty(handle, inode);
3232 if (!error)
3233 error = rc;
3234 ext3_journal_stop(handle); 3316 ext3_journal_stop(handle);
3317 if (error) {
3318 /* Some hard fs error must have happened. Bail out. */
3319 ext3_orphan_del(NULL, inode);
3320 goto err_out;
3321 }
3322 rc = ext3_block_truncate_page(inode, attr->ia_size);
3323 if (rc) {
3324 /* Cleanup orphan list and exit */
3325 handle = ext3_journal_start(inode, 3);
3326 if (IS_ERR(handle)) {
3327 ext3_orphan_del(NULL, inode);
3328 goto err_out;
3329 }
3330 ext3_orphan_del(handle, inode);
3331 ext3_journal_stop(handle);
3332 goto err_out;
3333 }
3235 } 3334 }
3236 3335
3237 if ((attr->ia_valid & ATTR_SIZE) && 3336 if ((attr->ia_valid & ATTR_SIZE) &&
3238 attr->ia_size != i_size_read(inode)) { 3337 attr->ia_size != i_size_read(inode)) {
3239 rc = vmtruncate(inode, attr->ia_size); 3338 truncate_setsize(inode, attr->ia_size);
3240 if (rc) 3339 ext3_truncate(inode);
3241 goto err_out;
3242 } 3340 }
3243 3341
3244 setattr_copy(inode, attr); 3342 setattr_copy(inode, attr);
@@ -3372,6 +3470,7 @@ int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode)
3372 int err; 3470 int err;
3373 3471
3374 might_sleep(); 3472 might_sleep();
3473 trace_ext3_mark_inode_dirty(inode, _RET_IP_);
3375 err = ext3_reserve_inode_write(handle, inode, &iloc); 3474 err = ext3_reserve_inode_write(handle, inode, &iloc);
3376 if (!err) 3475 if (!err)
3377 err = ext3_mark_iloc_dirty(handle, inode, &iloc); 3476 err = ext3_mark_iloc_dirty(handle, inode, &iloc);
diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c
index f4090bd2f34..c7f43944f16 100644
--- a/fs/ext3/ioctl.c
+++ b/fs/ext3/ioctl.c
@@ -285,7 +285,7 @@ group_add_out:
285 if (!capable(CAP_SYS_ADMIN)) 285 if (!capable(CAP_SYS_ADMIN))
286 return -EPERM; 286 return -EPERM;
287 287
288 if (copy_from_user(&range, (struct fstrim_range *)arg, 288 if (copy_from_user(&range, (struct fstrim_range __user *)arg,
289 sizeof(range))) 289 sizeof(range)))
290 return -EFAULT; 290 return -EFAULT;
291 291
@@ -293,7 +293,7 @@ group_add_out:
293 if (ret < 0) 293 if (ret < 0)
294 return ret; 294 return ret;
295 295
296 if (copy_to_user((struct fstrim_range *)arg, &range, 296 if (copy_to_user((struct fstrim_range __user *)arg, &range,
297 sizeof(range))) 297 sizeof(range)))
298 return -EFAULT; 298 return -EFAULT;
299 299
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 34b6d9bfc48..0629e09f651 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -36,6 +36,7 @@
36#include <linux/quotaops.h> 36#include <linux/quotaops.h>
37#include <linux/buffer_head.h> 37#include <linux/buffer_head.h>
38#include <linux/bio.h> 38#include <linux/bio.h>
39#include <trace/events/ext3.h>
39 40
40#include "namei.h" 41#include "namei.h"
41#include "xattr.h" 42#include "xattr.h"
@@ -287,7 +288,7 @@ static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3_dir_ent
287 while (len--) printk("%c", *name++); 288 while (len--) printk("%c", *name++);
288 ext3fs_dirhash(de->name, de->name_len, &h); 289 ext3fs_dirhash(de->name, de->name_len, &h);
289 printk(":%x.%u ", h.hash, 290 printk(":%x.%u ", h.hash,
290 ((char *) de - base)); 291 (unsigned) ((char *) de - base));
291 } 292 }
292 space += EXT3_DIR_REC_LEN(de->name_len); 293 space += EXT3_DIR_REC_LEN(de->name_len);
293 names++; 294 names++;
@@ -921,7 +922,8 @@ restart:
921 bh = ext3_getblk(NULL, dir, b++, 0, &err); 922 bh = ext3_getblk(NULL, dir, b++, 0, &err);
922 bh_use[ra_max] = bh; 923 bh_use[ra_max] = bh;
923 if (bh) 924 if (bh)
924 ll_rw_block(READ_META, 1, &bh); 925 ll_rw_block(READ | REQ_META | REQ_PRIO,
926 1, &bh);
925 } 927 }
926 } 928 }
927 if ((bh = bh_use[ra_ptr++]) == NULL) 929 if ((bh = bh_use[ra_ptr++]) == NULL)
@@ -1013,7 +1015,7 @@ static struct buffer_head * ext3_dx_find_entry(struct inode *dir,
1013 1015
1014 *err = -ENOENT; 1016 *err = -ENOENT;
1015errout: 1017errout:
1016 dxtrace(printk("%s not found\n", name)); 1018 dxtrace(printk("%s not found\n", entry->name));
1017 dx_release (frames); 1019 dx_release (frames);
1018 return NULL; 1020 return NULL;
1019} 1021}
@@ -1038,15 +1040,11 @@ static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, str
1038 return ERR_PTR(-EIO); 1040 return ERR_PTR(-EIO);
1039 } 1041 }
1040 inode = ext3_iget(dir->i_sb, ino); 1042 inode = ext3_iget(dir->i_sb, ino);
1041 if (IS_ERR(inode)) { 1043 if (inode == ERR_PTR(-ESTALE)) {
1042 if (PTR_ERR(inode) == -ESTALE) { 1044 ext3_error(dir->i_sb, __func__,
1043 ext3_error(dir->i_sb, __func__, 1045 "deleted inode referenced: %lu",
1044 "deleted inode referenced: %lu", 1046 ino);
1045 ino); 1047 return ERR_PTR(-EIO);
1046 return ERR_PTR(-EIO);
1047 } else {
1048 return ERR_CAST(inode);
1049 }
1050 } 1048 }
1051 } 1049 }
1052 return d_splice_alias(inode, dentry); 1050 return d_splice_alias(inode, dentry);
@@ -2144,6 +2142,7 @@ static int ext3_unlink(struct inode * dir, struct dentry *dentry)
2144 struct ext3_dir_entry_2 * de; 2142 struct ext3_dir_entry_2 * de;
2145 handle_t *handle; 2143 handle_t *handle;
2146 2144
2145 trace_ext3_unlink_enter(dir, dentry);
2147 /* Initialize quotas before so that eventual writes go 2146 /* Initialize quotas before so that eventual writes go
2148 * in separate transaction */ 2147 * in separate transaction */
2149 dquot_initialize(dir); 2148 dquot_initialize(dir);
@@ -2189,6 +2188,7 @@ static int ext3_unlink(struct inode * dir, struct dentry *dentry)
2189end_unlink: 2188end_unlink:
2190 ext3_journal_stop(handle); 2189 ext3_journal_stop(handle);
2191 brelse (bh); 2190 brelse (bh);
2191 trace_ext3_unlink_exit(dentry, retval);
2192 return retval; 2192 return retval;
2193} 2193}
2194 2194
@@ -2210,9 +2210,11 @@ static int ext3_symlink (struct inode * dir,
2210 /* 2210 /*
2211 * For non-fast symlinks, we just allocate inode and put it on 2211 * For non-fast symlinks, we just allocate inode and put it on
2212 * orphan list in the first transaction => we need bitmap, 2212 * orphan list in the first transaction => we need bitmap,
2213 * group descriptor, sb, inode block, quota blocks. 2213 * group descriptor, sb, inode block, quota blocks, and
2214 * possibly selinux xattr blocks.
2214 */ 2215 */
2215 credits = 4 + EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb); 2216 credits = 4 + EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb) +
2217 EXT3_XATTR_TRANS_BLOCKS;
2216 } else { 2218 } else {
2217 /* 2219 /*
2218 * Fast symlink. We have to add entry to directory 2220 * Fast symlink. We have to add entry to directory
@@ -2533,7 +2535,7 @@ const struct inode_operations ext3_dir_inode_operations = {
2533 .listxattr = ext3_listxattr, 2535 .listxattr = ext3_listxattr,
2534 .removexattr = generic_removexattr, 2536 .removexattr = generic_removexattr,
2535#endif 2537#endif
2536 .check_acl = ext3_check_acl, 2538 .get_acl = ext3_get_acl,
2537}; 2539};
2538 2540
2539const struct inode_operations ext3_special_inode_operations = { 2541const struct inode_operations ext3_special_inode_operations = {
@@ -2544,5 +2546,5 @@ const struct inode_operations ext3_special_inode_operations = {
2544 .listxattr = ext3_listxattr, 2546 .listxattr = ext3_listxattr,
2545 .removexattr = generic_removexattr, 2547 .removexattr = generic_removexattr,
2546#endif 2548#endif
2547 .check_acl = ext3_check_acl, 2549 .get_acl = ext3_get_acl,
2548}; 2550};
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index aad153ef6b7..7beb69ae001 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -44,6 +44,9 @@
44#include "acl.h" 44#include "acl.h"
45#include "namei.h" 45#include "namei.h"
46 46
47#define CREATE_TRACE_POINTS
48#include <trace/events/ext3.h>
49
47#ifdef CONFIG_EXT3_DEFAULTS_TO_ORDERED 50#ifdef CONFIG_EXT3_DEFAULTS_TO_ORDERED
48 #define EXT3_MOUNT_DEFAULT_DATA_MODE EXT3_MOUNT_ORDERED_DATA 51 #define EXT3_MOUNT_DEFAULT_DATA_MODE EXT3_MOUNT_ORDERED_DATA
49#else 52#else
@@ -497,6 +500,14 @@ static struct inode *ext3_alloc_inode(struct super_block *sb)
497 return &ei->vfs_inode; 500 return &ei->vfs_inode;
498} 501}
499 502
503static int ext3_drop_inode(struct inode *inode)
504{
505 int drop = generic_drop_inode(inode);
506
507 trace_ext3_drop_inode(inode, drop);
508 return drop;
509}
510
500static void ext3_i_callback(struct rcu_head *head) 511static void ext3_i_callback(struct rcu_head *head)
501{ 512{
502 struct inode *inode = container_of(head, struct inode, i_rcu); 513 struct inode *inode = container_of(head, struct inode, i_rcu);
@@ -788,6 +799,7 @@ static const struct super_operations ext3_sops = {
788 .destroy_inode = ext3_destroy_inode, 799 .destroy_inode = ext3_destroy_inode,
789 .write_inode = ext3_write_inode, 800 .write_inode = ext3_write_inode,
790 .dirty_inode = ext3_dirty_inode, 801 .dirty_inode = ext3_dirty_inode,
802 .drop_inode = ext3_drop_inode,
791 .evict_inode = ext3_evict_inode, 803 .evict_inode = ext3_evict_inode,
792 .put_super = ext3_put_super, 804 .put_super = ext3_put_super,
793 .sync_fs = ext3_sync_fs, 805 .sync_fs = ext3_sync_fs,
@@ -1718,6 +1730,8 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1718 sbi->s_resuid = le16_to_cpu(es->s_def_resuid); 1730 sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
1719 sbi->s_resgid = le16_to_cpu(es->s_def_resgid); 1731 sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
1720 1732
1733 /* enable barriers by default */
1734 set_opt(sbi->s_mount_opt, BARRIER);
1721 set_opt(sbi->s_mount_opt, RESERVATION); 1735 set_opt(sbi->s_mount_opt, RESERVATION);
1722 1736
1723 if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum, 1737 if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum,
@@ -2507,6 +2521,7 @@ static int ext3_sync_fs(struct super_block *sb, int wait)
2507{ 2521{
2508 tid_t target; 2522 tid_t target;
2509 2523
2524 trace_ext3_sync_fs(sb, wait);
2510 if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) { 2525 if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) {
2511 if (wait) 2526 if (wait)
2512 log_wait_commit(EXT3_SB(sb)->s_journal, target); 2527 log_wait_commit(EXT3_SB(sb)->s_journal, target);
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index 32e6cc23bd9..d565759d82e 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -803,8 +803,16 @@ inserted:
803 /* We need to allocate a new block */ 803 /* We need to allocate a new block */
804 ext3_fsblk_t goal = ext3_group_first_block_no(sb, 804 ext3_fsblk_t goal = ext3_group_first_block_no(sb,
805 EXT3_I(inode)->i_block_group); 805 EXT3_I(inode)->i_block_group);
806 ext3_fsblk_t block = ext3_new_block(handle, inode, 806 ext3_fsblk_t block;
807 goal, &error); 807
808 /*
809 * Protect us agaist concurrent allocations to the
810 * same inode from ext3_..._writepage(). Reservation
811 * code does not expect racing allocations.
812 */
813 mutex_lock(&EXT3_I(inode)->truncate_mutex);
814 block = ext3_new_block(handle, inode, goal, &error);
815 mutex_unlock(&EXT3_I(inode)->truncate_mutex);
808 if (error) 816 if (error)
809 goto cleanup; 817 goto cleanup;
810 ea_idebug(inode, "creating block %d", block); 818 ea_idebug(inode, "creating block %d", block);