diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-07-26 14:34:40 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-07-26 14:34:40 -0400 |
commit | 2ac232f37fa0e8551856a575fe299c47b65b4d66 (patch) | |
tree | 58ff15ecdbc383415a82ea678e5191db16a479f3 /fs | |
parent | fa8f53ace4af9470d8414427cb3dc3c0ffc4f182 (diff) | |
parent | 5cf49d763eb141d236e92be6d4a0dc94e31fa886 (diff) |
Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs-2.6
* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs-2.6:
jbd: change the field "b_cow_tid" of struct journal_head from type unsigned to tid_t
ext3.txt: update the links in the section "useful links" to the latest ones
ext3: Fix data corruption in inodes with journalled data
ext2: check xattr name_len before acquiring xattr_sem in ext2_xattr_get
ext3: Fix compilation with -DDX_DEBUG
quota: Remove unused declaration
jbd: Use WRITE_SYNC in journal checkpoint.
jbd: Fix oops in journal_remove_journal_head()
ext3: Return -EINVAL when start is beyond the end of fs in ext3_trim_fs()
ext3/ioctl.c: silence sparse warnings about different address spaces
ext3/ext4 Documentation: remove bh/nobh since it has been deprecated
ext3: Improve truncate error handling
ext3: use proper little-endian bitops
ext2: include fs.h into ext2_fs.h
ext3: Fix oops in ext3_try_to_allocate_with_rsv()
jbd: fix a bug of leaking jh->b_jcount
jbd: remove dependency on __GFP_NOFAIL
ext3: Convert ext3 to new truncate calling convention
jbd: Add fixed tracepoints
ext3: Add fixed tracepoints
Resolve conflicts in fs/ext3/fsync.c due to fsync locking push-down and
new fixed tracepoints.
Diffstat (limited to 'fs')
-rw-r--r-- | fs/ext2/xattr.c | 10 | ||||
-rw-r--r-- | fs/ext3/balloc.c | 38 | ||||
-rw-r--r-- | fs/ext3/file.c | 1 | ||||
-rw-r--r-- | fs/ext3/fsync.c | 11 | ||||
-rw-r--r-- | fs/ext3/ialloc.c | 4 | ||||
-rw-r--r-- | fs/ext3/inode.c | 193 | ||||
-rw-r--r-- | fs/ext3/ioctl.c | 4 | ||||
-rw-r--r-- | fs/ext3/namei.c | 7 | ||||
-rw-r--r-- | fs/ext3/super.c | 13 | ||||
-rw-r--r-- | fs/ext3/xattr.c | 12 | ||||
-rw-r--r-- | fs/jbd/checkpoint.c | 37 | ||||
-rw-r--r-- | fs/jbd/commit.c | 57 | ||||
-rw-r--r-- | fs/jbd/journal.c | 99 | ||||
-rw-r--r-- | fs/jbd/transaction.c | 83 |
14 files changed, 339 insertions, 230 deletions
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c index 529970617a2..d27b71f1d18 100644 --- a/fs/ext2/xattr.c +++ b/fs/ext2/xattr.c | |||
@@ -161,6 +161,10 @@ ext2_xattr_get(struct inode *inode, int name_index, const char *name, | |||
161 | 161 | ||
162 | if (name == NULL) | 162 | if (name == NULL) |
163 | return -EINVAL; | 163 | return -EINVAL; |
164 | name_len = strlen(name); | ||
165 | if (name_len > 255) | ||
166 | return -ERANGE; | ||
167 | |||
164 | down_read(&EXT2_I(inode)->xattr_sem); | 168 | down_read(&EXT2_I(inode)->xattr_sem); |
165 | error = -ENODATA; | 169 | error = -ENODATA; |
166 | if (!EXT2_I(inode)->i_file_acl) | 170 | if (!EXT2_I(inode)->i_file_acl) |
@@ -181,12 +185,8 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_get", | |||
181 | error = -EIO; | 185 | error = -EIO; |
182 | goto cleanup; | 186 | goto cleanup; |
183 | } | 187 | } |
184 | /* find named attribute */ | ||
185 | name_len = strlen(name); | ||
186 | 188 | ||
187 | error = -ERANGE; | 189 | /* find named attribute */ |
188 | if (name_len > 255) | ||
189 | goto cleanup; | ||
190 | entry = FIRST_ENTRY(bh); | 190 | entry = FIRST_ENTRY(bh); |
191 | while (!IS_LAST_ENTRY(entry)) { | 191 | while (!IS_LAST_ENTRY(entry)) { |
192 | struct ext2_xattr_entry *next = | 192 | struct ext2_xattr_entry *next = |
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c index fe52297e31a..6386d76f44a 100644 --- a/fs/ext3/balloc.c +++ b/fs/ext3/balloc.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/quotaops.h> | 21 | #include <linux/quotaops.h> |
22 | #include <linux/buffer_head.h> | 22 | #include <linux/buffer_head.h> |
23 | #include <linux/blkdev.h> | 23 | #include <linux/blkdev.h> |
24 | #include <trace/events/ext3.h> | ||
24 | 25 | ||
25 | /* | 26 | /* |
26 | * balloc.c contains the blocks allocation and deallocation routines | 27 | * balloc.c contains the blocks allocation and deallocation routines |
@@ -161,6 +162,7 @@ read_block_bitmap(struct super_block *sb, unsigned int block_group) | |||
161 | desc = ext3_get_group_desc(sb, block_group, NULL); | 162 | desc = ext3_get_group_desc(sb, block_group, NULL); |
162 | if (!desc) | 163 | if (!desc) |
163 | return NULL; | 164 | return NULL; |
165 | trace_ext3_read_block_bitmap(sb, block_group); | ||
164 | bitmap_blk = le32_to_cpu(desc->bg_block_bitmap); | 166 | bitmap_blk = le32_to_cpu(desc->bg_block_bitmap); |
165 | bh = sb_getblk(sb, bitmap_blk); | 167 | bh = sb_getblk(sb, bitmap_blk); |
166 | if (unlikely(!bh)) { | 168 | if (unlikely(!bh)) { |
@@ -351,6 +353,7 @@ void ext3_rsv_window_add(struct super_block *sb, | |||
351 | struct rb_node * parent = NULL; | 353 | struct rb_node * parent = NULL; |
352 | struct ext3_reserve_window_node *this; | 354 | struct ext3_reserve_window_node *this; |
353 | 355 | ||
356 | trace_ext3_rsv_window_add(sb, rsv); | ||
354 | while (*p) | 357 | while (*p) |
355 | { | 358 | { |
356 | parent = *p; | 359 | parent = *p; |
@@ -476,8 +479,10 @@ void ext3_discard_reservation(struct inode *inode) | |||
476 | rsv = &block_i->rsv_window_node; | 479 | rsv = &block_i->rsv_window_node; |
477 | if (!rsv_is_empty(&rsv->rsv_window)) { | 480 | if (!rsv_is_empty(&rsv->rsv_window)) { |
478 | spin_lock(rsv_lock); | 481 | spin_lock(rsv_lock); |
479 | if (!rsv_is_empty(&rsv->rsv_window)) | 482 | if (!rsv_is_empty(&rsv->rsv_window)) { |
483 | trace_ext3_discard_reservation(inode, rsv); | ||
480 | rsv_window_remove(inode->i_sb, rsv); | 484 | rsv_window_remove(inode->i_sb, rsv); |
485 | } | ||
481 | spin_unlock(rsv_lock); | 486 | spin_unlock(rsv_lock); |
482 | } | 487 | } |
483 | } | 488 | } |
@@ -683,14 +688,10 @@ error_return: | |||
683 | void ext3_free_blocks(handle_t *handle, struct inode *inode, | 688 | void ext3_free_blocks(handle_t *handle, struct inode *inode, |
684 | ext3_fsblk_t block, unsigned long count) | 689 | ext3_fsblk_t block, unsigned long count) |
685 | { | 690 | { |
686 | struct super_block * sb; | 691 | struct super_block *sb = inode->i_sb; |
687 | unsigned long dquot_freed_blocks; | 692 | unsigned long dquot_freed_blocks; |
688 | 693 | ||
689 | sb = inode->i_sb; | 694 | trace_ext3_free_blocks(inode, block, count); |
690 | if (!sb) { | ||
691 | printk ("ext3_free_blocks: nonexistent device"); | ||
692 | return; | ||
693 | } | ||
694 | ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks); | 695 | ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks); |
695 | if (dquot_freed_blocks) | 696 | if (dquot_freed_blocks) |
696 | dquot_free_block(inode, dquot_freed_blocks); | 697 | dquot_free_block(inode, dquot_freed_blocks); |
@@ -1136,6 +1137,7 @@ static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv, | |||
1136 | else | 1137 | else |
1137 | start_block = grp_goal + group_first_block; | 1138 | start_block = grp_goal + group_first_block; |
1138 | 1139 | ||
1140 | trace_ext3_alloc_new_reservation(sb, start_block); | ||
1139 | size = my_rsv->rsv_goal_size; | 1141 | size = my_rsv->rsv_goal_size; |
1140 | 1142 | ||
1141 | if (!rsv_is_empty(&my_rsv->rsv_window)) { | 1143 | if (!rsv_is_empty(&my_rsv->rsv_window)) { |
@@ -1230,8 +1232,11 @@ retry: | |||
1230 | * check if the first free block is within the | 1232 | * check if the first free block is within the |
1231 | * free space we just reserved | 1233 | * free space we just reserved |
1232 | */ | 1234 | */ |
1233 | if (start_block >= my_rsv->rsv_start && start_block <= my_rsv->rsv_end) | 1235 | if (start_block >= my_rsv->rsv_start && |
1236 | start_block <= my_rsv->rsv_end) { | ||
1237 | trace_ext3_reserved(sb, start_block, my_rsv); | ||
1234 | return 0; /* success */ | 1238 | return 0; /* success */ |
1239 | } | ||
1235 | /* | 1240 | /* |
1236 | * if the first free bit we found is out of the reservable space | 1241 | * if the first free bit we found is out of the reservable space |
1237 | * continue search for next reservable space, | 1242 | * continue search for next reservable space, |
@@ -1514,10 +1519,6 @@ ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode, | |||
1514 | 1519 | ||
1515 | *errp = -ENOSPC; | 1520 | *errp = -ENOSPC; |
1516 | sb = inode->i_sb; | 1521 | sb = inode->i_sb; |
1517 | if (!sb) { | ||
1518 | printk("ext3_new_block: nonexistent device"); | ||
1519 | return 0; | ||
1520 | } | ||
1521 | 1522 | ||
1522 | /* | 1523 | /* |
1523 | * Check quota for allocation of this block. | 1524 | * Check quota for allocation of this block. |
@@ -1528,8 +1529,10 @@ ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode, | |||
1528 | return 0; | 1529 | return 0; |
1529 | } | 1530 | } |
1530 | 1531 | ||
1532 | trace_ext3_request_blocks(inode, goal, num); | ||
1533 | |||
1531 | sbi = EXT3_SB(sb); | 1534 | sbi = EXT3_SB(sb); |
1532 | es = EXT3_SB(sb)->s_es; | 1535 | es = sbi->s_es; |
1533 | ext3_debug("goal=%lu.\n", goal); | 1536 | ext3_debug("goal=%lu.\n", goal); |
1534 | /* | 1537 | /* |
1535 | * Allocate a block from reservation only when | 1538 | * Allocate a block from reservation only when |
@@ -1742,6 +1745,10 @@ allocated: | |||
1742 | brelse(bitmap_bh); | 1745 | brelse(bitmap_bh); |
1743 | dquot_free_block(inode, *count-num); | 1746 | dquot_free_block(inode, *count-num); |
1744 | *count = num; | 1747 | *count = num; |
1748 | |||
1749 | trace_ext3_allocate_blocks(inode, goal, num, | ||
1750 | (unsigned long long)ret_block); | ||
1751 | |||
1745 | return ret_block; | 1752 | return ret_block; |
1746 | 1753 | ||
1747 | io_error: | 1754 | io_error: |
@@ -1996,6 +2003,7 @@ ext3_grpblk_t ext3_trim_all_free(struct super_block *sb, unsigned int group, | |||
1996 | if ((next - start) < minblocks) | 2003 | if ((next - start) < minblocks) |
1997 | goto free_extent; | 2004 | goto free_extent; |
1998 | 2005 | ||
2006 | trace_ext3_discard_blocks(sb, discard_block, next - start); | ||
1999 | /* Send the TRIM command down to the device */ | 2007 | /* Send the TRIM command down to the device */ |
2000 | err = sb_issue_discard(sb, discard_block, next - start, | 2008 | err = sb_issue_discard(sb, discard_block, next - start, |
2001 | GFP_NOFS, 0); | 2009 | GFP_NOFS, 0); |
@@ -2100,7 +2108,7 @@ int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range) | |||
2100 | if (unlikely(minlen > EXT3_BLOCKS_PER_GROUP(sb))) | 2108 | if (unlikely(minlen > EXT3_BLOCKS_PER_GROUP(sb))) |
2101 | return -EINVAL; | 2109 | return -EINVAL; |
2102 | if (start >= max_blks) | 2110 | if (start >= max_blks) |
2103 | goto out; | 2111 | return -EINVAL; |
2104 | if (start + len > max_blks) | 2112 | if (start + len > max_blks) |
2105 | len = max_blks - start; | 2113 | len = max_blks - start; |
2106 | 2114 | ||
@@ -2148,8 +2156,6 @@ int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range) | |||
2148 | 2156 | ||
2149 | if (ret >= 0) | 2157 | if (ret >= 0) |
2150 | ret = 0; | 2158 | ret = 0; |
2151 | |||
2152 | out: | ||
2153 | range->len = trimmed * sb->s_blocksize; | 2159 | range->len = trimmed * sb->s_blocksize; |
2154 | 2160 | ||
2155 | return ret; | 2161 | return ret; |
diff --git a/fs/ext3/file.c b/fs/ext3/file.c index 2be5b99097f..724df69847d 100644 --- a/fs/ext3/file.c +++ b/fs/ext3/file.c | |||
@@ -71,7 +71,6 @@ const struct file_operations ext3_file_operations = { | |||
71 | }; | 71 | }; |
72 | 72 | ||
73 | const struct inode_operations ext3_file_inode_operations = { | 73 | const struct inode_operations ext3_file_inode_operations = { |
74 | .truncate = ext3_truncate, | ||
75 | .setattr = ext3_setattr, | 74 | .setattr = ext3_setattr, |
76 | #ifdef CONFIG_EXT3_FS_XATTR | 75 | #ifdef CONFIG_EXT3_FS_XATTR |
77 | .setxattr = generic_setxattr, | 76 | .setxattr = generic_setxattr, |
diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c index 0bcf63adb80..d494c554c6e 100644 --- a/fs/ext3/fsync.c +++ b/fs/ext3/fsync.c | |||
@@ -30,6 +30,7 @@ | |||
30 | #include <linux/jbd.h> | 30 | #include <linux/jbd.h> |
31 | #include <linux/ext3_fs.h> | 31 | #include <linux/ext3_fs.h> |
32 | #include <linux/ext3_jbd.h> | 32 | #include <linux/ext3_jbd.h> |
33 | #include <trace/events/ext3.h> | ||
33 | 34 | ||
34 | /* | 35 | /* |
35 | * akpm: A new design for ext3_sync_file(). | 36 | * akpm: A new design for ext3_sync_file(). |
@@ -51,12 +52,14 @@ int ext3_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
51 | int ret, needs_barrier = 0; | 52 | int ret, needs_barrier = 0; |
52 | tid_t commit_tid; | 53 | tid_t commit_tid; |
53 | 54 | ||
55 | trace_ext3_sync_file_enter(file, datasync); | ||
56 | |||
54 | if (inode->i_sb->s_flags & MS_RDONLY) | 57 | if (inode->i_sb->s_flags & MS_RDONLY) |
55 | return 0; | 58 | return 0; |
56 | 59 | ||
57 | ret = filemap_write_and_wait_range(inode->i_mapping, start, end); | 60 | ret = filemap_write_and_wait_range(inode->i_mapping, start, end); |
58 | if (ret) | 61 | if (ret) |
59 | return ret; | 62 | goto out; |
60 | 63 | ||
61 | /* | 64 | /* |
62 | * Taking the mutex here just to keep consistent with how fsync was | 65 | * Taking the mutex here just to keep consistent with how fsync was |
@@ -83,7 +86,8 @@ int ext3_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
83 | */ | 86 | */ |
84 | if (ext3_should_journal_data(inode)) { | 87 | if (ext3_should_journal_data(inode)) { |
85 | mutex_unlock(&inode->i_mutex); | 88 | mutex_unlock(&inode->i_mutex); |
86 | return ext3_force_commit(inode->i_sb); | 89 | ret = ext3_force_commit(inode->i_sb); |
90 | goto out; | ||
87 | } | 91 | } |
88 | 92 | ||
89 | if (datasync) | 93 | if (datasync) |
@@ -104,6 +108,9 @@ int ext3_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
104 | */ | 108 | */ |
105 | if (needs_barrier) | 109 | if (needs_barrier) |
106 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); | 110 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); |
111 | |||
107 | mutex_unlock(&inode->i_mutex); | 112 | mutex_unlock(&inode->i_mutex); |
113 | out: | ||
114 | trace_ext3_sync_file_exit(inode, ret); | ||
108 | return ret; | 115 | return ret; |
109 | } | 116 | } |
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c index bfc2dc43681..bf09cbf938c 100644 --- a/fs/ext3/ialloc.c +++ b/fs/ext3/ialloc.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <linux/buffer_head.h> | 23 | #include <linux/buffer_head.h> |
24 | #include <linux/random.h> | 24 | #include <linux/random.h> |
25 | #include <linux/bitops.h> | 25 | #include <linux/bitops.h> |
26 | #include <trace/events/ext3.h> | ||
26 | 27 | ||
27 | #include <asm/byteorder.h> | 28 | #include <asm/byteorder.h> |
28 | 29 | ||
@@ -118,6 +119,7 @@ void ext3_free_inode (handle_t *handle, struct inode * inode) | |||
118 | 119 | ||
119 | ino = inode->i_ino; | 120 | ino = inode->i_ino; |
120 | ext3_debug ("freeing inode %lu\n", ino); | 121 | ext3_debug ("freeing inode %lu\n", ino); |
122 | trace_ext3_free_inode(inode); | ||
121 | 123 | ||
122 | is_directory = S_ISDIR(inode->i_mode); | 124 | is_directory = S_ISDIR(inode->i_mode); |
123 | 125 | ||
@@ -426,6 +428,7 @@ struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, | |||
426 | return ERR_PTR(-EPERM); | 428 | return ERR_PTR(-EPERM); |
427 | 429 | ||
428 | sb = dir->i_sb; | 430 | sb = dir->i_sb; |
431 | trace_ext3_request_inode(dir, mode); | ||
429 | inode = new_inode(sb); | 432 | inode = new_inode(sb); |
430 | if (!inode) | 433 | if (!inode) |
431 | return ERR_PTR(-ENOMEM); | 434 | return ERR_PTR(-ENOMEM); |
@@ -601,6 +604,7 @@ got: | |||
601 | } | 604 | } |
602 | 605 | ||
603 | ext3_debug("allocating inode %lu\n", inode->i_ino); | 606 | ext3_debug("allocating inode %lu\n", inode->i_ino); |
607 | trace_ext3_allocate_inode(inode, dir, mode); | ||
604 | goto really_out; | 608 | goto really_out; |
605 | fail: | 609 | fail: |
606 | ext3_std_error(sb, err); | 610 | ext3_std_error(sb, err); |
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 2978a2a17a5..04da6acde85 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c | |||
@@ -38,10 +38,12 @@ | |||
38 | #include <linux/bio.h> | 38 | #include <linux/bio.h> |
39 | #include <linux/fiemap.h> | 39 | #include <linux/fiemap.h> |
40 | #include <linux/namei.h> | 40 | #include <linux/namei.h> |
41 | #include <trace/events/ext3.h> | ||
41 | #include "xattr.h" | 42 | #include "xattr.h" |
42 | #include "acl.h" | 43 | #include "acl.h" |
43 | 44 | ||
44 | static int ext3_writepage_trans_blocks(struct inode *inode); | 45 | static int ext3_writepage_trans_blocks(struct inode *inode); |
46 | static int ext3_block_truncate_page(struct inode *inode, loff_t from); | ||
45 | 47 | ||
46 | /* | 48 | /* |
47 | * Test whether an inode is a fast symlink. | 49 | * Test whether an inode is a fast symlink. |
@@ -70,6 +72,7 @@ int ext3_forget(handle_t *handle, int is_metadata, struct inode *inode, | |||
70 | 72 | ||
71 | might_sleep(); | 73 | might_sleep(); |
72 | 74 | ||
75 | trace_ext3_forget(inode, is_metadata, blocknr); | ||
73 | BUFFER_TRACE(bh, "enter"); | 76 | BUFFER_TRACE(bh, "enter"); |
74 | 77 | ||
75 | jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, " | 78 | jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, " |
@@ -194,20 +197,47 @@ static int truncate_restart_transaction(handle_t *handle, struct inode *inode) | |||
194 | */ | 197 | */ |
195 | void ext3_evict_inode (struct inode *inode) | 198 | void ext3_evict_inode (struct inode *inode) |
196 | { | 199 | { |
200 | struct ext3_inode_info *ei = EXT3_I(inode); | ||
197 | struct ext3_block_alloc_info *rsv; | 201 | struct ext3_block_alloc_info *rsv; |
198 | handle_t *handle; | 202 | handle_t *handle; |
199 | int want_delete = 0; | 203 | int want_delete = 0; |
200 | 204 | ||
205 | trace_ext3_evict_inode(inode); | ||
201 | if (!inode->i_nlink && !is_bad_inode(inode)) { | 206 | if (!inode->i_nlink && !is_bad_inode(inode)) { |
202 | dquot_initialize(inode); | 207 | dquot_initialize(inode); |
203 | want_delete = 1; | 208 | want_delete = 1; |
204 | } | 209 | } |
205 | 210 | ||
211 | /* | ||
212 | * When journalling data dirty buffers are tracked only in the journal. | ||
213 | * So although mm thinks everything is clean and ready for reaping the | ||
214 | * inode might still have some pages to write in the running | ||
215 | * transaction or waiting to be checkpointed. Thus calling | ||
216 | * journal_invalidatepage() (via truncate_inode_pages()) to discard | ||
217 | * these buffers can cause data loss. Also even if we did not discard | ||
218 | * these buffers, we would have no way to find them after the inode | ||
219 | * is reaped and thus user could see stale data if he tries to read | ||
220 | * them before the transaction is checkpointed. So be careful and | ||
221 | * force everything to disk here... We use ei->i_datasync_tid to | ||
222 | * store the newest transaction containing inode's data. | ||
223 | * | ||
224 | * Note that directories do not have this problem because they don't | ||
225 | * use page cache. | ||
226 | */ | ||
227 | if (inode->i_nlink && ext3_should_journal_data(inode) && | ||
228 | (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) { | ||
229 | tid_t commit_tid = atomic_read(&ei->i_datasync_tid); | ||
230 | journal_t *journal = EXT3_SB(inode->i_sb)->s_journal; | ||
231 | |||
232 | log_start_commit(journal, commit_tid); | ||
233 | log_wait_commit(journal, commit_tid); | ||
234 | filemap_write_and_wait(&inode->i_data); | ||
235 | } | ||
206 | truncate_inode_pages(&inode->i_data, 0); | 236 | truncate_inode_pages(&inode->i_data, 0); |
207 | 237 | ||
208 | ext3_discard_reservation(inode); | 238 | ext3_discard_reservation(inode); |
209 | rsv = EXT3_I(inode)->i_block_alloc_info; | 239 | rsv = ei->i_block_alloc_info; |
210 | EXT3_I(inode)->i_block_alloc_info = NULL; | 240 | ei->i_block_alloc_info = NULL; |
211 | if (unlikely(rsv)) | 241 | if (unlikely(rsv)) |
212 | kfree(rsv); | 242 | kfree(rsv); |
213 | 243 | ||
@@ -231,15 +261,13 @@ void ext3_evict_inode (struct inode *inode) | |||
231 | if (inode->i_blocks) | 261 | if (inode->i_blocks) |
232 | ext3_truncate(inode); | 262 | ext3_truncate(inode); |
233 | /* | 263 | /* |
234 | * Kill off the orphan record which ext3_truncate created. | 264 | * Kill off the orphan record created when the inode lost the last |
235 | * AKPM: I think this can be inside the above `if'. | 265 | * link. Note that ext3_orphan_del() has to be able to cope with the |
236 | * Note that ext3_orphan_del() has to be able to cope with the | 266 | * deletion of a non-existent orphan - ext3_truncate() could |
237 | * deletion of a non-existent orphan - this is because we don't | 267 | * have removed the record. |
238 | * know if ext3_truncate() actually created an orphan record. | ||
239 | * (Well, we could do this if we need to, but heck - it works) | ||
240 | */ | 268 | */ |
241 | ext3_orphan_del(handle, inode); | 269 | ext3_orphan_del(handle, inode); |
242 | EXT3_I(inode)->i_dtime = get_seconds(); | 270 | ei->i_dtime = get_seconds(); |
243 | 271 | ||
244 | /* | 272 | /* |
245 | * One subtle ordering requirement: if anything has gone wrong | 273 | * One subtle ordering requirement: if anything has gone wrong |
@@ -842,6 +870,7 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode, | |||
842 | ext3_fsblk_t first_block = 0; | 870 | ext3_fsblk_t first_block = 0; |
843 | 871 | ||
844 | 872 | ||
873 | trace_ext3_get_blocks_enter(inode, iblock, maxblocks, create); | ||
845 | J_ASSERT(handle != NULL || create == 0); | 874 | J_ASSERT(handle != NULL || create == 0); |
846 | depth = ext3_block_to_path(inode,iblock,offsets,&blocks_to_boundary); | 875 | depth = ext3_block_to_path(inode,iblock,offsets,&blocks_to_boundary); |
847 | 876 | ||
@@ -886,6 +915,9 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode, | |||
886 | if (!create || err == -EIO) | 915 | if (!create || err == -EIO) |
887 | goto cleanup; | 916 | goto cleanup; |
888 | 917 | ||
918 | /* | ||
919 | * Block out ext3_truncate while we alter the tree | ||
920 | */ | ||
889 | mutex_lock(&ei->truncate_mutex); | 921 | mutex_lock(&ei->truncate_mutex); |
890 | 922 | ||
891 | /* | 923 | /* |
@@ -934,9 +966,6 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode, | |||
934 | */ | 966 | */ |
935 | count = ext3_blks_to_allocate(partial, indirect_blks, | 967 | count = ext3_blks_to_allocate(partial, indirect_blks, |
936 | maxblocks, blocks_to_boundary); | 968 | maxblocks, blocks_to_boundary); |
937 | /* | ||
938 | * Block out ext3_truncate while we alter the tree | ||
939 | */ | ||
940 | err = ext3_alloc_branch(handle, inode, indirect_blks, &count, goal, | 969 | err = ext3_alloc_branch(handle, inode, indirect_blks, &count, goal, |
941 | offsets + (partial - chain), partial); | 970 | offsets + (partial - chain), partial); |
942 | 971 | ||
@@ -970,6 +999,9 @@ cleanup: | |||
970 | } | 999 | } |
971 | BUFFER_TRACE(bh_result, "returned"); | 1000 | BUFFER_TRACE(bh_result, "returned"); |
972 | out: | 1001 | out: |
1002 | trace_ext3_get_blocks_exit(inode, iblock, | ||
1003 | depth ? le32_to_cpu(chain[depth-1].key) : 0, | ||
1004 | count, err); | ||
973 | return err; | 1005 | return err; |
974 | } | 1006 | } |
975 | 1007 | ||
@@ -1202,6 +1234,16 @@ static void ext3_truncate_failed_write(struct inode *inode) | |||
1202 | ext3_truncate(inode); | 1234 | ext3_truncate(inode); |
1203 | } | 1235 | } |
1204 | 1236 | ||
1237 | /* | ||
1238 | * Truncate blocks that were not used by direct IO write. We have to zero out | ||
1239 | * the last file block as well because direct IO might have written to it. | ||
1240 | */ | ||
1241 | static void ext3_truncate_failed_direct_write(struct inode *inode) | ||
1242 | { | ||
1243 | ext3_block_truncate_page(inode, inode->i_size); | ||
1244 | ext3_truncate(inode); | ||
1245 | } | ||
1246 | |||
1205 | static int ext3_write_begin(struct file *file, struct address_space *mapping, | 1247 | static int ext3_write_begin(struct file *file, struct address_space *mapping, |
1206 | loff_t pos, unsigned len, unsigned flags, | 1248 | loff_t pos, unsigned len, unsigned flags, |
1207 | struct page **pagep, void **fsdata) | 1249 | struct page **pagep, void **fsdata) |
@@ -1217,6 +1259,8 @@ static int ext3_write_begin(struct file *file, struct address_space *mapping, | |||
1217 | * we allocate blocks but write fails for some reason */ | 1259 | * we allocate blocks but write fails for some reason */ |
1218 | int needed_blocks = ext3_writepage_trans_blocks(inode) + 1; | 1260 | int needed_blocks = ext3_writepage_trans_blocks(inode) + 1; |
1219 | 1261 | ||
1262 | trace_ext3_write_begin(inode, pos, len, flags); | ||
1263 | |||
1220 | index = pos >> PAGE_CACHE_SHIFT; | 1264 | index = pos >> PAGE_CACHE_SHIFT; |
1221 | from = pos & (PAGE_CACHE_SIZE - 1); | 1265 | from = pos & (PAGE_CACHE_SIZE - 1); |
1222 | to = from + len; | 1266 | to = from + len; |
@@ -1332,6 +1376,7 @@ static int ext3_ordered_write_end(struct file *file, | |||
1332 | unsigned from, to; | 1376 | unsigned from, to; |
1333 | int ret = 0, ret2; | 1377 | int ret = 0, ret2; |
1334 | 1378 | ||
1379 | trace_ext3_ordered_write_end(inode, pos, len, copied); | ||
1335 | copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); | 1380 | copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); |
1336 | 1381 | ||
1337 | from = pos & (PAGE_CACHE_SIZE - 1); | 1382 | from = pos & (PAGE_CACHE_SIZE - 1); |
@@ -1367,6 +1412,7 @@ static int ext3_writeback_write_end(struct file *file, | |||
1367 | struct inode *inode = file->f_mapping->host; | 1412 | struct inode *inode = file->f_mapping->host; |
1368 | int ret; | 1413 | int ret; |
1369 | 1414 | ||
1415 | trace_ext3_writeback_write_end(inode, pos, len, copied); | ||
1370 | copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); | 1416 | copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); |
1371 | update_file_sizes(inode, pos, copied); | 1417 | update_file_sizes(inode, pos, copied); |
1372 | /* | 1418 | /* |
@@ -1391,10 +1437,12 @@ static int ext3_journalled_write_end(struct file *file, | |||
1391 | { | 1437 | { |
1392 | handle_t *handle = ext3_journal_current_handle(); | 1438 | handle_t *handle = ext3_journal_current_handle(); |
1393 | struct inode *inode = mapping->host; | 1439 | struct inode *inode = mapping->host; |
1440 | struct ext3_inode_info *ei = EXT3_I(inode); | ||
1394 | int ret = 0, ret2; | 1441 | int ret = 0, ret2; |
1395 | int partial = 0; | 1442 | int partial = 0; |
1396 | unsigned from, to; | 1443 | unsigned from, to; |
1397 | 1444 | ||
1445 | trace_ext3_journalled_write_end(inode, pos, len, copied); | ||
1398 | from = pos & (PAGE_CACHE_SIZE - 1); | 1446 | from = pos & (PAGE_CACHE_SIZE - 1); |
1399 | to = from + len; | 1447 | to = from + len; |
1400 | 1448 | ||
@@ -1419,8 +1467,9 @@ static int ext3_journalled_write_end(struct file *file, | |||
1419 | if (pos + len > inode->i_size && ext3_can_truncate(inode)) | 1467 | if (pos + len > inode->i_size && ext3_can_truncate(inode)) |
1420 | ext3_orphan_add(handle, inode); | 1468 | ext3_orphan_add(handle, inode); |
1421 | ext3_set_inode_state(inode, EXT3_STATE_JDATA); | 1469 | ext3_set_inode_state(inode, EXT3_STATE_JDATA); |
1422 | if (inode->i_size > EXT3_I(inode)->i_disksize) { | 1470 | atomic_set(&ei->i_datasync_tid, handle->h_transaction->t_tid); |
1423 | EXT3_I(inode)->i_disksize = inode->i_size; | 1471 | if (inode->i_size > ei->i_disksize) { |
1472 | ei->i_disksize = inode->i_size; | ||
1424 | ret2 = ext3_mark_inode_dirty(handle, inode); | 1473 | ret2 = ext3_mark_inode_dirty(handle, inode); |
1425 | if (!ret) | 1474 | if (!ret) |
1426 | ret = ret2; | 1475 | ret = ret2; |
@@ -1577,6 +1626,7 @@ static int ext3_ordered_writepage(struct page *page, | |||
1577 | if (ext3_journal_current_handle()) | 1626 | if (ext3_journal_current_handle()) |
1578 | goto out_fail; | 1627 | goto out_fail; |
1579 | 1628 | ||
1629 | trace_ext3_ordered_writepage(page); | ||
1580 | if (!page_has_buffers(page)) { | 1630 | if (!page_has_buffers(page)) { |
1581 | create_empty_buffers(page, inode->i_sb->s_blocksize, | 1631 | create_empty_buffers(page, inode->i_sb->s_blocksize, |
1582 | (1 << BH_Dirty)|(1 << BH_Uptodate)); | 1632 | (1 << BH_Dirty)|(1 << BH_Uptodate)); |
@@ -1647,6 +1697,7 @@ static int ext3_writeback_writepage(struct page *page, | |||
1647 | if (ext3_journal_current_handle()) | 1697 | if (ext3_journal_current_handle()) |
1648 | goto out_fail; | 1698 | goto out_fail; |
1649 | 1699 | ||
1700 | trace_ext3_writeback_writepage(page); | ||
1650 | if (page_has_buffers(page)) { | 1701 | if (page_has_buffers(page)) { |
1651 | if (!walk_page_buffers(NULL, page_buffers(page), 0, | 1702 | if (!walk_page_buffers(NULL, page_buffers(page), 0, |
1652 | PAGE_CACHE_SIZE, NULL, buffer_unmapped)) { | 1703 | PAGE_CACHE_SIZE, NULL, buffer_unmapped)) { |
@@ -1689,6 +1740,7 @@ static int ext3_journalled_writepage(struct page *page, | |||
1689 | if (ext3_journal_current_handle()) | 1740 | if (ext3_journal_current_handle()) |
1690 | goto no_write; | 1741 | goto no_write; |
1691 | 1742 | ||
1743 | trace_ext3_journalled_writepage(page); | ||
1692 | handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode)); | 1744 | handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode)); |
1693 | if (IS_ERR(handle)) { | 1745 | if (IS_ERR(handle)) { |
1694 | ret = PTR_ERR(handle); | 1746 | ret = PTR_ERR(handle); |
@@ -1715,6 +1767,8 @@ static int ext3_journalled_writepage(struct page *page, | |||
1715 | if (ret == 0) | 1767 | if (ret == 0) |
1716 | ret = err; | 1768 | ret = err; |
1717 | ext3_set_inode_state(inode, EXT3_STATE_JDATA); | 1769 | ext3_set_inode_state(inode, EXT3_STATE_JDATA); |
1770 | atomic_set(&EXT3_I(inode)->i_datasync_tid, | ||
1771 | handle->h_transaction->t_tid); | ||
1718 | unlock_page(page); | 1772 | unlock_page(page); |
1719 | } else { | 1773 | } else { |
1720 | /* | 1774 | /* |
@@ -1739,6 +1793,7 @@ out_unlock: | |||
1739 | 1793 | ||
1740 | static int ext3_readpage(struct file *file, struct page *page) | 1794 | static int ext3_readpage(struct file *file, struct page *page) |
1741 | { | 1795 | { |
1796 | trace_ext3_readpage(page); | ||
1742 | return mpage_readpage(page, ext3_get_block); | 1797 | return mpage_readpage(page, ext3_get_block); |
1743 | } | 1798 | } |
1744 | 1799 | ||
@@ -1753,6 +1808,8 @@ static void ext3_invalidatepage(struct page *page, unsigned long offset) | |||
1753 | { | 1808 | { |
1754 | journal_t *journal = EXT3_JOURNAL(page->mapping->host); | 1809 | journal_t *journal = EXT3_JOURNAL(page->mapping->host); |
1755 | 1810 | ||
1811 | trace_ext3_invalidatepage(page, offset); | ||
1812 | |||
1756 | /* | 1813 | /* |
1757 | * If it's a full truncate we just forget about the pending dirtying | 1814 | * If it's a full truncate we just forget about the pending dirtying |
1758 | */ | 1815 | */ |
@@ -1766,6 +1823,7 @@ static int ext3_releasepage(struct page *page, gfp_t wait) | |||
1766 | { | 1823 | { |
1767 | journal_t *journal = EXT3_JOURNAL(page->mapping->host); | 1824 | journal_t *journal = EXT3_JOURNAL(page->mapping->host); |
1768 | 1825 | ||
1826 | trace_ext3_releasepage(page); | ||
1769 | WARN_ON(PageChecked(page)); | 1827 | WARN_ON(PageChecked(page)); |
1770 | if (!page_has_buffers(page)) | 1828 | if (!page_has_buffers(page)) |
1771 | return 0; | 1829 | return 0; |
@@ -1794,6 +1852,8 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb, | |||
1794 | size_t count = iov_length(iov, nr_segs); | 1852 | size_t count = iov_length(iov, nr_segs); |
1795 | int retries = 0; | 1853 | int retries = 0; |
1796 | 1854 | ||
1855 | trace_ext3_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw); | ||
1856 | |||
1797 | if (rw == WRITE) { | 1857 | if (rw == WRITE) { |
1798 | loff_t final_size = offset + count; | 1858 | loff_t final_size = offset + count; |
1799 | 1859 | ||
@@ -1827,7 +1887,7 @@ retry: | |||
1827 | loff_t end = offset + iov_length(iov, nr_segs); | 1887 | loff_t end = offset + iov_length(iov, nr_segs); |
1828 | 1888 | ||
1829 | if (end > isize) | 1889 | if (end > isize) |
1830 | vmtruncate(inode, isize); | 1890 | ext3_truncate_failed_direct_write(inode); |
1831 | } | 1891 | } |
1832 | if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries)) | 1892 | if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries)) |
1833 | goto retry; | 1893 | goto retry; |
@@ -1841,7 +1901,7 @@ retry: | |||
1841 | /* This is really bad luck. We've written the data | 1901 | /* This is really bad luck. We've written the data |
1842 | * but cannot extend i_size. Truncate allocated blocks | 1902 | * but cannot extend i_size. Truncate allocated blocks |
1843 | * and pretend the write failed... */ | 1903 | * and pretend the write failed... */ |
1844 | ext3_truncate(inode); | 1904 | ext3_truncate_failed_direct_write(inode); |
1845 | ret = PTR_ERR(handle); | 1905 | ret = PTR_ERR(handle); |
1846 | goto out; | 1906 | goto out; |
1847 | } | 1907 | } |
@@ -1867,6 +1927,8 @@ retry: | |||
1867 | ret = err; | 1927 | ret = err; |
1868 | } | 1928 | } |
1869 | out: | 1929 | out: |
1930 | trace_ext3_direct_IO_exit(inode, offset, | ||
1931 | iov_length(iov, nr_segs), rw, ret); | ||
1870 | return ret; | 1932 | return ret; |
1871 | } | 1933 | } |
1872 | 1934 | ||
@@ -1949,17 +2011,24 @@ void ext3_set_aops(struct inode *inode) | |||
1949 | * This required during truncate. We need to physically zero the tail end | 2011 | * This required during truncate. We need to physically zero the tail end |
1950 | * of that block so it doesn't yield old data if the file is later grown. | 2012 | * of that block so it doesn't yield old data if the file is later grown. |
1951 | */ | 2013 | */ |
1952 | static int ext3_block_truncate_page(handle_t *handle, struct page *page, | 2014 | static int ext3_block_truncate_page(struct inode *inode, loff_t from) |
1953 | struct address_space *mapping, loff_t from) | ||
1954 | { | 2015 | { |
1955 | ext3_fsblk_t index = from >> PAGE_CACHE_SHIFT; | 2016 | ext3_fsblk_t index = from >> PAGE_CACHE_SHIFT; |
1956 | unsigned offset = from & (PAGE_CACHE_SIZE-1); | 2017 | unsigned offset = from & (PAGE_CACHE_SIZE - 1); |
1957 | unsigned blocksize, iblock, length, pos; | 2018 | unsigned blocksize, iblock, length, pos; |
1958 | struct inode *inode = mapping->host; | 2019 | struct page *page; |
2020 | handle_t *handle = NULL; | ||
1959 | struct buffer_head *bh; | 2021 | struct buffer_head *bh; |
1960 | int err = 0; | 2022 | int err = 0; |
1961 | 2023 | ||
2024 | /* Truncated on block boundary - nothing to do */ | ||
1962 | blocksize = inode->i_sb->s_blocksize; | 2025 | blocksize = inode->i_sb->s_blocksize; |
2026 | if ((from & (blocksize - 1)) == 0) | ||
2027 | return 0; | ||
2028 | |||
2029 | page = grab_cache_page(inode->i_mapping, index); | ||
2030 | if (!page) | ||
2031 | return -ENOMEM; | ||
1963 | length = blocksize - (offset & (blocksize - 1)); | 2032 | length = blocksize - (offset & (blocksize - 1)); |
1964 | iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); | 2033 | iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); |
1965 | 2034 | ||
@@ -2004,11 +2073,23 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page, | |||
2004 | goto unlock; | 2073 | goto unlock; |
2005 | } | 2074 | } |
2006 | 2075 | ||
2076 | /* data=writeback mode doesn't need transaction to zero-out data */ | ||
2077 | if (!ext3_should_writeback_data(inode)) { | ||
2078 | /* We journal at most one block */ | ||
2079 | handle = ext3_journal_start(inode, 1); | ||
2080 | if (IS_ERR(handle)) { | ||
2081 | clear_highpage(page); | ||
2082 | flush_dcache_page(page); | ||
2083 | err = PTR_ERR(handle); | ||
2084 | goto unlock; | ||
2085 | } | ||
2086 | } | ||
2087 | |||
2007 | if (ext3_should_journal_data(inode)) { | 2088 | if (ext3_should_journal_data(inode)) { |
2008 | BUFFER_TRACE(bh, "get write access"); | 2089 | BUFFER_TRACE(bh, "get write access"); |
2009 | err = ext3_journal_get_write_access(handle, bh); | 2090 | err = ext3_journal_get_write_access(handle, bh); |
2010 | if (err) | 2091 | if (err) |
2011 | goto unlock; | 2092 | goto stop; |
2012 | } | 2093 | } |
2013 | 2094 | ||
2014 | zero_user(page, offset, length); | 2095 | zero_user(page, offset, length); |
@@ -2022,6 +2103,9 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page, | |||
2022 | err = ext3_journal_dirty_data(handle, bh); | 2103 | err = ext3_journal_dirty_data(handle, bh); |
2023 | mark_buffer_dirty(bh); | 2104 | mark_buffer_dirty(bh); |
2024 | } | 2105 | } |
2106 | stop: | ||
2107 | if (handle) | ||
2108 | ext3_journal_stop(handle); | ||
2025 | 2109 | ||
2026 | unlock: | 2110 | unlock: |
2027 | unlock_page(page); | 2111 | unlock_page(page); |
@@ -2390,8 +2474,6 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode, | |||
2390 | 2474 | ||
2391 | int ext3_can_truncate(struct inode *inode) | 2475 | int ext3_can_truncate(struct inode *inode) |
2392 | { | 2476 | { |
2393 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) | ||
2394 | return 0; | ||
2395 | if (S_ISREG(inode->i_mode)) | 2477 | if (S_ISREG(inode->i_mode)) |
2396 | return 1; | 2478 | return 1; |
2397 | if (S_ISDIR(inode->i_mode)) | 2479 | if (S_ISDIR(inode->i_mode)) |
@@ -2435,7 +2517,6 @@ void ext3_truncate(struct inode *inode) | |||
2435 | struct ext3_inode_info *ei = EXT3_I(inode); | 2517 | struct ext3_inode_info *ei = EXT3_I(inode); |
2436 | __le32 *i_data = ei->i_data; | 2518 | __le32 *i_data = ei->i_data; |
2437 | int addr_per_block = EXT3_ADDR_PER_BLOCK(inode->i_sb); | 2519 | int addr_per_block = EXT3_ADDR_PER_BLOCK(inode->i_sb); |
2438 | struct address_space *mapping = inode->i_mapping; | ||
2439 | int offsets[4]; | 2520 | int offsets[4]; |
2440 | Indirect chain[4]; | 2521 | Indirect chain[4]; |
2441 | Indirect *partial; | 2522 | Indirect *partial; |
@@ -2443,7 +2524,8 @@ void ext3_truncate(struct inode *inode) | |||
2443 | int n; | 2524 | int n; |
2444 | long last_block; | 2525 | long last_block; |
2445 | unsigned blocksize = inode->i_sb->s_blocksize; | 2526 | unsigned blocksize = inode->i_sb->s_blocksize; |
2446 | struct page *page; | 2527 | |
2528 | trace_ext3_truncate_enter(inode); | ||
2447 | 2529 | ||
2448 | if (!ext3_can_truncate(inode)) | 2530 | if (!ext3_can_truncate(inode)) |
2449 | goto out_notrans; | 2531 | goto out_notrans; |
@@ -2451,37 +2533,12 @@ void ext3_truncate(struct inode *inode) | |||
2451 | if (inode->i_size == 0 && ext3_should_writeback_data(inode)) | 2533 | if (inode->i_size == 0 && ext3_should_writeback_data(inode)) |
2452 | ext3_set_inode_state(inode, EXT3_STATE_FLUSH_ON_CLOSE); | 2534 | ext3_set_inode_state(inode, EXT3_STATE_FLUSH_ON_CLOSE); |
2453 | 2535 | ||
2454 | /* | ||
2455 | * We have to lock the EOF page here, because lock_page() nests | ||
2456 | * outside journal_start(). | ||
2457 | */ | ||
2458 | if ((inode->i_size & (blocksize - 1)) == 0) { | ||
2459 | /* Block boundary? Nothing to do */ | ||
2460 | page = NULL; | ||
2461 | } else { | ||
2462 | page = grab_cache_page(mapping, | ||
2463 | inode->i_size >> PAGE_CACHE_SHIFT); | ||
2464 | if (!page) | ||
2465 | goto out_notrans; | ||
2466 | } | ||
2467 | |||
2468 | handle = start_transaction(inode); | 2536 | handle = start_transaction(inode); |
2469 | if (IS_ERR(handle)) { | 2537 | if (IS_ERR(handle)) |
2470 | if (page) { | ||
2471 | clear_highpage(page); | ||
2472 | flush_dcache_page(page); | ||
2473 | unlock_page(page); | ||
2474 | page_cache_release(page); | ||
2475 | } | ||
2476 | goto out_notrans; | 2538 | goto out_notrans; |
2477 | } | ||
2478 | 2539 | ||
2479 | last_block = (inode->i_size + blocksize-1) | 2540 | last_block = (inode->i_size + blocksize-1) |
2480 | >> EXT3_BLOCK_SIZE_BITS(inode->i_sb); | 2541 | >> EXT3_BLOCK_SIZE_BITS(inode->i_sb); |
2481 | |||
2482 | if (page) | ||
2483 | ext3_block_truncate_page(handle, page, mapping, inode->i_size); | ||
2484 | |||
2485 | n = ext3_block_to_path(inode, last_block, offsets, NULL); | 2542 | n = ext3_block_to_path(inode, last_block, offsets, NULL); |
2486 | if (n == 0) | 2543 | if (n == 0) |
2487 | goto out_stop; /* error */ | 2544 | goto out_stop; /* error */ |
@@ -2596,6 +2653,7 @@ out_stop: | |||
2596 | ext3_orphan_del(handle, inode); | 2653 | ext3_orphan_del(handle, inode); |
2597 | 2654 | ||
2598 | ext3_journal_stop(handle); | 2655 | ext3_journal_stop(handle); |
2656 | trace_ext3_truncate_exit(inode); | ||
2599 | return; | 2657 | return; |
2600 | out_notrans: | 2658 | out_notrans: |
2601 | /* | 2659 | /* |
@@ -2604,6 +2662,7 @@ out_notrans: | |||
2604 | */ | 2662 | */ |
2605 | if (inode->i_nlink) | 2663 | if (inode->i_nlink) |
2606 | ext3_orphan_del(NULL, inode); | 2664 | ext3_orphan_del(NULL, inode); |
2665 | trace_ext3_truncate_exit(inode); | ||
2607 | } | 2666 | } |
2608 | 2667 | ||
2609 | static ext3_fsblk_t ext3_get_inode_block(struct super_block *sb, | 2668 | static ext3_fsblk_t ext3_get_inode_block(struct super_block *sb, |
@@ -2745,6 +2804,7 @@ make_io: | |||
2745 | * has in-inode xattrs, or we don't have this inode in memory. | 2804 | * has in-inode xattrs, or we don't have this inode in memory. |
2746 | * Read the block from disk. | 2805 | * Read the block from disk. |
2747 | */ | 2806 | */ |
2807 | trace_ext3_load_inode(inode); | ||
2748 | get_bh(bh); | 2808 | get_bh(bh); |
2749 | bh->b_end_io = end_buffer_read_sync; | 2809 | bh->b_end_io = end_buffer_read_sync; |
2750 | submit_bh(READ_META, bh); | 2810 | submit_bh(READ_META, bh); |
@@ -3229,18 +3289,36 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr) | |||
3229 | } | 3289 | } |
3230 | 3290 | ||
3231 | error = ext3_orphan_add(handle, inode); | 3291 | error = ext3_orphan_add(handle, inode); |
3292 | if (error) { | ||
3293 | ext3_journal_stop(handle); | ||
3294 | goto err_out; | ||
3295 | } | ||
3232 | EXT3_I(inode)->i_disksize = attr->ia_size; | 3296 | EXT3_I(inode)->i_disksize = attr->ia_size; |
3233 | rc = ext3_mark_inode_dirty(handle, inode); | 3297 | error = ext3_mark_inode_dirty(handle, inode); |
3234 | if (!error) | ||
3235 | error = rc; | ||
3236 | ext3_journal_stop(handle); | 3298 | ext3_journal_stop(handle); |
3299 | if (error) { | ||
3300 | /* Some hard fs error must have happened. Bail out. */ | ||
3301 | ext3_orphan_del(NULL, inode); | ||
3302 | goto err_out; | ||
3303 | } | ||
3304 | rc = ext3_block_truncate_page(inode, attr->ia_size); | ||
3305 | if (rc) { | ||
3306 | /* Cleanup orphan list and exit */ | ||
3307 | handle = ext3_journal_start(inode, 3); | ||
3308 | if (IS_ERR(handle)) { | ||
3309 | ext3_orphan_del(NULL, inode); | ||
3310 | goto err_out; | ||
3311 | } | ||
3312 | ext3_orphan_del(handle, inode); | ||
3313 | ext3_journal_stop(handle); | ||
3314 | goto err_out; | ||
3315 | } | ||
3237 | } | 3316 | } |
3238 | 3317 | ||
3239 | if ((attr->ia_valid & ATTR_SIZE) && | 3318 | if ((attr->ia_valid & ATTR_SIZE) && |
3240 | attr->ia_size != i_size_read(inode)) { | 3319 | attr->ia_size != i_size_read(inode)) { |
3241 | rc = vmtruncate(inode, attr->ia_size); | 3320 | truncate_setsize(inode, attr->ia_size); |
3242 | if (rc) | 3321 | ext3_truncate(inode); |
3243 | goto err_out; | ||
3244 | } | 3322 | } |
3245 | 3323 | ||
3246 | setattr_copy(inode, attr); | 3324 | setattr_copy(inode, attr); |
@@ -3374,6 +3452,7 @@ int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode) | |||
3374 | int err; | 3452 | int err; |
3375 | 3453 | ||
3376 | might_sleep(); | 3454 | might_sleep(); |
3455 | trace_ext3_mark_inode_dirty(inode, _RET_IP_); | ||
3377 | err = ext3_reserve_inode_write(handle, inode, &iloc); | 3456 | err = ext3_reserve_inode_write(handle, inode, &iloc); |
3378 | if (!err) | 3457 | if (!err) |
3379 | err = ext3_mark_iloc_dirty(handle, inode, &iloc); | 3458 | err = ext3_mark_iloc_dirty(handle, inode, &iloc); |
diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c index f4090bd2f34..c7f43944f16 100644 --- a/fs/ext3/ioctl.c +++ b/fs/ext3/ioctl.c | |||
@@ -285,7 +285,7 @@ group_add_out: | |||
285 | if (!capable(CAP_SYS_ADMIN)) | 285 | if (!capable(CAP_SYS_ADMIN)) |
286 | return -EPERM; | 286 | return -EPERM; |
287 | 287 | ||
288 | if (copy_from_user(&range, (struct fstrim_range *)arg, | 288 | if (copy_from_user(&range, (struct fstrim_range __user *)arg, |
289 | sizeof(range))) | 289 | sizeof(range))) |
290 | return -EFAULT; | 290 | return -EFAULT; |
291 | 291 | ||
@@ -293,7 +293,7 @@ group_add_out: | |||
293 | if (ret < 0) | 293 | if (ret < 0) |
294 | return ret; | 294 | return ret; |
295 | 295 | ||
296 | if (copy_to_user((struct fstrim_range *)arg, &range, | 296 | if (copy_to_user((struct fstrim_range __user *)arg, &range, |
297 | sizeof(range))) | 297 | sizeof(range))) |
298 | return -EFAULT; | 298 | return -EFAULT; |
299 | 299 | ||
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 3b57230a17b..6e18a0b7750 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <linux/quotaops.h> | 36 | #include <linux/quotaops.h> |
37 | #include <linux/buffer_head.h> | 37 | #include <linux/buffer_head.h> |
38 | #include <linux/bio.h> | 38 | #include <linux/bio.h> |
39 | #include <trace/events/ext3.h> | ||
39 | 40 | ||
40 | #include "namei.h" | 41 | #include "namei.h" |
41 | #include "xattr.h" | 42 | #include "xattr.h" |
@@ -287,7 +288,7 @@ static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3_dir_ent | |||
287 | while (len--) printk("%c", *name++); | 288 | while (len--) printk("%c", *name++); |
288 | ext3fs_dirhash(de->name, de->name_len, &h); | 289 | ext3fs_dirhash(de->name, de->name_len, &h); |
289 | printk(":%x.%u ", h.hash, | 290 | printk(":%x.%u ", h.hash, |
290 | ((char *) de - base)); | 291 | (unsigned) ((char *) de - base)); |
291 | } | 292 | } |
292 | space += EXT3_DIR_REC_LEN(de->name_len); | 293 | space += EXT3_DIR_REC_LEN(de->name_len); |
293 | names++; | 294 | names++; |
@@ -1013,7 +1014,7 @@ static struct buffer_head * ext3_dx_find_entry(struct inode *dir, | |||
1013 | 1014 | ||
1014 | *err = -ENOENT; | 1015 | *err = -ENOENT; |
1015 | errout: | 1016 | errout: |
1016 | dxtrace(printk("%s not found\n", name)); | 1017 | dxtrace(printk("%s not found\n", entry->name)); |
1017 | dx_release (frames); | 1018 | dx_release (frames); |
1018 | return NULL; | 1019 | return NULL; |
1019 | } | 1020 | } |
@@ -2140,6 +2141,7 @@ static int ext3_unlink(struct inode * dir, struct dentry *dentry) | |||
2140 | struct ext3_dir_entry_2 * de; | 2141 | struct ext3_dir_entry_2 * de; |
2141 | handle_t *handle; | 2142 | handle_t *handle; |
2142 | 2143 | ||
2144 | trace_ext3_unlink_enter(dir, dentry); | ||
2143 | /* Initialize quotas before so that eventual writes go | 2145 | /* Initialize quotas before so that eventual writes go |
2144 | * in separate transaction */ | 2146 | * in separate transaction */ |
2145 | dquot_initialize(dir); | 2147 | dquot_initialize(dir); |
@@ -2185,6 +2187,7 @@ static int ext3_unlink(struct inode * dir, struct dentry *dentry) | |||
2185 | end_unlink: | 2187 | end_unlink: |
2186 | ext3_journal_stop(handle); | 2188 | ext3_journal_stop(handle); |
2187 | brelse (bh); | 2189 | brelse (bh); |
2190 | trace_ext3_unlink_exit(dentry, retval); | ||
2188 | return retval; | 2191 | return retval; |
2189 | } | 2192 | } |
2190 | 2193 | ||
diff --git a/fs/ext3/super.c b/fs/ext3/super.c index b57ea2f9126..7beb69ae001 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c | |||
@@ -44,6 +44,9 @@ | |||
44 | #include "acl.h" | 44 | #include "acl.h" |
45 | #include "namei.h" | 45 | #include "namei.h" |
46 | 46 | ||
47 | #define CREATE_TRACE_POINTS | ||
48 | #include <trace/events/ext3.h> | ||
49 | |||
47 | #ifdef CONFIG_EXT3_DEFAULTS_TO_ORDERED | 50 | #ifdef CONFIG_EXT3_DEFAULTS_TO_ORDERED |
48 | #define EXT3_MOUNT_DEFAULT_DATA_MODE EXT3_MOUNT_ORDERED_DATA | 51 | #define EXT3_MOUNT_DEFAULT_DATA_MODE EXT3_MOUNT_ORDERED_DATA |
49 | #else | 52 | #else |
@@ -497,6 +500,14 @@ static struct inode *ext3_alloc_inode(struct super_block *sb) | |||
497 | return &ei->vfs_inode; | 500 | return &ei->vfs_inode; |
498 | } | 501 | } |
499 | 502 | ||
503 | static int ext3_drop_inode(struct inode *inode) | ||
504 | { | ||
505 | int drop = generic_drop_inode(inode); | ||
506 | |||
507 | trace_ext3_drop_inode(inode, drop); | ||
508 | return drop; | ||
509 | } | ||
510 | |||
500 | static void ext3_i_callback(struct rcu_head *head) | 511 | static void ext3_i_callback(struct rcu_head *head) |
501 | { | 512 | { |
502 | struct inode *inode = container_of(head, struct inode, i_rcu); | 513 | struct inode *inode = container_of(head, struct inode, i_rcu); |
@@ -788,6 +799,7 @@ static const struct super_operations ext3_sops = { | |||
788 | .destroy_inode = ext3_destroy_inode, | 799 | .destroy_inode = ext3_destroy_inode, |
789 | .write_inode = ext3_write_inode, | 800 | .write_inode = ext3_write_inode, |
790 | .dirty_inode = ext3_dirty_inode, | 801 | .dirty_inode = ext3_dirty_inode, |
802 | .drop_inode = ext3_drop_inode, | ||
791 | .evict_inode = ext3_evict_inode, | 803 | .evict_inode = ext3_evict_inode, |
792 | .put_super = ext3_put_super, | 804 | .put_super = ext3_put_super, |
793 | .sync_fs = ext3_sync_fs, | 805 | .sync_fs = ext3_sync_fs, |
@@ -2509,6 +2521,7 @@ static int ext3_sync_fs(struct super_block *sb, int wait) | |||
2509 | { | 2521 | { |
2510 | tid_t target; | 2522 | tid_t target; |
2511 | 2523 | ||
2524 | trace_ext3_sync_fs(sb, wait); | ||
2512 | if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) { | 2525 | if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) { |
2513 | if (wait) | 2526 | if (wait) |
2514 | log_wait_commit(EXT3_SB(sb)->s_journal, target); | 2527 | log_wait_commit(EXT3_SB(sb)->s_journal, target); |
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c index 32e6cc23bd9..d565759d82e 100644 --- a/fs/ext3/xattr.c +++ b/fs/ext3/xattr.c | |||
@@ -803,8 +803,16 @@ inserted: | |||
803 | /* We need to allocate a new block */ | 803 | /* We need to allocate a new block */ |
804 | ext3_fsblk_t goal = ext3_group_first_block_no(sb, | 804 | ext3_fsblk_t goal = ext3_group_first_block_no(sb, |
805 | EXT3_I(inode)->i_block_group); | 805 | EXT3_I(inode)->i_block_group); |
806 | ext3_fsblk_t block = ext3_new_block(handle, inode, | 806 | ext3_fsblk_t block; |
807 | goal, &error); | 807 | |
808 | /* | ||
809 | * Protect us agaist concurrent allocations to the | ||
810 | * same inode from ext3_..._writepage(). Reservation | ||
811 | * code does not expect racing allocations. | ||
812 | */ | ||
813 | mutex_lock(&EXT3_I(inode)->truncate_mutex); | ||
814 | block = ext3_new_block(handle, inode, goal, &error); | ||
815 | mutex_unlock(&EXT3_I(inode)->truncate_mutex); | ||
808 | if (error) | 816 | if (error) |
809 | goto cleanup; | 817 | goto cleanup; |
810 | ea_idebug(inode, "creating block %d", block); | 818 | ea_idebug(inode, "creating block %d", block); |
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c index e4b87bc1fa5..f94fc48ff3a 100644 --- a/fs/jbd/checkpoint.c +++ b/fs/jbd/checkpoint.c | |||
@@ -22,6 +22,8 @@ | |||
22 | #include <linux/jbd.h> | 22 | #include <linux/jbd.h> |
23 | #include <linux/errno.h> | 23 | #include <linux/errno.h> |
24 | #include <linux/slab.h> | 24 | #include <linux/slab.h> |
25 | #include <linux/blkdev.h> | ||
26 | #include <trace/events/jbd.h> | ||
25 | 27 | ||
26 | /* | 28 | /* |
27 | * Unlink a buffer from a transaction checkpoint list. | 29 | * Unlink a buffer from a transaction checkpoint list. |
@@ -95,10 +97,14 @@ static int __try_to_free_cp_buf(struct journal_head *jh) | |||
95 | 97 | ||
96 | if (jh->b_jlist == BJ_None && !buffer_locked(bh) && | 98 | if (jh->b_jlist == BJ_None && !buffer_locked(bh) && |
97 | !buffer_dirty(bh) && !buffer_write_io_error(bh)) { | 99 | !buffer_dirty(bh) && !buffer_write_io_error(bh)) { |
100 | /* | ||
101 | * Get our reference so that bh cannot be freed before | ||
102 | * we unlock it | ||
103 | */ | ||
104 | get_bh(bh); | ||
98 | JBUFFER_TRACE(jh, "remove from checkpoint list"); | 105 | JBUFFER_TRACE(jh, "remove from checkpoint list"); |
99 | ret = __journal_remove_checkpoint(jh) + 1; | 106 | ret = __journal_remove_checkpoint(jh) + 1; |
100 | jbd_unlock_bh_state(bh); | 107 | jbd_unlock_bh_state(bh); |
101 | journal_remove_journal_head(bh); | ||
102 | BUFFER_TRACE(bh, "release"); | 108 | BUFFER_TRACE(bh, "release"); |
103 | __brelse(bh); | 109 | __brelse(bh); |
104 | } else { | 110 | } else { |
@@ -220,8 +226,8 @@ restart: | |||
220 | spin_lock(&journal->j_list_lock); | 226 | spin_lock(&journal->j_list_lock); |
221 | goto restart; | 227 | goto restart; |
222 | } | 228 | } |
229 | get_bh(bh); | ||
223 | if (buffer_locked(bh)) { | 230 | if (buffer_locked(bh)) { |
224 | get_bh(bh); | ||
225 | spin_unlock(&journal->j_list_lock); | 231 | spin_unlock(&journal->j_list_lock); |
226 | jbd_unlock_bh_state(bh); | 232 | jbd_unlock_bh_state(bh); |
227 | wait_on_buffer(bh); | 233 | wait_on_buffer(bh); |
@@ -240,7 +246,6 @@ restart: | |||
240 | */ | 246 | */ |
241 | released = __journal_remove_checkpoint(jh); | 247 | released = __journal_remove_checkpoint(jh); |
242 | jbd_unlock_bh_state(bh); | 248 | jbd_unlock_bh_state(bh); |
243 | journal_remove_journal_head(bh); | ||
244 | __brelse(bh); | 249 | __brelse(bh); |
245 | } | 250 | } |
246 | 251 | ||
@@ -253,9 +258,12 @@ static void | |||
253 | __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count) | 258 | __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count) |
254 | { | 259 | { |
255 | int i; | 260 | int i; |
261 | struct blk_plug plug; | ||
256 | 262 | ||
263 | blk_start_plug(&plug); | ||
257 | for (i = 0; i < *batch_count; i++) | 264 | for (i = 0; i < *batch_count; i++) |
258 | write_dirty_buffer(bhs[i], WRITE); | 265 | write_dirty_buffer(bhs[i], WRITE_SYNC); |
266 | blk_finish_plug(&plug); | ||
259 | 267 | ||
260 | for (i = 0; i < *batch_count; i++) { | 268 | for (i = 0; i < *batch_count; i++) { |
261 | struct buffer_head *bh = bhs[i]; | 269 | struct buffer_head *bh = bhs[i]; |
@@ -304,12 +312,12 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, | |||
304 | ret = 1; | 312 | ret = 1; |
305 | if (unlikely(buffer_write_io_error(bh))) | 313 | if (unlikely(buffer_write_io_error(bh))) |
306 | ret = -EIO; | 314 | ret = -EIO; |
315 | get_bh(bh); | ||
307 | J_ASSERT_JH(jh, !buffer_jbddirty(bh)); | 316 | J_ASSERT_JH(jh, !buffer_jbddirty(bh)); |
308 | BUFFER_TRACE(bh, "remove from checkpoint"); | 317 | BUFFER_TRACE(bh, "remove from checkpoint"); |
309 | __journal_remove_checkpoint(jh); | 318 | __journal_remove_checkpoint(jh); |
310 | spin_unlock(&journal->j_list_lock); | 319 | spin_unlock(&journal->j_list_lock); |
311 | jbd_unlock_bh_state(bh); | 320 | jbd_unlock_bh_state(bh); |
312 | journal_remove_journal_head(bh); | ||
313 | __brelse(bh); | 321 | __brelse(bh); |
314 | } else { | 322 | } else { |
315 | /* | 323 | /* |
@@ -358,6 +366,7 @@ int log_do_checkpoint(journal_t *journal) | |||
358 | * journal straight away. | 366 | * journal straight away. |
359 | */ | 367 | */ |
360 | result = cleanup_journal_tail(journal); | 368 | result = cleanup_journal_tail(journal); |
369 | trace_jbd_checkpoint(journal, result); | ||
361 | jbd_debug(1, "cleanup_journal_tail returned %d\n", result); | 370 | jbd_debug(1, "cleanup_journal_tail returned %d\n", result); |
362 | if (result <= 0) | 371 | if (result <= 0) |
363 | return result; | 372 | return result; |
@@ -503,6 +512,7 @@ int cleanup_journal_tail(journal_t *journal) | |||
503 | if (blocknr < journal->j_tail) | 512 | if (blocknr < journal->j_tail) |
504 | freed = freed + journal->j_last - journal->j_first; | 513 | freed = freed + journal->j_last - journal->j_first; |
505 | 514 | ||
515 | trace_jbd_cleanup_journal_tail(journal, first_tid, blocknr, freed); | ||
506 | jbd_debug(1, | 516 | jbd_debug(1, |
507 | "Cleaning journal tail from %d to %d (offset %u), " | 517 | "Cleaning journal tail from %d to %d (offset %u), " |
508 | "freeing %u\n", | 518 | "freeing %u\n", |
@@ -523,9 +533,9 @@ int cleanup_journal_tail(journal_t *journal) | |||
523 | /* | 533 | /* |
524 | * journal_clean_one_cp_list | 534 | * journal_clean_one_cp_list |
525 | * | 535 | * |
526 | * Find all the written-back checkpoint buffers in the given list and release them. | 536 | * Find all the written-back checkpoint buffers in the given list and release |
537 | * them. | ||
527 | * | 538 | * |
528 | * Called with the journal locked. | ||
529 | * Called with j_list_lock held. | 539 | * Called with j_list_lock held. |
530 | * Returns number of bufers reaped (for debug) | 540 | * Returns number of bufers reaped (for debug) |
531 | */ | 541 | */ |
@@ -632,8 +642,8 @@ out: | |||
632 | * checkpoint lists. | 642 | * checkpoint lists. |
633 | * | 643 | * |
634 | * The function returns 1 if it frees the transaction, 0 otherwise. | 644 | * The function returns 1 if it frees the transaction, 0 otherwise. |
645 | * The function can free jh and bh. | ||
635 | * | 646 | * |
636 | * This function is called with the journal locked. | ||
637 | * This function is called with j_list_lock held. | 647 | * This function is called with j_list_lock held. |
638 | * This function is called with jbd_lock_bh_state(jh2bh(jh)) | 648 | * This function is called with jbd_lock_bh_state(jh2bh(jh)) |
639 | */ | 649 | */ |
@@ -652,13 +662,14 @@ int __journal_remove_checkpoint(struct journal_head *jh) | |||
652 | } | 662 | } |
653 | journal = transaction->t_journal; | 663 | journal = transaction->t_journal; |
654 | 664 | ||
665 | JBUFFER_TRACE(jh, "removing from transaction"); | ||
655 | __buffer_unlink(jh); | 666 | __buffer_unlink(jh); |
656 | jh->b_cp_transaction = NULL; | 667 | jh->b_cp_transaction = NULL; |
668 | journal_put_journal_head(jh); | ||
657 | 669 | ||
658 | if (transaction->t_checkpoint_list != NULL || | 670 | if (transaction->t_checkpoint_list != NULL || |
659 | transaction->t_checkpoint_io_list != NULL) | 671 | transaction->t_checkpoint_io_list != NULL) |
660 | goto out; | 672 | goto out; |
661 | JBUFFER_TRACE(jh, "transaction has no more buffers"); | ||
662 | 673 | ||
663 | /* | 674 | /* |
664 | * There is one special case to worry about: if we have just pulled the | 675 | * There is one special case to worry about: if we have just pulled the |
@@ -669,10 +680,8 @@ int __journal_remove_checkpoint(struct journal_head *jh) | |||
669 | * The locking here around t_state is a bit sleazy. | 680 | * The locking here around t_state is a bit sleazy. |
670 | * See the comment at the end of journal_commit_transaction(). | 681 | * See the comment at the end of journal_commit_transaction(). |
671 | */ | 682 | */ |
672 | if (transaction->t_state != T_FINISHED) { | 683 | if (transaction->t_state != T_FINISHED) |
673 | JBUFFER_TRACE(jh, "belongs to running/committing transaction"); | ||
674 | goto out; | 684 | goto out; |
675 | } | ||
676 | 685 | ||
677 | /* OK, that was the last buffer for the transaction: we can now | 686 | /* OK, that was the last buffer for the transaction: we can now |
678 | safely remove this transaction from the log */ | 687 | safely remove this transaction from the log */ |
@@ -684,7 +693,6 @@ int __journal_remove_checkpoint(struct journal_head *jh) | |||
684 | wake_up(&journal->j_wait_logspace); | 693 | wake_up(&journal->j_wait_logspace); |
685 | ret = 1; | 694 | ret = 1; |
686 | out: | 695 | out: |
687 | JBUFFER_TRACE(jh, "exit"); | ||
688 | return ret; | 696 | return ret; |
689 | } | 697 | } |
690 | 698 | ||
@@ -703,6 +711,8 @@ void __journal_insert_checkpoint(struct journal_head *jh, | |||
703 | J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh)) || buffer_jbddirty(jh2bh(jh))); | 711 | J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh)) || buffer_jbddirty(jh2bh(jh))); |
704 | J_ASSERT_JH(jh, jh->b_cp_transaction == NULL); | 712 | J_ASSERT_JH(jh, jh->b_cp_transaction == NULL); |
705 | 713 | ||
714 | /* Get reference for checkpointing transaction */ | ||
715 | journal_grab_journal_head(jh2bh(jh)); | ||
706 | jh->b_cp_transaction = transaction; | 716 | jh->b_cp_transaction = transaction; |
707 | 717 | ||
708 | if (!transaction->t_checkpoint_list) { | 718 | if (!transaction->t_checkpoint_list) { |
@@ -752,6 +762,7 @@ void __journal_drop_transaction(journal_t *journal, transaction_t *transaction) | |||
752 | J_ASSERT(journal->j_committing_transaction != transaction); | 762 | J_ASSERT(journal->j_committing_transaction != transaction); |
753 | J_ASSERT(journal->j_running_transaction != transaction); | 763 | J_ASSERT(journal->j_running_transaction != transaction); |
754 | 764 | ||
765 | trace_jbd_drop_transaction(journal, transaction); | ||
755 | jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid); | 766 | jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid); |
756 | kfree(transaction); | 767 | kfree(transaction); |
757 | } | 768 | } |
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 72ffa974b0b..8799207df05 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/pagemap.h> | 21 | #include <linux/pagemap.h> |
22 | #include <linux/bio.h> | 22 | #include <linux/bio.h> |
23 | #include <linux/blkdev.h> | 23 | #include <linux/blkdev.h> |
24 | #include <trace/events/jbd.h> | ||
24 | 25 | ||
25 | /* | 26 | /* |
26 | * Default IO end handler for temporary BJ_IO buffer_heads. | 27 | * Default IO end handler for temporary BJ_IO buffer_heads. |
@@ -204,6 +205,8 @@ write_out_data: | |||
204 | if (!trylock_buffer(bh)) { | 205 | if (!trylock_buffer(bh)) { |
205 | BUFFER_TRACE(bh, "needs blocking lock"); | 206 | BUFFER_TRACE(bh, "needs blocking lock"); |
206 | spin_unlock(&journal->j_list_lock); | 207 | spin_unlock(&journal->j_list_lock); |
208 | trace_jbd_do_submit_data(journal, | ||
209 | commit_transaction); | ||
207 | /* Write out all data to prevent deadlocks */ | 210 | /* Write out all data to prevent deadlocks */ |
208 | journal_do_submit_data(wbuf, bufs, write_op); | 211 | journal_do_submit_data(wbuf, bufs, write_op); |
209 | bufs = 0; | 212 | bufs = 0; |
@@ -236,6 +239,8 @@ write_out_data: | |||
236 | jbd_unlock_bh_state(bh); | 239 | jbd_unlock_bh_state(bh); |
237 | if (bufs == journal->j_wbufsize) { | 240 | if (bufs == journal->j_wbufsize) { |
238 | spin_unlock(&journal->j_list_lock); | 241 | spin_unlock(&journal->j_list_lock); |
242 | trace_jbd_do_submit_data(journal, | ||
243 | commit_transaction); | ||
239 | journal_do_submit_data(wbuf, bufs, write_op); | 244 | journal_do_submit_data(wbuf, bufs, write_op); |
240 | bufs = 0; | 245 | bufs = 0; |
241 | goto write_out_data; | 246 | goto write_out_data; |
@@ -253,10 +258,6 @@ write_out_data: | |||
253 | jbd_unlock_bh_state(bh); | 258 | jbd_unlock_bh_state(bh); |
254 | if (locked) | 259 | if (locked) |
255 | unlock_buffer(bh); | 260 | unlock_buffer(bh); |
256 | journal_remove_journal_head(bh); | ||
257 | /* One for our safety reference, other for | ||
258 | * journal_remove_journal_head() */ | ||
259 | put_bh(bh); | ||
260 | release_data_buffer(bh); | 261 | release_data_buffer(bh); |
261 | } | 262 | } |
262 | 263 | ||
@@ -266,6 +267,7 @@ write_out_data: | |||
266 | } | 267 | } |
267 | } | 268 | } |
268 | spin_unlock(&journal->j_list_lock); | 269 | spin_unlock(&journal->j_list_lock); |
270 | trace_jbd_do_submit_data(journal, commit_transaction); | ||
269 | journal_do_submit_data(wbuf, bufs, write_op); | 271 | journal_do_submit_data(wbuf, bufs, write_op); |
270 | 272 | ||
271 | return err; | 273 | return err; |
@@ -316,12 +318,14 @@ void journal_commit_transaction(journal_t *journal) | |||
316 | commit_transaction = journal->j_running_transaction; | 318 | commit_transaction = journal->j_running_transaction; |
317 | J_ASSERT(commit_transaction->t_state == T_RUNNING); | 319 | J_ASSERT(commit_transaction->t_state == T_RUNNING); |
318 | 320 | ||
321 | trace_jbd_start_commit(journal, commit_transaction); | ||
319 | jbd_debug(1, "JBD: starting commit of transaction %d\n", | 322 | jbd_debug(1, "JBD: starting commit of transaction %d\n", |
320 | commit_transaction->t_tid); | 323 | commit_transaction->t_tid); |
321 | 324 | ||
322 | spin_lock(&journal->j_state_lock); | 325 | spin_lock(&journal->j_state_lock); |
323 | commit_transaction->t_state = T_LOCKED; | 326 | commit_transaction->t_state = T_LOCKED; |
324 | 327 | ||
328 | trace_jbd_commit_locking(journal, commit_transaction); | ||
325 | spin_lock(&commit_transaction->t_handle_lock); | 329 | spin_lock(&commit_transaction->t_handle_lock); |
326 | while (commit_transaction->t_updates) { | 330 | while (commit_transaction->t_updates) { |
327 | DEFINE_WAIT(wait); | 331 | DEFINE_WAIT(wait); |
@@ -392,6 +396,7 @@ void journal_commit_transaction(journal_t *journal) | |||
392 | */ | 396 | */ |
393 | journal_switch_revoke_table(journal); | 397 | journal_switch_revoke_table(journal); |
394 | 398 | ||
399 | trace_jbd_commit_flushing(journal, commit_transaction); | ||
395 | commit_transaction->t_state = T_FLUSH; | 400 | commit_transaction->t_state = T_FLUSH; |
396 | journal->j_committing_transaction = commit_transaction; | 401 | journal->j_committing_transaction = commit_transaction; |
397 | journal->j_running_transaction = NULL; | 402 | journal->j_running_transaction = NULL; |
@@ -446,14 +451,9 @@ void journal_commit_transaction(journal_t *journal) | |||
446 | } | 451 | } |
447 | if (buffer_jbd(bh) && bh2jh(bh) == jh && | 452 | if (buffer_jbd(bh) && bh2jh(bh) == jh && |
448 | jh->b_transaction == commit_transaction && | 453 | jh->b_transaction == commit_transaction && |
449 | jh->b_jlist == BJ_Locked) { | 454 | jh->b_jlist == BJ_Locked) |
450 | __journal_unfile_buffer(jh); | 455 | __journal_unfile_buffer(jh); |
451 | jbd_unlock_bh_state(bh); | 456 | jbd_unlock_bh_state(bh); |
452 | journal_remove_journal_head(bh); | ||
453 | put_bh(bh); | ||
454 | } else { | ||
455 | jbd_unlock_bh_state(bh); | ||
456 | } | ||
457 | release_data_buffer(bh); | 457 | release_data_buffer(bh); |
458 | cond_resched_lock(&journal->j_list_lock); | 458 | cond_resched_lock(&journal->j_list_lock); |
459 | } | 459 | } |
@@ -493,6 +493,7 @@ void journal_commit_transaction(journal_t *journal) | |||
493 | commit_transaction->t_state = T_COMMIT; | 493 | commit_transaction->t_state = T_COMMIT; |
494 | spin_unlock(&journal->j_state_lock); | 494 | spin_unlock(&journal->j_state_lock); |
495 | 495 | ||
496 | trace_jbd_commit_logging(journal, commit_transaction); | ||
496 | J_ASSERT(commit_transaction->t_nr_buffers <= | 497 | J_ASSERT(commit_transaction->t_nr_buffers <= |
497 | commit_transaction->t_outstanding_credits); | 498 | commit_transaction->t_outstanding_credits); |
498 | 499 | ||
@@ -797,10 +798,16 @@ restart_loop: | |||
797 | while (commit_transaction->t_forget) { | 798 | while (commit_transaction->t_forget) { |
798 | transaction_t *cp_transaction; | 799 | transaction_t *cp_transaction; |
799 | struct buffer_head *bh; | 800 | struct buffer_head *bh; |
801 | int try_to_free = 0; | ||
800 | 802 | ||
801 | jh = commit_transaction->t_forget; | 803 | jh = commit_transaction->t_forget; |
802 | spin_unlock(&journal->j_list_lock); | 804 | spin_unlock(&journal->j_list_lock); |
803 | bh = jh2bh(jh); | 805 | bh = jh2bh(jh); |
806 | /* | ||
807 | * Get a reference so that bh cannot be freed before we are | ||
808 | * done with it. | ||
809 | */ | ||
810 | get_bh(bh); | ||
804 | jbd_lock_bh_state(bh); | 811 | jbd_lock_bh_state(bh); |
805 | J_ASSERT_JH(jh, jh->b_transaction == commit_transaction || | 812 | J_ASSERT_JH(jh, jh->b_transaction == commit_transaction || |
806 | jh->b_transaction == journal->j_running_transaction); | 813 | jh->b_transaction == journal->j_running_transaction); |
@@ -858,28 +865,27 @@ restart_loop: | |||
858 | __journal_insert_checkpoint(jh, commit_transaction); | 865 | __journal_insert_checkpoint(jh, commit_transaction); |
859 | if (is_journal_aborted(journal)) | 866 | if (is_journal_aborted(journal)) |
860 | clear_buffer_jbddirty(bh); | 867 | clear_buffer_jbddirty(bh); |
861 | JBUFFER_TRACE(jh, "refile for checkpoint writeback"); | ||
862 | __journal_refile_buffer(jh); | ||
863 | jbd_unlock_bh_state(bh); | ||
864 | } else { | 868 | } else { |
865 | J_ASSERT_BH(bh, !buffer_dirty(bh)); | 869 | J_ASSERT_BH(bh, !buffer_dirty(bh)); |
866 | /* The buffer on BJ_Forget list and not jbddirty means | 870 | /* |
871 | * The buffer on BJ_Forget list and not jbddirty means | ||
867 | * it has been freed by this transaction and hence it | 872 | * it has been freed by this transaction and hence it |
868 | * could not have been reallocated until this | 873 | * could not have been reallocated until this |
869 | * transaction has committed. *BUT* it could be | 874 | * transaction has committed. *BUT* it could be |
870 | * reallocated once we have written all the data to | 875 | * reallocated once we have written all the data to |
871 | * disk and before we process the buffer on BJ_Forget | 876 | * disk and before we process the buffer on BJ_Forget |
872 | * list. */ | 877 | * list. |
873 | JBUFFER_TRACE(jh, "refile or unfile freed buffer"); | 878 | */ |
874 | __journal_refile_buffer(jh); | 879 | if (!jh->b_next_transaction) |
875 | if (!jh->b_transaction) { | 880 | try_to_free = 1; |
876 | jbd_unlock_bh_state(bh); | ||
877 | /* needs a brelse */ | ||
878 | journal_remove_journal_head(bh); | ||
879 | release_buffer_page(bh); | ||
880 | } else | ||
881 | jbd_unlock_bh_state(bh); | ||
882 | } | 881 | } |
882 | JBUFFER_TRACE(jh, "refile or unfile freed buffer"); | ||
883 | __journal_refile_buffer(jh); | ||
884 | jbd_unlock_bh_state(bh); | ||
885 | if (try_to_free) | ||
886 | release_buffer_page(bh); | ||
887 | else | ||
888 | __brelse(bh); | ||
883 | cond_resched_lock(&journal->j_list_lock); | 889 | cond_resched_lock(&journal->j_list_lock); |
884 | } | 890 | } |
885 | spin_unlock(&journal->j_list_lock); | 891 | spin_unlock(&journal->j_list_lock); |
@@ -946,6 +952,7 @@ restart_loop: | |||
946 | } | 952 | } |
947 | spin_unlock(&journal->j_list_lock); | 953 | spin_unlock(&journal->j_list_lock); |
948 | 954 | ||
955 | trace_jbd_end_commit(journal, commit_transaction); | ||
949 | jbd_debug(1, "JBD: commit %d complete, head %d\n", | 956 | jbd_debug(1, "JBD: commit %d complete, head %d\n", |
950 | journal->j_commit_sequence, journal->j_tail_sequence); | 957 | journal->j_commit_sequence, journal->j_tail_sequence); |
951 | 958 | ||
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index e2d4285fbe9..9fe061fb877 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c | |||
@@ -38,6 +38,9 @@ | |||
38 | #include <linux/debugfs.h> | 38 | #include <linux/debugfs.h> |
39 | #include <linux/ratelimit.h> | 39 | #include <linux/ratelimit.h> |
40 | 40 | ||
41 | #define CREATE_TRACE_POINTS | ||
42 | #include <trace/events/jbd.h> | ||
43 | |||
41 | #include <asm/uaccess.h> | 44 | #include <asm/uaccess.h> |
42 | #include <asm/page.h> | 45 | #include <asm/page.h> |
43 | 46 | ||
@@ -1065,6 +1068,7 @@ void journal_update_superblock(journal_t *journal, int wait) | |||
1065 | } else | 1068 | } else |
1066 | write_dirty_buffer(bh, WRITE); | 1069 | write_dirty_buffer(bh, WRITE); |
1067 | 1070 | ||
1071 | trace_jbd_update_superblock_end(journal, wait); | ||
1068 | out: | 1072 | out: |
1069 | /* If we have just flushed the log (by marking s_start==0), then | 1073 | /* If we have just flushed the log (by marking s_start==0), then |
1070 | * any future commit will have to be careful to update the | 1074 | * any future commit will have to be careful to update the |
@@ -1799,10 +1803,9 @@ static void journal_free_journal_head(struct journal_head *jh) | |||
1799 | * When a buffer has its BH_JBD bit set it is immune from being released by | 1803 | * When a buffer has its BH_JBD bit set it is immune from being released by |
1800 | * core kernel code, mainly via ->b_count. | 1804 | * core kernel code, mainly via ->b_count. |
1801 | * | 1805 | * |
1802 | * A journal_head may be detached from its buffer_head when the journal_head's | 1806 | * A journal_head is detached from its buffer_head when the journal_head's |
1803 | * b_transaction, b_cp_transaction and b_next_transaction pointers are NULL. | 1807 | * b_jcount reaches zero. Running transaction (b_transaction) and checkpoint |
1804 | * Various places in JBD call journal_remove_journal_head() to indicate that the | 1808 | * transaction (b_cp_transaction) hold their references to b_jcount. |
1805 | * journal_head can be dropped if needed. | ||
1806 | * | 1809 | * |
1807 | * Various places in the kernel want to attach a journal_head to a buffer_head | 1810 | * Various places in the kernel want to attach a journal_head to a buffer_head |
1808 | * _before_ attaching the journal_head to a transaction. To protect the | 1811 | * _before_ attaching the journal_head to a transaction. To protect the |
@@ -1815,17 +1818,16 @@ static void journal_free_journal_head(struct journal_head *jh) | |||
1815 | * (Attach a journal_head if needed. Increments b_jcount) | 1818 | * (Attach a journal_head if needed. Increments b_jcount) |
1816 | * struct journal_head *jh = journal_add_journal_head(bh); | 1819 | * struct journal_head *jh = journal_add_journal_head(bh); |
1817 | * ... | 1820 | * ... |
1818 | * jh->b_transaction = xxx; | 1821 | * (Get another reference for transaction) |
1819 | * journal_put_journal_head(jh); | 1822 | * journal_grab_journal_head(bh); |
1820 | * | 1823 | * jh->b_transaction = xxx; |
1821 | * Now, the journal_head's b_jcount is zero, but it is safe from being released | 1824 | * (Put original reference) |
1822 | * because it has a non-zero b_transaction. | 1825 | * journal_put_journal_head(jh); |
1823 | */ | 1826 | */ |
1824 | 1827 | ||
1825 | /* | 1828 | /* |
1826 | * Give a buffer_head a journal_head. | 1829 | * Give a buffer_head a journal_head. |
1827 | * | 1830 | * |
1828 | * Doesn't need the journal lock. | ||
1829 | * May sleep. | 1831 | * May sleep. |
1830 | */ | 1832 | */ |
1831 | struct journal_head *journal_add_journal_head(struct buffer_head *bh) | 1833 | struct journal_head *journal_add_journal_head(struct buffer_head *bh) |
@@ -1889,61 +1891,29 @@ static void __journal_remove_journal_head(struct buffer_head *bh) | |||
1889 | struct journal_head *jh = bh2jh(bh); | 1891 | struct journal_head *jh = bh2jh(bh); |
1890 | 1892 | ||
1891 | J_ASSERT_JH(jh, jh->b_jcount >= 0); | 1893 | J_ASSERT_JH(jh, jh->b_jcount >= 0); |
1892 | 1894 | J_ASSERT_JH(jh, jh->b_transaction == NULL); | |
1893 | get_bh(bh); | 1895 | J_ASSERT_JH(jh, jh->b_next_transaction == NULL); |
1894 | if (jh->b_jcount == 0) { | 1896 | J_ASSERT_JH(jh, jh->b_cp_transaction == NULL); |
1895 | if (jh->b_transaction == NULL && | 1897 | J_ASSERT_JH(jh, jh->b_jlist == BJ_None); |
1896 | jh->b_next_transaction == NULL && | 1898 | J_ASSERT_BH(bh, buffer_jbd(bh)); |
1897 | jh->b_cp_transaction == NULL) { | 1899 | J_ASSERT_BH(bh, jh2bh(jh) == bh); |
1898 | J_ASSERT_JH(jh, jh->b_jlist == BJ_None); | 1900 | BUFFER_TRACE(bh, "remove journal_head"); |
1899 | J_ASSERT_BH(bh, buffer_jbd(bh)); | 1901 | if (jh->b_frozen_data) { |
1900 | J_ASSERT_BH(bh, jh2bh(jh) == bh); | 1902 | printk(KERN_WARNING "%s: freeing b_frozen_data\n", __func__); |
1901 | BUFFER_TRACE(bh, "remove journal_head"); | 1903 | jbd_free(jh->b_frozen_data, bh->b_size); |
1902 | if (jh->b_frozen_data) { | ||
1903 | printk(KERN_WARNING "%s: freeing " | ||
1904 | "b_frozen_data\n", | ||
1905 | __func__); | ||
1906 | jbd_free(jh->b_frozen_data, bh->b_size); | ||
1907 | } | ||
1908 | if (jh->b_committed_data) { | ||
1909 | printk(KERN_WARNING "%s: freeing " | ||
1910 | "b_committed_data\n", | ||
1911 | __func__); | ||
1912 | jbd_free(jh->b_committed_data, bh->b_size); | ||
1913 | } | ||
1914 | bh->b_private = NULL; | ||
1915 | jh->b_bh = NULL; /* debug, really */ | ||
1916 | clear_buffer_jbd(bh); | ||
1917 | __brelse(bh); | ||
1918 | journal_free_journal_head(jh); | ||
1919 | } else { | ||
1920 | BUFFER_TRACE(bh, "journal_head was locked"); | ||
1921 | } | ||
1922 | } | 1904 | } |
1905 | if (jh->b_committed_data) { | ||
1906 | printk(KERN_WARNING "%s: freeing b_committed_data\n", __func__); | ||
1907 | jbd_free(jh->b_committed_data, bh->b_size); | ||
1908 | } | ||
1909 | bh->b_private = NULL; | ||
1910 | jh->b_bh = NULL; /* debug, really */ | ||
1911 | clear_buffer_jbd(bh); | ||
1912 | journal_free_journal_head(jh); | ||
1923 | } | 1913 | } |
1924 | 1914 | ||
1925 | /* | 1915 | /* |
1926 | * journal_remove_journal_head(): if the buffer isn't attached to a transaction | 1916 | * Drop a reference on the passed journal_head. If it fell to zero then |
1927 | * and has a zero b_jcount then remove and release its journal_head. If we did | ||
1928 | * see that the buffer is not used by any transaction we also "logically" | ||
1929 | * decrement ->b_count. | ||
1930 | * | ||
1931 | * We in fact take an additional increment on ->b_count as a convenience, | ||
1932 | * because the caller usually wants to do additional things with the bh | ||
1933 | * after calling here. | ||
1934 | * The caller of journal_remove_journal_head() *must* run __brelse(bh) at some | ||
1935 | * time. Once the caller has run __brelse(), the buffer is eligible for | ||
1936 | * reaping by try_to_free_buffers(). | ||
1937 | */ | ||
1938 | void journal_remove_journal_head(struct buffer_head *bh) | ||
1939 | { | ||
1940 | jbd_lock_bh_journal_head(bh); | ||
1941 | __journal_remove_journal_head(bh); | ||
1942 | jbd_unlock_bh_journal_head(bh); | ||
1943 | } | ||
1944 | |||
1945 | /* | ||
1946 | * Drop a reference on the passed journal_head. If it fell to zero then try to | ||
1947 | * release the journal_head from the buffer_head. | 1917 | * release the journal_head from the buffer_head. |
1948 | */ | 1918 | */ |
1949 | void journal_put_journal_head(struct journal_head *jh) | 1919 | void journal_put_journal_head(struct journal_head *jh) |
@@ -1953,11 +1923,12 @@ void journal_put_journal_head(struct journal_head *jh) | |||
1953 | jbd_lock_bh_journal_head(bh); | 1923 | jbd_lock_bh_journal_head(bh); |
1954 | J_ASSERT_JH(jh, jh->b_jcount > 0); | 1924 | J_ASSERT_JH(jh, jh->b_jcount > 0); |
1955 | --jh->b_jcount; | 1925 | --jh->b_jcount; |
1956 | if (!jh->b_jcount && !jh->b_transaction) { | 1926 | if (!jh->b_jcount) { |
1957 | __journal_remove_journal_head(bh); | 1927 | __journal_remove_journal_head(bh); |
1928 | jbd_unlock_bh_journal_head(bh); | ||
1958 | __brelse(bh); | 1929 | __brelse(bh); |
1959 | } | 1930 | } else |
1960 | jbd_unlock_bh_journal_head(bh); | 1931 | jbd_unlock_bh_journal_head(bh); |
1961 | } | 1932 | } |
1962 | 1933 | ||
1963 | /* | 1934 | /* |
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index f7ee81a065d..7e59c6e66f9 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <linux/mm.h> | 26 | #include <linux/mm.h> |
27 | #include <linux/highmem.h> | 27 | #include <linux/highmem.h> |
28 | #include <linux/hrtimer.h> | 28 | #include <linux/hrtimer.h> |
29 | #include <linux/backing-dev.h> | ||
29 | 30 | ||
30 | static void __journal_temp_unlink_buffer(struct journal_head *jh); | 31 | static void __journal_temp_unlink_buffer(struct journal_head *jh); |
31 | 32 | ||
@@ -99,11 +100,10 @@ static int start_this_handle(journal_t *journal, handle_t *handle) | |||
99 | 100 | ||
100 | alloc_transaction: | 101 | alloc_transaction: |
101 | if (!journal->j_running_transaction) { | 102 | if (!journal->j_running_transaction) { |
102 | new_transaction = kzalloc(sizeof(*new_transaction), | 103 | new_transaction = kzalloc(sizeof(*new_transaction), GFP_NOFS); |
103 | GFP_NOFS|__GFP_NOFAIL); | ||
104 | if (!new_transaction) { | 104 | if (!new_transaction) { |
105 | ret = -ENOMEM; | 105 | congestion_wait(BLK_RW_ASYNC, HZ/50); |
106 | goto out; | 106 | goto alloc_transaction; |
107 | } | 107 | } |
108 | } | 108 | } |
109 | 109 | ||
@@ -696,7 +696,6 @@ repeat: | |||
696 | if (!jh->b_transaction) { | 696 | if (!jh->b_transaction) { |
697 | JBUFFER_TRACE(jh, "no transaction"); | 697 | JBUFFER_TRACE(jh, "no transaction"); |
698 | J_ASSERT_JH(jh, !jh->b_next_transaction); | 698 | J_ASSERT_JH(jh, !jh->b_next_transaction); |
699 | jh->b_transaction = transaction; | ||
700 | JBUFFER_TRACE(jh, "file as BJ_Reserved"); | 699 | JBUFFER_TRACE(jh, "file as BJ_Reserved"); |
701 | spin_lock(&journal->j_list_lock); | 700 | spin_lock(&journal->j_list_lock); |
702 | __journal_file_buffer(jh, transaction, BJ_Reserved); | 701 | __journal_file_buffer(jh, transaction, BJ_Reserved); |
@@ -818,7 +817,6 @@ int journal_get_create_access(handle_t *handle, struct buffer_head *bh) | |||
818 | * committed and so it's safe to clear the dirty bit. | 817 | * committed and so it's safe to clear the dirty bit. |
819 | */ | 818 | */ |
820 | clear_buffer_dirty(jh2bh(jh)); | 819 | clear_buffer_dirty(jh2bh(jh)); |
821 | jh->b_transaction = transaction; | ||
822 | 820 | ||
823 | /* first access by this transaction */ | 821 | /* first access by this transaction */ |
824 | jh->b_modified = 0; | 822 | jh->b_modified = 0; |
@@ -844,8 +842,8 @@ int journal_get_create_access(handle_t *handle, struct buffer_head *bh) | |||
844 | */ | 842 | */ |
845 | JBUFFER_TRACE(jh, "cancelling revoke"); | 843 | JBUFFER_TRACE(jh, "cancelling revoke"); |
846 | journal_cancel_revoke(handle, jh); | 844 | journal_cancel_revoke(handle, jh); |
847 | journal_put_journal_head(jh); | ||
848 | out: | 845 | out: |
846 | journal_put_journal_head(jh); | ||
849 | return err; | 847 | return err; |
850 | } | 848 | } |
851 | 849 | ||
@@ -1069,8 +1067,9 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh) | |||
1069 | ret = -EIO; | 1067 | ret = -EIO; |
1070 | goto no_journal; | 1068 | goto no_journal; |
1071 | } | 1069 | } |
1072 | 1070 | /* We might have slept so buffer could be refiled now */ | |
1073 | if (jh->b_transaction != NULL) { | 1071 | if (jh->b_transaction != NULL && |
1072 | jh->b_transaction != handle->h_transaction) { | ||
1074 | JBUFFER_TRACE(jh, "unfile from commit"); | 1073 | JBUFFER_TRACE(jh, "unfile from commit"); |
1075 | __journal_temp_unlink_buffer(jh); | 1074 | __journal_temp_unlink_buffer(jh); |
1076 | /* It still points to the committing | 1075 | /* It still points to the committing |
@@ -1091,8 +1090,6 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh) | |||
1091 | if (jh->b_jlist != BJ_SyncData && jh->b_jlist != BJ_Locked) { | 1090 | if (jh->b_jlist != BJ_SyncData && jh->b_jlist != BJ_Locked) { |
1092 | JBUFFER_TRACE(jh, "not on correct data list: unfile"); | 1091 | JBUFFER_TRACE(jh, "not on correct data list: unfile"); |
1093 | J_ASSERT_JH(jh, jh->b_jlist != BJ_Shadow); | 1092 | J_ASSERT_JH(jh, jh->b_jlist != BJ_Shadow); |
1094 | __journal_temp_unlink_buffer(jh); | ||
1095 | jh->b_transaction = handle->h_transaction; | ||
1096 | JBUFFER_TRACE(jh, "file as data"); | 1093 | JBUFFER_TRACE(jh, "file as data"); |
1097 | __journal_file_buffer(jh, handle->h_transaction, | 1094 | __journal_file_buffer(jh, handle->h_transaction, |
1098 | BJ_SyncData); | 1095 | BJ_SyncData); |
@@ -1300,8 +1297,6 @@ int journal_forget (handle_t *handle, struct buffer_head *bh) | |||
1300 | __journal_file_buffer(jh, transaction, BJ_Forget); | 1297 | __journal_file_buffer(jh, transaction, BJ_Forget); |
1301 | } else { | 1298 | } else { |
1302 | __journal_unfile_buffer(jh); | 1299 | __journal_unfile_buffer(jh); |
1303 | journal_remove_journal_head(bh); | ||
1304 | __brelse(bh); | ||
1305 | if (!buffer_jbd(bh)) { | 1300 | if (!buffer_jbd(bh)) { |
1306 | spin_unlock(&journal->j_list_lock); | 1301 | spin_unlock(&journal->j_list_lock); |
1307 | jbd_unlock_bh_state(bh); | 1302 | jbd_unlock_bh_state(bh); |
@@ -1622,19 +1617,32 @@ static void __journal_temp_unlink_buffer(struct journal_head *jh) | |||
1622 | mark_buffer_dirty(bh); /* Expose it to the VM */ | 1617 | mark_buffer_dirty(bh); /* Expose it to the VM */ |
1623 | } | 1618 | } |
1624 | 1619 | ||
1620 | /* | ||
1621 | * Remove buffer from all transactions. | ||
1622 | * | ||
1623 | * Called with bh_state lock and j_list_lock | ||
1624 | * | ||
1625 | * jh and bh may be already freed when this function returns. | ||
1626 | */ | ||
1625 | void __journal_unfile_buffer(struct journal_head *jh) | 1627 | void __journal_unfile_buffer(struct journal_head *jh) |
1626 | { | 1628 | { |
1627 | __journal_temp_unlink_buffer(jh); | 1629 | __journal_temp_unlink_buffer(jh); |
1628 | jh->b_transaction = NULL; | 1630 | jh->b_transaction = NULL; |
1631 | journal_put_journal_head(jh); | ||
1629 | } | 1632 | } |
1630 | 1633 | ||
1631 | void journal_unfile_buffer(journal_t *journal, struct journal_head *jh) | 1634 | void journal_unfile_buffer(journal_t *journal, struct journal_head *jh) |
1632 | { | 1635 | { |
1633 | jbd_lock_bh_state(jh2bh(jh)); | 1636 | struct buffer_head *bh = jh2bh(jh); |
1637 | |||
1638 | /* Get reference so that buffer cannot be freed before we unlock it */ | ||
1639 | get_bh(bh); | ||
1640 | jbd_lock_bh_state(bh); | ||
1634 | spin_lock(&journal->j_list_lock); | 1641 | spin_lock(&journal->j_list_lock); |
1635 | __journal_unfile_buffer(jh); | 1642 | __journal_unfile_buffer(jh); |
1636 | spin_unlock(&journal->j_list_lock); | 1643 | spin_unlock(&journal->j_list_lock); |
1637 | jbd_unlock_bh_state(jh2bh(jh)); | 1644 | jbd_unlock_bh_state(bh); |
1645 | __brelse(bh); | ||
1638 | } | 1646 | } |
1639 | 1647 | ||
1640 | /* | 1648 | /* |
@@ -1661,16 +1669,12 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh) | |||
1661 | /* A written-back ordered data buffer */ | 1669 | /* A written-back ordered data buffer */ |
1662 | JBUFFER_TRACE(jh, "release data"); | 1670 | JBUFFER_TRACE(jh, "release data"); |
1663 | __journal_unfile_buffer(jh); | 1671 | __journal_unfile_buffer(jh); |
1664 | journal_remove_journal_head(bh); | ||
1665 | __brelse(bh); | ||
1666 | } | 1672 | } |
1667 | } else if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) { | 1673 | } else if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) { |
1668 | /* written-back checkpointed metadata buffer */ | 1674 | /* written-back checkpointed metadata buffer */ |
1669 | if (jh->b_jlist == BJ_None) { | 1675 | if (jh->b_jlist == BJ_None) { |
1670 | JBUFFER_TRACE(jh, "remove from checkpoint list"); | 1676 | JBUFFER_TRACE(jh, "remove from checkpoint list"); |
1671 | __journal_remove_checkpoint(jh); | 1677 | __journal_remove_checkpoint(jh); |
1672 | journal_remove_journal_head(bh); | ||
1673 | __brelse(bh); | ||
1674 | } | 1678 | } |
1675 | } | 1679 | } |
1676 | spin_unlock(&journal->j_list_lock); | 1680 | spin_unlock(&journal->j_list_lock); |
@@ -1733,7 +1737,7 @@ int journal_try_to_free_buffers(journal_t *journal, | |||
1733 | /* | 1737 | /* |
1734 | * We take our own ref against the journal_head here to avoid | 1738 | * We take our own ref against the journal_head here to avoid |
1735 | * having to add tons of locking around each instance of | 1739 | * having to add tons of locking around each instance of |
1736 | * journal_remove_journal_head() and journal_put_journal_head(). | 1740 | * journal_put_journal_head(). |
1737 | */ | 1741 | */ |
1738 | jh = journal_grab_journal_head(bh); | 1742 | jh = journal_grab_journal_head(bh); |
1739 | if (!jh) | 1743 | if (!jh) |
@@ -1770,10 +1774,9 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) | |||
1770 | int may_free = 1; | 1774 | int may_free = 1; |
1771 | struct buffer_head *bh = jh2bh(jh); | 1775 | struct buffer_head *bh = jh2bh(jh); |
1772 | 1776 | ||
1773 | __journal_unfile_buffer(jh); | ||
1774 | |||
1775 | if (jh->b_cp_transaction) { | 1777 | if (jh->b_cp_transaction) { |
1776 | JBUFFER_TRACE(jh, "on running+cp transaction"); | 1778 | JBUFFER_TRACE(jh, "on running+cp transaction"); |
1779 | __journal_temp_unlink_buffer(jh); | ||
1777 | /* | 1780 | /* |
1778 | * We don't want to write the buffer anymore, clear the | 1781 | * We don't want to write the buffer anymore, clear the |
1779 | * bit so that we don't confuse checks in | 1782 | * bit so that we don't confuse checks in |
@@ -1784,8 +1787,7 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) | |||
1784 | may_free = 0; | 1787 | may_free = 0; |
1785 | } else { | 1788 | } else { |
1786 | JBUFFER_TRACE(jh, "on running transaction"); | 1789 | JBUFFER_TRACE(jh, "on running transaction"); |
1787 | journal_remove_journal_head(bh); | 1790 | __journal_unfile_buffer(jh); |
1788 | __brelse(bh); | ||
1789 | } | 1791 | } |
1790 | return may_free; | 1792 | return may_free; |
1791 | } | 1793 | } |
@@ -2070,6 +2072,8 @@ void __journal_file_buffer(struct journal_head *jh, | |||
2070 | 2072 | ||
2071 | if (jh->b_transaction) | 2073 | if (jh->b_transaction) |
2072 | __journal_temp_unlink_buffer(jh); | 2074 | __journal_temp_unlink_buffer(jh); |
2075 | else | ||
2076 | journal_grab_journal_head(bh); | ||
2073 | jh->b_transaction = transaction; | 2077 | jh->b_transaction = transaction; |
2074 | 2078 | ||
2075 | switch (jlist) { | 2079 | switch (jlist) { |
@@ -2127,9 +2131,10 @@ void journal_file_buffer(struct journal_head *jh, | |||
2127 | * already started to be used by a subsequent transaction, refile the | 2131 | * already started to be used by a subsequent transaction, refile the |
2128 | * buffer on that transaction's metadata list. | 2132 | * buffer on that transaction's metadata list. |
2129 | * | 2133 | * |
2130 | * Called under journal->j_list_lock | 2134 | * Called under j_list_lock |
2131 | * | ||
2132 | * Called under jbd_lock_bh_state(jh2bh(jh)) | 2135 | * Called under jbd_lock_bh_state(jh2bh(jh)) |
2136 | * | ||
2137 | * jh and bh may be already free when this function returns | ||
2133 | */ | 2138 | */ |
2134 | void __journal_refile_buffer(struct journal_head *jh) | 2139 | void __journal_refile_buffer(struct journal_head *jh) |
2135 | { | 2140 | { |
@@ -2153,6 +2158,11 @@ void __journal_refile_buffer(struct journal_head *jh) | |||
2153 | 2158 | ||
2154 | was_dirty = test_clear_buffer_jbddirty(bh); | 2159 | was_dirty = test_clear_buffer_jbddirty(bh); |
2155 | __journal_temp_unlink_buffer(jh); | 2160 | __journal_temp_unlink_buffer(jh); |
2161 | /* | ||
2162 | * We set b_transaction here because b_next_transaction will inherit | ||
2163 | * our jh reference and thus __journal_file_buffer() must not take a | ||
2164 | * new one. | ||
2165 | */ | ||
2156 | jh->b_transaction = jh->b_next_transaction; | 2166 | jh->b_transaction = jh->b_next_transaction; |
2157 | jh->b_next_transaction = NULL; | 2167 | jh->b_next_transaction = NULL; |
2158 | if (buffer_freed(bh)) | 2168 | if (buffer_freed(bh)) |
@@ -2169,30 +2179,21 @@ void __journal_refile_buffer(struct journal_head *jh) | |||
2169 | } | 2179 | } |
2170 | 2180 | ||
2171 | /* | 2181 | /* |
2172 | * For the unlocked version of this call, also make sure that any | 2182 | * __journal_refile_buffer() with necessary locking added. We take our bh |
2173 | * hanging journal_head is cleaned up if necessary. | 2183 | * reference so that we can safely unlock bh. |
2174 | * | 2184 | * |
2175 | * __journal_refile_buffer is usually called as part of a single locked | 2185 | * The jh and bh may be freed by this call. |
2176 | * operation on a buffer_head, in which the caller is probably going to | ||
2177 | * be hooking the journal_head onto other lists. In that case it is up | ||
2178 | * to the caller to remove the journal_head if necessary. For the | ||
2179 | * unlocked journal_refile_buffer call, the caller isn't going to be | ||
2180 | * doing anything else to the buffer so we need to do the cleanup | ||
2181 | * ourselves to avoid a jh leak. | ||
2182 | * | ||
2183 | * *** The journal_head may be freed by this call! *** | ||
2184 | */ | 2186 | */ |
2185 | void journal_refile_buffer(journal_t *journal, struct journal_head *jh) | 2187 | void journal_refile_buffer(journal_t *journal, struct journal_head *jh) |
2186 | { | 2188 | { |
2187 | struct buffer_head *bh = jh2bh(jh); | 2189 | struct buffer_head *bh = jh2bh(jh); |
2188 | 2190 | ||
2191 | /* Get reference so that buffer cannot be freed before we unlock it */ | ||
2192 | get_bh(bh); | ||
2189 | jbd_lock_bh_state(bh); | 2193 | jbd_lock_bh_state(bh); |
2190 | spin_lock(&journal->j_list_lock); | 2194 | spin_lock(&journal->j_list_lock); |
2191 | |||
2192 | __journal_refile_buffer(jh); | 2195 | __journal_refile_buffer(jh); |
2193 | jbd_unlock_bh_state(bh); | 2196 | jbd_unlock_bh_state(bh); |
2194 | journal_remove_journal_head(bh); | ||
2195 | |||
2196 | spin_unlock(&journal->j_list_lock); | 2197 | spin_unlock(&journal->j_list_lock); |
2197 | __brelse(bh); | 2198 | __brelse(bh); |
2198 | } | 2199 | } |