aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/filesystems/ext3.txt13
-rw-r--r--Documentation/filesystems/ext4.txt23
-rw-r--r--fs/ext2/xattr.c10
-rw-r--r--fs/ext3/balloc.c38
-rw-r--r--fs/ext3/file.c1
-rw-r--r--fs/ext3/fsync.c11
-rw-r--r--fs/ext3/ialloc.c4
-rw-r--r--fs/ext3/inode.c193
-rw-r--r--fs/ext3/ioctl.c4
-rw-r--r--fs/ext3/namei.c7
-rw-r--r--fs/ext3/super.c13
-rw-r--r--fs/ext3/xattr.c12
-rw-r--r--fs/jbd/checkpoint.c37
-rw-r--r--fs/jbd/commit.c57
-rw-r--r--fs/jbd/journal.c99
-rw-r--r--fs/jbd/transaction.c83
-rw-r--r--include/linux/ext2_fs.h1
-rw-r--r--include/linux/ext3_fs.h7
-rw-r--r--include/linux/jbd.h1
-rw-r--r--include/linux/journal-head.h2
-rw-r--r--include/linux/quota.h8
-rw-r--r--include/trace/events/ext3.h864
-rw-r--r--include/trace/events/jbd.h203
23 files changed, 1420 insertions, 271 deletions
diff --git a/Documentation/filesystems/ext3.txt b/Documentation/filesystems/ext3.txt
index 272f80d5f96..22f3a0eda1d 100644
--- a/Documentation/filesystems/ext3.txt
+++ b/Documentation/filesystems/ext3.txt
@@ -147,15 +147,6 @@ grpjquota=<file> during journal replay. They replace the above
147 package for more details 147 package for more details
148 (http://sourceforge.net/projects/linuxquota). 148 (http://sourceforge.net/projects/linuxquota).
149 149
150bh (*) ext3 associates buffer heads to data pages to
151nobh (a) cache disk block mapping information
152 (b) link pages into transaction to provide
153 ordering guarantees.
154 "bh" option forces use of buffer heads.
155 "nobh" option tries to avoid associating buffer
156 heads (supported only for "writeback" mode).
157
158
159Specification 150Specification
160============= 151=============
161Ext3 shares all disk implementation with the ext2 filesystem, and adds 152Ext3 shares all disk implementation with the ext2 filesystem, and adds
@@ -227,5 +218,5 @@ kernel source: <file:fs/ext3/>
227programs: http://e2fsprogs.sourceforge.net/ 218programs: http://e2fsprogs.sourceforge.net/
228 http://ext2resize.sourceforge.net 219 http://ext2resize.sourceforge.net
229 220
230useful links: http://www.ibm.com/developerworks/library/l-fs7.html 221useful links: http://www.ibm.com/developerworks/library/l-fs7/index.html
231 http://www.ibm.com/developerworks/library/l-fs8.html 222 http://www.ibm.com/developerworks/library/l-fs8/index.html
diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt
index 3ae9bc94352..232a575a0c4 100644
--- a/Documentation/filesystems/ext4.txt
+++ b/Documentation/filesystems/ext4.txt
@@ -68,12 +68,12 @@ Note: More extensive information for getting started with ext4 can be
68 '-o barriers=[0|1]' mount option for both ext3 and ext4 filesystems 68 '-o barriers=[0|1]' mount option for both ext3 and ext4 filesystems
69 for a fair comparison. When tuning ext3 for best benchmark numbers, 69 for a fair comparison. When tuning ext3 for best benchmark numbers,
70 it is often worthwhile to try changing the data journaling mode; '-o 70 it is often worthwhile to try changing the data journaling mode; '-o
71 data=writeback,nobh' can be faster for some workloads. (Note 71 data=writeback' can be faster for some workloads. (Note however that
72 however that running mounted with data=writeback can potentially 72 running mounted with data=writeback can potentially leave stale data
73 leave stale data exposed in recently written files in case of an 73 exposed in recently written files in case of an unclean shutdown,
74 unclean shutdown, which could be a security exposure in some 74 which could be a security exposure in some situations.) Configuring
75 situations.) Configuring the filesystem with a large journal can 75 the filesystem with a large journal can also be helpful for
76 also be helpful for metadata-intensive workloads. 76 metadata-intensive workloads.
77 77
782. Features 782. Features
79=========== 79===========
@@ -272,14 +272,6 @@ grpjquota=<file> during journal replay. They replace the above
272 package for more details 272 package for more details
273 (http://sourceforge.net/projects/linuxquota). 273 (http://sourceforge.net/projects/linuxquota).
274 274
275bh (*) ext4 associates buffer heads to data pages to
276nobh (a) cache disk block mapping information
277 (b) link pages into transaction to provide
278 ordering guarantees.
279 "bh" option forces use of buffer heads.
280 "nobh" option tries to avoid associating buffer
281 heads (supported only for "writeback" mode).
282
283stripe=n Number of filesystem blocks that mballoc will try 275stripe=n Number of filesystem blocks that mballoc will try
284 to use for allocation size and alignment. For RAID5/6 276 to use for allocation size and alignment. For RAID5/6
285 systems this should be the number of data 277 systems this should be the number of data
@@ -393,8 +385,7 @@ dioread_nolock locking. If the dioread_nolock option is specified
393 write and convert the extent to initialized after IO 385 write and convert the extent to initialized after IO
394 completes. This approach allows ext4 code to avoid 386 completes. This approach allows ext4 code to avoid
395 using inode mutex, which improves scalability on high 387 using inode mutex, which improves scalability on high
396 speed storages. However this does not work with nobh 388 speed storages. However this does not work with
397 option and the mount will fail. Nor does it work with
398 data journaling and dioread_nolock option will be 389 data journaling and dioread_nolock option will be
399 ignored with kernel warning. Note that dioread_nolock 390 ignored with kernel warning. Note that dioread_nolock
400 code path is only used for extent-based files. 391 code path is only used for extent-based files.
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index 529970617a2..d27b71f1d18 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -161,6 +161,10 @@ ext2_xattr_get(struct inode *inode, int name_index, const char *name,
161 161
162 if (name == NULL) 162 if (name == NULL)
163 return -EINVAL; 163 return -EINVAL;
164 name_len = strlen(name);
165 if (name_len > 255)
166 return -ERANGE;
167
164 down_read(&EXT2_I(inode)->xattr_sem); 168 down_read(&EXT2_I(inode)->xattr_sem);
165 error = -ENODATA; 169 error = -ENODATA;
166 if (!EXT2_I(inode)->i_file_acl) 170 if (!EXT2_I(inode)->i_file_acl)
@@ -181,12 +185,8 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_get",
181 error = -EIO; 185 error = -EIO;
182 goto cleanup; 186 goto cleanup;
183 } 187 }
184 /* find named attribute */
185 name_len = strlen(name);
186 188
187 error = -ERANGE; 189 /* find named attribute */
188 if (name_len > 255)
189 goto cleanup;
190 entry = FIRST_ENTRY(bh); 190 entry = FIRST_ENTRY(bh);
191 while (!IS_LAST_ENTRY(entry)) { 191 while (!IS_LAST_ENTRY(entry)) {
192 struct ext2_xattr_entry *next = 192 struct ext2_xattr_entry *next =
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index fe52297e31a..6386d76f44a 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -21,6 +21,7 @@
21#include <linux/quotaops.h> 21#include <linux/quotaops.h>
22#include <linux/buffer_head.h> 22#include <linux/buffer_head.h>
23#include <linux/blkdev.h> 23#include <linux/blkdev.h>
24#include <trace/events/ext3.h>
24 25
25/* 26/*
26 * balloc.c contains the blocks allocation and deallocation routines 27 * balloc.c contains the blocks allocation and deallocation routines
@@ -161,6 +162,7 @@ read_block_bitmap(struct super_block *sb, unsigned int block_group)
161 desc = ext3_get_group_desc(sb, block_group, NULL); 162 desc = ext3_get_group_desc(sb, block_group, NULL);
162 if (!desc) 163 if (!desc)
163 return NULL; 164 return NULL;
165 trace_ext3_read_block_bitmap(sb, block_group);
164 bitmap_blk = le32_to_cpu(desc->bg_block_bitmap); 166 bitmap_blk = le32_to_cpu(desc->bg_block_bitmap);
165 bh = sb_getblk(sb, bitmap_blk); 167 bh = sb_getblk(sb, bitmap_blk);
166 if (unlikely(!bh)) { 168 if (unlikely(!bh)) {
@@ -351,6 +353,7 @@ void ext3_rsv_window_add(struct super_block *sb,
351 struct rb_node * parent = NULL; 353 struct rb_node * parent = NULL;
352 struct ext3_reserve_window_node *this; 354 struct ext3_reserve_window_node *this;
353 355
356 trace_ext3_rsv_window_add(sb, rsv);
354 while (*p) 357 while (*p)
355 { 358 {
356 parent = *p; 359 parent = *p;
@@ -476,8 +479,10 @@ void ext3_discard_reservation(struct inode *inode)
476 rsv = &block_i->rsv_window_node; 479 rsv = &block_i->rsv_window_node;
477 if (!rsv_is_empty(&rsv->rsv_window)) { 480 if (!rsv_is_empty(&rsv->rsv_window)) {
478 spin_lock(rsv_lock); 481 spin_lock(rsv_lock);
479 if (!rsv_is_empty(&rsv->rsv_window)) 482 if (!rsv_is_empty(&rsv->rsv_window)) {
483 trace_ext3_discard_reservation(inode, rsv);
480 rsv_window_remove(inode->i_sb, rsv); 484 rsv_window_remove(inode->i_sb, rsv);
485 }
481 spin_unlock(rsv_lock); 486 spin_unlock(rsv_lock);
482 } 487 }
483} 488}
@@ -683,14 +688,10 @@ error_return:
683void ext3_free_blocks(handle_t *handle, struct inode *inode, 688void ext3_free_blocks(handle_t *handle, struct inode *inode,
684 ext3_fsblk_t block, unsigned long count) 689 ext3_fsblk_t block, unsigned long count)
685{ 690{
686 struct super_block * sb; 691 struct super_block *sb = inode->i_sb;
687 unsigned long dquot_freed_blocks; 692 unsigned long dquot_freed_blocks;
688 693
689 sb = inode->i_sb; 694 trace_ext3_free_blocks(inode, block, count);
690 if (!sb) {
691 printk ("ext3_free_blocks: nonexistent device");
692 return;
693 }
694 ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks); 695 ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks);
695 if (dquot_freed_blocks) 696 if (dquot_freed_blocks)
696 dquot_free_block(inode, dquot_freed_blocks); 697 dquot_free_block(inode, dquot_freed_blocks);
@@ -1136,6 +1137,7 @@ static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv,
1136 else 1137 else
1137 start_block = grp_goal + group_first_block; 1138 start_block = grp_goal + group_first_block;
1138 1139
1140 trace_ext3_alloc_new_reservation(sb, start_block);
1139 size = my_rsv->rsv_goal_size; 1141 size = my_rsv->rsv_goal_size;
1140 1142
1141 if (!rsv_is_empty(&my_rsv->rsv_window)) { 1143 if (!rsv_is_empty(&my_rsv->rsv_window)) {
@@ -1230,8 +1232,11 @@ retry:
1230 * check if the first free block is within the 1232 * check if the first free block is within the
1231 * free space we just reserved 1233 * free space we just reserved
1232 */ 1234 */
1233 if (start_block >= my_rsv->rsv_start && start_block <= my_rsv->rsv_end) 1235 if (start_block >= my_rsv->rsv_start &&
1236 start_block <= my_rsv->rsv_end) {
1237 trace_ext3_reserved(sb, start_block, my_rsv);
1234 return 0; /* success */ 1238 return 0; /* success */
1239 }
1235 /* 1240 /*
1236 * if the first free bit we found is out of the reservable space 1241 * if the first free bit we found is out of the reservable space
1237 * continue search for next reservable space, 1242 * continue search for next reservable space,
@@ -1514,10 +1519,6 @@ ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode,
1514 1519
1515 *errp = -ENOSPC; 1520 *errp = -ENOSPC;
1516 sb = inode->i_sb; 1521 sb = inode->i_sb;
1517 if (!sb) {
1518 printk("ext3_new_block: nonexistent device");
1519 return 0;
1520 }
1521 1522
1522 /* 1523 /*
1523 * Check quota for allocation of this block. 1524 * Check quota for allocation of this block.
@@ -1528,8 +1529,10 @@ ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode,
1528 return 0; 1529 return 0;
1529 } 1530 }
1530 1531
1532 trace_ext3_request_blocks(inode, goal, num);
1533
1531 sbi = EXT3_SB(sb); 1534 sbi = EXT3_SB(sb);
1532 es = EXT3_SB(sb)->s_es; 1535 es = sbi->s_es;
1533 ext3_debug("goal=%lu.\n", goal); 1536 ext3_debug("goal=%lu.\n", goal);
1534 /* 1537 /*
1535 * Allocate a block from reservation only when 1538 * Allocate a block from reservation only when
@@ -1742,6 +1745,10 @@ allocated:
1742 brelse(bitmap_bh); 1745 brelse(bitmap_bh);
1743 dquot_free_block(inode, *count-num); 1746 dquot_free_block(inode, *count-num);
1744 *count = num; 1747 *count = num;
1748
1749 trace_ext3_allocate_blocks(inode, goal, num,
1750 (unsigned long long)ret_block);
1751
1745 return ret_block; 1752 return ret_block;
1746 1753
1747io_error: 1754io_error:
@@ -1996,6 +2003,7 @@ ext3_grpblk_t ext3_trim_all_free(struct super_block *sb, unsigned int group,
1996 if ((next - start) < minblocks) 2003 if ((next - start) < minblocks)
1997 goto free_extent; 2004 goto free_extent;
1998 2005
2006 trace_ext3_discard_blocks(sb, discard_block, next - start);
1999 /* Send the TRIM command down to the device */ 2007 /* Send the TRIM command down to the device */
2000 err = sb_issue_discard(sb, discard_block, next - start, 2008 err = sb_issue_discard(sb, discard_block, next - start,
2001 GFP_NOFS, 0); 2009 GFP_NOFS, 0);
@@ -2100,7 +2108,7 @@ int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range)
2100 if (unlikely(minlen > EXT3_BLOCKS_PER_GROUP(sb))) 2108 if (unlikely(minlen > EXT3_BLOCKS_PER_GROUP(sb)))
2101 return -EINVAL; 2109 return -EINVAL;
2102 if (start >= max_blks) 2110 if (start >= max_blks)
2103 goto out; 2111 return -EINVAL;
2104 if (start + len > max_blks) 2112 if (start + len > max_blks)
2105 len = max_blks - start; 2113 len = max_blks - start;
2106 2114
@@ -2148,8 +2156,6 @@ int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range)
2148 2156
2149 if (ret >= 0) 2157 if (ret >= 0)
2150 ret = 0; 2158 ret = 0;
2151
2152out:
2153 range->len = trimmed * sb->s_blocksize; 2159 range->len = trimmed * sb->s_blocksize;
2154 2160
2155 return ret; 2161 return ret;
diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index 2be5b99097f..724df69847d 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -71,7 +71,6 @@ const struct file_operations ext3_file_operations = {
71}; 71};
72 72
73const struct inode_operations ext3_file_inode_operations = { 73const struct inode_operations ext3_file_inode_operations = {
74 .truncate = ext3_truncate,
75 .setattr = ext3_setattr, 74 .setattr = ext3_setattr,
76#ifdef CONFIG_EXT3_FS_XATTR 75#ifdef CONFIG_EXT3_FS_XATTR
77 .setxattr = generic_setxattr, 76 .setxattr = generic_setxattr,
diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c
index 0bcf63adb80..d494c554c6e 100644
--- a/fs/ext3/fsync.c
+++ b/fs/ext3/fsync.c
@@ -30,6 +30,7 @@
30#include <linux/jbd.h> 30#include <linux/jbd.h>
31#include <linux/ext3_fs.h> 31#include <linux/ext3_fs.h>
32#include <linux/ext3_jbd.h> 32#include <linux/ext3_jbd.h>
33#include <trace/events/ext3.h>
33 34
34/* 35/*
35 * akpm: A new design for ext3_sync_file(). 36 * akpm: A new design for ext3_sync_file().
@@ -51,12 +52,14 @@ int ext3_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
51 int ret, needs_barrier = 0; 52 int ret, needs_barrier = 0;
52 tid_t commit_tid; 53 tid_t commit_tid;
53 54
55 trace_ext3_sync_file_enter(file, datasync);
56
54 if (inode->i_sb->s_flags & MS_RDONLY) 57 if (inode->i_sb->s_flags & MS_RDONLY)
55 return 0; 58 return 0;
56 59
57 ret = filemap_write_and_wait_range(inode->i_mapping, start, end); 60 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
58 if (ret) 61 if (ret)
59 return ret; 62 goto out;
60 63
61 /* 64 /*
62 * Taking the mutex here just to keep consistent with how fsync was 65 * Taking the mutex here just to keep consistent with how fsync was
@@ -83,7 +86,8 @@ int ext3_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
83 */ 86 */
84 if (ext3_should_journal_data(inode)) { 87 if (ext3_should_journal_data(inode)) {
85 mutex_unlock(&inode->i_mutex); 88 mutex_unlock(&inode->i_mutex);
86 return ext3_force_commit(inode->i_sb); 89 ret = ext3_force_commit(inode->i_sb);
90 goto out;
87 } 91 }
88 92
89 if (datasync) 93 if (datasync)
@@ -104,6 +108,9 @@ int ext3_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
104 */ 108 */
105 if (needs_barrier) 109 if (needs_barrier)
106 blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); 110 blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
111
107 mutex_unlock(&inode->i_mutex); 112 mutex_unlock(&inode->i_mutex);
113out:
114 trace_ext3_sync_file_exit(inode, ret);
108 return ret; 115 return ret;
109} 116}
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index bfc2dc43681..bf09cbf938c 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -23,6 +23,7 @@
23#include <linux/buffer_head.h> 23#include <linux/buffer_head.h>
24#include <linux/random.h> 24#include <linux/random.h>
25#include <linux/bitops.h> 25#include <linux/bitops.h>
26#include <trace/events/ext3.h>
26 27
27#include <asm/byteorder.h> 28#include <asm/byteorder.h>
28 29
@@ -118,6 +119,7 @@ void ext3_free_inode (handle_t *handle, struct inode * inode)
118 119
119 ino = inode->i_ino; 120 ino = inode->i_ino;
120 ext3_debug ("freeing inode %lu\n", ino); 121 ext3_debug ("freeing inode %lu\n", ino);
122 trace_ext3_free_inode(inode);
121 123
122 is_directory = S_ISDIR(inode->i_mode); 124 is_directory = S_ISDIR(inode->i_mode);
123 125
@@ -426,6 +428,7 @@ struct inode *ext3_new_inode(handle_t *handle, struct inode * dir,
426 return ERR_PTR(-EPERM); 428 return ERR_PTR(-EPERM);
427 429
428 sb = dir->i_sb; 430 sb = dir->i_sb;
431 trace_ext3_request_inode(dir, mode);
429 inode = new_inode(sb); 432 inode = new_inode(sb);
430 if (!inode) 433 if (!inode)
431 return ERR_PTR(-ENOMEM); 434 return ERR_PTR(-ENOMEM);
@@ -601,6 +604,7 @@ got:
601 } 604 }
602 605
603 ext3_debug("allocating inode %lu\n", inode->i_ino); 606 ext3_debug("allocating inode %lu\n", inode->i_ino);
607 trace_ext3_allocate_inode(inode, dir, mode);
604 goto really_out; 608 goto really_out;
605fail: 609fail:
606 ext3_std_error(sb, err); 610 ext3_std_error(sb, err);
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 2978a2a17a5..04da6acde85 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -38,10 +38,12 @@
38#include <linux/bio.h> 38#include <linux/bio.h>
39#include <linux/fiemap.h> 39#include <linux/fiemap.h>
40#include <linux/namei.h> 40#include <linux/namei.h>
41#include <trace/events/ext3.h>
41#include "xattr.h" 42#include "xattr.h"
42#include "acl.h" 43#include "acl.h"
43 44
44static int ext3_writepage_trans_blocks(struct inode *inode); 45static int ext3_writepage_trans_blocks(struct inode *inode);
46static int ext3_block_truncate_page(struct inode *inode, loff_t from);
45 47
46/* 48/*
47 * Test whether an inode is a fast symlink. 49 * Test whether an inode is a fast symlink.
@@ -70,6 +72,7 @@ int ext3_forget(handle_t *handle, int is_metadata, struct inode *inode,
70 72
71 might_sleep(); 73 might_sleep();
72 74
75 trace_ext3_forget(inode, is_metadata, blocknr);
73 BUFFER_TRACE(bh, "enter"); 76 BUFFER_TRACE(bh, "enter");
74 77
75 jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, " 78 jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, "
@@ -194,20 +197,47 @@ static int truncate_restart_transaction(handle_t *handle, struct inode *inode)
194 */ 197 */
195void ext3_evict_inode (struct inode *inode) 198void ext3_evict_inode (struct inode *inode)
196{ 199{
200 struct ext3_inode_info *ei = EXT3_I(inode);
197 struct ext3_block_alloc_info *rsv; 201 struct ext3_block_alloc_info *rsv;
198 handle_t *handle; 202 handle_t *handle;
199 int want_delete = 0; 203 int want_delete = 0;
200 204
205 trace_ext3_evict_inode(inode);
201 if (!inode->i_nlink && !is_bad_inode(inode)) { 206 if (!inode->i_nlink && !is_bad_inode(inode)) {
202 dquot_initialize(inode); 207 dquot_initialize(inode);
203 want_delete = 1; 208 want_delete = 1;
204 } 209 }
205 210
211 /*
212 * When journalling data dirty buffers are tracked only in the journal.
213 * So although mm thinks everything is clean and ready for reaping the
214 * inode might still have some pages to write in the running
215 * transaction or waiting to be checkpointed. Thus calling
216 * journal_invalidatepage() (via truncate_inode_pages()) to discard
217 * these buffers can cause data loss. Also even if we did not discard
218 * these buffers, we would have no way to find them after the inode
219 * is reaped and thus user could see stale data if he tries to read
220 * them before the transaction is checkpointed. So be careful and
221 * force everything to disk here... We use ei->i_datasync_tid to
222 * store the newest transaction containing inode's data.
223 *
224 * Note that directories do not have this problem because they don't
225 * use page cache.
226 */
227 if (inode->i_nlink && ext3_should_journal_data(inode) &&
228 (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) {
229 tid_t commit_tid = atomic_read(&ei->i_datasync_tid);
230 journal_t *journal = EXT3_SB(inode->i_sb)->s_journal;
231
232 log_start_commit(journal, commit_tid);
233 log_wait_commit(journal, commit_tid);
234 filemap_write_and_wait(&inode->i_data);
235 }
206 truncate_inode_pages(&inode->i_data, 0); 236 truncate_inode_pages(&inode->i_data, 0);
207 237
208 ext3_discard_reservation(inode); 238 ext3_discard_reservation(inode);
209 rsv = EXT3_I(inode)->i_block_alloc_info; 239 rsv = ei->i_block_alloc_info;
210 EXT3_I(inode)->i_block_alloc_info = NULL; 240 ei->i_block_alloc_info = NULL;
211 if (unlikely(rsv)) 241 if (unlikely(rsv))
212 kfree(rsv); 242 kfree(rsv);
213 243
@@ -231,15 +261,13 @@ void ext3_evict_inode (struct inode *inode)
231 if (inode->i_blocks) 261 if (inode->i_blocks)
232 ext3_truncate(inode); 262 ext3_truncate(inode);
233 /* 263 /*
234 * Kill off the orphan record which ext3_truncate created. 264 * Kill off the orphan record created when the inode lost the last
235 * AKPM: I think this can be inside the above `if'. 265 * link. Note that ext3_orphan_del() has to be able to cope with the
236 * Note that ext3_orphan_del() has to be able to cope with the 266 * deletion of a non-existent orphan - ext3_truncate() could
237 * deletion of a non-existent orphan - this is because we don't 267 * have removed the record.
238 * know if ext3_truncate() actually created an orphan record.
239 * (Well, we could do this if we need to, but heck - it works)
240 */ 268 */
241 ext3_orphan_del(handle, inode); 269 ext3_orphan_del(handle, inode);
242 EXT3_I(inode)->i_dtime = get_seconds(); 270 ei->i_dtime = get_seconds();
243 271
244 /* 272 /*
245 * One subtle ordering requirement: if anything has gone wrong 273 * One subtle ordering requirement: if anything has gone wrong
@@ -842,6 +870,7 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
842 ext3_fsblk_t first_block = 0; 870 ext3_fsblk_t first_block = 0;
843 871
844 872
873 trace_ext3_get_blocks_enter(inode, iblock, maxblocks, create);
845 J_ASSERT(handle != NULL || create == 0); 874 J_ASSERT(handle != NULL || create == 0);
846 depth = ext3_block_to_path(inode,iblock,offsets,&blocks_to_boundary); 875 depth = ext3_block_to_path(inode,iblock,offsets,&blocks_to_boundary);
847 876
@@ -886,6 +915,9 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
886 if (!create || err == -EIO) 915 if (!create || err == -EIO)
887 goto cleanup; 916 goto cleanup;
888 917
918 /*
919 * Block out ext3_truncate while we alter the tree
920 */
889 mutex_lock(&ei->truncate_mutex); 921 mutex_lock(&ei->truncate_mutex);
890 922
891 /* 923 /*
@@ -934,9 +966,6 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
934 */ 966 */
935 count = ext3_blks_to_allocate(partial, indirect_blks, 967 count = ext3_blks_to_allocate(partial, indirect_blks,
936 maxblocks, blocks_to_boundary); 968 maxblocks, blocks_to_boundary);
937 /*
938 * Block out ext3_truncate while we alter the tree
939 */
940 err = ext3_alloc_branch(handle, inode, indirect_blks, &count, goal, 969 err = ext3_alloc_branch(handle, inode, indirect_blks, &count, goal,
941 offsets + (partial - chain), partial); 970 offsets + (partial - chain), partial);
942 971
@@ -970,6 +999,9 @@ cleanup:
970 } 999 }
971 BUFFER_TRACE(bh_result, "returned"); 1000 BUFFER_TRACE(bh_result, "returned");
972out: 1001out:
1002 trace_ext3_get_blocks_exit(inode, iblock,
1003 depth ? le32_to_cpu(chain[depth-1].key) : 0,
1004 count, err);
973 return err; 1005 return err;
974} 1006}
975 1007
@@ -1202,6 +1234,16 @@ static void ext3_truncate_failed_write(struct inode *inode)
1202 ext3_truncate(inode); 1234 ext3_truncate(inode);
1203} 1235}
1204 1236
1237/*
1238 * Truncate blocks that were not used by direct IO write. We have to zero out
1239 * the last file block as well because direct IO might have written to it.
1240 */
1241static void ext3_truncate_failed_direct_write(struct inode *inode)
1242{
1243 ext3_block_truncate_page(inode, inode->i_size);
1244 ext3_truncate(inode);
1245}
1246
1205static int ext3_write_begin(struct file *file, struct address_space *mapping, 1247static int ext3_write_begin(struct file *file, struct address_space *mapping,
1206 loff_t pos, unsigned len, unsigned flags, 1248 loff_t pos, unsigned len, unsigned flags,
1207 struct page **pagep, void **fsdata) 1249 struct page **pagep, void **fsdata)
@@ -1217,6 +1259,8 @@ static int ext3_write_begin(struct file *file, struct address_space *mapping,
1217 * we allocate blocks but write fails for some reason */ 1259 * we allocate blocks but write fails for some reason */
1218 int needed_blocks = ext3_writepage_trans_blocks(inode) + 1; 1260 int needed_blocks = ext3_writepage_trans_blocks(inode) + 1;
1219 1261
1262 trace_ext3_write_begin(inode, pos, len, flags);
1263
1220 index = pos >> PAGE_CACHE_SHIFT; 1264 index = pos >> PAGE_CACHE_SHIFT;
1221 from = pos & (PAGE_CACHE_SIZE - 1); 1265 from = pos & (PAGE_CACHE_SIZE - 1);
1222 to = from + len; 1266 to = from + len;
@@ -1332,6 +1376,7 @@ static int ext3_ordered_write_end(struct file *file,
1332 unsigned from, to; 1376 unsigned from, to;
1333 int ret = 0, ret2; 1377 int ret = 0, ret2;
1334 1378
1379 trace_ext3_ordered_write_end(inode, pos, len, copied);
1335 copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); 1380 copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
1336 1381
1337 from = pos & (PAGE_CACHE_SIZE - 1); 1382 from = pos & (PAGE_CACHE_SIZE - 1);
@@ -1367,6 +1412,7 @@ static int ext3_writeback_write_end(struct file *file,
1367 struct inode *inode = file->f_mapping->host; 1412 struct inode *inode = file->f_mapping->host;
1368 int ret; 1413 int ret;
1369 1414
1415 trace_ext3_writeback_write_end(inode, pos, len, copied);
1370 copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); 1416 copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
1371 update_file_sizes(inode, pos, copied); 1417 update_file_sizes(inode, pos, copied);
1372 /* 1418 /*
@@ -1391,10 +1437,12 @@ static int ext3_journalled_write_end(struct file *file,
1391{ 1437{
1392 handle_t *handle = ext3_journal_current_handle(); 1438 handle_t *handle = ext3_journal_current_handle();
1393 struct inode *inode = mapping->host; 1439 struct inode *inode = mapping->host;
1440 struct ext3_inode_info *ei = EXT3_I(inode);
1394 int ret = 0, ret2; 1441 int ret = 0, ret2;
1395 int partial = 0; 1442 int partial = 0;
1396 unsigned from, to; 1443 unsigned from, to;
1397 1444
1445 trace_ext3_journalled_write_end(inode, pos, len, copied);
1398 from = pos & (PAGE_CACHE_SIZE - 1); 1446 from = pos & (PAGE_CACHE_SIZE - 1);
1399 to = from + len; 1447 to = from + len;
1400 1448
@@ -1419,8 +1467,9 @@ static int ext3_journalled_write_end(struct file *file,
1419 if (pos + len > inode->i_size && ext3_can_truncate(inode)) 1467 if (pos + len > inode->i_size && ext3_can_truncate(inode))
1420 ext3_orphan_add(handle, inode); 1468 ext3_orphan_add(handle, inode);
1421 ext3_set_inode_state(inode, EXT3_STATE_JDATA); 1469 ext3_set_inode_state(inode, EXT3_STATE_JDATA);
1422 if (inode->i_size > EXT3_I(inode)->i_disksize) { 1470 atomic_set(&ei->i_datasync_tid, handle->h_transaction->t_tid);
1423 EXT3_I(inode)->i_disksize = inode->i_size; 1471 if (inode->i_size > ei->i_disksize) {
1472 ei->i_disksize = inode->i_size;
1424 ret2 = ext3_mark_inode_dirty(handle, inode); 1473 ret2 = ext3_mark_inode_dirty(handle, inode);
1425 if (!ret) 1474 if (!ret)
1426 ret = ret2; 1475 ret = ret2;
@@ -1577,6 +1626,7 @@ static int ext3_ordered_writepage(struct page *page,
1577 if (ext3_journal_current_handle()) 1626 if (ext3_journal_current_handle())
1578 goto out_fail; 1627 goto out_fail;
1579 1628
1629 trace_ext3_ordered_writepage(page);
1580 if (!page_has_buffers(page)) { 1630 if (!page_has_buffers(page)) {
1581 create_empty_buffers(page, inode->i_sb->s_blocksize, 1631 create_empty_buffers(page, inode->i_sb->s_blocksize,
1582 (1 << BH_Dirty)|(1 << BH_Uptodate)); 1632 (1 << BH_Dirty)|(1 << BH_Uptodate));
@@ -1647,6 +1697,7 @@ static int ext3_writeback_writepage(struct page *page,
1647 if (ext3_journal_current_handle()) 1697 if (ext3_journal_current_handle())
1648 goto out_fail; 1698 goto out_fail;
1649 1699
1700 trace_ext3_writeback_writepage(page);
1650 if (page_has_buffers(page)) { 1701 if (page_has_buffers(page)) {
1651 if (!walk_page_buffers(NULL, page_buffers(page), 0, 1702 if (!walk_page_buffers(NULL, page_buffers(page), 0,
1652 PAGE_CACHE_SIZE, NULL, buffer_unmapped)) { 1703 PAGE_CACHE_SIZE, NULL, buffer_unmapped)) {
@@ -1689,6 +1740,7 @@ static int ext3_journalled_writepage(struct page *page,
1689 if (ext3_journal_current_handle()) 1740 if (ext3_journal_current_handle())
1690 goto no_write; 1741 goto no_write;
1691 1742
1743 trace_ext3_journalled_writepage(page);
1692 handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode)); 1744 handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode));
1693 if (IS_ERR(handle)) { 1745 if (IS_ERR(handle)) {
1694 ret = PTR_ERR(handle); 1746 ret = PTR_ERR(handle);
@@ -1715,6 +1767,8 @@ static int ext3_journalled_writepage(struct page *page,
1715 if (ret == 0) 1767 if (ret == 0)
1716 ret = err; 1768 ret = err;
1717 ext3_set_inode_state(inode, EXT3_STATE_JDATA); 1769 ext3_set_inode_state(inode, EXT3_STATE_JDATA);
1770 atomic_set(&EXT3_I(inode)->i_datasync_tid,
1771 handle->h_transaction->t_tid);
1718 unlock_page(page); 1772 unlock_page(page);
1719 } else { 1773 } else {
1720 /* 1774 /*
@@ -1739,6 +1793,7 @@ out_unlock:
1739 1793
1740static int ext3_readpage(struct file *file, struct page *page) 1794static int ext3_readpage(struct file *file, struct page *page)
1741{ 1795{
1796 trace_ext3_readpage(page);
1742 return mpage_readpage(page, ext3_get_block); 1797 return mpage_readpage(page, ext3_get_block);
1743} 1798}
1744 1799
@@ -1753,6 +1808,8 @@ static void ext3_invalidatepage(struct page *page, unsigned long offset)
1753{ 1808{
1754 journal_t *journal = EXT3_JOURNAL(page->mapping->host); 1809 journal_t *journal = EXT3_JOURNAL(page->mapping->host);
1755 1810
1811 trace_ext3_invalidatepage(page, offset);
1812
1756 /* 1813 /*
1757 * If it's a full truncate we just forget about the pending dirtying 1814 * If it's a full truncate we just forget about the pending dirtying
1758 */ 1815 */
@@ -1766,6 +1823,7 @@ static int ext3_releasepage(struct page *page, gfp_t wait)
1766{ 1823{
1767 journal_t *journal = EXT3_JOURNAL(page->mapping->host); 1824 journal_t *journal = EXT3_JOURNAL(page->mapping->host);
1768 1825
1826 trace_ext3_releasepage(page);
1769 WARN_ON(PageChecked(page)); 1827 WARN_ON(PageChecked(page));
1770 if (!page_has_buffers(page)) 1828 if (!page_has_buffers(page))
1771 return 0; 1829 return 0;
@@ -1794,6 +1852,8 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
1794 size_t count = iov_length(iov, nr_segs); 1852 size_t count = iov_length(iov, nr_segs);
1795 int retries = 0; 1853 int retries = 0;
1796 1854
1855 trace_ext3_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw);
1856
1797 if (rw == WRITE) { 1857 if (rw == WRITE) {
1798 loff_t final_size = offset + count; 1858 loff_t final_size = offset + count;
1799 1859
@@ -1827,7 +1887,7 @@ retry:
1827 loff_t end = offset + iov_length(iov, nr_segs); 1887 loff_t end = offset + iov_length(iov, nr_segs);
1828 1888
1829 if (end > isize) 1889 if (end > isize)
1830 vmtruncate(inode, isize); 1890 ext3_truncate_failed_direct_write(inode);
1831 } 1891 }
1832 if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries)) 1892 if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
1833 goto retry; 1893 goto retry;
@@ -1841,7 +1901,7 @@ retry:
1841 /* This is really bad luck. We've written the data 1901 /* This is really bad luck. We've written the data
1842 * but cannot extend i_size. Truncate allocated blocks 1902 * but cannot extend i_size. Truncate allocated blocks
1843 * and pretend the write failed... */ 1903 * and pretend the write failed... */
1844 ext3_truncate(inode); 1904 ext3_truncate_failed_direct_write(inode);
1845 ret = PTR_ERR(handle); 1905 ret = PTR_ERR(handle);
1846 goto out; 1906 goto out;
1847 } 1907 }
@@ -1867,6 +1927,8 @@ retry:
1867 ret = err; 1927 ret = err;
1868 } 1928 }
1869out: 1929out:
1930 trace_ext3_direct_IO_exit(inode, offset,
1931 iov_length(iov, nr_segs), rw, ret);
1870 return ret; 1932 return ret;
1871} 1933}
1872 1934
@@ -1949,17 +2011,24 @@ void ext3_set_aops(struct inode *inode)
1949 * This required during truncate. We need to physically zero the tail end 2011 * This required during truncate. We need to physically zero the tail end
1950 * of that block so it doesn't yield old data if the file is later grown. 2012 * of that block so it doesn't yield old data if the file is later grown.
1951 */ 2013 */
1952static int ext3_block_truncate_page(handle_t *handle, struct page *page, 2014static int ext3_block_truncate_page(struct inode *inode, loff_t from)
1953 struct address_space *mapping, loff_t from)
1954{ 2015{
1955 ext3_fsblk_t index = from >> PAGE_CACHE_SHIFT; 2016 ext3_fsblk_t index = from >> PAGE_CACHE_SHIFT;
1956 unsigned offset = from & (PAGE_CACHE_SIZE-1); 2017 unsigned offset = from & (PAGE_CACHE_SIZE - 1);
1957 unsigned blocksize, iblock, length, pos; 2018 unsigned blocksize, iblock, length, pos;
1958 struct inode *inode = mapping->host; 2019 struct page *page;
2020 handle_t *handle = NULL;
1959 struct buffer_head *bh; 2021 struct buffer_head *bh;
1960 int err = 0; 2022 int err = 0;
1961 2023
2024 /* Truncated on block boundary - nothing to do */
1962 blocksize = inode->i_sb->s_blocksize; 2025 blocksize = inode->i_sb->s_blocksize;
2026 if ((from & (blocksize - 1)) == 0)
2027 return 0;
2028
2029 page = grab_cache_page(inode->i_mapping, index);
2030 if (!page)
2031 return -ENOMEM;
1963 length = blocksize - (offset & (blocksize - 1)); 2032 length = blocksize - (offset & (blocksize - 1));
1964 iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); 2033 iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
1965 2034
@@ -2004,11 +2073,23 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page,
2004 goto unlock; 2073 goto unlock;
2005 } 2074 }
2006 2075
2076 /* data=writeback mode doesn't need transaction to zero-out data */
2077 if (!ext3_should_writeback_data(inode)) {
2078 /* We journal at most one block */
2079 handle = ext3_journal_start(inode, 1);
2080 if (IS_ERR(handle)) {
2081 clear_highpage(page);
2082 flush_dcache_page(page);
2083 err = PTR_ERR(handle);
2084 goto unlock;
2085 }
2086 }
2087
2007 if (ext3_should_journal_data(inode)) { 2088 if (ext3_should_journal_data(inode)) {
2008 BUFFER_TRACE(bh, "get write access"); 2089 BUFFER_TRACE(bh, "get write access");
2009 err = ext3_journal_get_write_access(handle, bh); 2090 err = ext3_journal_get_write_access(handle, bh);
2010 if (err) 2091 if (err)
2011 goto unlock; 2092 goto stop;
2012 } 2093 }
2013 2094
2014 zero_user(page, offset, length); 2095 zero_user(page, offset, length);
@@ -2022,6 +2103,9 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page,
2022 err = ext3_journal_dirty_data(handle, bh); 2103 err = ext3_journal_dirty_data(handle, bh);
2023 mark_buffer_dirty(bh); 2104 mark_buffer_dirty(bh);
2024 } 2105 }
2106stop:
2107 if (handle)
2108 ext3_journal_stop(handle);
2025 2109
2026unlock: 2110unlock:
2027 unlock_page(page); 2111 unlock_page(page);
@@ -2390,8 +2474,6 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
2390 2474
2391int ext3_can_truncate(struct inode *inode) 2475int ext3_can_truncate(struct inode *inode)
2392{ 2476{
2393 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
2394 return 0;
2395 if (S_ISREG(inode->i_mode)) 2477 if (S_ISREG(inode->i_mode))
2396 return 1; 2478 return 1;
2397 if (S_ISDIR(inode->i_mode)) 2479 if (S_ISDIR(inode->i_mode))
@@ -2435,7 +2517,6 @@ void ext3_truncate(struct inode *inode)
2435 struct ext3_inode_info *ei = EXT3_I(inode); 2517 struct ext3_inode_info *ei = EXT3_I(inode);
2436 __le32 *i_data = ei->i_data; 2518 __le32 *i_data = ei->i_data;
2437 int addr_per_block = EXT3_ADDR_PER_BLOCK(inode->i_sb); 2519 int addr_per_block = EXT3_ADDR_PER_BLOCK(inode->i_sb);
2438 struct address_space *mapping = inode->i_mapping;
2439 int offsets[4]; 2520 int offsets[4];
2440 Indirect chain[4]; 2521 Indirect chain[4];
2441 Indirect *partial; 2522 Indirect *partial;
@@ -2443,7 +2524,8 @@ void ext3_truncate(struct inode *inode)
2443 int n; 2524 int n;
2444 long last_block; 2525 long last_block;
2445 unsigned blocksize = inode->i_sb->s_blocksize; 2526 unsigned blocksize = inode->i_sb->s_blocksize;
2446 struct page *page; 2527
2528 trace_ext3_truncate_enter(inode);
2447 2529
2448 if (!ext3_can_truncate(inode)) 2530 if (!ext3_can_truncate(inode))
2449 goto out_notrans; 2531 goto out_notrans;
@@ -2451,37 +2533,12 @@ void ext3_truncate(struct inode *inode)
2451 if (inode->i_size == 0 && ext3_should_writeback_data(inode)) 2533 if (inode->i_size == 0 && ext3_should_writeback_data(inode))
2452 ext3_set_inode_state(inode, EXT3_STATE_FLUSH_ON_CLOSE); 2534 ext3_set_inode_state(inode, EXT3_STATE_FLUSH_ON_CLOSE);
2453 2535
2454 /*
2455 * We have to lock the EOF page here, because lock_page() nests
2456 * outside journal_start().
2457 */
2458 if ((inode->i_size & (blocksize - 1)) == 0) {
2459 /* Block boundary? Nothing to do */
2460 page = NULL;
2461 } else {
2462 page = grab_cache_page(mapping,
2463 inode->i_size >> PAGE_CACHE_SHIFT);
2464 if (!page)
2465 goto out_notrans;
2466 }
2467
2468 handle = start_transaction(inode); 2536 handle = start_transaction(inode);
2469 if (IS_ERR(handle)) { 2537 if (IS_ERR(handle))
2470 if (page) {
2471 clear_highpage(page);
2472 flush_dcache_page(page);
2473 unlock_page(page);
2474 page_cache_release(page);
2475 }
2476 goto out_notrans; 2538 goto out_notrans;
2477 }
2478 2539
2479 last_block = (inode->i_size + blocksize-1) 2540 last_block = (inode->i_size + blocksize-1)
2480 >> EXT3_BLOCK_SIZE_BITS(inode->i_sb); 2541 >> EXT3_BLOCK_SIZE_BITS(inode->i_sb);
2481
2482 if (page)
2483 ext3_block_truncate_page(handle, page, mapping, inode->i_size);
2484
2485 n = ext3_block_to_path(inode, last_block, offsets, NULL); 2542 n = ext3_block_to_path(inode, last_block, offsets, NULL);
2486 if (n == 0) 2543 if (n == 0)
2487 goto out_stop; /* error */ 2544 goto out_stop; /* error */
@@ -2596,6 +2653,7 @@ out_stop:
2596 ext3_orphan_del(handle, inode); 2653 ext3_orphan_del(handle, inode);
2597 2654
2598 ext3_journal_stop(handle); 2655 ext3_journal_stop(handle);
2656 trace_ext3_truncate_exit(inode);
2599 return; 2657 return;
2600out_notrans: 2658out_notrans:
2601 /* 2659 /*
@@ -2604,6 +2662,7 @@ out_notrans:
2604 */ 2662 */
2605 if (inode->i_nlink) 2663 if (inode->i_nlink)
2606 ext3_orphan_del(NULL, inode); 2664 ext3_orphan_del(NULL, inode);
2665 trace_ext3_truncate_exit(inode);
2607} 2666}
2608 2667
2609static ext3_fsblk_t ext3_get_inode_block(struct super_block *sb, 2668static ext3_fsblk_t ext3_get_inode_block(struct super_block *sb,
@@ -2745,6 +2804,7 @@ make_io:
2745 * has in-inode xattrs, or we don't have this inode in memory. 2804 * has in-inode xattrs, or we don't have this inode in memory.
2746 * Read the block from disk. 2805 * Read the block from disk.
2747 */ 2806 */
2807 trace_ext3_load_inode(inode);
2748 get_bh(bh); 2808 get_bh(bh);
2749 bh->b_end_io = end_buffer_read_sync; 2809 bh->b_end_io = end_buffer_read_sync;
2750 submit_bh(READ_META, bh); 2810 submit_bh(READ_META, bh);
@@ -3229,18 +3289,36 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
3229 } 3289 }
3230 3290
3231 error = ext3_orphan_add(handle, inode); 3291 error = ext3_orphan_add(handle, inode);
3292 if (error) {
3293 ext3_journal_stop(handle);
3294 goto err_out;
3295 }
3232 EXT3_I(inode)->i_disksize = attr->ia_size; 3296 EXT3_I(inode)->i_disksize = attr->ia_size;
3233 rc = ext3_mark_inode_dirty(handle, inode); 3297 error = ext3_mark_inode_dirty(handle, inode);
3234 if (!error)
3235 error = rc;
3236 ext3_journal_stop(handle); 3298 ext3_journal_stop(handle);
3299 if (error) {
3300 /* Some hard fs error must have happened. Bail out. */
3301 ext3_orphan_del(NULL, inode);
3302 goto err_out;
3303 }
3304 rc = ext3_block_truncate_page(inode, attr->ia_size);
3305 if (rc) {
3306 /* Cleanup orphan list and exit */
3307 handle = ext3_journal_start(inode, 3);
3308 if (IS_ERR(handle)) {
3309 ext3_orphan_del(NULL, inode);
3310 goto err_out;
3311 }
3312 ext3_orphan_del(handle, inode);
3313 ext3_journal_stop(handle);
3314 goto err_out;
3315 }
3237 } 3316 }
3238 3317
3239 if ((attr->ia_valid & ATTR_SIZE) && 3318 if ((attr->ia_valid & ATTR_SIZE) &&
3240 attr->ia_size != i_size_read(inode)) { 3319 attr->ia_size != i_size_read(inode)) {
3241 rc = vmtruncate(inode, attr->ia_size); 3320 truncate_setsize(inode, attr->ia_size);
3242 if (rc) 3321 ext3_truncate(inode);
3243 goto err_out;
3244 } 3322 }
3245 3323
3246 setattr_copy(inode, attr); 3324 setattr_copy(inode, attr);
@@ -3374,6 +3452,7 @@ int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode)
3374 int err; 3452 int err;
3375 3453
3376 might_sleep(); 3454 might_sleep();
3455 trace_ext3_mark_inode_dirty(inode, _RET_IP_);
3377 err = ext3_reserve_inode_write(handle, inode, &iloc); 3456 err = ext3_reserve_inode_write(handle, inode, &iloc);
3378 if (!err) 3457 if (!err)
3379 err = ext3_mark_iloc_dirty(handle, inode, &iloc); 3458 err = ext3_mark_iloc_dirty(handle, inode, &iloc);
diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c
index f4090bd2f34..c7f43944f16 100644
--- a/fs/ext3/ioctl.c
+++ b/fs/ext3/ioctl.c
@@ -285,7 +285,7 @@ group_add_out:
285 if (!capable(CAP_SYS_ADMIN)) 285 if (!capable(CAP_SYS_ADMIN))
286 return -EPERM; 286 return -EPERM;
287 287
288 if (copy_from_user(&range, (struct fstrim_range *)arg, 288 if (copy_from_user(&range, (struct fstrim_range __user *)arg,
289 sizeof(range))) 289 sizeof(range)))
290 return -EFAULT; 290 return -EFAULT;
291 291
@@ -293,7 +293,7 @@ group_add_out:
293 if (ret < 0) 293 if (ret < 0)
294 return ret; 294 return ret;
295 295
296 if (copy_to_user((struct fstrim_range *)arg, &range, 296 if (copy_to_user((struct fstrim_range __user *)arg, &range,
297 sizeof(range))) 297 sizeof(range)))
298 return -EFAULT; 298 return -EFAULT;
299 299
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 3b57230a17b..6e18a0b7750 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -36,6 +36,7 @@
36#include <linux/quotaops.h> 36#include <linux/quotaops.h>
37#include <linux/buffer_head.h> 37#include <linux/buffer_head.h>
38#include <linux/bio.h> 38#include <linux/bio.h>
39#include <trace/events/ext3.h>
39 40
40#include "namei.h" 41#include "namei.h"
41#include "xattr.h" 42#include "xattr.h"
@@ -287,7 +288,7 @@ static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3_dir_ent
287 while (len--) printk("%c", *name++); 288 while (len--) printk("%c", *name++);
288 ext3fs_dirhash(de->name, de->name_len, &h); 289 ext3fs_dirhash(de->name, de->name_len, &h);
289 printk(":%x.%u ", h.hash, 290 printk(":%x.%u ", h.hash,
290 ((char *) de - base)); 291 (unsigned) ((char *) de - base));
291 } 292 }
292 space += EXT3_DIR_REC_LEN(de->name_len); 293 space += EXT3_DIR_REC_LEN(de->name_len);
293 names++; 294 names++;
@@ -1013,7 +1014,7 @@ static struct buffer_head * ext3_dx_find_entry(struct inode *dir,
1013 1014
1014 *err = -ENOENT; 1015 *err = -ENOENT;
1015errout: 1016errout:
1016 dxtrace(printk("%s not found\n", name)); 1017 dxtrace(printk("%s not found\n", entry->name));
1017 dx_release (frames); 1018 dx_release (frames);
1018 return NULL; 1019 return NULL;
1019} 1020}
@@ -2140,6 +2141,7 @@ static int ext3_unlink(struct inode * dir, struct dentry *dentry)
2140 struct ext3_dir_entry_2 * de; 2141 struct ext3_dir_entry_2 * de;
2141 handle_t *handle; 2142 handle_t *handle;
2142 2143
2144 trace_ext3_unlink_enter(dir, dentry);
2143 /* Initialize quotas before so that eventual writes go 2145 /* Initialize quotas before so that eventual writes go
2144 * in separate transaction */ 2146 * in separate transaction */
2145 dquot_initialize(dir); 2147 dquot_initialize(dir);
@@ -2185,6 +2187,7 @@ static int ext3_unlink(struct inode * dir, struct dentry *dentry)
2185end_unlink: 2187end_unlink:
2186 ext3_journal_stop(handle); 2188 ext3_journal_stop(handle);
2187 brelse (bh); 2189 brelse (bh);
2190 trace_ext3_unlink_exit(dentry, retval);
2188 return retval; 2191 return retval;
2189} 2192}
2190 2193
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index b57ea2f9126..7beb69ae001 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -44,6 +44,9 @@
44#include "acl.h" 44#include "acl.h"
45#include "namei.h" 45#include "namei.h"
46 46
47#define CREATE_TRACE_POINTS
48#include <trace/events/ext3.h>
49
47#ifdef CONFIG_EXT3_DEFAULTS_TO_ORDERED 50#ifdef CONFIG_EXT3_DEFAULTS_TO_ORDERED
48 #define EXT3_MOUNT_DEFAULT_DATA_MODE EXT3_MOUNT_ORDERED_DATA 51 #define EXT3_MOUNT_DEFAULT_DATA_MODE EXT3_MOUNT_ORDERED_DATA
49#else 52#else
@@ -497,6 +500,14 @@ static struct inode *ext3_alloc_inode(struct super_block *sb)
497 return &ei->vfs_inode; 500 return &ei->vfs_inode;
498} 501}
499 502
503static int ext3_drop_inode(struct inode *inode)
504{
505 int drop = generic_drop_inode(inode);
506
507 trace_ext3_drop_inode(inode, drop);
508 return drop;
509}
510
500static void ext3_i_callback(struct rcu_head *head) 511static void ext3_i_callback(struct rcu_head *head)
501{ 512{
502 struct inode *inode = container_of(head, struct inode, i_rcu); 513 struct inode *inode = container_of(head, struct inode, i_rcu);
@@ -788,6 +799,7 @@ static const struct super_operations ext3_sops = {
788 .destroy_inode = ext3_destroy_inode, 799 .destroy_inode = ext3_destroy_inode,
789 .write_inode = ext3_write_inode, 800 .write_inode = ext3_write_inode,
790 .dirty_inode = ext3_dirty_inode, 801 .dirty_inode = ext3_dirty_inode,
802 .drop_inode = ext3_drop_inode,
791 .evict_inode = ext3_evict_inode, 803 .evict_inode = ext3_evict_inode,
792 .put_super = ext3_put_super, 804 .put_super = ext3_put_super,
793 .sync_fs = ext3_sync_fs, 805 .sync_fs = ext3_sync_fs,
@@ -2509,6 +2521,7 @@ static int ext3_sync_fs(struct super_block *sb, int wait)
2509{ 2521{
2510 tid_t target; 2522 tid_t target;
2511 2523
2524 trace_ext3_sync_fs(sb, wait);
2512 if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) { 2525 if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) {
2513 if (wait) 2526 if (wait)
2514 log_wait_commit(EXT3_SB(sb)->s_journal, target); 2527 log_wait_commit(EXT3_SB(sb)->s_journal, target);
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index 32e6cc23bd9..d565759d82e 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -803,8 +803,16 @@ inserted:
803 /* We need to allocate a new block */ 803 /* We need to allocate a new block */
804 ext3_fsblk_t goal = ext3_group_first_block_no(sb, 804 ext3_fsblk_t goal = ext3_group_first_block_no(sb,
805 EXT3_I(inode)->i_block_group); 805 EXT3_I(inode)->i_block_group);
806 ext3_fsblk_t block = ext3_new_block(handle, inode, 806 ext3_fsblk_t block;
807 goal, &error); 807
808 /*
809 * Protect us agaist concurrent allocations to the
810 * same inode from ext3_..._writepage(). Reservation
811 * code does not expect racing allocations.
812 */
813 mutex_lock(&EXT3_I(inode)->truncate_mutex);
814 block = ext3_new_block(handle, inode, goal, &error);
815 mutex_unlock(&EXT3_I(inode)->truncate_mutex);
808 if (error) 816 if (error)
809 goto cleanup; 817 goto cleanup;
810 ea_idebug(inode, "creating block %d", block); 818 ea_idebug(inode, "creating block %d", block);
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index e4b87bc1fa5..f94fc48ff3a 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -22,6 +22,8 @@
22#include <linux/jbd.h> 22#include <linux/jbd.h>
23#include <linux/errno.h> 23#include <linux/errno.h>
24#include <linux/slab.h> 24#include <linux/slab.h>
25#include <linux/blkdev.h>
26#include <trace/events/jbd.h>
25 27
26/* 28/*
27 * Unlink a buffer from a transaction checkpoint list. 29 * Unlink a buffer from a transaction checkpoint list.
@@ -95,10 +97,14 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
95 97
96 if (jh->b_jlist == BJ_None && !buffer_locked(bh) && 98 if (jh->b_jlist == BJ_None && !buffer_locked(bh) &&
97 !buffer_dirty(bh) && !buffer_write_io_error(bh)) { 99 !buffer_dirty(bh) && !buffer_write_io_error(bh)) {
100 /*
101 * Get our reference so that bh cannot be freed before
102 * we unlock it
103 */
104 get_bh(bh);
98 JBUFFER_TRACE(jh, "remove from checkpoint list"); 105 JBUFFER_TRACE(jh, "remove from checkpoint list");
99 ret = __journal_remove_checkpoint(jh) + 1; 106 ret = __journal_remove_checkpoint(jh) + 1;
100 jbd_unlock_bh_state(bh); 107 jbd_unlock_bh_state(bh);
101 journal_remove_journal_head(bh);
102 BUFFER_TRACE(bh, "release"); 108 BUFFER_TRACE(bh, "release");
103 __brelse(bh); 109 __brelse(bh);
104 } else { 110 } else {
@@ -220,8 +226,8 @@ restart:
220 spin_lock(&journal->j_list_lock); 226 spin_lock(&journal->j_list_lock);
221 goto restart; 227 goto restart;
222 } 228 }
229 get_bh(bh);
223 if (buffer_locked(bh)) { 230 if (buffer_locked(bh)) {
224 get_bh(bh);
225 spin_unlock(&journal->j_list_lock); 231 spin_unlock(&journal->j_list_lock);
226 jbd_unlock_bh_state(bh); 232 jbd_unlock_bh_state(bh);
227 wait_on_buffer(bh); 233 wait_on_buffer(bh);
@@ -240,7 +246,6 @@ restart:
240 */ 246 */
241 released = __journal_remove_checkpoint(jh); 247 released = __journal_remove_checkpoint(jh);
242 jbd_unlock_bh_state(bh); 248 jbd_unlock_bh_state(bh);
243 journal_remove_journal_head(bh);
244 __brelse(bh); 249 __brelse(bh);
245 } 250 }
246 251
@@ -253,9 +258,12 @@ static void
253__flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count) 258__flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
254{ 259{
255 int i; 260 int i;
261 struct blk_plug plug;
256 262
263 blk_start_plug(&plug);
257 for (i = 0; i < *batch_count; i++) 264 for (i = 0; i < *batch_count; i++)
258 write_dirty_buffer(bhs[i], WRITE); 265 write_dirty_buffer(bhs[i], WRITE_SYNC);
266 blk_finish_plug(&plug);
259 267
260 for (i = 0; i < *batch_count; i++) { 268 for (i = 0; i < *batch_count; i++) {
261 struct buffer_head *bh = bhs[i]; 269 struct buffer_head *bh = bhs[i];
@@ -304,12 +312,12 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
304 ret = 1; 312 ret = 1;
305 if (unlikely(buffer_write_io_error(bh))) 313 if (unlikely(buffer_write_io_error(bh)))
306 ret = -EIO; 314 ret = -EIO;
315 get_bh(bh);
307 J_ASSERT_JH(jh, !buffer_jbddirty(bh)); 316 J_ASSERT_JH(jh, !buffer_jbddirty(bh));
308 BUFFER_TRACE(bh, "remove from checkpoint"); 317 BUFFER_TRACE(bh, "remove from checkpoint");
309 __journal_remove_checkpoint(jh); 318 __journal_remove_checkpoint(jh);
310 spin_unlock(&journal->j_list_lock); 319 spin_unlock(&journal->j_list_lock);
311 jbd_unlock_bh_state(bh); 320 jbd_unlock_bh_state(bh);
312 journal_remove_journal_head(bh);
313 __brelse(bh); 321 __brelse(bh);
314 } else { 322 } else {
315 /* 323 /*
@@ -358,6 +366,7 @@ int log_do_checkpoint(journal_t *journal)
358 * journal straight away. 366 * journal straight away.
359 */ 367 */
360 result = cleanup_journal_tail(journal); 368 result = cleanup_journal_tail(journal);
369 trace_jbd_checkpoint(journal, result);
361 jbd_debug(1, "cleanup_journal_tail returned %d\n", result); 370 jbd_debug(1, "cleanup_journal_tail returned %d\n", result);
362 if (result <= 0) 371 if (result <= 0)
363 return result; 372 return result;
@@ -503,6 +512,7 @@ int cleanup_journal_tail(journal_t *journal)
503 if (blocknr < journal->j_tail) 512 if (blocknr < journal->j_tail)
504 freed = freed + journal->j_last - journal->j_first; 513 freed = freed + journal->j_last - journal->j_first;
505 514
515 trace_jbd_cleanup_journal_tail(journal, first_tid, blocknr, freed);
506 jbd_debug(1, 516 jbd_debug(1,
507 "Cleaning journal tail from %d to %d (offset %u), " 517 "Cleaning journal tail from %d to %d (offset %u), "
508 "freeing %u\n", 518 "freeing %u\n",
@@ -523,9 +533,9 @@ int cleanup_journal_tail(journal_t *journal)
523/* 533/*
524 * journal_clean_one_cp_list 534 * journal_clean_one_cp_list
525 * 535 *
526 * Find all the written-back checkpoint buffers in the given list and release them. 536 * Find all the written-back checkpoint buffers in the given list and release
537 * them.
527 * 538 *
528 * Called with the journal locked.
529 * Called with j_list_lock held. 539 * Called with j_list_lock held.
530 * Returns number of bufers reaped (for debug) 540 * Returns number of bufers reaped (for debug)
531 */ 541 */
@@ -632,8 +642,8 @@ out:
632 * checkpoint lists. 642 * checkpoint lists.
633 * 643 *
634 * The function returns 1 if it frees the transaction, 0 otherwise. 644 * The function returns 1 if it frees the transaction, 0 otherwise.
645 * The function can free jh and bh.
635 * 646 *
636 * This function is called with the journal locked.
637 * This function is called with j_list_lock held. 647 * This function is called with j_list_lock held.
638 * This function is called with jbd_lock_bh_state(jh2bh(jh)) 648 * This function is called with jbd_lock_bh_state(jh2bh(jh))
639 */ 649 */
@@ -652,13 +662,14 @@ int __journal_remove_checkpoint(struct journal_head *jh)
652 } 662 }
653 journal = transaction->t_journal; 663 journal = transaction->t_journal;
654 664
665 JBUFFER_TRACE(jh, "removing from transaction");
655 __buffer_unlink(jh); 666 __buffer_unlink(jh);
656 jh->b_cp_transaction = NULL; 667 jh->b_cp_transaction = NULL;
668 journal_put_journal_head(jh);
657 669
658 if (transaction->t_checkpoint_list != NULL || 670 if (transaction->t_checkpoint_list != NULL ||
659 transaction->t_checkpoint_io_list != NULL) 671 transaction->t_checkpoint_io_list != NULL)
660 goto out; 672 goto out;
661 JBUFFER_TRACE(jh, "transaction has no more buffers");
662 673
663 /* 674 /*
664 * There is one special case to worry about: if we have just pulled the 675 * There is one special case to worry about: if we have just pulled the
@@ -669,10 +680,8 @@ int __journal_remove_checkpoint(struct journal_head *jh)
669 * The locking here around t_state is a bit sleazy. 680 * The locking here around t_state is a bit sleazy.
670 * See the comment at the end of journal_commit_transaction(). 681 * See the comment at the end of journal_commit_transaction().
671 */ 682 */
672 if (transaction->t_state != T_FINISHED) { 683 if (transaction->t_state != T_FINISHED)
673 JBUFFER_TRACE(jh, "belongs to running/committing transaction");
674 goto out; 684 goto out;
675 }
676 685
677 /* OK, that was the last buffer for the transaction: we can now 686 /* OK, that was the last buffer for the transaction: we can now
678 safely remove this transaction from the log */ 687 safely remove this transaction from the log */
@@ -684,7 +693,6 @@ int __journal_remove_checkpoint(struct journal_head *jh)
684 wake_up(&journal->j_wait_logspace); 693 wake_up(&journal->j_wait_logspace);
685 ret = 1; 694 ret = 1;
686out: 695out:
687 JBUFFER_TRACE(jh, "exit");
688 return ret; 696 return ret;
689} 697}
690 698
@@ -703,6 +711,8 @@ void __journal_insert_checkpoint(struct journal_head *jh,
703 J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh)) || buffer_jbddirty(jh2bh(jh))); 711 J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh)) || buffer_jbddirty(jh2bh(jh)));
704 J_ASSERT_JH(jh, jh->b_cp_transaction == NULL); 712 J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);
705 713
714 /* Get reference for checkpointing transaction */
715 journal_grab_journal_head(jh2bh(jh));
706 jh->b_cp_transaction = transaction; 716 jh->b_cp_transaction = transaction;
707 717
708 if (!transaction->t_checkpoint_list) { 718 if (!transaction->t_checkpoint_list) {
@@ -752,6 +762,7 @@ void __journal_drop_transaction(journal_t *journal, transaction_t *transaction)
752 J_ASSERT(journal->j_committing_transaction != transaction); 762 J_ASSERT(journal->j_committing_transaction != transaction);
753 J_ASSERT(journal->j_running_transaction != transaction); 763 J_ASSERT(journal->j_running_transaction != transaction);
754 764
765 trace_jbd_drop_transaction(journal, transaction);
755 jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid); 766 jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid);
756 kfree(transaction); 767 kfree(transaction);
757} 768}
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 72ffa974b0b..8799207df05 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -21,6 +21,7 @@
21#include <linux/pagemap.h> 21#include <linux/pagemap.h>
22#include <linux/bio.h> 22#include <linux/bio.h>
23#include <linux/blkdev.h> 23#include <linux/blkdev.h>
24#include <trace/events/jbd.h>
24 25
25/* 26/*
26 * Default IO end handler for temporary BJ_IO buffer_heads. 27 * Default IO end handler for temporary BJ_IO buffer_heads.
@@ -204,6 +205,8 @@ write_out_data:
204 if (!trylock_buffer(bh)) { 205 if (!trylock_buffer(bh)) {
205 BUFFER_TRACE(bh, "needs blocking lock"); 206 BUFFER_TRACE(bh, "needs blocking lock");
206 spin_unlock(&journal->j_list_lock); 207 spin_unlock(&journal->j_list_lock);
208 trace_jbd_do_submit_data(journal,
209 commit_transaction);
207 /* Write out all data to prevent deadlocks */ 210 /* Write out all data to prevent deadlocks */
208 journal_do_submit_data(wbuf, bufs, write_op); 211 journal_do_submit_data(wbuf, bufs, write_op);
209 bufs = 0; 212 bufs = 0;
@@ -236,6 +239,8 @@ write_out_data:
236 jbd_unlock_bh_state(bh); 239 jbd_unlock_bh_state(bh);
237 if (bufs == journal->j_wbufsize) { 240 if (bufs == journal->j_wbufsize) {
238 spin_unlock(&journal->j_list_lock); 241 spin_unlock(&journal->j_list_lock);
242 trace_jbd_do_submit_data(journal,
243 commit_transaction);
239 journal_do_submit_data(wbuf, bufs, write_op); 244 journal_do_submit_data(wbuf, bufs, write_op);
240 bufs = 0; 245 bufs = 0;
241 goto write_out_data; 246 goto write_out_data;
@@ -253,10 +258,6 @@ write_out_data:
253 jbd_unlock_bh_state(bh); 258 jbd_unlock_bh_state(bh);
254 if (locked) 259 if (locked)
255 unlock_buffer(bh); 260 unlock_buffer(bh);
256 journal_remove_journal_head(bh);
257 /* One for our safety reference, other for
258 * journal_remove_journal_head() */
259 put_bh(bh);
260 release_data_buffer(bh); 261 release_data_buffer(bh);
261 } 262 }
262 263
@@ -266,6 +267,7 @@ write_out_data:
266 } 267 }
267 } 268 }
268 spin_unlock(&journal->j_list_lock); 269 spin_unlock(&journal->j_list_lock);
270 trace_jbd_do_submit_data(journal, commit_transaction);
269 journal_do_submit_data(wbuf, bufs, write_op); 271 journal_do_submit_data(wbuf, bufs, write_op);
270 272
271 return err; 273 return err;
@@ -316,12 +318,14 @@ void journal_commit_transaction(journal_t *journal)
316 commit_transaction = journal->j_running_transaction; 318 commit_transaction = journal->j_running_transaction;
317 J_ASSERT(commit_transaction->t_state == T_RUNNING); 319 J_ASSERT(commit_transaction->t_state == T_RUNNING);
318 320
321 trace_jbd_start_commit(journal, commit_transaction);
319 jbd_debug(1, "JBD: starting commit of transaction %d\n", 322 jbd_debug(1, "JBD: starting commit of transaction %d\n",
320 commit_transaction->t_tid); 323 commit_transaction->t_tid);
321 324
322 spin_lock(&journal->j_state_lock); 325 spin_lock(&journal->j_state_lock);
323 commit_transaction->t_state = T_LOCKED; 326 commit_transaction->t_state = T_LOCKED;
324 327
328 trace_jbd_commit_locking(journal, commit_transaction);
325 spin_lock(&commit_transaction->t_handle_lock); 329 spin_lock(&commit_transaction->t_handle_lock);
326 while (commit_transaction->t_updates) { 330 while (commit_transaction->t_updates) {
327 DEFINE_WAIT(wait); 331 DEFINE_WAIT(wait);
@@ -392,6 +396,7 @@ void journal_commit_transaction(journal_t *journal)
392 */ 396 */
393 journal_switch_revoke_table(journal); 397 journal_switch_revoke_table(journal);
394 398
399 trace_jbd_commit_flushing(journal, commit_transaction);
395 commit_transaction->t_state = T_FLUSH; 400 commit_transaction->t_state = T_FLUSH;
396 journal->j_committing_transaction = commit_transaction; 401 journal->j_committing_transaction = commit_transaction;
397 journal->j_running_transaction = NULL; 402 journal->j_running_transaction = NULL;
@@ -446,14 +451,9 @@ void journal_commit_transaction(journal_t *journal)
446 } 451 }
447 if (buffer_jbd(bh) && bh2jh(bh) == jh && 452 if (buffer_jbd(bh) && bh2jh(bh) == jh &&
448 jh->b_transaction == commit_transaction && 453 jh->b_transaction == commit_transaction &&
449 jh->b_jlist == BJ_Locked) { 454 jh->b_jlist == BJ_Locked)
450 __journal_unfile_buffer(jh); 455 __journal_unfile_buffer(jh);
451 jbd_unlock_bh_state(bh); 456 jbd_unlock_bh_state(bh);
452 journal_remove_journal_head(bh);
453 put_bh(bh);
454 } else {
455 jbd_unlock_bh_state(bh);
456 }
457 release_data_buffer(bh); 457 release_data_buffer(bh);
458 cond_resched_lock(&journal->j_list_lock); 458 cond_resched_lock(&journal->j_list_lock);
459 } 459 }
@@ -493,6 +493,7 @@ void journal_commit_transaction(journal_t *journal)
493 commit_transaction->t_state = T_COMMIT; 493 commit_transaction->t_state = T_COMMIT;
494 spin_unlock(&journal->j_state_lock); 494 spin_unlock(&journal->j_state_lock);
495 495
496 trace_jbd_commit_logging(journal, commit_transaction);
496 J_ASSERT(commit_transaction->t_nr_buffers <= 497 J_ASSERT(commit_transaction->t_nr_buffers <=
497 commit_transaction->t_outstanding_credits); 498 commit_transaction->t_outstanding_credits);
498 499
@@ -797,10 +798,16 @@ restart_loop:
797 while (commit_transaction->t_forget) { 798 while (commit_transaction->t_forget) {
798 transaction_t *cp_transaction; 799 transaction_t *cp_transaction;
799 struct buffer_head *bh; 800 struct buffer_head *bh;
801 int try_to_free = 0;
800 802
801 jh = commit_transaction->t_forget; 803 jh = commit_transaction->t_forget;
802 spin_unlock(&journal->j_list_lock); 804 spin_unlock(&journal->j_list_lock);
803 bh = jh2bh(jh); 805 bh = jh2bh(jh);
806 /*
807 * Get a reference so that bh cannot be freed before we are
808 * done with it.
809 */
810 get_bh(bh);
804 jbd_lock_bh_state(bh); 811 jbd_lock_bh_state(bh);
805 J_ASSERT_JH(jh, jh->b_transaction == commit_transaction || 812 J_ASSERT_JH(jh, jh->b_transaction == commit_transaction ||
806 jh->b_transaction == journal->j_running_transaction); 813 jh->b_transaction == journal->j_running_transaction);
@@ -858,28 +865,27 @@ restart_loop:
858 __journal_insert_checkpoint(jh, commit_transaction); 865 __journal_insert_checkpoint(jh, commit_transaction);
859 if (is_journal_aborted(journal)) 866 if (is_journal_aborted(journal))
860 clear_buffer_jbddirty(bh); 867 clear_buffer_jbddirty(bh);
861 JBUFFER_TRACE(jh, "refile for checkpoint writeback");
862 __journal_refile_buffer(jh);
863 jbd_unlock_bh_state(bh);
864 } else { 868 } else {
865 J_ASSERT_BH(bh, !buffer_dirty(bh)); 869 J_ASSERT_BH(bh, !buffer_dirty(bh));
866 /* The buffer on BJ_Forget list and not jbddirty means 870 /*
871 * The buffer on BJ_Forget list and not jbddirty means
867 * it has been freed by this transaction and hence it 872 * it has been freed by this transaction and hence it
868 * could not have been reallocated until this 873 * could not have been reallocated until this
869 * transaction has committed. *BUT* it could be 874 * transaction has committed. *BUT* it could be
870 * reallocated once we have written all the data to 875 * reallocated once we have written all the data to
871 * disk and before we process the buffer on BJ_Forget 876 * disk and before we process the buffer on BJ_Forget
872 * list. */ 877 * list.
873 JBUFFER_TRACE(jh, "refile or unfile freed buffer"); 878 */
874 __journal_refile_buffer(jh); 879 if (!jh->b_next_transaction)
875 if (!jh->b_transaction) { 880 try_to_free = 1;
876 jbd_unlock_bh_state(bh);
877 /* needs a brelse */
878 journal_remove_journal_head(bh);
879 release_buffer_page(bh);
880 } else
881 jbd_unlock_bh_state(bh);
882 } 881 }
882 JBUFFER_TRACE(jh, "refile or unfile freed buffer");
883 __journal_refile_buffer(jh);
884 jbd_unlock_bh_state(bh);
885 if (try_to_free)
886 release_buffer_page(bh);
887 else
888 __brelse(bh);
883 cond_resched_lock(&journal->j_list_lock); 889 cond_resched_lock(&journal->j_list_lock);
884 } 890 }
885 spin_unlock(&journal->j_list_lock); 891 spin_unlock(&journal->j_list_lock);
@@ -946,6 +952,7 @@ restart_loop:
946 } 952 }
947 spin_unlock(&journal->j_list_lock); 953 spin_unlock(&journal->j_list_lock);
948 954
955 trace_jbd_end_commit(journal, commit_transaction);
949 jbd_debug(1, "JBD: commit %d complete, head %d\n", 956 jbd_debug(1, "JBD: commit %d complete, head %d\n",
950 journal->j_commit_sequence, journal->j_tail_sequence); 957 journal->j_commit_sequence, journal->j_tail_sequence);
951 958
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index e2d4285fbe9..9fe061fb877 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -38,6 +38,9 @@
38#include <linux/debugfs.h> 38#include <linux/debugfs.h>
39#include <linux/ratelimit.h> 39#include <linux/ratelimit.h>
40 40
41#define CREATE_TRACE_POINTS
42#include <trace/events/jbd.h>
43
41#include <asm/uaccess.h> 44#include <asm/uaccess.h>
42#include <asm/page.h> 45#include <asm/page.h>
43 46
@@ -1065,6 +1068,7 @@ void journal_update_superblock(journal_t *journal, int wait)
1065 } else 1068 } else
1066 write_dirty_buffer(bh, WRITE); 1069 write_dirty_buffer(bh, WRITE);
1067 1070
1071 trace_jbd_update_superblock_end(journal, wait);
1068out: 1072out:
1069 /* If we have just flushed the log (by marking s_start==0), then 1073 /* If we have just flushed the log (by marking s_start==0), then
1070 * any future commit will have to be careful to update the 1074 * any future commit will have to be careful to update the
@@ -1799,10 +1803,9 @@ static void journal_free_journal_head(struct journal_head *jh)
1799 * When a buffer has its BH_JBD bit set it is immune from being released by 1803 * When a buffer has its BH_JBD bit set it is immune from being released by
1800 * core kernel code, mainly via ->b_count. 1804 * core kernel code, mainly via ->b_count.
1801 * 1805 *
1802 * A journal_head may be detached from its buffer_head when the journal_head's 1806 * A journal_head is detached from its buffer_head when the journal_head's
1803 * b_transaction, b_cp_transaction and b_next_transaction pointers are NULL. 1807 * b_jcount reaches zero. Running transaction (b_transaction) and checkpoint
1804 * Various places in JBD call journal_remove_journal_head() to indicate that the 1808 * transaction (b_cp_transaction) hold their references to b_jcount.
1805 * journal_head can be dropped if needed.
1806 * 1809 *
1807 * Various places in the kernel want to attach a journal_head to a buffer_head 1810 * Various places in the kernel want to attach a journal_head to a buffer_head
1808 * _before_ attaching the journal_head to a transaction. To protect the 1811 * _before_ attaching the journal_head to a transaction. To protect the
@@ -1815,17 +1818,16 @@ static void journal_free_journal_head(struct journal_head *jh)
1815 * (Attach a journal_head if needed. Increments b_jcount) 1818 * (Attach a journal_head if needed. Increments b_jcount)
1816 * struct journal_head *jh = journal_add_journal_head(bh); 1819 * struct journal_head *jh = journal_add_journal_head(bh);
1817 * ... 1820 * ...
1818 * jh->b_transaction = xxx; 1821 * (Get another reference for transaction)
1819 * journal_put_journal_head(jh); 1822 * journal_grab_journal_head(bh);
1820 * 1823 * jh->b_transaction = xxx;
1821 * Now, the journal_head's b_jcount is zero, but it is safe from being released 1824 * (Put original reference)
1822 * because it has a non-zero b_transaction. 1825 * journal_put_journal_head(jh);
1823 */ 1826 */
1824 1827
1825/* 1828/*
1826 * Give a buffer_head a journal_head. 1829 * Give a buffer_head a journal_head.
1827 * 1830 *
1828 * Doesn't need the journal lock.
1829 * May sleep. 1831 * May sleep.
1830 */ 1832 */
1831struct journal_head *journal_add_journal_head(struct buffer_head *bh) 1833struct journal_head *journal_add_journal_head(struct buffer_head *bh)
@@ -1889,61 +1891,29 @@ static void __journal_remove_journal_head(struct buffer_head *bh)
1889 struct journal_head *jh = bh2jh(bh); 1891 struct journal_head *jh = bh2jh(bh);
1890 1892
1891 J_ASSERT_JH(jh, jh->b_jcount >= 0); 1893 J_ASSERT_JH(jh, jh->b_jcount >= 0);
1892 1894 J_ASSERT_JH(jh, jh->b_transaction == NULL);
1893 get_bh(bh); 1895 J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
1894 if (jh->b_jcount == 0) { 1896 J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);
1895 if (jh->b_transaction == NULL && 1897 J_ASSERT_JH(jh, jh->b_jlist == BJ_None);
1896 jh->b_next_transaction == NULL && 1898 J_ASSERT_BH(bh, buffer_jbd(bh));
1897 jh->b_cp_transaction == NULL) { 1899 J_ASSERT_BH(bh, jh2bh(jh) == bh);
1898 J_ASSERT_JH(jh, jh->b_jlist == BJ_None); 1900 BUFFER_TRACE(bh, "remove journal_head");
1899 J_ASSERT_BH(bh, buffer_jbd(bh)); 1901 if (jh->b_frozen_data) {
1900 J_ASSERT_BH(bh, jh2bh(jh) == bh); 1902 printk(KERN_WARNING "%s: freeing b_frozen_data\n", __func__);
1901 BUFFER_TRACE(bh, "remove journal_head"); 1903 jbd_free(jh->b_frozen_data, bh->b_size);
1902 if (jh->b_frozen_data) {
1903 printk(KERN_WARNING "%s: freeing "
1904 "b_frozen_data\n",
1905 __func__);
1906 jbd_free(jh->b_frozen_data, bh->b_size);
1907 }
1908 if (jh->b_committed_data) {
1909 printk(KERN_WARNING "%s: freeing "
1910 "b_committed_data\n",
1911 __func__);
1912 jbd_free(jh->b_committed_data, bh->b_size);
1913 }
1914 bh->b_private = NULL;
1915 jh->b_bh = NULL; /* debug, really */
1916 clear_buffer_jbd(bh);
1917 __brelse(bh);
1918 journal_free_journal_head(jh);
1919 } else {
1920 BUFFER_TRACE(bh, "journal_head was locked");
1921 }
1922 } 1904 }
1905 if (jh->b_committed_data) {
1906 printk(KERN_WARNING "%s: freeing b_committed_data\n", __func__);
1907 jbd_free(jh->b_committed_data, bh->b_size);
1908 }
1909 bh->b_private = NULL;
1910 jh->b_bh = NULL; /* debug, really */
1911 clear_buffer_jbd(bh);
1912 journal_free_journal_head(jh);
1923} 1913}
1924 1914
1925/* 1915/*
1926 * journal_remove_journal_head(): if the buffer isn't attached to a transaction 1916 * Drop a reference on the passed journal_head. If it fell to zero then
1927 * and has a zero b_jcount then remove and release its journal_head. If we did
1928 * see that the buffer is not used by any transaction we also "logically"
1929 * decrement ->b_count.
1930 *
1931 * We in fact take an additional increment on ->b_count as a convenience,
1932 * because the caller usually wants to do additional things with the bh
1933 * after calling here.
1934 * The caller of journal_remove_journal_head() *must* run __brelse(bh) at some
1935 * time. Once the caller has run __brelse(), the buffer is eligible for
1936 * reaping by try_to_free_buffers().
1937 */
1938void journal_remove_journal_head(struct buffer_head *bh)
1939{
1940 jbd_lock_bh_journal_head(bh);
1941 __journal_remove_journal_head(bh);
1942 jbd_unlock_bh_journal_head(bh);
1943}
1944
1945/*
1946 * Drop a reference on the passed journal_head. If it fell to zero then try to
1947 * release the journal_head from the buffer_head. 1917 * release the journal_head from the buffer_head.
1948 */ 1918 */
1949void journal_put_journal_head(struct journal_head *jh) 1919void journal_put_journal_head(struct journal_head *jh)
@@ -1953,11 +1923,12 @@ void journal_put_journal_head(struct journal_head *jh)
1953 jbd_lock_bh_journal_head(bh); 1923 jbd_lock_bh_journal_head(bh);
1954 J_ASSERT_JH(jh, jh->b_jcount > 0); 1924 J_ASSERT_JH(jh, jh->b_jcount > 0);
1955 --jh->b_jcount; 1925 --jh->b_jcount;
1956 if (!jh->b_jcount && !jh->b_transaction) { 1926 if (!jh->b_jcount) {
1957 __journal_remove_journal_head(bh); 1927 __journal_remove_journal_head(bh);
1928 jbd_unlock_bh_journal_head(bh);
1958 __brelse(bh); 1929 __brelse(bh);
1959 } 1930 } else
1960 jbd_unlock_bh_journal_head(bh); 1931 jbd_unlock_bh_journal_head(bh);
1961} 1932}
1962 1933
1963/* 1934/*
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index f7ee81a065d..7e59c6e66f9 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -26,6 +26,7 @@
26#include <linux/mm.h> 26#include <linux/mm.h>
27#include <linux/highmem.h> 27#include <linux/highmem.h>
28#include <linux/hrtimer.h> 28#include <linux/hrtimer.h>
29#include <linux/backing-dev.h>
29 30
30static void __journal_temp_unlink_buffer(struct journal_head *jh); 31static void __journal_temp_unlink_buffer(struct journal_head *jh);
31 32
@@ -99,11 +100,10 @@ static int start_this_handle(journal_t *journal, handle_t *handle)
99 100
100alloc_transaction: 101alloc_transaction:
101 if (!journal->j_running_transaction) { 102 if (!journal->j_running_transaction) {
102 new_transaction = kzalloc(sizeof(*new_transaction), 103 new_transaction = kzalloc(sizeof(*new_transaction), GFP_NOFS);
103 GFP_NOFS|__GFP_NOFAIL);
104 if (!new_transaction) { 104 if (!new_transaction) {
105 ret = -ENOMEM; 105 congestion_wait(BLK_RW_ASYNC, HZ/50);
106 goto out; 106 goto alloc_transaction;
107 } 107 }
108 } 108 }
109 109
@@ -696,7 +696,6 @@ repeat:
696 if (!jh->b_transaction) { 696 if (!jh->b_transaction) {
697 JBUFFER_TRACE(jh, "no transaction"); 697 JBUFFER_TRACE(jh, "no transaction");
698 J_ASSERT_JH(jh, !jh->b_next_transaction); 698 J_ASSERT_JH(jh, !jh->b_next_transaction);
699 jh->b_transaction = transaction;
700 JBUFFER_TRACE(jh, "file as BJ_Reserved"); 699 JBUFFER_TRACE(jh, "file as BJ_Reserved");
701 spin_lock(&journal->j_list_lock); 700 spin_lock(&journal->j_list_lock);
702 __journal_file_buffer(jh, transaction, BJ_Reserved); 701 __journal_file_buffer(jh, transaction, BJ_Reserved);
@@ -818,7 +817,6 @@ int journal_get_create_access(handle_t *handle, struct buffer_head *bh)
818 * committed and so it's safe to clear the dirty bit. 817 * committed and so it's safe to clear the dirty bit.
819 */ 818 */
820 clear_buffer_dirty(jh2bh(jh)); 819 clear_buffer_dirty(jh2bh(jh));
821 jh->b_transaction = transaction;
822 820
823 /* first access by this transaction */ 821 /* first access by this transaction */
824 jh->b_modified = 0; 822 jh->b_modified = 0;
@@ -844,8 +842,8 @@ int journal_get_create_access(handle_t *handle, struct buffer_head *bh)
844 */ 842 */
845 JBUFFER_TRACE(jh, "cancelling revoke"); 843 JBUFFER_TRACE(jh, "cancelling revoke");
846 journal_cancel_revoke(handle, jh); 844 journal_cancel_revoke(handle, jh);
847 journal_put_journal_head(jh);
848out: 845out:
846 journal_put_journal_head(jh);
849 return err; 847 return err;
850} 848}
851 849
@@ -1069,8 +1067,9 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
1069 ret = -EIO; 1067 ret = -EIO;
1070 goto no_journal; 1068 goto no_journal;
1071 } 1069 }
1072 1070 /* We might have slept so buffer could be refiled now */
1073 if (jh->b_transaction != NULL) { 1071 if (jh->b_transaction != NULL &&
1072 jh->b_transaction != handle->h_transaction) {
1074 JBUFFER_TRACE(jh, "unfile from commit"); 1073 JBUFFER_TRACE(jh, "unfile from commit");
1075 __journal_temp_unlink_buffer(jh); 1074 __journal_temp_unlink_buffer(jh);
1076 /* It still points to the committing 1075 /* It still points to the committing
@@ -1091,8 +1090,6 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
1091 if (jh->b_jlist != BJ_SyncData && jh->b_jlist != BJ_Locked) { 1090 if (jh->b_jlist != BJ_SyncData && jh->b_jlist != BJ_Locked) {
1092 JBUFFER_TRACE(jh, "not on correct data list: unfile"); 1091 JBUFFER_TRACE(jh, "not on correct data list: unfile");
1093 J_ASSERT_JH(jh, jh->b_jlist != BJ_Shadow); 1092 J_ASSERT_JH(jh, jh->b_jlist != BJ_Shadow);
1094 __journal_temp_unlink_buffer(jh);
1095 jh->b_transaction = handle->h_transaction;
1096 JBUFFER_TRACE(jh, "file as data"); 1093 JBUFFER_TRACE(jh, "file as data");
1097 __journal_file_buffer(jh, handle->h_transaction, 1094 __journal_file_buffer(jh, handle->h_transaction,
1098 BJ_SyncData); 1095 BJ_SyncData);
@@ -1300,8 +1297,6 @@ int journal_forget (handle_t *handle, struct buffer_head *bh)
1300 __journal_file_buffer(jh, transaction, BJ_Forget); 1297 __journal_file_buffer(jh, transaction, BJ_Forget);
1301 } else { 1298 } else {
1302 __journal_unfile_buffer(jh); 1299 __journal_unfile_buffer(jh);
1303 journal_remove_journal_head(bh);
1304 __brelse(bh);
1305 if (!buffer_jbd(bh)) { 1300 if (!buffer_jbd(bh)) {
1306 spin_unlock(&journal->j_list_lock); 1301 spin_unlock(&journal->j_list_lock);
1307 jbd_unlock_bh_state(bh); 1302 jbd_unlock_bh_state(bh);
@@ -1622,19 +1617,32 @@ static void __journal_temp_unlink_buffer(struct journal_head *jh)
1622 mark_buffer_dirty(bh); /* Expose it to the VM */ 1617 mark_buffer_dirty(bh); /* Expose it to the VM */
1623} 1618}
1624 1619
1620/*
1621 * Remove buffer from all transactions.
1622 *
1623 * Called with bh_state lock and j_list_lock
1624 *
1625 * jh and bh may be already freed when this function returns.
1626 */
1625void __journal_unfile_buffer(struct journal_head *jh) 1627void __journal_unfile_buffer(struct journal_head *jh)
1626{ 1628{
1627 __journal_temp_unlink_buffer(jh); 1629 __journal_temp_unlink_buffer(jh);
1628 jh->b_transaction = NULL; 1630 jh->b_transaction = NULL;
1631 journal_put_journal_head(jh);
1629} 1632}
1630 1633
1631void journal_unfile_buffer(journal_t *journal, struct journal_head *jh) 1634void journal_unfile_buffer(journal_t *journal, struct journal_head *jh)
1632{ 1635{
1633 jbd_lock_bh_state(jh2bh(jh)); 1636 struct buffer_head *bh = jh2bh(jh);
1637
1638 /* Get reference so that buffer cannot be freed before we unlock it */
1639 get_bh(bh);
1640 jbd_lock_bh_state(bh);
1634 spin_lock(&journal->j_list_lock); 1641 spin_lock(&journal->j_list_lock);
1635 __journal_unfile_buffer(jh); 1642 __journal_unfile_buffer(jh);
1636 spin_unlock(&journal->j_list_lock); 1643 spin_unlock(&journal->j_list_lock);
1637 jbd_unlock_bh_state(jh2bh(jh)); 1644 jbd_unlock_bh_state(bh);
1645 __brelse(bh);
1638} 1646}
1639 1647
1640/* 1648/*
@@ -1661,16 +1669,12 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
1661 /* A written-back ordered data buffer */ 1669 /* A written-back ordered data buffer */
1662 JBUFFER_TRACE(jh, "release data"); 1670 JBUFFER_TRACE(jh, "release data");
1663 __journal_unfile_buffer(jh); 1671 __journal_unfile_buffer(jh);
1664 journal_remove_journal_head(bh);
1665 __brelse(bh);
1666 } 1672 }
1667 } else if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) { 1673 } else if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) {
1668 /* written-back checkpointed metadata buffer */ 1674 /* written-back checkpointed metadata buffer */
1669 if (jh->b_jlist == BJ_None) { 1675 if (jh->b_jlist == BJ_None) {
1670 JBUFFER_TRACE(jh, "remove from checkpoint list"); 1676 JBUFFER_TRACE(jh, "remove from checkpoint list");
1671 __journal_remove_checkpoint(jh); 1677 __journal_remove_checkpoint(jh);
1672 journal_remove_journal_head(bh);
1673 __brelse(bh);
1674 } 1678 }
1675 } 1679 }
1676 spin_unlock(&journal->j_list_lock); 1680 spin_unlock(&journal->j_list_lock);
@@ -1733,7 +1737,7 @@ int journal_try_to_free_buffers(journal_t *journal,
1733 /* 1737 /*
1734 * We take our own ref against the journal_head here to avoid 1738 * We take our own ref against the journal_head here to avoid
1735 * having to add tons of locking around each instance of 1739 * having to add tons of locking around each instance of
1736 * journal_remove_journal_head() and journal_put_journal_head(). 1740 * journal_put_journal_head().
1737 */ 1741 */
1738 jh = journal_grab_journal_head(bh); 1742 jh = journal_grab_journal_head(bh);
1739 if (!jh) 1743 if (!jh)
@@ -1770,10 +1774,9 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
1770 int may_free = 1; 1774 int may_free = 1;
1771 struct buffer_head *bh = jh2bh(jh); 1775 struct buffer_head *bh = jh2bh(jh);
1772 1776
1773 __journal_unfile_buffer(jh);
1774
1775 if (jh->b_cp_transaction) { 1777 if (jh->b_cp_transaction) {
1776 JBUFFER_TRACE(jh, "on running+cp transaction"); 1778 JBUFFER_TRACE(jh, "on running+cp transaction");
1779 __journal_temp_unlink_buffer(jh);
1777 /* 1780 /*
1778 * We don't want to write the buffer anymore, clear the 1781 * We don't want to write the buffer anymore, clear the
1779 * bit so that we don't confuse checks in 1782 * bit so that we don't confuse checks in
@@ -1784,8 +1787,7 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
1784 may_free = 0; 1787 may_free = 0;
1785 } else { 1788 } else {
1786 JBUFFER_TRACE(jh, "on running transaction"); 1789 JBUFFER_TRACE(jh, "on running transaction");
1787 journal_remove_journal_head(bh); 1790 __journal_unfile_buffer(jh);
1788 __brelse(bh);
1789 } 1791 }
1790 return may_free; 1792 return may_free;
1791} 1793}
@@ -2070,6 +2072,8 @@ void __journal_file_buffer(struct journal_head *jh,
2070 2072
2071 if (jh->b_transaction) 2073 if (jh->b_transaction)
2072 __journal_temp_unlink_buffer(jh); 2074 __journal_temp_unlink_buffer(jh);
2075 else
2076 journal_grab_journal_head(bh);
2073 jh->b_transaction = transaction; 2077 jh->b_transaction = transaction;
2074 2078
2075 switch (jlist) { 2079 switch (jlist) {
@@ -2127,9 +2131,10 @@ void journal_file_buffer(struct journal_head *jh,
2127 * already started to be used by a subsequent transaction, refile the 2131 * already started to be used by a subsequent transaction, refile the
2128 * buffer on that transaction's metadata list. 2132 * buffer on that transaction's metadata list.
2129 * 2133 *
2130 * Called under journal->j_list_lock 2134 * Called under j_list_lock
2131 *
2132 * Called under jbd_lock_bh_state(jh2bh(jh)) 2135 * Called under jbd_lock_bh_state(jh2bh(jh))
2136 *
2137 * jh and bh may be already free when this function returns
2133 */ 2138 */
2134void __journal_refile_buffer(struct journal_head *jh) 2139void __journal_refile_buffer(struct journal_head *jh)
2135{ 2140{
@@ -2153,6 +2158,11 @@ void __journal_refile_buffer(struct journal_head *jh)
2153 2158
2154 was_dirty = test_clear_buffer_jbddirty(bh); 2159 was_dirty = test_clear_buffer_jbddirty(bh);
2155 __journal_temp_unlink_buffer(jh); 2160 __journal_temp_unlink_buffer(jh);
2161 /*
2162 * We set b_transaction here because b_next_transaction will inherit
2163 * our jh reference and thus __journal_file_buffer() must not take a
2164 * new one.
2165 */
2156 jh->b_transaction = jh->b_next_transaction; 2166 jh->b_transaction = jh->b_next_transaction;
2157 jh->b_next_transaction = NULL; 2167 jh->b_next_transaction = NULL;
2158 if (buffer_freed(bh)) 2168 if (buffer_freed(bh))
@@ -2169,30 +2179,21 @@ void __journal_refile_buffer(struct journal_head *jh)
2169} 2179}
2170 2180
2171/* 2181/*
2172 * For the unlocked version of this call, also make sure that any 2182 * __journal_refile_buffer() with necessary locking added. We take our bh
2173 * hanging journal_head is cleaned up if necessary. 2183 * reference so that we can safely unlock bh.
2174 * 2184 *
2175 * __journal_refile_buffer is usually called as part of a single locked 2185 * The jh and bh may be freed by this call.
2176 * operation on a buffer_head, in which the caller is probably going to
2177 * be hooking the journal_head onto other lists. In that case it is up
2178 * to the caller to remove the journal_head if necessary. For the
2179 * unlocked journal_refile_buffer call, the caller isn't going to be
2180 * doing anything else to the buffer so we need to do the cleanup
2181 * ourselves to avoid a jh leak.
2182 *
2183 * *** The journal_head may be freed by this call! ***
2184 */ 2186 */
2185void journal_refile_buffer(journal_t *journal, struct journal_head *jh) 2187void journal_refile_buffer(journal_t *journal, struct journal_head *jh)
2186{ 2188{
2187 struct buffer_head *bh = jh2bh(jh); 2189 struct buffer_head *bh = jh2bh(jh);
2188 2190
2191 /* Get reference so that buffer cannot be freed before we unlock it */
2192 get_bh(bh);
2189 jbd_lock_bh_state(bh); 2193 jbd_lock_bh_state(bh);
2190 spin_lock(&journal->j_list_lock); 2194 spin_lock(&journal->j_list_lock);
2191
2192 __journal_refile_buffer(jh); 2195 __journal_refile_buffer(jh);
2193 jbd_unlock_bh_state(bh); 2196 jbd_unlock_bh_state(bh);
2194 journal_remove_journal_head(bh);
2195
2196 spin_unlock(&journal->j_list_lock); 2197 spin_unlock(&journal->j_list_lock);
2197 __brelse(bh); 2198 __brelse(bh);
2198} 2199}
diff --git a/include/linux/ext2_fs.h b/include/linux/ext2_fs.h
index 2dfa7076e8b..53792bf36c7 100644
--- a/include/linux/ext2_fs.h
+++ b/include/linux/ext2_fs.h
@@ -18,6 +18,7 @@
18 18
19#include <linux/types.h> 19#include <linux/types.h>
20#include <linux/magic.h> 20#include <linux/magic.h>
21#include <linux/fs.h>
21 22
22/* 23/*
23 * The second extended filesystem constants/structures 24 * The second extended filesystem constants/structures
diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
index 0c473fd79ac..67a803aee61 100644
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -418,12 +418,11 @@ struct ext3_inode {
418#define EXT2_MOUNT_DATA_FLAGS EXT3_MOUNT_DATA_FLAGS 418#define EXT2_MOUNT_DATA_FLAGS EXT3_MOUNT_DATA_FLAGS
419#endif 419#endif
420 420
421#define ext3_set_bit __test_and_set_bit_le 421#define ext3_set_bit __set_bit_le
422#define ext3_set_bit_atomic ext2_set_bit_atomic 422#define ext3_set_bit_atomic ext2_set_bit_atomic
423#define ext3_clear_bit __test_and_clear_bit_le 423#define ext3_clear_bit __clear_bit_le
424#define ext3_clear_bit_atomic ext2_clear_bit_atomic 424#define ext3_clear_bit_atomic ext2_clear_bit_atomic
425#define ext3_test_bit test_bit_le 425#define ext3_test_bit test_bit_le
426#define ext3_find_first_zero_bit find_first_zero_bit_le
427#define ext3_find_next_zero_bit find_next_zero_bit_le 426#define ext3_find_next_zero_bit find_next_zero_bit_le
428 427
429/* 428/*
@@ -913,7 +912,7 @@ extern void ext3_dirty_inode(struct inode *, int);
913extern int ext3_change_inode_journal_flag(struct inode *, int); 912extern int ext3_change_inode_journal_flag(struct inode *, int);
914extern int ext3_get_inode_loc(struct inode *, struct ext3_iloc *); 913extern int ext3_get_inode_loc(struct inode *, struct ext3_iloc *);
915extern int ext3_can_truncate(struct inode *inode); 914extern int ext3_can_truncate(struct inode *inode);
916extern void ext3_truncate (struct inode *); 915extern void ext3_truncate(struct inode *inode);
917extern void ext3_set_inode_flags(struct inode *); 916extern void ext3_set_inode_flags(struct inode *);
918extern void ext3_get_inode_flags(struct ext3_inode_info *); 917extern void ext3_get_inode_flags(struct ext3_inode_info *);
919extern void ext3_set_aops(struct inode *inode); 918extern void ext3_set_aops(struct inode *inode);
diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index e06965081ba..e6a5e34bed4 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -940,7 +940,6 @@ extern int journal_force_commit(journal_t *);
940 */ 940 */
941struct journal_head *journal_add_journal_head(struct buffer_head *bh); 941struct journal_head *journal_add_journal_head(struct buffer_head *bh);
942struct journal_head *journal_grab_journal_head(struct buffer_head *bh); 942struct journal_head *journal_grab_journal_head(struct buffer_head *bh);
943void journal_remove_journal_head(struct buffer_head *bh);
944void journal_put_journal_head(struct journal_head *jh); 943void journal_put_journal_head(struct journal_head *jh);
945 944
946/* 945/*
diff --git a/include/linux/journal-head.h b/include/linux/journal-head.h
index 44e95d0a721..423cb6d78ee 100644
--- a/include/linux/journal-head.h
+++ b/include/linux/journal-head.h
@@ -45,7 +45,7 @@ struct journal_head {
45 * has been cowed 45 * has been cowed
46 * [jbd_lock_bh_state()] 46 * [jbd_lock_bh_state()]
47 */ 47 */
48 unsigned b_cow_tid; 48 tid_t b_cow_tid;
49 49
50 /* 50 /*
51 * Copy of the buffer data frozen for writing to the log. 51 * Copy of the buffer data frozen for writing to the log.
diff --git a/include/linux/quota.h b/include/linux/quota.h
index 9a85412e0db..313b7defc08 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -415,13 +415,5 @@ struct quota_module_name {
415 {QFMT_VFS_V0, "quota_v2"},\ 415 {QFMT_VFS_V0, "quota_v2"},\
416 {0, NULL}} 416 {0, NULL}}
417 417
418#else
419
420# /* nodep */ include <sys/cdefs.h>
421
422__BEGIN_DECLS
423long quotactl __P ((unsigned int, const char *, int, caddr_t));
424__END_DECLS
425
426#endif /* __KERNEL__ */ 418#endif /* __KERNEL__ */
427#endif /* _QUOTA_ */ 419#endif /* _QUOTA_ */
diff --git a/include/trace/events/ext3.h b/include/trace/events/ext3.h
new file mode 100644
index 00000000000..7b53c0573dc
--- /dev/null
+++ b/include/trace/events/ext3.h
@@ -0,0 +1,864 @@
1#undef TRACE_SYSTEM
2#define TRACE_SYSTEM ext3
3
4#if !defined(_TRACE_EXT3_H) || defined(TRACE_HEADER_MULTI_READ)
5#define _TRACE_EXT3_H
6
7#include <linux/tracepoint.h>
8
9TRACE_EVENT(ext3_free_inode,
10 TP_PROTO(struct inode *inode),
11
12 TP_ARGS(inode),
13
14 TP_STRUCT__entry(
15 __field( dev_t, dev )
16 __field( ino_t, ino )
17 __field( umode_t, mode )
18 __field( uid_t, uid )
19 __field( gid_t, gid )
20 __field( blkcnt_t, blocks )
21 ),
22
23 TP_fast_assign(
24 __entry->dev = inode->i_sb->s_dev;
25 __entry->ino = inode->i_ino;
26 __entry->mode = inode->i_mode;
27 __entry->uid = inode->i_uid;
28 __entry->gid = inode->i_gid;
29 __entry->blocks = inode->i_blocks;
30 ),
31
32 TP_printk("dev %d,%d ino %lu mode 0%o uid %u gid %u blocks %lu",
33 MAJOR(__entry->dev), MINOR(__entry->dev),
34 (unsigned long) __entry->ino,
35 __entry->mode, __entry->uid, __entry->gid,
36 (unsigned long) __entry->blocks)
37);
38
39TRACE_EVENT(ext3_request_inode,
40 TP_PROTO(struct inode *dir, int mode),
41
42 TP_ARGS(dir, mode),
43
44 TP_STRUCT__entry(
45 __field( dev_t, dev )
46 __field( ino_t, dir )
47 __field( umode_t, mode )
48 ),
49
50 TP_fast_assign(
51 __entry->dev = dir->i_sb->s_dev;
52 __entry->dir = dir->i_ino;
53 __entry->mode = mode;
54 ),
55
56 TP_printk("dev %d,%d dir %lu mode 0%o",
57 MAJOR(__entry->dev), MINOR(__entry->dev),
58 (unsigned long) __entry->dir, __entry->mode)
59);
60
61TRACE_EVENT(ext3_allocate_inode,
62 TP_PROTO(struct inode *inode, struct inode *dir, int mode),
63
64 TP_ARGS(inode, dir, mode),
65
66 TP_STRUCT__entry(
67 __field( dev_t, dev )
68 __field( ino_t, ino )
69 __field( ino_t, dir )
70 __field( umode_t, mode )
71 ),
72
73 TP_fast_assign(
74 __entry->dev = inode->i_sb->s_dev;
75 __entry->ino = inode->i_ino;
76 __entry->dir = dir->i_ino;
77 __entry->mode = mode;
78 ),
79
80 TP_printk("dev %d,%d ino %lu dir %lu mode 0%o",
81 MAJOR(__entry->dev), MINOR(__entry->dev),
82 (unsigned long) __entry->ino,
83 (unsigned long) __entry->dir, __entry->mode)
84);
85
86TRACE_EVENT(ext3_evict_inode,
87 TP_PROTO(struct inode *inode),
88
89 TP_ARGS(inode),
90
91 TP_STRUCT__entry(
92 __field( dev_t, dev )
93 __field( ino_t, ino )
94 __field( int, nlink )
95 ),
96
97 TP_fast_assign(
98 __entry->dev = inode->i_sb->s_dev;
99 __entry->ino = inode->i_ino;
100 __entry->nlink = inode->i_nlink;
101 ),
102
103 TP_printk("dev %d,%d ino %lu nlink %d",
104 MAJOR(__entry->dev), MINOR(__entry->dev),
105 (unsigned long) __entry->ino, __entry->nlink)
106);
107
108TRACE_EVENT(ext3_drop_inode,
109 TP_PROTO(struct inode *inode, int drop),
110
111 TP_ARGS(inode, drop),
112
113 TP_STRUCT__entry(
114 __field( dev_t, dev )
115 __field( ino_t, ino )
116 __field( int, drop )
117 ),
118
119 TP_fast_assign(
120 __entry->dev = inode->i_sb->s_dev;
121 __entry->ino = inode->i_ino;
122 __entry->drop = drop;
123 ),
124
125 TP_printk("dev %d,%d ino %lu drop %d",
126 MAJOR(__entry->dev), MINOR(__entry->dev),
127 (unsigned long) __entry->ino, __entry->drop)
128);
129
130TRACE_EVENT(ext3_mark_inode_dirty,
131 TP_PROTO(struct inode *inode, unsigned long IP),
132
133 TP_ARGS(inode, IP),
134
135 TP_STRUCT__entry(
136 __field( dev_t, dev )
137 __field( ino_t, ino )
138 __field(unsigned long, ip )
139 ),
140
141 TP_fast_assign(
142 __entry->dev = inode->i_sb->s_dev;
143 __entry->ino = inode->i_ino;
144 __entry->ip = IP;
145 ),
146
147 TP_printk("dev %d,%d ino %lu caller %pF",
148 MAJOR(__entry->dev), MINOR(__entry->dev),
149 (unsigned long) __entry->ino, (void *)__entry->ip)
150);
151
152TRACE_EVENT(ext3_write_begin,
153 TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
154 unsigned int flags),
155
156 TP_ARGS(inode, pos, len, flags),
157
158 TP_STRUCT__entry(
159 __field( dev_t, dev )
160 __field( ino_t, ino )
161 __field( loff_t, pos )
162 __field( unsigned int, len )
163 __field( unsigned int, flags )
164 ),
165
166 TP_fast_assign(
167 __entry->dev = inode->i_sb->s_dev;
168 __entry->ino = inode->i_ino;
169 __entry->pos = pos;
170 __entry->len = len;
171 __entry->flags = flags;
172 ),
173
174 TP_printk("dev %d,%d ino %lu pos %llu len %u flags %u",
175 MAJOR(__entry->dev), MINOR(__entry->dev),
176 (unsigned long) __entry->ino,
177 (unsigned long long) __entry->pos, __entry->len,
178 __entry->flags)
179);
180
181DECLARE_EVENT_CLASS(ext3__write_end,
182 TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
183 unsigned int copied),
184
185 TP_ARGS(inode, pos, len, copied),
186
187 TP_STRUCT__entry(
188 __field( dev_t, dev )
189 __field( ino_t, ino )
190 __field( loff_t, pos )
191 __field( unsigned int, len )
192 __field( unsigned int, copied )
193 ),
194
195 TP_fast_assign(
196 __entry->dev = inode->i_sb->s_dev;
197 __entry->ino = inode->i_ino;
198 __entry->pos = pos;
199 __entry->len = len;
200 __entry->copied = copied;
201 ),
202
203 TP_printk("dev %d,%d ino %lu pos %llu len %u copied %u",
204 MAJOR(__entry->dev), MINOR(__entry->dev),
205 (unsigned long) __entry->ino,
206 (unsigned long long) __entry->pos, __entry->len,
207 __entry->copied)
208);
209
210DEFINE_EVENT(ext3__write_end, ext3_ordered_write_end,
211
212 TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
213 unsigned int copied),
214
215 TP_ARGS(inode, pos, len, copied)
216);
217
218DEFINE_EVENT(ext3__write_end, ext3_writeback_write_end,
219
220 TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
221 unsigned int copied),
222
223 TP_ARGS(inode, pos, len, copied)
224);
225
226DEFINE_EVENT(ext3__write_end, ext3_journalled_write_end,
227
228 TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
229 unsigned int copied),
230
231 TP_ARGS(inode, pos, len, copied)
232);
233
234DECLARE_EVENT_CLASS(ext3__page_op,
235 TP_PROTO(struct page *page),
236
237 TP_ARGS(page),
238
239 TP_STRUCT__entry(
240 __field( dev_t, dev )
241 __field( ino_t, ino )
242 __field( pgoff_t, index )
243
244 ),
245
246 TP_fast_assign(
247 __entry->index = page->index;
248 __entry->ino = page->mapping->host->i_ino;
249 __entry->dev = page->mapping->host->i_sb->s_dev;
250 ),
251
252 TP_printk("dev %d,%d ino %lu page_index %lu",
253 MAJOR(__entry->dev), MINOR(__entry->dev),
254 (unsigned long) __entry->ino, __entry->index)
255);
256
257DEFINE_EVENT(ext3__page_op, ext3_ordered_writepage,
258
259 TP_PROTO(struct page *page),
260
261 TP_ARGS(page)
262);
263
264DEFINE_EVENT(ext3__page_op, ext3_writeback_writepage,
265
266 TP_PROTO(struct page *page),
267
268 TP_ARGS(page)
269);
270
271DEFINE_EVENT(ext3__page_op, ext3_journalled_writepage,
272
273 TP_PROTO(struct page *page),
274
275 TP_ARGS(page)
276);
277
278DEFINE_EVENT(ext3__page_op, ext3_readpage,
279
280 TP_PROTO(struct page *page),
281
282 TP_ARGS(page)
283);
284
285DEFINE_EVENT(ext3__page_op, ext3_releasepage,
286
287 TP_PROTO(struct page *page),
288
289 TP_ARGS(page)
290);
291
292TRACE_EVENT(ext3_invalidatepage,
293 TP_PROTO(struct page *page, unsigned long offset),
294
295 TP_ARGS(page, offset),
296
297 TP_STRUCT__entry(
298 __field( pgoff_t, index )
299 __field( unsigned long, offset )
300 __field( ino_t, ino )
301 __field( dev_t, dev )
302
303 ),
304
305 TP_fast_assign(
306 __entry->index = page->index;
307 __entry->offset = offset;
308 __entry->ino = page->mapping->host->i_ino;
309 __entry->dev = page->mapping->host->i_sb->s_dev;
310 ),
311
312 TP_printk("dev %d,%d ino %lu page_index %lu offset %lu",
313 MAJOR(__entry->dev), MINOR(__entry->dev),
314 (unsigned long) __entry->ino,
315 __entry->index, __entry->offset)
316);
317
318TRACE_EVENT(ext3_discard_blocks,
319 TP_PROTO(struct super_block *sb, unsigned long blk,
320 unsigned long count),
321
322 TP_ARGS(sb, blk, count),
323
324 TP_STRUCT__entry(
325 __field( dev_t, dev )
326 __field( unsigned long, blk )
327 __field( unsigned long, count )
328
329 ),
330
331 TP_fast_assign(
332 __entry->dev = sb->s_dev;
333 __entry->blk = blk;
334 __entry->count = count;
335 ),
336
337 TP_printk("dev %d,%d blk %lu count %lu",
338 MAJOR(__entry->dev), MINOR(__entry->dev),
339 __entry->blk, __entry->count)
340);
341
342TRACE_EVENT(ext3_request_blocks,
343 TP_PROTO(struct inode *inode, unsigned long goal,
344 unsigned long count),
345
346 TP_ARGS(inode, goal, count),
347
348 TP_STRUCT__entry(
349 __field( dev_t, dev )
350 __field( ino_t, ino )
351 __field( unsigned long, count )
352 __field( unsigned long, goal )
353 ),
354
355 TP_fast_assign(
356 __entry->dev = inode->i_sb->s_dev;
357 __entry->ino = inode->i_ino;
358 __entry->count = count;
359 __entry->goal = goal;
360 ),
361
362 TP_printk("dev %d,%d ino %lu count %lu goal %lu ",
363 MAJOR(__entry->dev), MINOR(__entry->dev),
364 (unsigned long) __entry->ino,
365 __entry->count, __entry->goal)
366);
367
368TRACE_EVENT(ext3_allocate_blocks,
369 TP_PROTO(struct inode *inode, unsigned long goal,
370 unsigned long count, unsigned long block),
371
372 TP_ARGS(inode, goal, count, block),
373
374 TP_STRUCT__entry(
375 __field( dev_t, dev )
376 __field( ino_t, ino )
377 __field( unsigned long, block )
378 __field( unsigned long, count )
379 __field( unsigned long, goal )
380 ),
381
382 TP_fast_assign(
383 __entry->dev = inode->i_sb->s_dev;
384 __entry->ino = inode->i_ino;
385 __entry->block = block;
386 __entry->count = count;
387 __entry->goal = goal;
388 ),
389
390 TP_printk("dev %d,%d ino %lu count %lu block %lu goal %lu",
391 MAJOR(__entry->dev), MINOR(__entry->dev),
392 (unsigned long) __entry->ino,
393 __entry->count, __entry->block,
394 __entry->goal)
395);
396
397TRACE_EVENT(ext3_free_blocks,
398 TP_PROTO(struct inode *inode, unsigned long block,
399 unsigned long count),
400
401 TP_ARGS(inode, block, count),
402
403 TP_STRUCT__entry(
404 __field( dev_t, dev )
405 __field( ino_t, ino )
406 __field( umode_t, mode )
407 __field( unsigned long, block )
408 __field( unsigned long, count )
409 ),
410
411 TP_fast_assign(
412 __entry->dev = inode->i_sb->s_dev;
413 __entry->ino = inode->i_ino;
414 __entry->mode = inode->i_mode;
415 __entry->block = block;
416 __entry->count = count;
417 ),
418
419 TP_printk("dev %d,%d ino %lu mode 0%o block %lu count %lu",
420 MAJOR(__entry->dev), MINOR(__entry->dev),
421 (unsigned long) __entry->ino,
422 __entry->mode, __entry->block, __entry->count)
423);
424
425TRACE_EVENT(ext3_sync_file_enter,
426 TP_PROTO(struct file *file, int datasync),
427
428 TP_ARGS(file, datasync),
429
430 TP_STRUCT__entry(
431 __field( dev_t, dev )
432 __field( ino_t, ino )
433 __field( ino_t, parent )
434 __field( int, datasync )
435 ),
436
437 TP_fast_assign(
438 struct dentry *dentry = file->f_path.dentry;
439
440 __entry->dev = dentry->d_inode->i_sb->s_dev;
441 __entry->ino = dentry->d_inode->i_ino;
442 __entry->datasync = datasync;
443 __entry->parent = dentry->d_parent->d_inode->i_ino;
444 ),
445
446 TP_printk("dev %d,%d ino %lu parent %ld datasync %d ",
447 MAJOR(__entry->dev), MINOR(__entry->dev),
448 (unsigned long) __entry->ino,
449 (unsigned long) __entry->parent, __entry->datasync)
450);
451
452TRACE_EVENT(ext3_sync_file_exit,
453 TP_PROTO(struct inode *inode, int ret),
454
455 TP_ARGS(inode, ret),
456
457 TP_STRUCT__entry(
458 __field( int, ret )
459 __field( ino_t, ino )
460 __field( dev_t, dev )
461 ),
462
463 TP_fast_assign(
464 __entry->ret = ret;
465 __entry->ino = inode->i_ino;
466 __entry->dev = inode->i_sb->s_dev;
467 ),
468
469 TP_printk("dev %d,%d ino %lu ret %d",
470 MAJOR(__entry->dev), MINOR(__entry->dev),
471 (unsigned long) __entry->ino,
472 __entry->ret)
473);
474
475TRACE_EVENT(ext3_sync_fs,
476 TP_PROTO(struct super_block *sb, int wait),
477
478 TP_ARGS(sb, wait),
479
480 TP_STRUCT__entry(
481 __field( dev_t, dev )
482 __field( int, wait )
483
484 ),
485
486 TP_fast_assign(
487 __entry->dev = sb->s_dev;
488 __entry->wait = wait;
489 ),
490
491 TP_printk("dev %d,%d wait %d",
492 MAJOR(__entry->dev), MINOR(__entry->dev),
493 __entry->wait)
494);
495
496TRACE_EVENT(ext3_rsv_window_add,
497 TP_PROTO(struct super_block *sb,
498 struct ext3_reserve_window_node *rsv_node),
499
500 TP_ARGS(sb, rsv_node),
501
502 TP_STRUCT__entry(
503 __field( unsigned long, start )
504 __field( unsigned long, end )
505 __field( dev_t, dev )
506 ),
507
508 TP_fast_assign(
509 __entry->dev = sb->s_dev;
510 __entry->start = rsv_node->rsv_window._rsv_start;
511 __entry->end = rsv_node->rsv_window._rsv_end;
512 ),
513
514 TP_printk("dev %d,%d start %lu end %lu",
515 MAJOR(__entry->dev), MINOR(__entry->dev),
516 __entry->start, __entry->end)
517);
518
519TRACE_EVENT(ext3_discard_reservation,
520 TP_PROTO(struct inode *inode,
521 struct ext3_reserve_window_node *rsv_node),
522
523 TP_ARGS(inode, rsv_node),
524
525 TP_STRUCT__entry(
526 __field( unsigned long, start )
527 __field( unsigned long, end )
528 __field( ino_t, ino )
529 __field( dev_t, dev )
530 ),
531
532 TP_fast_assign(
533 __entry->start = rsv_node->rsv_window._rsv_start;
534 __entry->end = rsv_node->rsv_window._rsv_end;
535 __entry->ino = inode->i_ino;
536 __entry->dev = inode->i_sb->s_dev;
537 ),
538
539 TP_printk("dev %d,%d ino %lu start %lu end %lu",
540 MAJOR(__entry->dev), MINOR(__entry->dev),
541 (unsigned long)__entry->ino, __entry->start,
542 __entry->end)
543);
544
545TRACE_EVENT(ext3_alloc_new_reservation,
546 TP_PROTO(struct super_block *sb, unsigned long goal),
547
548 TP_ARGS(sb, goal),
549
550 TP_STRUCT__entry(
551 __field( dev_t, dev )
552 __field( unsigned long, goal )
553 ),
554
555 TP_fast_assign(
556 __entry->dev = sb->s_dev;
557 __entry->goal = goal;
558 ),
559
560 TP_printk("dev %d,%d goal %lu",
561 MAJOR(__entry->dev), MINOR(__entry->dev),
562 __entry->goal)
563);
564
565TRACE_EVENT(ext3_reserved,
566 TP_PROTO(struct super_block *sb, unsigned long block,
567 struct ext3_reserve_window_node *rsv_node),
568
569 TP_ARGS(sb, block, rsv_node),
570
571 TP_STRUCT__entry(
572 __field( unsigned long, block )
573 __field( unsigned long, start )
574 __field( unsigned long, end )
575 __field( dev_t, dev )
576 ),
577
578 TP_fast_assign(
579 __entry->block = block;
580 __entry->start = rsv_node->rsv_window._rsv_start;
581 __entry->end = rsv_node->rsv_window._rsv_end;
582 __entry->dev = sb->s_dev;
583 ),
584
585 TP_printk("dev %d,%d block %lu, start %lu end %lu",
586 MAJOR(__entry->dev), MINOR(__entry->dev),
587 __entry->block, __entry->start, __entry->end)
588);
589
590TRACE_EVENT(ext3_forget,
591 TP_PROTO(struct inode *inode, int is_metadata, unsigned long block),
592
593 TP_ARGS(inode, is_metadata, block),
594
595 TP_STRUCT__entry(
596 __field( dev_t, dev )
597 __field( ino_t, ino )
598 __field( umode_t, mode )
599 __field( int, is_metadata )
600 __field( unsigned long, block )
601 ),
602
603 TP_fast_assign(
604 __entry->dev = inode->i_sb->s_dev;
605 __entry->ino = inode->i_ino;
606 __entry->mode = inode->i_mode;
607 __entry->is_metadata = is_metadata;
608 __entry->block = block;
609 ),
610
611 TP_printk("dev %d,%d ino %lu mode 0%o is_metadata %d block %lu",
612 MAJOR(__entry->dev), MINOR(__entry->dev),
613 (unsigned long) __entry->ino,
614 __entry->mode, __entry->is_metadata, __entry->block)
615);
616
617TRACE_EVENT(ext3_read_block_bitmap,
618 TP_PROTO(struct super_block *sb, unsigned int group),
619
620 TP_ARGS(sb, group),
621
622 TP_STRUCT__entry(
623 __field( dev_t, dev )
624 __field( __u32, group )
625
626 ),
627
628 TP_fast_assign(
629 __entry->dev = sb->s_dev;
630 __entry->group = group;
631 ),
632
633 TP_printk("dev %d,%d group %u",
634 MAJOR(__entry->dev), MINOR(__entry->dev),
635 __entry->group)
636);
637
638TRACE_EVENT(ext3_direct_IO_enter,
639 TP_PROTO(struct inode *inode, loff_t offset, unsigned long len, int rw),
640
641 TP_ARGS(inode, offset, len, rw),
642
643 TP_STRUCT__entry(
644 __field( ino_t, ino )
645 __field( dev_t, dev )
646 __field( loff_t, pos )
647 __field( unsigned long, len )
648 __field( int, rw )
649 ),
650
651 TP_fast_assign(
652 __entry->ino = inode->i_ino;
653 __entry->dev = inode->i_sb->s_dev;
654 __entry->pos = offset;
655 __entry->len = len;
656 __entry->rw = rw;
657 ),
658
659 TP_printk("dev %d,%d ino %lu pos %llu len %lu rw %d",
660 MAJOR(__entry->dev), MINOR(__entry->dev),
661 (unsigned long) __entry->ino,
662 (unsigned long long) __entry->pos, __entry->len,
663 __entry->rw)
664);
665
666TRACE_EVENT(ext3_direct_IO_exit,
667 TP_PROTO(struct inode *inode, loff_t offset, unsigned long len,
668 int rw, int ret),
669
670 TP_ARGS(inode, offset, len, rw, ret),
671
672 TP_STRUCT__entry(
673 __field( ino_t, ino )
674 __field( dev_t, dev )
675 __field( loff_t, pos )
676 __field( unsigned long, len )
677 __field( int, rw )
678 __field( int, ret )
679 ),
680
681 TP_fast_assign(
682 __entry->ino = inode->i_ino;
683 __entry->dev = inode->i_sb->s_dev;
684 __entry->pos = offset;
685 __entry->len = len;
686 __entry->rw = rw;
687 __entry->ret = ret;
688 ),
689
690 TP_printk("dev %d,%d ino %lu pos %llu len %lu rw %d ret %d",
691 MAJOR(__entry->dev), MINOR(__entry->dev),
692 (unsigned long) __entry->ino,
693 (unsigned long long) __entry->pos, __entry->len,
694 __entry->rw, __entry->ret)
695);
696
697TRACE_EVENT(ext3_unlink_enter,
698 TP_PROTO(struct inode *parent, struct dentry *dentry),
699
700 TP_ARGS(parent, dentry),
701
702 TP_STRUCT__entry(
703 __field( ino_t, parent )
704 __field( ino_t, ino )
705 __field( loff_t, size )
706 __field( dev_t, dev )
707 ),
708
709 TP_fast_assign(
710 __entry->parent = parent->i_ino;
711 __entry->ino = dentry->d_inode->i_ino;
712 __entry->size = dentry->d_inode->i_size;
713 __entry->dev = dentry->d_inode->i_sb->s_dev;
714 ),
715
716 TP_printk("dev %d,%d ino %lu size %lld parent %ld",
717 MAJOR(__entry->dev), MINOR(__entry->dev),
718 (unsigned long) __entry->ino,
719 (unsigned long long)__entry->size,
720 (unsigned long) __entry->parent)
721);
722
723TRACE_EVENT(ext3_unlink_exit,
724 TP_PROTO(struct dentry *dentry, int ret),
725
726 TP_ARGS(dentry, ret),
727
728 TP_STRUCT__entry(
729 __field( ino_t, ino )
730 __field( dev_t, dev )
731 __field( int, ret )
732 ),
733
734 TP_fast_assign(
735 __entry->ino = dentry->d_inode->i_ino;
736 __entry->dev = dentry->d_inode->i_sb->s_dev;
737 __entry->ret = ret;
738 ),
739
740 TP_printk("dev %d,%d ino %lu ret %d",
741 MAJOR(__entry->dev), MINOR(__entry->dev),
742 (unsigned long) __entry->ino,
743 __entry->ret)
744);
745
746DECLARE_EVENT_CLASS(ext3__truncate,
747 TP_PROTO(struct inode *inode),
748
749 TP_ARGS(inode),
750
751 TP_STRUCT__entry(
752 __field( ino_t, ino )
753 __field( dev_t, dev )
754 __field( blkcnt_t, blocks )
755 ),
756
757 TP_fast_assign(
758 __entry->ino = inode->i_ino;
759 __entry->dev = inode->i_sb->s_dev;
760 __entry->blocks = inode->i_blocks;
761 ),
762
763 TP_printk("dev %d,%d ino %lu blocks %lu",
764 MAJOR(__entry->dev), MINOR(__entry->dev),
765 (unsigned long) __entry->ino, (unsigned long) __entry->blocks)
766);
767
768DEFINE_EVENT(ext3__truncate, ext3_truncate_enter,
769
770 TP_PROTO(struct inode *inode),
771
772 TP_ARGS(inode)
773);
774
775DEFINE_EVENT(ext3__truncate, ext3_truncate_exit,
776
777 TP_PROTO(struct inode *inode),
778
779 TP_ARGS(inode)
780);
781
782TRACE_EVENT(ext3_get_blocks_enter,
783 TP_PROTO(struct inode *inode, unsigned long lblk,
784 unsigned long len, int create),
785
786 TP_ARGS(inode, lblk, len, create),
787
788 TP_STRUCT__entry(
789 __field( ino_t, ino )
790 __field( dev_t, dev )
791 __field( unsigned long, lblk )
792 __field( unsigned long, len )
793 __field( int, create )
794 ),
795
796 TP_fast_assign(
797 __entry->ino = inode->i_ino;
798 __entry->dev = inode->i_sb->s_dev;
799 __entry->lblk = lblk;
800 __entry->len = len;
801 __entry->create = create;
802 ),
803
804 TP_printk("dev %d,%d ino %lu lblk %lu len %lu create %u",
805 MAJOR(__entry->dev), MINOR(__entry->dev),
806 (unsigned long) __entry->ino,
807 __entry->lblk, __entry->len, __entry->create)
808);
809
810TRACE_EVENT(ext3_get_blocks_exit,
811 TP_PROTO(struct inode *inode, unsigned long lblk,
812 unsigned long pblk, unsigned long len, int ret),
813
814 TP_ARGS(inode, lblk, pblk, len, ret),
815
816 TP_STRUCT__entry(
817 __field( ino_t, ino )
818 __field( dev_t, dev )
819 __field( unsigned long, lblk )
820 __field( unsigned long, pblk )
821 __field( unsigned long, len )
822 __field( int, ret )
823 ),
824
825 TP_fast_assign(
826 __entry->ino = inode->i_ino;
827 __entry->dev = inode->i_sb->s_dev;
828 __entry->lblk = lblk;
829 __entry->pblk = pblk;
830 __entry->len = len;
831 __entry->ret = ret;
832 ),
833
834 TP_printk("dev %d,%d ino %lu lblk %lu pblk %lu len %lu ret %d",
835 MAJOR(__entry->dev), MINOR(__entry->dev),
836 (unsigned long) __entry->ino,
837 __entry->lblk, __entry->pblk,
838 __entry->len, __entry->ret)
839);
840
841TRACE_EVENT(ext3_load_inode,
842 TP_PROTO(struct inode *inode),
843
844 TP_ARGS(inode),
845
846 TP_STRUCT__entry(
847 __field( ino_t, ino )
848 __field( dev_t, dev )
849 ),
850
851 TP_fast_assign(
852 __entry->ino = inode->i_ino;
853 __entry->dev = inode->i_sb->s_dev;
854 ),
855
856 TP_printk("dev %d,%d ino %lu",
857 MAJOR(__entry->dev), MINOR(__entry->dev),
858 (unsigned long) __entry->ino)
859);
860
861#endif /* _TRACE_EXT3_H */
862
863/* This part must be outside protection */
864#include <trace/define_trace.h>
diff --git a/include/trace/events/jbd.h b/include/trace/events/jbd.h
new file mode 100644
index 00000000000..aff64d82d71
--- /dev/null
+++ b/include/trace/events/jbd.h
@@ -0,0 +1,203 @@
1#undef TRACE_SYSTEM
2#define TRACE_SYSTEM jbd
3
4#if !defined(_TRACE_JBD_H) || defined(TRACE_HEADER_MULTI_READ)
5#define _TRACE_JBD_H
6
7#include <linux/jbd.h>
8#include <linux/tracepoint.h>
9
10TRACE_EVENT(jbd_checkpoint,
11
12 TP_PROTO(journal_t *journal, int result),
13
14 TP_ARGS(journal, result),
15
16 TP_STRUCT__entry(
17 __field( dev_t, dev )
18 __field( int, result )
19 ),
20
21 TP_fast_assign(
22 __entry->dev = journal->j_fs_dev->bd_dev;
23 __entry->result = result;
24 ),
25
26 TP_printk("dev %d,%d result %d",
27 MAJOR(__entry->dev), MINOR(__entry->dev),
28 __entry->result)
29);
30
31DECLARE_EVENT_CLASS(jbd_commit,
32
33 TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
34
35 TP_ARGS(journal, commit_transaction),
36
37 TP_STRUCT__entry(
38 __field( dev_t, dev )
39 __field( char, sync_commit )
40 __field( int, transaction )
41 ),
42
43 TP_fast_assign(
44 __entry->dev = journal->j_fs_dev->bd_dev;
45 __entry->sync_commit = commit_transaction->t_synchronous_commit;
46 __entry->transaction = commit_transaction->t_tid;
47 ),
48
49 TP_printk("dev %d,%d transaction %d sync %d",
50 MAJOR(__entry->dev), MINOR(__entry->dev),
51 __entry->transaction, __entry->sync_commit)
52);
53
54DEFINE_EVENT(jbd_commit, jbd_start_commit,
55
56 TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
57
58 TP_ARGS(journal, commit_transaction)
59);
60
61DEFINE_EVENT(jbd_commit, jbd_commit_locking,
62
63 TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
64
65 TP_ARGS(journal, commit_transaction)
66);
67
68DEFINE_EVENT(jbd_commit, jbd_commit_flushing,
69
70 TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
71
72 TP_ARGS(journal, commit_transaction)
73);
74
75DEFINE_EVENT(jbd_commit, jbd_commit_logging,
76
77 TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
78
79 TP_ARGS(journal, commit_transaction)
80);
81
82TRACE_EVENT(jbd_drop_transaction,
83
84 TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
85
86 TP_ARGS(journal, commit_transaction),
87
88 TP_STRUCT__entry(
89 __field( dev_t, dev )
90 __field( char, sync_commit )
91 __field( int, transaction )
92 ),
93
94 TP_fast_assign(
95 __entry->dev = journal->j_fs_dev->bd_dev;
96 __entry->sync_commit = commit_transaction->t_synchronous_commit;
97 __entry->transaction = commit_transaction->t_tid;
98 ),
99
100 TP_printk("dev %d,%d transaction %d sync %d",
101 MAJOR(__entry->dev), MINOR(__entry->dev),
102 __entry->transaction, __entry->sync_commit)
103);
104
105TRACE_EVENT(jbd_end_commit,
106 TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
107
108 TP_ARGS(journal, commit_transaction),
109
110 TP_STRUCT__entry(
111 __field( dev_t, dev )
112 __field( char, sync_commit )
113 __field( int, transaction )
114 __field( int, head )
115 ),
116
117 TP_fast_assign(
118 __entry->dev = journal->j_fs_dev->bd_dev;
119 __entry->sync_commit = commit_transaction->t_synchronous_commit;
120 __entry->transaction = commit_transaction->t_tid;
121 __entry->head = journal->j_tail_sequence;
122 ),
123
124 TP_printk("dev %d,%d transaction %d sync %d head %d",
125 MAJOR(__entry->dev), MINOR(__entry->dev),
126 __entry->transaction, __entry->sync_commit, __entry->head)
127);
128
129TRACE_EVENT(jbd_do_submit_data,
130 TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
131
132 TP_ARGS(journal, commit_transaction),
133
134 TP_STRUCT__entry(
135 __field( dev_t, dev )
136 __field( char, sync_commit )
137 __field( int, transaction )
138 ),
139
140 TP_fast_assign(
141 __entry->dev = journal->j_fs_dev->bd_dev;
142 __entry->sync_commit = commit_transaction->t_synchronous_commit;
143 __entry->transaction = commit_transaction->t_tid;
144 ),
145
146 TP_printk("dev %d,%d transaction %d sync %d",
147 MAJOR(__entry->dev), MINOR(__entry->dev),
148 __entry->transaction, __entry->sync_commit)
149);
150
151TRACE_EVENT(jbd_cleanup_journal_tail,
152
153 TP_PROTO(journal_t *journal, tid_t first_tid,
154 unsigned long block_nr, unsigned long freed),
155
156 TP_ARGS(journal, first_tid, block_nr, freed),
157
158 TP_STRUCT__entry(
159 __field( dev_t, dev )
160 __field( tid_t, tail_sequence )
161 __field( tid_t, first_tid )
162 __field(unsigned long, block_nr )
163 __field(unsigned long, freed )
164 ),
165
166 TP_fast_assign(
167 __entry->dev = journal->j_fs_dev->bd_dev;
168 __entry->tail_sequence = journal->j_tail_sequence;
169 __entry->first_tid = first_tid;
170 __entry->block_nr = block_nr;
171 __entry->freed = freed;
172 ),
173
174 TP_printk("dev %d,%d from %u to %u offset %lu freed %lu",
175 MAJOR(__entry->dev), MINOR(__entry->dev),
176 __entry->tail_sequence, __entry->first_tid,
177 __entry->block_nr, __entry->freed)
178);
179
180TRACE_EVENT(jbd_update_superblock_end,
181 TP_PROTO(journal_t *journal, int wait),
182
183 TP_ARGS(journal, wait),
184
185 TP_STRUCT__entry(
186 __field( dev_t, dev )
187 __field( int, wait )
188 ),
189
190 TP_fast_assign(
191 __entry->dev = journal->j_fs_dev->bd_dev;
192 __entry->wait = wait;
193 ),
194
195 TP_printk("dev %d,%d wait %d",
196 MAJOR(__entry->dev), MINOR(__entry->dev),
197 __entry->wait)
198);
199
200#endif /* _TRACE_JBD_H */
201
202/* This part must be outside protection */
203#include <trace/define_trace.h>