diff options
-rw-r--r-- | Documentation/filesystems/ext3.txt | 6 | ||||
-rw-r--r-- | fs/ext2/balloc.c | 4 | ||||
-rw-r--r-- | fs/ext2/ialloc.c | 2 | ||||
-rw-r--r-- | fs/ext2/super.c | 18 | ||||
-rw-r--r-- | fs/ext2/xattr.c | 1 | ||||
-rw-r--r-- | fs/ext3/dir.c | 167 | ||||
-rw-r--r-- | fs/ext3/ext3.h | 6 | ||||
-rw-r--r-- | fs/ext3/hash.c | 4 | ||||
-rw-r--r-- | fs/ext3/ialloc.c | 20 | ||||
-rw-r--r-- | fs/ext3/super.c | 6 | ||||
-rw-r--r-- | fs/ext4/super.c | 6 | ||||
-rw-r--r-- | fs/jbd/checkpoint.c | 23 | ||||
-rw-r--r-- | fs/jbd/commit.c | 21 | ||||
-rw-r--r-- | fs/jbd/journal.c | 206 | ||||
-rw-r--r-- | fs/jbd/transaction.c | 2 | ||||
-rw-r--r-- | fs/quota/dquot.c | 32 | ||||
-rw-r--r-- | fs/reiserfs/super.c | 6 | ||||
-rw-r--r-- | include/linux/jbd.h | 18 | ||||
-rw-r--r-- | include/trace/events/jbd.h | 39 |
19 files changed, 342 insertions, 245 deletions
diff --git a/Documentation/filesystems/ext3.txt b/Documentation/filesystems/ext3.txt index b100adc38adb..293855e95000 100644 --- a/Documentation/filesystems/ext3.txt +++ b/Documentation/filesystems/ext3.txt | |||
@@ -59,9 +59,9 @@ commit=nrsec (*) Ext3 can be told to sync all its data and metadata | |||
59 | Setting it to very large values will improve | 59 | Setting it to very large values will improve |
60 | performance. | 60 | performance. |
61 | 61 | ||
62 | barrier=<0(*)|1> This enables/disables the use of write barriers in | 62 | barrier=<0|1(*)> This enables/disables the use of write barriers in |
63 | barrier the jbd code. barrier=0 disables, barrier=1 enables. | 63 | barrier (*) the jbd code. barrier=0 disables, barrier=1 enables. |
64 | nobarrier (*) This also requires an IO stack which can support | 64 | nobarrier This also requires an IO stack which can support |
65 | barriers, and if jbd gets an error on a barrier | 65 | barriers, and if jbd gets an error on a barrier |
66 | write, it will disable again with a warning. | 66 | write, it will disable again with a warning. |
67 | Write barriers enforce proper on-disk ordering | 67 | Write barriers enforce proper on-disk ordering |
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c index 030c6d277e14..1c3613998862 100644 --- a/fs/ext2/balloc.c +++ b/fs/ext2/balloc.c | |||
@@ -165,7 +165,6 @@ static void release_blocks(struct super_block *sb, int count) | |||
165 | struct ext2_sb_info *sbi = EXT2_SB(sb); | 165 | struct ext2_sb_info *sbi = EXT2_SB(sb); |
166 | 166 | ||
167 | percpu_counter_add(&sbi->s_freeblocks_counter, count); | 167 | percpu_counter_add(&sbi->s_freeblocks_counter, count); |
168 | sb->s_dirt = 1; | ||
169 | } | 168 | } |
170 | } | 169 | } |
171 | 170 | ||
@@ -180,7 +179,6 @@ static void group_adjust_blocks(struct super_block *sb, int group_no, | |||
180 | free_blocks = le16_to_cpu(desc->bg_free_blocks_count); | 179 | free_blocks = le16_to_cpu(desc->bg_free_blocks_count); |
181 | desc->bg_free_blocks_count = cpu_to_le16(free_blocks + count); | 180 | desc->bg_free_blocks_count = cpu_to_le16(free_blocks + count); |
182 | spin_unlock(sb_bgl_lock(sbi, group_no)); | 181 | spin_unlock(sb_bgl_lock(sbi, group_no)); |
183 | sb->s_dirt = 1; | ||
184 | mark_buffer_dirty(bh); | 182 | mark_buffer_dirty(bh); |
185 | } | 183 | } |
186 | } | 184 | } |
@@ -479,7 +477,7 @@ void ext2_discard_reservation(struct inode *inode) | |||
479 | } | 477 | } |
480 | 478 | ||
481 | /** | 479 | /** |
482 | * ext2_free_blocks_sb() -- Free given blocks and update quota and i_blocks | 480 | * ext2_free_blocks() -- Free given blocks and update quota and i_blocks |
483 | * @inode: inode | 481 | * @inode: inode |
484 | * @block: start physcial block to free | 482 | * @block: start physcial block to free |
485 | * @count: number of blocks to free | 483 | * @count: number of blocks to free |
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c index 8b15cf8cef37..c13eb7b91a11 100644 --- a/fs/ext2/ialloc.c +++ b/fs/ext2/ialloc.c | |||
@@ -81,7 +81,6 @@ static void ext2_release_inode(struct super_block *sb, int group, int dir) | |||
81 | spin_unlock(sb_bgl_lock(EXT2_SB(sb), group)); | 81 | spin_unlock(sb_bgl_lock(EXT2_SB(sb), group)); |
82 | if (dir) | 82 | if (dir) |
83 | percpu_counter_dec(&EXT2_SB(sb)->s_dirs_counter); | 83 | percpu_counter_dec(&EXT2_SB(sb)->s_dirs_counter); |
84 | sb->s_dirt = 1; | ||
85 | mark_buffer_dirty(bh); | 84 | mark_buffer_dirty(bh); |
86 | } | 85 | } |
87 | 86 | ||
@@ -543,7 +542,6 @@ got: | |||
543 | } | 542 | } |
544 | spin_unlock(sb_bgl_lock(sbi, group)); | 543 | spin_unlock(sb_bgl_lock(sbi, group)); |
545 | 544 | ||
546 | sb->s_dirt = 1; | ||
547 | mark_buffer_dirty(bh2); | 545 | mark_buffer_dirty(bh2); |
548 | if (test_opt(sb, GRPID)) { | 546 | if (test_opt(sb, GRPID)) { |
549 | inode->i_mode = mode; | 547 | inode->i_mode = mode; |
diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 38f816071ddb..b3621cb7ea31 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c | |||
@@ -130,9 +130,6 @@ static void ext2_put_super (struct super_block * sb) | |||
130 | 130 | ||
131 | dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); | 131 | dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); |
132 | 132 | ||
133 | if (sb->s_dirt) | ||
134 | ext2_write_super(sb); | ||
135 | |||
136 | ext2_xattr_put_super(sb); | 133 | ext2_xattr_put_super(sb); |
137 | if (!(sb->s_flags & MS_RDONLY)) { | 134 | if (!(sb->s_flags & MS_RDONLY)) { |
138 | struct ext2_super_block *es = sbi->s_es; | 135 | struct ext2_super_block *es = sbi->s_es; |
@@ -307,7 +304,6 @@ static const struct super_operations ext2_sops = { | |||
307 | .write_inode = ext2_write_inode, | 304 | .write_inode = ext2_write_inode, |
308 | .evict_inode = ext2_evict_inode, | 305 | .evict_inode = ext2_evict_inode, |
309 | .put_super = ext2_put_super, | 306 | .put_super = ext2_put_super, |
310 | .write_super = ext2_write_super, | ||
311 | .sync_fs = ext2_sync_fs, | 307 | .sync_fs = ext2_sync_fs, |
312 | .statfs = ext2_statfs, | 308 | .statfs = ext2_statfs, |
313 | .remount_fs = ext2_remount, | 309 | .remount_fs = ext2_remount, |
@@ -358,11 +354,6 @@ static struct dentry *ext2_fh_to_parent(struct super_block *sb, struct fid *fid, | |||
358 | ext2_nfs_get_inode); | 354 | ext2_nfs_get_inode); |
359 | } | 355 | } |
360 | 356 | ||
361 | /* Yes, most of these are left as NULL!! | ||
362 | * A NULL value implies the default, which works with ext2-like file | ||
363 | * systems, but can be improved upon. | ||
364 | * Currently only get_parent is required. | ||
365 | */ | ||
366 | static const struct export_operations ext2_export_ops = { | 357 | static const struct export_operations ext2_export_ops = { |
367 | .fh_to_dentry = ext2_fh_to_dentry, | 358 | .fh_to_dentry = ext2_fh_to_dentry, |
368 | .fh_to_parent = ext2_fh_to_parent, | 359 | .fh_to_parent = ext2_fh_to_parent, |
@@ -1176,7 +1167,6 @@ static void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es, | |||
1176 | mark_buffer_dirty(EXT2_SB(sb)->s_sbh); | 1167 | mark_buffer_dirty(EXT2_SB(sb)->s_sbh); |
1177 | if (wait) | 1168 | if (wait) |
1178 | sync_dirty_buffer(EXT2_SB(sb)->s_sbh); | 1169 | sync_dirty_buffer(EXT2_SB(sb)->s_sbh); |
1179 | sb->s_dirt = 0; | ||
1180 | } | 1170 | } |
1181 | 1171 | ||
1182 | /* | 1172 | /* |
@@ -1209,8 +1199,6 @@ void ext2_write_super(struct super_block *sb) | |||
1209 | { | 1199 | { |
1210 | if (!(sb->s_flags & MS_RDONLY)) | 1200 | if (!(sb->s_flags & MS_RDONLY)) |
1211 | ext2_sync_fs(sb, 1); | 1201 | ext2_sync_fs(sb, 1); |
1212 | else | ||
1213 | sb->s_dirt = 0; | ||
1214 | } | 1202 | } |
1215 | 1203 | ||
1216 | static int ext2_remount (struct super_block * sb, int * flags, char * data) | 1204 | static int ext2_remount (struct super_block * sb, int * flags, char * data) |
@@ -1456,7 +1444,6 @@ static ssize_t ext2_quota_write(struct super_block *sb, int type, | |||
1456 | struct buffer_head tmp_bh; | 1444 | struct buffer_head tmp_bh; |
1457 | struct buffer_head *bh; | 1445 | struct buffer_head *bh; |
1458 | 1446 | ||
1459 | mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); | ||
1460 | while (towrite > 0) { | 1447 | while (towrite > 0) { |
1461 | tocopy = sb->s_blocksize - offset < towrite ? | 1448 | tocopy = sb->s_blocksize - offset < towrite ? |
1462 | sb->s_blocksize - offset : towrite; | 1449 | sb->s_blocksize - offset : towrite; |
@@ -1486,16 +1473,13 @@ static ssize_t ext2_quota_write(struct super_block *sb, int type, | |||
1486 | blk++; | 1473 | blk++; |
1487 | } | 1474 | } |
1488 | out: | 1475 | out: |
1489 | if (len == towrite) { | 1476 | if (len == towrite) |
1490 | mutex_unlock(&inode->i_mutex); | ||
1491 | return err; | 1477 | return err; |
1492 | } | ||
1493 | if (inode->i_size < off+len-towrite) | 1478 | if (inode->i_size < off+len-towrite) |
1494 | i_size_write(inode, off+len-towrite); | 1479 | i_size_write(inode, off+len-towrite); |
1495 | inode->i_version++; | 1480 | inode->i_version++; |
1496 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 1481 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; |
1497 | mark_inode_dirty(inode); | 1482 | mark_inode_dirty(inode); |
1498 | mutex_unlock(&inode->i_mutex); | ||
1499 | return len - towrite; | 1483 | return len - towrite; |
1500 | } | 1484 | } |
1501 | 1485 | ||
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c index 6dcafc7efdfd..b6754dbbce3c 100644 --- a/fs/ext2/xattr.c +++ b/fs/ext2/xattr.c | |||
@@ -339,7 +339,6 @@ static void ext2_xattr_update_super_block(struct super_block *sb) | |||
339 | spin_lock(&EXT2_SB(sb)->s_lock); | 339 | spin_lock(&EXT2_SB(sb)->s_lock); |
340 | EXT2_SET_COMPAT_FEATURE(sb, EXT2_FEATURE_COMPAT_EXT_ATTR); | 340 | EXT2_SET_COMPAT_FEATURE(sb, EXT2_FEATURE_COMPAT_EXT_ATTR); |
341 | spin_unlock(&EXT2_SB(sb)->s_lock); | 341 | spin_unlock(&EXT2_SB(sb)->s_lock); |
342 | sb->s_dirt = 1; | ||
343 | mark_buffer_dirty(EXT2_SB(sb)->s_sbh); | 342 | mark_buffer_dirty(EXT2_SB(sb)->s_sbh); |
344 | } | 343 | } |
345 | 344 | ||
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c index cc761ad8fa57..92490e9f85ca 100644 --- a/fs/ext3/dir.c +++ b/fs/ext3/dir.c | |||
@@ -21,30 +21,15 @@ | |||
21 | * | 21 | * |
22 | */ | 22 | */ |
23 | 23 | ||
24 | #include <linux/compat.h> | ||
24 | #include "ext3.h" | 25 | #include "ext3.h" |
25 | 26 | ||
26 | static unsigned char ext3_filetype_table[] = { | 27 | static unsigned char ext3_filetype_table[] = { |
27 | DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK | 28 | DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK |
28 | }; | 29 | }; |
29 | 30 | ||
30 | static int ext3_readdir(struct file *, void *, filldir_t); | ||
31 | static int ext3_dx_readdir(struct file * filp, | 31 | static int ext3_dx_readdir(struct file * filp, |
32 | void * dirent, filldir_t filldir); | 32 | void * dirent, filldir_t filldir); |
33 | static int ext3_release_dir (struct inode * inode, | ||
34 | struct file * filp); | ||
35 | |||
36 | const struct file_operations ext3_dir_operations = { | ||
37 | .llseek = generic_file_llseek, | ||
38 | .read = generic_read_dir, | ||
39 | .readdir = ext3_readdir, /* we take BKL. needed?*/ | ||
40 | .unlocked_ioctl = ext3_ioctl, | ||
41 | #ifdef CONFIG_COMPAT | ||
42 | .compat_ioctl = ext3_compat_ioctl, | ||
43 | #endif | ||
44 | .fsync = ext3_sync_file, /* BKL held */ | ||
45 | .release = ext3_release_dir, | ||
46 | }; | ||
47 | |||
48 | 33 | ||
49 | static unsigned char get_dtype(struct super_block *sb, int filetype) | 34 | static unsigned char get_dtype(struct super_block *sb, int filetype) |
50 | { | 35 | { |
@@ -55,6 +40,25 @@ static unsigned char get_dtype(struct super_block *sb, int filetype) | |||
55 | return (ext3_filetype_table[filetype]); | 40 | return (ext3_filetype_table[filetype]); |
56 | } | 41 | } |
57 | 42 | ||
43 | /** | ||
44 | * Check if the given dir-inode refers to an htree-indexed directory | ||
45 | * (or a directory which chould potentially get coverted to use htree | ||
46 | * indexing). | ||
47 | * | ||
48 | * Return 1 if it is a dx dir, 0 if not | ||
49 | */ | ||
50 | static int is_dx_dir(struct inode *inode) | ||
51 | { | ||
52 | struct super_block *sb = inode->i_sb; | ||
53 | |||
54 | if (EXT3_HAS_COMPAT_FEATURE(inode->i_sb, | ||
55 | EXT3_FEATURE_COMPAT_DIR_INDEX) && | ||
56 | ((EXT3_I(inode)->i_flags & EXT3_INDEX_FL) || | ||
57 | ((inode->i_size >> sb->s_blocksize_bits) == 1))) | ||
58 | return 1; | ||
59 | |||
60 | return 0; | ||
61 | } | ||
58 | 62 | ||
59 | int ext3_check_dir_entry (const char * function, struct inode * dir, | 63 | int ext3_check_dir_entry (const char * function, struct inode * dir, |
60 | struct ext3_dir_entry_2 * de, | 64 | struct ext3_dir_entry_2 * de, |
@@ -94,18 +98,13 @@ static int ext3_readdir(struct file * filp, | |||
94 | unsigned long offset; | 98 | unsigned long offset; |
95 | int i, stored; | 99 | int i, stored; |
96 | struct ext3_dir_entry_2 *de; | 100 | struct ext3_dir_entry_2 *de; |
97 | struct super_block *sb; | ||
98 | int err; | 101 | int err; |
99 | struct inode *inode = filp->f_path.dentry->d_inode; | 102 | struct inode *inode = filp->f_path.dentry->d_inode; |
103 | struct super_block *sb = inode->i_sb; | ||
100 | int ret = 0; | 104 | int ret = 0; |
101 | int dir_has_error = 0; | 105 | int dir_has_error = 0; |
102 | 106 | ||
103 | sb = inode->i_sb; | 107 | if (is_dx_dir(inode)) { |
104 | |||
105 | if (EXT3_HAS_COMPAT_FEATURE(inode->i_sb, | ||
106 | EXT3_FEATURE_COMPAT_DIR_INDEX) && | ||
107 | ((EXT3_I(inode)->i_flags & EXT3_INDEX_FL) || | ||
108 | ((inode->i_size >> sb->s_blocksize_bits) == 1))) { | ||
109 | err = ext3_dx_readdir(filp, dirent, filldir); | 108 | err = ext3_dx_readdir(filp, dirent, filldir); |
110 | if (err != ERR_BAD_DX_DIR) { | 109 | if (err != ERR_BAD_DX_DIR) { |
111 | ret = err; | 110 | ret = err; |
@@ -227,22 +226,87 @@ out: | |||
227 | return ret; | 226 | return ret; |
228 | } | 227 | } |
229 | 228 | ||
229 | static inline int is_32bit_api(void) | ||
230 | { | ||
231 | #ifdef CONFIG_COMPAT | ||
232 | return is_compat_task(); | ||
233 | #else | ||
234 | return (BITS_PER_LONG == 32); | ||
235 | #endif | ||
236 | } | ||
237 | |||
230 | /* | 238 | /* |
231 | * These functions convert from the major/minor hash to an f_pos | 239 | * These functions convert from the major/minor hash to an f_pos |
232 | * value. | 240 | * value for dx directories |
233 | * | 241 | * |
234 | * Currently we only use major hash numer. This is unfortunate, but | 242 | * Upper layer (for example NFS) should specify FMODE_32BITHASH or |
235 | * on 32-bit machines, the same VFS interface is used for lseek and | 243 | * FMODE_64BITHASH explicitly. On the other hand, we allow ext3 to be mounted |
236 | * llseek, so if we use the 64 bit offset, then the 32-bit versions of | 244 | * directly on both 32-bit and 64-bit nodes, under such case, neither |
237 | * lseek/telldir/seekdir will blow out spectacularly, and from within | 245 | * FMODE_32BITHASH nor FMODE_64BITHASH is specified. |
238 | * the ext2 low-level routine, we don't know if we're being called by | ||
239 | * a 64-bit version of the system call or the 32-bit version of the | ||
240 | * system call. Worse yet, NFSv2 only allows for a 32-bit readdir | ||
241 | * cookie. Sigh. | ||
242 | */ | 246 | */ |
243 | #define hash2pos(major, minor) (major >> 1) | 247 | static inline loff_t hash2pos(struct file *filp, __u32 major, __u32 minor) |
244 | #define pos2maj_hash(pos) ((pos << 1) & 0xffffffff) | 248 | { |
245 | #define pos2min_hash(pos) (0) | 249 | if ((filp->f_mode & FMODE_32BITHASH) || |
250 | (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api())) | ||
251 | return major >> 1; | ||
252 | else | ||
253 | return ((__u64)(major >> 1) << 32) | (__u64)minor; | ||
254 | } | ||
255 | |||
256 | static inline __u32 pos2maj_hash(struct file *filp, loff_t pos) | ||
257 | { | ||
258 | if ((filp->f_mode & FMODE_32BITHASH) || | ||
259 | (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api())) | ||
260 | return (pos << 1) & 0xffffffff; | ||
261 | else | ||
262 | return ((pos >> 32) << 1) & 0xffffffff; | ||
263 | } | ||
264 | |||
265 | static inline __u32 pos2min_hash(struct file *filp, loff_t pos) | ||
266 | { | ||
267 | if ((filp->f_mode & FMODE_32BITHASH) || | ||
268 | (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api())) | ||
269 | return 0; | ||
270 | else | ||
271 | return pos & 0xffffffff; | ||
272 | } | ||
273 | |||
274 | /* | ||
275 | * Return 32- or 64-bit end-of-file for dx directories | ||
276 | */ | ||
277 | static inline loff_t ext3_get_htree_eof(struct file *filp) | ||
278 | { | ||
279 | if ((filp->f_mode & FMODE_32BITHASH) || | ||
280 | (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api())) | ||
281 | return EXT3_HTREE_EOF_32BIT; | ||
282 | else | ||
283 | return EXT3_HTREE_EOF_64BIT; | ||
284 | } | ||
285 | |||
286 | |||
287 | /* | ||
288 | * ext3_dir_llseek() calls generic_file_llseek[_size]() to handle both | ||
289 | * non-htree and htree directories, where the "offset" is in terms | ||
290 | * of the filename hash value instead of the byte offset. | ||
291 | * | ||
292 | * Because we may return a 64-bit hash that is well beyond s_maxbytes, | ||
293 | * we need to pass the max hash as the maximum allowable offset in | ||
294 | * the htree directory case. | ||
295 | * | ||
296 | * NOTE: offsets obtained *before* ext3_set_inode_flag(dir, EXT3_INODE_INDEX) | ||
297 | * will be invalid once the directory was converted into a dx directory | ||
298 | */ | ||
299 | loff_t ext3_dir_llseek(struct file *file, loff_t offset, int origin) | ||
300 | { | ||
301 | struct inode *inode = file->f_mapping->host; | ||
302 | int dx_dir = is_dx_dir(inode); | ||
303 | |||
304 | if (likely(dx_dir)) | ||
305 | return generic_file_llseek_size(file, offset, origin, | ||
306 | ext3_get_htree_eof(file)); | ||
307 | else | ||
308 | return generic_file_llseek(file, offset, origin); | ||
309 | } | ||
246 | 310 | ||
247 | /* | 311 | /* |
248 | * This structure holds the nodes of the red-black tree used to store | 312 | * This structure holds the nodes of the red-black tree used to store |
@@ -303,15 +367,16 @@ static void free_rb_tree_fname(struct rb_root *root) | |||
303 | } | 367 | } |
304 | 368 | ||
305 | 369 | ||
306 | static struct dir_private_info *ext3_htree_create_dir_info(loff_t pos) | 370 | static struct dir_private_info *ext3_htree_create_dir_info(struct file *filp, |
371 | loff_t pos) | ||
307 | { | 372 | { |
308 | struct dir_private_info *p; | 373 | struct dir_private_info *p; |
309 | 374 | ||
310 | p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL); | 375 | p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL); |
311 | if (!p) | 376 | if (!p) |
312 | return NULL; | 377 | return NULL; |
313 | p->curr_hash = pos2maj_hash(pos); | 378 | p->curr_hash = pos2maj_hash(filp, pos); |
314 | p->curr_minor_hash = pos2min_hash(pos); | 379 | p->curr_minor_hash = pos2min_hash(filp, pos); |
315 | return p; | 380 | return p; |
316 | } | 381 | } |
317 | 382 | ||
@@ -401,7 +466,7 @@ static int call_filldir(struct file * filp, void * dirent, | |||
401 | printk("call_filldir: called with null fname?!?\n"); | 466 | printk("call_filldir: called with null fname?!?\n"); |
402 | return 0; | 467 | return 0; |
403 | } | 468 | } |
404 | curr_pos = hash2pos(fname->hash, fname->minor_hash); | 469 | curr_pos = hash2pos(filp, fname->hash, fname->minor_hash); |
405 | while (fname) { | 470 | while (fname) { |
406 | error = filldir(dirent, fname->name, | 471 | error = filldir(dirent, fname->name, |
407 | fname->name_len, curr_pos, | 472 | fname->name_len, curr_pos, |
@@ -426,13 +491,13 @@ static int ext3_dx_readdir(struct file * filp, | |||
426 | int ret; | 491 | int ret; |
427 | 492 | ||
428 | if (!info) { | 493 | if (!info) { |
429 | info = ext3_htree_create_dir_info(filp->f_pos); | 494 | info = ext3_htree_create_dir_info(filp, filp->f_pos); |
430 | if (!info) | 495 | if (!info) |
431 | return -ENOMEM; | 496 | return -ENOMEM; |
432 | filp->private_data = info; | 497 | filp->private_data = info; |
433 | } | 498 | } |
434 | 499 | ||
435 | if (filp->f_pos == EXT3_HTREE_EOF) | 500 | if (filp->f_pos == ext3_get_htree_eof(filp)) |
436 | return 0; /* EOF */ | 501 | return 0; /* EOF */ |
437 | 502 | ||
438 | /* Some one has messed with f_pos; reset the world */ | 503 | /* Some one has messed with f_pos; reset the world */ |
@@ -440,8 +505,8 @@ static int ext3_dx_readdir(struct file * filp, | |||
440 | free_rb_tree_fname(&info->root); | 505 | free_rb_tree_fname(&info->root); |
441 | info->curr_node = NULL; | 506 | info->curr_node = NULL; |
442 | info->extra_fname = NULL; | 507 | info->extra_fname = NULL; |
443 | info->curr_hash = pos2maj_hash(filp->f_pos); | 508 | info->curr_hash = pos2maj_hash(filp, filp->f_pos); |
444 | info->curr_minor_hash = pos2min_hash(filp->f_pos); | 509 | info->curr_minor_hash = pos2min_hash(filp, filp->f_pos); |
445 | } | 510 | } |
446 | 511 | ||
447 | /* | 512 | /* |
@@ -473,7 +538,7 @@ static int ext3_dx_readdir(struct file * filp, | |||
473 | if (ret < 0) | 538 | if (ret < 0) |
474 | return ret; | 539 | return ret; |
475 | if (ret == 0) { | 540 | if (ret == 0) { |
476 | filp->f_pos = EXT3_HTREE_EOF; | 541 | filp->f_pos = ext3_get_htree_eof(filp); |
477 | break; | 542 | break; |
478 | } | 543 | } |
479 | info->curr_node = rb_first(&info->root); | 544 | info->curr_node = rb_first(&info->root); |
@@ -493,7 +558,7 @@ static int ext3_dx_readdir(struct file * filp, | |||
493 | info->curr_minor_hash = fname->minor_hash; | 558 | info->curr_minor_hash = fname->minor_hash; |
494 | } else { | 559 | } else { |
495 | if (info->next_hash == ~0) { | 560 | if (info->next_hash == ~0) { |
496 | filp->f_pos = EXT3_HTREE_EOF; | 561 | filp->f_pos = ext3_get_htree_eof(filp); |
497 | break; | 562 | break; |
498 | } | 563 | } |
499 | info->curr_hash = info->next_hash; | 564 | info->curr_hash = info->next_hash; |
@@ -512,3 +577,15 @@ static int ext3_release_dir (struct inode * inode, struct file * filp) | |||
512 | 577 | ||
513 | return 0; | 578 | return 0; |
514 | } | 579 | } |
580 | |||
581 | const struct file_operations ext3_dir_operations = { | ||
582 | .llseek = ext3_dir_llseek, | ||
583 | .read = generic_read_dir, | ||
584 | .readdir = ext3_readdir, | ||
585 | .unlocked_ioctl = ext3_ioctl, | ||
586 | #ifdef CONFIG_COMPAT | ||
587 | .compat_ioctl = ext3_compat_ioctl, | ||
588 | #endif | ||
589 | .fsync = ext3_sync_file, | ||
590 | .release = ext3_release_dir, | ||
591 | }; | ||
diff --git a/fs/ext3/ext3.h b/fs/ext3/ext3.h index 7977973a24f0..e85ff15a060e 100644 --- a/fs/ext3/ext3.h +++ b/fs/ext3/ext3.h | |||
@@ -920,7 +920,11 @@ struct dx_hash_info | |||
920 | u32 *seed; | 920 | u32 *seed; |
921 | }; | 921 | }; |
922 | 922 | ||
923 | #define EXT3_HTREE_EOF 0x7fffffff | 923 | |
924 | /* 32 and 64 bit signed EOF for dx directories */ | ||
925 | #define EXT3_HTREE_EOF_32BIT ((1UL << (32 - 1)) - 1) | ||
926 | #define EXT3_HTREE_EOF_64BIT ((1ULL << (64 - 1)) - 1) | ||
927 | |||
924 | 928 | ||
925 | /* | 929 | /* |
926 | * Control parameters used by ext3_htree_next_block | 930 | * Control parameters used by ext3_htree_next_block |
diff --git a/fs/ext3/hash.c b/fs/ext3/hash.c index d10231ddcf8a..ede315cdf126 100644 --- a/fs/ext3/hash.c +++ b/fs/ext3/hash.c | |||
@@ -198,8 +198,8 @@ int ext3fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo) | |||
198 | return -1; | 198 | return -1; |
199 | } | 199 | } |
200 | hash = hash & ~1; | 200 | hash = hash & ~1; |
201 | if (hash == (EXT3_HTREE_EOF << 1)) | 201 | if (hash == (EXT3_HTREE_EOF_32BIT << 1)) |
202 | hash = (EXT3_HTREE_EOF-1) << 1; | 202 | hash = (EXT3_HTREE_EOF_32BIT - 1) << 1; |
203 | hinfo->hash = hash; | 203 | hinfo->hash = hash; |
204 | hinfo->minor_hash = minor_hash; | 204 | hinfo->minor_hash = minor_hash; |
205 | return 0; | 205 | return 0; |
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c index e3c39e4cec19..082afd78b107 100644 --- a/fs/ext3/ialloc.c +++ b/fs/ext3/ialloc.c | |||
@@ -180,8 +180,7 @@ error_return: | |||
180 | * It's OK to put directory into a group unless | 180 | * It's OK to put directory into a group unless |
181 | * it has too many directories already (max_dirs) or | 181 | * it has too many directories already (max_dirs) or |
182 | * it has too few free inodes left (min_inodes) or | 182 | * it has too few free inodes left (min_inodes) or |
183 | * it has too few free blocks left (min_blocks) or | 183 | * it has too few free blocks left (min_blocks). |
184 | * it's already running too large debt (max_debt). | ||
185 | * Parent's group is preferred, if it doesn't satisfy these | 184 | * Parent's group is preferred, if it doesn't satisfy these |
186 | * conditions we search cyclically through the rest. If none | 185 | * conditions we search cyclically through the rest. If none |
187 | * of the groups look good we just look for a group with more | 186 | * of the groups look good we just look for a group with more |
@@ -191,21 +190,16 @@ error_return: | |||
191 | * when we allocate an inode, within 0--255. | 190 | * when we allocate an inode, within 0--255. |
192 | */ | 191 | */ |
193 | 192 | ||
194 | #define INODE_COST 64 | ||
195 | #define BLOCK_COST 256 | ||
196 | |||
197 | static int find_group_orlov(struct super_block *sb, struct inode *parent) | 193 | static int find_group_orlov(struct super_block *sb, struct inode *parent) |
198 | { | 194 | { |
199 | int parent_group = EXT3_I(parent)->i_block_group; | 195 | int parent_group = EXT3_I(parent)->i_block_group; |
200 | struct ext3_sb_info *sbi = EXT3_SB(sb); | 196 | struct ext3_sb_info *sbi = EXT3_SB(sb); |
201 | struct ext3_super_block *es = sbi->s_es; | ||
202 | int ngroups = sbi->s_groups_count; | 197 | int ngroups = sbi->s_groups_count; |
203 | int inodes_per_group = EXT3_INODES_PER_GROUP(sb); | 198 | int inodes_per_group = EXT3_INODES_PER_GROUP(sb); |
204 | unsigned int freei, avefreei; | 199 | unsigned int freei, avefreei; |
205 | ext3_fsblk_t freeb, avefreeb; | 200 | ext3_fsblk_t freeb, avefreeb; |
206 | ext3_fsblk_t blocks_per_dir; | ||
207 | unsigned int ndirs; | 201 | unsigned int ndirs; |
208 | int max_debt, max_dirs, min_inodes; | 202 | int max_dirs, min_inodes; |
209 | ext3_grpblk_t min_blocks; | 203 | ext3_grpblk_t min_blocks; |
210 | int group = -1, i; | 204 | int group = -1, i; |
211 | struct ext3_group_desc *desc; | 205 | struct ext3_group_desc *desc; |
@@ -242,20 +236,10 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent) | |||
242 | goto fallback; | 236 | goto fallback; |
243 | } | 237 | } |
244 | 238 | ||
245 | blocks_per_dir = (le32_to_cpu(es->s_blocks_count) - freeb) / ndirs; | ||
246 | |||
247 | max_dirs = ndirs / ngroups + inodes_per_group / 16; | 239 | max_dirs = ndirs / ngroups + inodes_per_group / 16; |
248 | min_inodes = avefreei - inodes_per_group / 4; | 240 | min_inodes = avefreei - inodes_per_group / 4; |
249 | min_blocks = avefreeb - EXT3_BLOCKS_PER_GROUP(sb) / 4; | 241 | min_blocks = avefreeb - EXT3_BLOCKS_PER_GROUP(sb) / 4; |
250 | 242 | ||
251 | max_debt = EXT3_BLOCKS_PER_GROUP(sb) / max(blocks_per_dir, (ext3_fsblk_t)BLOCK_COST); | ||
252 | if (max_debt * INODE_COST > inodes_per_group) | ||
253 | max_debt = inodes_per_group / INODE_COST; | ||
254 | if (max_debt > 255) | ||
255 | max_debt = 255; | ||
256 | if (max_debt == 0) | ||
257 | max_debt = 1; | ||
258 | |||
259 | for (i = 0; i < ngroups; i++) { | 243 | for (i = 0; i < ngroups; i++) { |
260 | group = (parent_group + i) % ngroups; | 244 | group = (parent_group + i) % ngroups; |
261 | desc = ext3_get_group_desc (sb, group, NULL); | 245 | desc = ext3_get_group_desc (sb, group, NULL); |
diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 94ef7e616129..8c3a44b7c375 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c | |||
@@ -3015,7 +3015,6 @@ static ssize_t ext3_quota_write(struct super_block *sb, int type, | |||
3015 | (unsigned long long)off, (unsigned long long)len); | 3015 | (unsigned long long)off, (unsigned long long)len); |
3016 | return -EIO; | 3016 | return -EIO; |
3017 | } | 3017 | } |
3018 | mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); | ||
3019 | bh = ext3_bread(handle, inode, blk, 1, &err); | 3018 | bh = ext3_bread(handle, inode, blk, 1, &err); |
3020 | if (!bh) | 3019 | if (!bh) |
3021 | goto out; | 3020 | goto out; |
@@ -3039,10 +3038,8 @@ static ssize_t ext3_quota_write(struct super_block *sb, int type, | |||
3039 | } | 3038 | } |
3040 | brelse(bh); | 3039 | brelse(bh); |
3041 | out: | 3040 | out: |
3042 | if (err) { | 3041 | if (err) |
3043 | mutex_unlock(&inode->i_mutex); | ||
3044 | return err; | 3042 | return err; |
3045 | } | ||
3046 | if (inode->i_size < off + len) { | 3043 | if (inode->i_size < off + len) { |
3047 | i_size_write(inode, off + len); | 3044 | i_size_write(inode, off + len); |
3048 | EXT3_I(inode)->i_disksize = inode->i_size; | 3045 | EXT3_I(inode)->i_disksize = inode->i_size; |
@@ -3050,7 +3047,6 @@ out: | |||
3050 | inode->i_version++; | 3047 | inode->i_version++; |
3051 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 3048 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; |
3052 | ext3_mark_inode_dirty(handle, inode); | 3049 | ext3_mark_inode_dirty(handle, inode); |
3053 | mutex_unlock(&inode->i_mutex); | ||
3054 | return len; | 3050 | return len; |
3055 | } | 3051 | } |
3056 | 3052 | ||
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 436b4223df66..1867a98e0c49 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -4758,7 +4758,6 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, | |||
4758 | return -EIO; | 4758 | return -EIO; |
4759 | } | 4759 | } |
4760 | 4760 | ||
4761 | mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); | ||
4762 | bh = ext4_bread(handle, inode, blk, 1, &err); | 4761 | bh = ext4_bread(handle, inode, blk, 1, &err); |
4763 | if (!bh) | 4762 | if (!bh) |
4764 | goto out; | 4763 | goto out; |
@@ -4774,16 +4773,13 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, | |||
4774 | err = ext4_handle_dirty_metadata(handle, NULL, bh); | 4773 | err = ext4_handle_dirty_metadata(handle, NULL, bh); |
4775 | brelse(bh); | 4774 | brelse(bh); |
4776 | out: | 4775 | out: |
4777 | if (err) { | 4776 | if (err) |
4778 | mutex_unlock(&inode->i_mutex); | ||
4779 | return err; | 4777 | return err; |
4780 | } | ||
4781 | if (inode->i_size < off + len) { | 4778 | if (inode->i_size < off + len) { |
4782 | i_size_write(inode, off + len); | 4779 | i_size_write(inode, off + len); |
4783 | EXT4_I(inode)->i_disksize = inode->i_size; | 4780 | EXT4_I(inode)->i_disksize = inode->i_size; |
4784 | ext4_mark_inode_dirty(handle, inode); | 4781 | ext4_mark_inode_dirty(handle, inode); |
4785 | } | 4782 | } |
4786 | mutex_unlock(&inode->i_mutex); | ||
4787 | return len; | 4783 | return len; |
4788 | } | 4784 | } |
4789 | 4785 | ||
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c index 05f0754f2b46..08c03044abdd 100644 --- a/fs/jbd/checkpoint.c +++ b/fs/jbd/checkpoint.c | |||
@@ -508,20 +508,19 @@ int cleanup_journal_tail(journal_t *journal) | |||
508 | /* | 508 | /* |
509 | * We need to make sure that any blocks that were recently written out | 509 | * We need to make sure that any blocks that were recently written out |
510 | * --- perhaps by log_do_checkpoint() --- are flushed out before we | 510 | * --- perhaps by log_do_checkpoint() --- are flushed out before we |
511 | * drop the transactions from the journal. It's unlikely this will be | 511 | * drop the transactions from the journal. Similarly we need to be sure |
512 | * necessary, especially with an appropriately sized journal, but we | 512 | * superblock makes it to disk before next transaction starts reusing |
513 | * need this to guarantee correctness. Fortunately | 513 | * freed space (otherwise we could replay some blocks of the new |
514 | * cleanup_journal_tail() doesn't get called all that often. | 514 | * transaction thinking they belong to the old one). So we use |
515 | * WRITE_FLUSH_FUA. It's unlikely this will be necessary, especially | ||
516 | * with an appropriately sized journal, but we need this to guarantee | ||
517 | * correctness. Fortunately cleanup_journal_tail() doesn't get called | ||
518 | * all that often. | ||
515 | */ | 519 | */ |
516 | if (journal->j_flags & JFS_BARRIER) | 520 | journal_update_sb_log_tail(journal, first_tid, blocknr, |
517 | blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); | 521 | WRITE_FLUSH_FUA); |
518 | 522 | ||
519 | spin_lock(&journal->j_state_lock); | 523 | spin_lock(&journal->j_state_lock); |
520 | if (!tid_gt(first_tid, journal->j_tail_sequence)) { | ||
521 | spin_unlock(&journal->j_state_lock); | ||
522 | /* Someone else cleaned up journal so return 0 */ | ||
523 | return 0; | ||
524 | } | ||
525 | /* OK, update the superblock to recover the freed space. | 524 | /* OK, update the superblock to recover the freed space. |
526 | * Physical blocks come first: have we wrapped beyond the end of | 525 | * Physical blocks come first: have we wrapped beyond the end of |
527 | * the log? */ | 526 | * the log? */ |
@@ -539,8 +538,6 @@ int cleanup_journal_tail(journal_t *journal) | |||
539 | journal->j_tail_sequence = first_tid; | 538 | journal->j_tail_sequence = first_tid; |
540 | journal->j_tail = blocknr; | 539 | journal->j_tail = blocknr; |
541 | spin_unlock(&journal->j_state_lock); | 540 | spin_unlock(&journal->j_state_lock); |
542 | if (!(journal->j_flags & JFS_ABORT)) | ||
543 | journal_update_superblock(journal, 1); | ||
544 | return 0; | 541 | return 0; |
545 | } | 542 | } |
546 | 543 | ||
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index f2b9a571f4cf..52c15c776029 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c | |||
@@ -298,6 +298,7 @@ void journal_commit_transaction(journal_t *journal) | |||
298 | int tag_flag; | 298 | int tag_flag; |
299 | int i; | 299 | int i; |
300 | struct blk_plug plug; | 300 | struct blk_plug plug; |
301 | int write_op = WRITE; | ||
301 | 302 | ||
302 | /* | 303 | /* |
303 | * First job: lock down the current transaction and wait for | 304 | * First job: lock down the current transaction and wait for |
@@ -307,7 +308,16 @@ void journal_commit_transaction(journal_t *journal) | |||
307 | /* Do we need to erase the effects of a prior journal_flush? */ | 308 | /* Do we need to erase the effects of a prior journal_flush? */ |
308 | if (journal->j_flags & JFS_FLUSHED) { | 309 | if (journal->j_flags & JFS_FLUSHED) { |
309 | jbd_debug(3, "super block updated\n"); | 310 | jbd_debug(3, "super block updated\n"); |
310 | journal_update_superblock(journal, 1); | 311 | mutex_lock(&journal->j_checkpoint_mutex); |
312 | /* | ||
313 | * We hold j_checkpoint_mutex so tail cannot change under us. | ||
314 | * We don't need any special data guarantees for writing sb | ||
315 | * since journal is empty and it is ok for write to be | ||
316 | * flushed only with transaction commit. | ||
317 | */ | ||
318 | journal_update_sb_log_tail(journal, journal->j_tail_sequence, | ||
319 | journal->j_tail, WRITE_SYNC); | ||
320 | mutex_unlock(&journal->j_checkpoint_mutex); | ||
311 | } else { | 321 | } else { |
312 | jbd_debug(3, "superblock not updated\n"); | 322 | jbd_debug(3, "superblock not updated\n"); |
313 | } | 323 | } |
@@ -413,13 +423,16 @@ void journal_commit_transaction(journal_t *journal) | |||
413 | 423 | ||
414 | jbd_debug (3, "JBD: commit phase 2\n"); | 424 | jbd_debug (3, "JBD: commit phase 2\n"); |
415 | 425 | ||
426 | if (tid_geq(journal->j_commit_waited, commit_transaction->t_tid)) | ||
427 | write_op = WRITE_SYNC; | ||
428 | |||
416 | /* | 429 | /* |
417 | * Now start flushing things to disk, in the order they appear | 430 | * Now start flushing things to disk, in the order they appear |
418 | * on the transaction lists. Data blocks go first. | 431 | * on the transaction lists. Data blocks go first. |
419 | */ | 432 | */ |
420 | blk_start_plug(&plug); | 433 | blk_start_plug(&plug); |
421 | err = journal_submit_data_buffers(journal, commit_transaction, | 434 | err = journal_submit_data_buffers(journal, commit_transaction, |
422 | WRITE_SYNC); | 435 | write_op); |
423 | blk_finish_plug(&plug); | 436 | blk_finish_plug(&plug); |
424 | 437 | ||
425 | /* | 438 | /* |
@@ -478,7 +491,7 @@ void journal_commit_transaction(journal_t *journal) | |||
478 | 491 | ||
479 | blk_start_plug(&plug); | 492 | blk_start_plug(&plug); |
480 | 493 | ||
481 | journal_write_revoke_records(journal, commit_transaction, WRITE_SYNC); | 494 | journal_write_revoke_records(journal, commit_transaction, write_op); |
482 | 495 | ||
483 | /* | 496 | /* |
484 | * If we found any dirty or locked buffers, then we should have | 497 | * If we found any dirty or locked buffers, then we should have |
@@ -649,7 +662,7 @@ start_journal_io: | |||
649 | clear_buffer_dirty(bh); | 662 | clear_buffer_dirty(bh); |
650 | set_buffer_uptodate(bh); | 663 | set_buffer_uptodate(bh); |
651 | bh->b_end_io = journal_end_buffer_io_sync; | 664 | bh->b_end_io = journal_end_buffer_io_sync; |
652 | submit_bh(WRITE_SYNC, bh); | 665 | submit_bh(write_op, bh); |
653 | } | 666 | } |
654 | cond_resched(); | 667 | cond_resched(); |
655 | 668 | ||
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index 0971e9217808..425c2f2cf170 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c | |||
@@ -563,6 +563,8 @@ int log_wait_commit(journal_t *journal, tid_t tid) | |||
563 | spin_unlock(&journal->j_state_lock); | 563 | spin_unlock(&journal->j_state_lock); |
564 | #endif | 564 | #endif |
565 | spin_lock(&journal->j_state_lock); | 565 | spin_lock(&journal->j_state_lock); |
566 | if (!tid_geq(journal->j_commit_waited, tid)) | ||
567 | journal->j_commit_waited = tid; | ||
566 | while (tid_gt(tid, journal->j_commit_sequence)) { | 568 | while (tid_gt(tid, journal->j_commit_sequence)) { |
567 | jbd_debug(1, "JBD: want %d, j_commit_sequence=%d\n", | 569 | jbd_debug(1, "JBD: want %d, j_commit_sequence=%d\n", |
568 | tid, journal->j_commit_sequence); | 570 | tid, journal->j_commit_sequence); |
@@ -921,8 +923,33 @@ static int journal_reset(journal_t *journal) | |||
921 | 923 | ||
922 | journal->j_max_transaction_buffers = journal->j_maxlen / 4; | 924 | journal->j_max_transaction_buffers = journal->j_maxlen / 4; |
923 | 925 | ||
924 | /* Add the dynamic fields and write it to disk. */ | 926 | /* |
925 | journal_update_superblock(journal, 1); | 927 | * As a special case, if the on-disk copy is already marked as needing |
928 | * no recovery (s_start == 0), then we can safely defer the superblock | ||
929 | * update until the next commit by setting JFS_FLUSHED. This avoids | ||
930 | * attempting a write to a potential-readonly device. | ||
931 | */ | ||
932 | if (sb->s_start == 0) { | ||
933 | jbd_debug(1,"JBD: Skipping superblock update on recovered sb " | ||
934 | "(start %u, seq %d, errno %d)\n", | ||
935 | journal->j_tail, journal->j_tail_sequence, | ||
936 | journal->j_errno); | ||
937 | journal->j_flags |= JFS_FLUSHED; | ||
938 | } else { | ||
939 | /* Lock here to make assertions happy... */ | ||
940 | mutex_lock(&journal->j_checkpoint_mutex); | ||
941 | /* | ||
942 | * Update log tail information. We use WRITE_FUA since new | ||
943 | * transaction will start reusing journal space and so we | ||
944 | * must make sure information about current log tail is on | ||
945 | * disk before that. | ||
946 | */ | ||
947 | journal_update_sb_log_tail(journal, | ||
948 | journal->j_tail_sequence, | ||
949 | journal->j_tail, | ||
950 | WRITE_FUA); | ||
951 | mutex_unlock(&journal->j_checkpoint_mutex); | ||
952 | } | ||
926 | return journal_start_thread(journal); | 953 | return journal_start_thread(journal); |
927 | } | 954 | } |
928 | 955 | ||
@@ -999,35 +1026,15 @@ int journal_create(journal_t *journal) | |||
999 | return journal_reset(journal); | 1026 | return journal_reset(journal); |
1000 | } | 1027 | } |
1001 | 1028 | ||
1002 | /** | 1029 | static void journal_write_superblock(journal_t *journal, int write_op) |
1003 | * void journal_update_superblock() - Update journal sb on disk. | ||
1004 | * @journal: The journal to update. | ||
1005 | * @wait: Set to '0' if you don't want to wait for IO completion. | ||
1006 | * | ||
1007 | * Update a journal's dynamic superblock fields and write it to disk, | ||
1008 | * optionally waiting for the IO to complete. | ||
1009 | */ | ||
1010 | void journal_update_superblock(journal_t *journal, int wait) | ||
1011 | { | 1030 | { |
1012 | journal_superblock_t *sb = journal->j_superblock; | ||
1013 | struct buffer_head *bh = journal->j_sb_buffer; | 1031 | struct buffer_head *bh = journal->j_sb_buffer; |
1032 | int ret; | ||
1014 | 1033 | ||
1015 | /* | 1034 | trace_journal_write_superblock(journal, write_op); |
1016 | * As a special case, if the on-disk copy is already marked as needing | 1035 | if (!(journal->j_flags & JFS_BARRIER)) |
1017 | * no recovery (s_start == 0) and there are no outstanding transactions | 1036 | write_op &= ~(REQ_FUA | REQ_FLUSH); |
1018 | * in the filesystem, then we can safely defer the superblock update | 1037 | lock_buffer(bh); |
1019 | * until the next commit by setting JFS_FLUSHED. This avoids | ||
1020 | * attempting a write to a potential-readonly device. | ||
1021 | */ | ||
1022 | if (sb->s_start == 0 && journal->j_tail_sequence == | ||
1023 | journal->j_transaction_sequence) { | ||
1024 | jbd_debug(1,"JBD: Skipping superblock update on recovered sb " | ||
1025 | "(start %u, seq %d, errno %d)\n", | ||
1026 | journal->j_tail, journal->j_tail_sequence, | ||
1027 | journal->j_errno); | ||
1028 | goto out; | ||
1029 | } | ||
1030 | |||
1031 | if (buffer_write_io_error(bh)) { | 1038 | if (buffer_write_io_error(bh)) { |
1032 | char b[BDEVNAME_SIZE]; | 1039 | char b[BDEVNAME_SIZE]; |
1033 | /* | 1040 | /* |
@@ -1045,42 +1052,100 @@ void journal_update_superblock(journal_t *journal, int wait) | |||
1045 | set_buffer_uptodate(bh); | 1052 | set_buffer_uptodate(bh); |
1046 | } | 1053 | } |
1047 | 1054 | ||
1055 | get_bh(bh); | ||
1056 | bh->b_end_io = end_buffer_write_sync; | ||
1057 | ret = submit_bh(write_op, bh); | ||
1058 | wait_on_buffer(bh); | ||
1059 | if (buffer_write_io_error(bh)) { | ||
1060 | clear_buffer_write_io_error(bh); | ||
1061 | set_buffer_uptodate(bh); | ||
1062 | ret = -EIO; | ||
1063 | } | ||
1064 | if (ret) { | ||
1065 | char b[BDEVNAME_SIZE]; | ||
1066 | printk(KERN_ERR "JBD: Error %d detected " | ||
1067 | "when updating journal superblock for %s.\n", | ||
1068 | ret, journal_dev_name(journal, b)); | ||
1069 | } | ||
1070 | } | ||
1071 | |||
1072 | /** | ||
1073 | * journal_update_sb_log_tail() - Update log tail in journal sb on disk. | ||
1074 | * @journal: The journal to update. | ||
1075 | * @tail_tid: TID of the new transaction at the tail of the log | ||
1076 | * @tail_block: The first block of the transaction at the tail of the log | ||
1077 | * @write_op: With which operation should we write the journal sb | ||
1078 | * | ||
1079 | * Update a journal's superblock information about log tail and write it to | ||
1080 | * disk, waiting for the IO to complete. | ||
1081 | */ | ||
1082 | void journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid, | ||
1083 | unsigned int tail_block, int write_op) | ||
1084 | { | ||
1085 | journal_superblock_t *sb = journal->j_superblock; | ||
1086 | |||
1087 | BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); | ||
1088 | jbd_debug(1,"JBD: updating superblock (start %u, seq %u)\n", | ||
1089 | tail_block, tail_tid); | ||
1090 | |||
1091 | sb->s_sequence = cpu_to_be32(tail_tid); | ||
1092 | sb->s_start = cpu_to_be32(tail_block); | ||
1093 | |||
1094 | journal_write_superblock(journal, write_op); | ||
1095 | |||
1096 | /* Log is no longer empty */ | ||
1097 | spin_lock(&journal->j_state_lock); | ||
1098 | WARN_ON(!sb->s_sequence); | ||
1099 | journal->j_flags &= ~JFS_FLUSHED; | ||
1100 | spin_unlock(&journal->j_state_lock); | ||
1101 | } | ||
1102 | |||
1103 | /** | ||
1104 | * mark_journal_empty() - Mark on disk journal as empty. | ||
1105 | * @journal: The journal to update. | ||
1106 | * | ||
1107 | * Update a journal's dynamic superblock fields to show that journal is empty. | ||
1108 | * Write updated superblock to disk waiting for IO to complete. | ||
1109 | */ | ||
1110 | static void mark_journal_empty(journal_t *journal) | ||
1111 | { | ||
1112 | journal_superblock_t *sb = journal->j_superblock; | ||
1113 | |||
1114 | BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); | ||
1048 | spin_lock(&journal->j_state_lock); | 1115 | spin_lock(&journal->j_state_lock); |
1049 | jbd_debug(1,"JBD: updating superblock (start %u, seq %d, errno %d)\n", | 1116 | jbd_debug(1, "JBD: Marking journal as empty (seq %d)\n", |
1050 | journal->j_tail, journal->j_tail_sequence, journal->j_errno); | 1117 | journal->j_tail_sequence); |
1051 | 1118 | ||
1052 | sb->s_sequence = cpu_to_be32(journal->j_tail_sequence); | 1119 | sb->s_sequence = cpu_to_be32(journal->j_tail_sequence); |
1053 | sb->s_start = cpu_to_be32(journal->j_tail); | 1120 | sb->s_start = cpu_to_be32(0); |
1054 | sb->s_errno = cpu_to_be32(journal->j_errno); | ||
1055 | spin_unlock(&journal->j_state_lock); | 1121 | spin_unlock(&journal->j_state_lock); |
1056 | 1122 | ||
1057 | BUFFER_TRACE(bh, "marking dirty"); | 1123 | journal_write_superblock(journal, WRITE_FUA); |
1058 | mark_buffer_dirty(bh); | ||
1059 | if (wait) { | ||
1060 | sync_dirty_buffer(bh); | ||
1061 | if (buffer_write_io_error(bh)) { | ||
1062 | char b[BDEVNAME_SIZE]; | ||
1063 | printk(KERN_ERR "JBD: I/O error detected " | ||
1064 | "when updating journal superblock for %s.\n", | ||
1065 | journal_dev_name(journal, b)); | ||
1066 | clear_buffer_write_io_error(bh); | ||
1067 | set_buffer_uptodate(bh); | ||
1068 | } | ||
1069 | } else | ||
1070 | write_dirty_buffer(bh, WRITE); | ||
1071 | 1124 | ||
1072 | trace_jbd_update_superblock_end(journal, wait); | 1125 | spin_lock(&journal->j_state_lock); |
1073 | out: | 1126 | /* Log is empty */ |
1074 | /* If we have just flushed the log (by marking s_start==0), then | 1127 | journal->j_flags |= JFS_FLUSHED; |
1075 | * any future commit will have to be careful to update the | 1128 | spin_unlock(&journal->j_state_lock); |
1076 | * superblock again to re-record the true start of the log. */ | 1129 | } |
1130 | |||
1131 | /** | ||
1132 | * journal_update_sb_errno() - Update error in the journal. | ||
1133 | * @journal: The journal to update. | ||
1134 | * | ||
1135 | * Update a journal's errno. Write updated superblock to disk waiting for IO | ||
1136 | * to complete. | ||
1137 | */ | ||
1138 | static void journal_update_sb_errno(journal_t *journal) | ||
1139 | { | ||
1140 | journal_superblock_t *sb = journal->j_superblock; | ||
1077 | 1141 | ||
1078 | spin_lock(&journal->j_state_lock); | 1142 | spin_lock(&journal->j_state_lock); |
1079 | if (sb->s_start) | 1143 | jbd_debug(1, "JBD: updating superblock error (errno %d)\n", |
1080 | journal->j_flags &= ~JFS_FLUSHED; | 1144 | journal->j_errno); |
1081 | else | 1145 | sb->s_errno = cpu_to_be32(journal->j_errno); |
1082 | journal->j_flags |= JFS_FLUSHED; | ||
1083 | spin_unlock(&journal->j_state_lock); | 1146 | spin_unlock(&journal->j_state_lock); |
1147 | |||
1148 | journal_write_superblock(journal, WRITE_SYNC); | ||
1084 | } | 1149 | } |
1085 | 1150 | ||
1086 | /* | 1151 | /* |
@@ -1251,6 +1316,8 @@ int journal_destroy(journal_t *journal) | |||
1251 | 1316 | ||
1252 | /* Force any old transactions to disk */ | 1317 | /* Force any old transactions to disk */ |
1253 | 1318 | ||
1319 | /* We cannot race with anybody but must keep assertions happy */ | ||
1320 | mutex_lock(&journal->j_checkpoint_mutex); | ||
1254 | /* Totally anal locking here... */ | 1321 | /* Totally anal locking here... */ |
1255 | spin_lock(&journal->j_list_lock); | 1322 | spin_lock(&journal->j_list_lock); |
1256 | while (journal->j_checkpoint_transactions != NULL) { | 1323 | while (journal->j_checkpoint_transactions != NULL) { |
@@ -1266,16 +1333,14 @@ int journal_destroy(journal_t *journal) | |||
1266 | 1333 | ||
1267 | if (journal->j_sb_buffer) { | 1334 | if (journal->j_sb_buffer) { |
1268 | if (!is_journal_aborted(journal)) { | 1335 | if (!is_journal_aborted(journal)) { |
1269 | /* We can now mark the journal as empty. */ | ||
1270 | journal->j_tail = 0; | ||
1271 | journal->j_tail_sequence = | 1336 | journal->j_tail_sequence = |
1272 | ++journal->j_transaction_sequence; | 1337 | ++journal->j_transaction_sequence; |
1273 | journal_update_superblock(journal, 1); | 1338 | mark_journal_empty(journal); |
1274 | } else { | 1339 | } else |
1275 | err = -EIO; | 1340 | err = -EIO; |
1276 | } | ||
1277 | brelse(journal->j_sb_buffer); | 1341 | brelse(journal->j_sb_buffer); |
1278 | } | 1342 | } |
1343 | mutex_unlock(&journal->j_checkpoint_mutex); | ||
1279 | 1344 | ||
1280 | if (journal->j_inode) | 1345 | if (journal->j_inode) |
1281 | iput(journal->j_inode); | 1346 | iput(journal->j_inode); |
@@ -1455,7 +1520,6 @@ int journal_flush(journal_t *journal) | |||
1455 | { | 1520 | { |
1456 | int err = 0; | 1521 | int err = 0; |
1457 | transaction_t *transaction = NULL; | 1522 | transaction_t *transaction = NULL; |
1458 | unsigned int old_tail; | ||
1459 | 1523 | ||
1460 | spin_lock(&journal->j_state_lock); | 1524 | spin_lock(&journal->j_state_lock); |
1461 | 1525 | ||
@@ -1490,6 +1554,7 @@ int journal_flush(journal_t *journal) | |||
1490 | if (is_journal_aborted(journal)) | 1554 | if (is_journal_aborted(journal)) |
1491 | return -EIO; | 1555 | return -EIO; |
1492 | 1556 | ||
1557 | mutex_lock(&journal->j_checkpoint_mutex); | ||
1493 | cleanup_journal_tail(journal); | 1558 | cleanup_journal_tail(journal); |
1494 | 1559 | ||
1495 | /* Finally, mark the journal as really needing no recovery. | 1560 | /* Finally, mark the journal as really needing no recovery. |
@@ -1497,14 +1562,9 @@ int journal_flush(journal_t *journal) | |||
1497 | * the magic code for a fully-recovered superblock. Any future | 1562 | * the magic code for a fully-recovered superblock. Any future |
1498 | * commits of data to the journal will restore the current | 1563 | * commits of data to the journal will restore the current |
1499 | * s_start value. */ | 1564 | * s_start value. */ |
1565 | mark_journal_empty(journal); | ||
1566 | mutex_unlock(&journal->j_checkpoint_mutex); | ||
1500 | spin_lock(&journal->j_state_lock); | 1567 | spin_lock(&journal->j_state_lock); |
1501 | old_tail = journal->j_tail; | ||
1502 | journal->j_tail = 0; | ||
1503 | spin_unlock(&journal->j_state_lock); | ||
1504 | journal_update_superblock(journal, 1); | ||
1505 | spin_lock(&journal->j_state_lock); | ||
1506 | journal->j_tail = old_tail; | ||
1507 | |||
1508 | J_ASSERT(!journal->j_running_transaction); | 1568 | J_ASSERT(!journal->j_running_transaction); |
1509 | J_ASSERT(!journal->j_committing_transaction); | 1569 | J_ASSERT(!journal->j_committing_transaction); |
1510 | J_ASSERT(!journal->j_checkpoint_transactions); | 1570 | J_ASSERT(!journal->j_checkpoint_transactions); |
@@ -1544,8 +1604,12 @@ int journal_wipe(journal_t *journal, int write) | |||
1544 | write ? "Clearing" : "Ignoring"); | 1604 | write ? "Clearing" : "Ignoring"); |
1545 | 1605 | ||
1546 | err = journal_skip_recovery(journal); | 1606 | err = journal_skip_recovery(journal); |
1547 | if (write) | 1607 | if (write) { |
1548 | journal_update_superblock(journal, 1); | 1608 | /* Lock to make assertions happy... */ |
1609 | mutex_lock(&journal->j_checkpoint_mutex); | ||
1610 | mark_journal_empty(journal); | ||
1611 | mutex_unlock(&journal->j_checkpoint_mutex); | ||
1612 | } | ||
1549 | 1613 | ||
1550 | no_recovery: | 1614 | no_recovery: |
1551 | return err; | 1615 | return err; |
@@ -1613,7 +1677,7 @@ static void __journal_abort_soft (journal_t *journal, int errno) | |||
1613 | __journal_abort_hard(journal); | 1677 | __journal_abort_hard(journal); |
1614 | 1678 | ||
1615 | if (errno) | 1679 | if (errno) |
1616 | journal_update_superblock(journal, 1); | 1680 | journal_update_sb_errno(journal); |
1617 | } | 1681 | } |
1618 | 1682 | ||
1619 | /** | 1683 | /** |
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index b2a7e5244e39..febc10db5ced 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c | |||
@@ -1433,8 +1433,6 @@ int journal_stop(handle_t *handle) | |||
1433 | } | 1433 | } |
1434 | } | 1434 | } |
1435 | 1435 | ||
1436 | if (handle->h_sync) | ||
1437 | transaction->t_synchronous_commit = 1; | ||
1438 | current->journal_info = NULL; | 1436 | current->journal_info = NULL; |
1439 | spin_lock(&journal->j_state_lock); | 1437 | spin_lock(&journal->j_state_lock); |
1440 | spin_lock(&transaction->t_handle_lock); | 1438 | spin_lock(&transaction->t_handle_lock); |
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index d69a1d1d7e15..10cbe841cb7e 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c | |||
@@ -116,15 +116,15 @@ | |||
116 | * spinlock to internal buffers before writing. | 116 | * spinlock to internal buffers before writing. |
117 | * | 117 | * |
118 | * Lock ordering (including related VFS locks) is the following: | 118 | * Lock ordering (including related VFS locks) is the following: |
119 | * i_mutex > dqonoff_sem > journal_lock > dqptr_sem > dquot->dq_lock > | 119 | * dqonoff_mutex > i_mutex > journal_lock > dqptr_sem > dquot->dq_lock > |
120 | * dqio_mutex | 120 | * dqio_mutex |
121 | * dqonoff_mutex > i_mutex comes from dquot_quota_sync, dquot_enable, etc. | ||
121 | * The lock ordering of dqptr_sem imposed by quota code is only dqonoff_sem > | 122 | * The lock ordering of dqptr_sem imposed by quota code is only dqonoff_sem > |
122 | * dqptr_sem. But filesystem has to count with the fact that functions such as | 123 | * dqptr_sem. But filesystem has to count with the fact that functions such as |
123 | * dquot_alloc_space() acquire dqptr_sem and they usually have to be called | 124 | * dquot_alloc_space() acquire dqptr_sem and they usually have to be called |
124 | * from inside a transaction to keep filesystem consistency after a crash. Also | 125 | * from inside a transaction to keep filesystem consistency after a crash. Also |
125 | * filesystems usually want to do some IO on dquot from ->mark_dirty which is | 126 | * filesystems usually want to do some IO on dquot from ->mark_dirty which is |
126 | * called with dqptr_sem held. | 127 | * called with dqptr_sem held. |
127 | * i_mutex on quota files is special (it's below dqio_mutex) | ||
128 | */ | 128 | */ |
129 | 129 | ||
130 | static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_list_lock); | 130 | static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_list_lock); |
@@ -638,7 +638,7 @@ int dquot_quota_sync(struct super_block *sb, int type, int wait) | |||
638 | dqstats_inc(DQST_SYNCS); | 638 | dqstats_inc(DQST_SYNCS); |
639 | mutex_unlock(&dqopt->dqonoff_mutex); | 639 | mutex_unlock(&dqopt->dqonoff_mutex); |
640 | 640 | ||
641 | if (!wait || (sb_dqopt(sb)->flags & DQUOT_QUOTA_SYS_FILE)) | 641 | if (!wait || (dqopt->flags & DQUOT_QUOTA_SYS_FILE)) |
642 | return 0; | 642 | return 0; |
643 | 643 | ||
644 | /* This is not very clever (and fast) but currently I don't know about | 644 | /* This is not very clever (and fast) but currently I don't know about |
@@ -652,18 +652,17 @@ int dquot_quota_sync(struct super_block *sb, int type, int wait) | |||
652 | * Now when everything is written we can discard the pagecache so | 652 | * Now when everything is written we can discard the pagecache so |
653 | * that userspace sees the changes. | 653 | * that userspace sees the changes. |
654 | */ | 654 | */ |
655 | mutex_lock(&sb_dqopt(sb)->dqonoff_mutex); | 655 | mutex_lock(&dqopt->dqonoff_mutex); |
656 | for (cnt = 0; cnt < MAXQUOTAS; cnt++) { | 656 | for (cnt = 0; cnt < MAXQUOTAS; cnt++) { |
657 | if (type != -1 && cnt != type) | 657 | if (type != -1 && cnt != type) |
658 | continue; | 658 | continue; |
659 | if (!sb_has_quota_active(sb, cnt)) | 659 | if (!sb_has_quota_active(sb, cnt)) |
660 | continue; | 660 | continue; |
661 | mutex_lock_nested(&sb_dqopt(sb)->files[cnt]->i_mutex, | 661 | mutex_lock(&dqopt->files[cnt]->i_mutex); |
662 | I_MUTEX_QUOTA); | 662 | truncate_inode_pages(&dqopt->files[cnt]->i_data, 0); |
663 | truncate_inode_pages(&sb_dqopt(sb)->files[cnt]->i_data, 0); | 663 | mutex_unlock(&dqopt->files[cnt]->i_mutex); |
664 | mutex_unlock(&sb_dqopt(sb)->files[cnt]->i_mutex); | ||
665 | } | 664 | } |
666 | mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); | 665 | mutex_unlock(&dqopt->dqonoff_mutex); |
667 | 666 | ||
668 | return 0; | 667 | return 0; |
669 | } | 668 | } |
@@ -907,14 +906,14 @@ static void add_dquot_ref(struct super_block *sb, int type) | |||
907 | spin_unlock(&inode->i_lock); | 906 | spin_unlock(&inode->i_lock); |
908 | continue; | 907 | continue; |
909 | } | 908 | } |
910 | #ifdef CONFIG_QUOTA_DEBUG | ||
911 | if (unlikely(inode_get_rsv_space(inode) > 0)) | ||
912 | reserved = 1; | ||
913 | #endif | ||
914 | __iget(inode); | 909 | __iget(inode); |
915 | spin_unlock(&inode->i_lock); | 910 | spin_unlock(&inode->i_lock); |
916 | spin_unlock(&inode_sb_list_lock); | 911 | spin_unlock(&inode_sb_list_lock); |
917 | 912 | ||
913 | #ifdef CONFIG_QUOTA_DEBUG | ||
914 | if (unlikely(inode_get_rsv_space(inode) > 0)) | ||
915 | reserved = 1; | ||
916 | #endif | ||
918 | iput(old_inode); | 917 | iput(old_inode); |
919 | __dquot_initialize(inode, type); | 918 | __dquot_initialize(inode, type); |
920 | 919 | ||
@@ -2037,8 +2036,7 @@ int dquot_disable(struct super_block *sb, int type, unsigned int flags) | |||
2037 | /* If quota was reenabled in the meantime, we have | 2036 | /* If quota was reenabled in the meantime, we have |
2038 | * nothing to do */ | 2037 | * nothing to do */ |
2039 | if (!sb_has_quota_loaded(sb, cnt)) { | 2038 | if (!sb_has_quota_loaded(sb, cnt)) { |
2040 | mutex_lock_nested(&toputinode[cnt]->i_mutex, | 2039 | mutex_lock(&toputinode[cnt]->i_mutex); |
2041 | I_MUTEX_QUOTA); | ||
2042 | toputinode[cnt]->i_flags &= ~(S_IMMUTABLE | | 2040 | toputinode[cnt]->i_flags &= ~(S_IMMUTABLE | |
2043 | S_NOATIME | S_NOQUOTA); | 2041 | S_NOATIME | S_NOQUOTA); |
2044 | truncate_inode_pages(&toputinode[cnt]->i_data, | 2042 | truncate_inode_pages(&toputinode[cnt]->i_data, |
@@ -2133,7 +2131,7 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id, | |||
2133 | /* We don't want quota and atime on quota files (deadlocks | 2131 | /* We don't want quota and atime on quota files (deadlocks |
2134 | * possible) Also nobody should write to the file - we use | 2132 | * possible) Also nobody should write to the file - we use |
2135 | * special IO operations which ignore the immutable bit. */ | 2133 | * special IO operations which ignore the immutable bit. */ |
2136 | mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); | 2134 | mutex_lock(&inode->i_mutex); |
2137 | oldflags = inode->i_flags & (S_NOATIME | S_IMMUTABLE | | 2135 | oldflags = inode->i_flags & (S_NOATIME | S_IMMUTABLE | |
2138 | S_NOQUOTA); | 2136 | S_NOQUOTA); |
2139 | inode->i_flags |= S_NOQUOTA | S_NOATIME | S_IMMUTABLE; | 2137 | inode->i_flags |= S_NOQUOTA | S_NOATIME | S_IMMUTABLE; |
@@ -2180,7 +2178,7 @@ out_file_init: | |||
2180 | iput(inode); | 2178 | iput(inode); |
2181 | out_lock: | 2179 | out_lock: |
2182 | if (oldflags != -1) { | 2180 | if (oldflags != -1) { |
2183 | mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); | 2181 | mutex_lock(&inode->i_mutex); |
2184 | /* Set the flags back (in the case of accidental quotaon() | 2182 | /* Set the flags back (in the case of accidental quotaon() |
2185 | * on a wrong file we don't want to mess up the flags) */ | 2183 | * on a wrong file we don't want to mess up the flags) */ |
2186 | inode->i_flags &= ~(S_NOATIME | S_NOQUOTA | S_IMMUTABLE); | 2184 | inode->i_flags &= ~(S_NOATIME | S_NOQUOTA | S_IMMUTABLE); |
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 8b7616ef06d8..c07b7d709447 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c | |||
@@ -2270,7 +2270,6 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type, | |||
2270 | (unsigned long long)off, (unsigned long long)len); | 2270 | (unsigned long long)off, (unsigned long long)len); |
2271 | return -EIO; | 2271 | return -EIO; |
2272 | } | 2272 | } |
2273 | mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); | ||
2274 | while (towrite > 0) { | 2273 | while (towrite > 0) { |
2275 | tocopy = sb->s_blocksize - offset < towrite ? | 2274 | tocopy = sb->s_blocksize - offset < towrite ? |
2276 | sb->s_blocksize - offset : towrite; | 2275 | sb->s_blocksize - offset : towrite; |
@@ -2302,16 +2301,13 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type, | |||
2302 | blk++; | 2301 | blk++; |
2303 | } | 2302 | } |
2304 | out: | 2303 | out: |
2305 | if (len == towrite) { | 2304 | if (len == towrite) |
2306 | mutex_unlock(&inode->i_mutex); | ||
2307 | return err; | 2305 | return err; |
2308 | } | ||
2309 | if (inode->i_size < off + len - towrite) | 2306 | if (inode->i_size < off + len - towrite) |
2310 | i_size_write(inode, off + len - towrite); | 2307 | i_size_write(inode, off + len - towrite); |
2311 | inode->i_version++; | 2308 | inode->i_version++; |
2312 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 2309 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; |
2313 | mark_inode_dirty(inode); | 2310 | mark_inode_dirty(inode); |
2314 | mutex_unlock(&inode->i_mutex); | ||
2315 | return len - towrite; | 2311 | return len - towrite; |
2316 | } | 2312 | } |
2317 | 2313 | ||
diff --git a/include/linux/jbd.h b/include/linux/jbd.h index d211732b9e99..c8f32975f0e4 100644 --- a/include/linux/jbd.h +++ b/include/linux/jbd.h | |||
@@ -479,12 +479,6 @@ struct transaction_s | |||
479 | * How many handles used this transaction? [t_handle_lock] | 479 | * How many handles used this transaction? [t_handle_lock] |
480 | */ | 480 | */ |
481 | int t_handle_count; | 481 | int t_handle_count; |
482 | |||
483 | /* | ||
484 | * This transaction is being forced and some process is | ||
485 | * waiting for it to finish. | ||
486 | */ | ||
487 | unsigned int t_synchronous_commit:1; | ||
488 | }; | 482 | }; |
489 | 483 | ||
490 | /** | 484 | /** |
@@ -531,6 +525,8 @@ struct transaction_s | |||
531 | * transaction | 525 | * transaction |
532 | * @j_commit_request: Sequence number of the most recent transaction wanting | 526 | * @j_commit_request: Sequence number of the most recent transaction wanting |
533 | * commit | 527 | * commit |
528 | * @j_commit_waited: Sequence number of the most recent transaction someone | ||
529 | * is waiting for to commit. | ||
534 | * @j_uuid: Uuid of client object. | 530 | * @j_uuid: Uuid of client object. |
535 | * @j_task: Pointer to the current commit thread for this journal | 531 | * @j_task: Pointer to the current commit thread for this journal |
536 | * @j_max_transaction_buffers: Maximum number of metadata buffers to allow in a | 532 | * @j_max_transaction_buffers: Maximum number of metadata buffers to allow in a |
@@ -696,6 +692,13 @@ struct journal_s | |||
696 | tid_t j_commit_request; | 692 | tid_t j_commit_request; |
697 | 693 | ||
698 | /* | 694 | /* |
695 | * Sequence number of the most recent transaction someone is waiting | ||
696 | * for to commit. | ||
697 | * [j_state_lock] | ||
698 | */ | ||
699 | tid_t j_commit_waited; | ||
700 | |||
701 | /* | ||
699 | * Journal uuid: identifies the object (filesystem, LVM volume etc) | 702 | * Journal uuid: identifies the object (filesystem, LVM volume etc) |
700 | * backed by this journal. This will eventually be replaced by an array | 703 | * backed by this journal. This will eventually be replaced by an array |
701 | * of uuids, allowing us to index multiple devices within a single | 704 | * of uuids, allowing us to index multiple devices within a single |
@@ -861,7 +864,8 @@ extern int journal_destroy (journal_t *); | |||
861 | extern int journal_recover (journal_t *journal); | 864 | extern int journal_recover (journal_t *journal); |
862 | extern int journal_wipe (journal_t *, int); | 865 | extern int journal_wipe (journal_t *, int); |
863 | extern int journal_skip_recovery (journal_t *); | 866 | extern int journal_skip_recovery (journal_t *); |
864 | extern void journal_update_superblock (journal_t *, int); | 867 | extern void journal_update_sb_log_tail (journal_t *, tid_t, unsigned int, |
868 | int); | ||
865 | extern void journal_abort (journal_t *, int); | 869 | extern void journal_abort (journal_t *, int); |
866 | extern int journal_errno (journal_t *); | 870 | extern int journal_errno (journal_t *); |
867 | extern void journal_ack_err (journal_t *); | 871 | extern void journal_ack_err (journal_t *); |
diff --git a/include/trace/events/jbd.h b/include/trace/events/jbd.h index aff64d82d713..da6f2591c25e 100644 --- a/include/trace/events/jbd.h +++ b/include/trace/events/jbd.h | |||
@@ -36,19 +36,17 @@ DECLARE_EVENT_CLASS(jbd_commit, | |||
36 | 36 | ||
37 | TP_STRUCT__entry( | 37 | TP_STRUCT__entry( |
38 | __field( dev_t, dev ) | 38 | __field( dev_t, dev ) |
39 | __field( char, sync_commit ) | ||
40 | __field( int, transaction ) | 39 | __field( int, transaction ) |
41 | ), | 40 | ), |
42 | 41 | ||
43 | TP_fast_assign( | 42 | TP_fast_assign( |
44 | __entry->dev = journal->j_fs_dev->bd_dev; | 43 | __entry->dev = journal->j_fs_dev->bd_dev; |
45 | __entry->sync_commit = commit_transaction->t_synchronous_commit; | ||
46 | __entry->transaction = commit_transaction->t_tid; | 44 | __entry->transaction = commit_transaction->t_tid; |
47 | ), | 45 | ), |
48 | 46 | ||
49 | TP_printk("dev %d,%d transaction %d sync %d", | 47 | TP_printk("dev %d,%d transaction %d", |
50 | MAJOR(__entry->dev), MINOR(__entry->dev), | 48 | MAJOR(__entry->dev), MINOR(__entry->dev), |
51 | __entry->transaction, __entry->sync_commit) | 49 | __entry->transaction) |
52 | ); | 50 | ); |
53 | 51 | ||
54 | DEFINE_EVENT(jbd_commit, jbd_start_commit, | 52 | DEFINE_EVENT(jbd_commit, jbd_start_commit, |
@@ -87,19 +85,17 @@ TRACE_EVENT(jbd_drop_transaction, | |||
87 | 85 | ||
88 | TP_STRUCT__entry( | 86 | TP_STRUCT__entry( |
89 | __field( dev_t, dev ) | 87 | __field( dev_t, dev ) |
90 | __field( char, sync_commit ) | ||
91 | __field( int, transaction ) | 88 | __field( int, transaction ) |
92 | ), | 89 | ), |
93 | 90 | ||
94 | TP_fast_assign( | 91 | TP_fast_assign( |
95 | __entry->dev = journal->j_fs_dev->bd_dev; | 92 | __entry->dev = journal->j_fs_dev->bd_dev; |
96 | __entry->sync_commit = commit_transaction->t_synchronous_commit; | ||
97 | __entry->transaction = commit_transaction->t_tid; | 93 | __entry->transaction = commit_transaction->t_tid; |
98 | ), | 94 | ), |
99 | 95 | ||
100 | TP_printk("dev %d,%d transaction %d sync %d", | 96 | TP_printk("dev %d,%d transaction %d", |
101 | MAJOR(__entry->dev), MINOR(__entry->dev), | 97 | MAJOR(__entry->dev), MINOR(__entry->dev), |
102 | __entry->transaction, __entry->sync_commit) | 98 | __entry->transaction) |
103 | ); | 99 | ); |
104 | 100 | ||
105 | TRACE_EVENT(jbd_end_commit, | 101 | TRACE_EVENT(jbd_end_commit, |
@@ -109,21 +105,19 @@ TRACE_EVENT(jbd_end_commit, | |||
109 | 105 | ||
110 | TP_STRUCT__entry( | 106 | TP_STRUCT__entry( |
111 | __field( dev_t, dev ) | 107 | __field( dev_t, dev ) |
112 | __field( char, sync_commit ) | ||
113 | __field( int, transaction ) | 108 | __field( int, transaction ) |
114 | __field( int, head ) | 109 | __field( int, head ) |
115 | ), | 110 | ), |
116 | 111 | ||
117 | TP_fast_assign( | 112 | TP_fast_assign( |
118 | __entry->dev = journal->j_fs_dev->bd_dev; | 113 | __entry->dev = journal->j_fs_dev->bd_dev; |
119 | __entry->sync_commit = commit_transaction->t_synchronous_commit; | ||
120 | __entry->transaction = commit_transaction->t_tid; | 114 | __entry->transaction = commit_transaction->t_tid; |
121 | __entry->head = journal->j_tail_sequence; | 115 | __entry->head = journal->j_tail_sequence; |
122 | ), | 116 | ), |
123 | 117 | ||
124 | TP_printk("dev %d,%d transaction %d sync %d head %d", | 118 | TP_printk("dev %d,%d transaction %d head %d", |
125 | MAJOR(__entry->dev), MINOR(__entry->dev), | 119 | MAJOR(__entry->dev), MINOR(__entry->dev), |
126 | __entry->transaction, __entry->sync_commit, __entry->head) | 120 | __entry->transaction, __entry->head) |
127 | ); | 121 | ); |
128 | 122 | ||
129 | TRACE_EVENT(jbd_do_submit_data, | 123 | TRACE_EVENT(jbd_do_submit_data, |
@@ -133,19 +127,17 @@ TRACE_EVENT(jbd_do_submit_data, | |||
133 | 127 | ||
134 | TP_STRUCT__entry( | 128 | TP_STRUCT__entry( |
135 | __field( dev_t, dev ) | 129 | __field( dev_t, dev ) |
136 | __field( char, sync_commit ) | ||
137 | __field( int, transaction ) | 130 | __field( int, transaction ) |
138 | ), | 131 | ), |
139 | 132 | ||
140 | TP_fast_assign( | 133 | TP_fast_assign( |
141 | __entry->dev = journal->j_fs_dev->bd_dev; | 134 | __entry->dev = journal->j_fs_dev->bd_dev; |
142 | __entry->sync_commit = commit_transaction->t_synchronous_commit; | ||
143 | __entry->transaction = commit_transaction->t_tid; | 135 | __entry->transaction = commit_transaction->t_tid; |
144 | ), | 136 | ), |
145 | 137 | ||
146 | TP_printk("dev %d,%d transaction %d sync %d", | 138 | TP_printk("dev %d,%d transaction %d", |
147 | MAJOR(__entry->dev), MINOR(__entry->dev), | 139 | MAJOR(__entry->dev), MINOR(__entry->dev), |
148 | __entry->transaction, __entry->sync_commit) | 140 | __entry->transaction) |
149 | ); | 141 | ); |
150 | 142 | ||
151 | TRACE_EVENT(jbd_cleanup_journal_tail, | 143 | TRACE_EVENT(jbd_cleanup_journal_tail, |
@@ -177,24 +169,23 @@ TRACE_EVENT(jbd_cleanup_journal_tail, | |||
177 | __entry->block_nr, __entry->freed) | 169 | __entry->block_nr, __entry->freed) |
178 | ); | 170 | ); |
179 | 171 | ||
180 | TRACE_EVENT(jbd_update_superblock_end, | 172 | TRACE_EVENT(journal_write_superblock, |
181 | TP_PROTO(journal_t *journal, int wait), | 173 | TP_PROTO(journal_t *journal, int write_op), |
182 | 174 | ||
183 | TP_ARGS(journal, wait), | 175 | TP_ARGS(journal, write_op), |
184 | 176 | ||
185 | TP_STRUCT__entry( | 177 | TP_STRUCT__entry( |
186 | __field( dev_t, dev ) | 178 | __field( dev_t, dev ) |
187 | __field( int, wait ) | 179 | __field( int, write_op ) |
188 | ), | 180 | ), |
189 | 181 | ||
190 | TP_fast_assign( | 182 | TP_fast_assign( |
191 | __entry->dev = journal->j_fs_dev->bd_dev; | 183 | __entry->dev = journal->j_fs_dev->bd_dev; |
192 | __entry->wait = wait; | 184 | __entry->write_op = write_op; |
193 | ), | 185 | ), |
194 | 186 | ||
195 | TP_printk("dev %d,%d wait %d", | 187 | TP_printk("dev %d,%d write_op %x", MAJOR(__entry->dev), |
196 | MAJOR(__entry->dev), MINOR(__entry->dev), | 188 | MINOR(__entry->dev), __entry->write_op) |
197 | __entry->wait) | ||
198 | ); | 189 | ); |
199 | 190 | ||
200 | #endif /* _TRACE_JBD_H */ | 191 | #endif /* _TRACE_JBD_H */ |