aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-06-11 13:00:50 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-06-11 13:00:50 -0400
commite893123c7378192c094747dadec326b7c000c190 (patch)
tree3fac44dceaa87339819d078e2bd69ea9a01056c3 /fs
parent27951daa71f1c91300ae4de9441916d1ffe2b078 (diff)
parenta41f20716975910d9beb90b7efc61107901492b8 (diff)
Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (49 commits) ext4: Avoid corrupting the uninitialized bit in the extent during truncate ext4: Don't treat a truncation of a zero-length file as replace-via-truncate ext4: fix dx_map_entry to support 256k directory blocks ext4: truncate the file properly if we fail to copy data from userspace ext4: Avoid leaking blocks after a block allocation failure ext4: Change all super.c messages to print the device ext4: Get rid of EXTEND_DISKSIZE flag of ext4_get_blocks_handle() ext4: super.c whitespace cleanup jbd2: Fix minor typos in comments in fs/jbd2/journal.c ext4: Clean up calls to ext4_get_group_desc() ext4: remove unused function __ext4_write_dirty_metadata ext2: Fix memory leak in ext2_fill_super() in case of a failed mount ext3: Fix memory leak in ext3_fill_super() in case of a failed mount ext4: Fix memory leak in ext4_fill_super() in case of a failed mount ext4: down i_data_sem only for read when walking tree for fiemap ext4: Add a comprehensive block validity check to ext4_get_blocks() ext4: Clean up ext4_get_blocks() so it does not depend on bh_result->b_state ext4: Merge ext4_da_get_block_write() into mpage_da_map_blocks() ext4: Add BUG_ON debugging checks to noalloc_get_block_write() ext4: Add documentation to the ext4_*get_block* functions ...
Diffstat (limited to 'fs')
-rw-r--r--fs/buffer.c2
-rw-r--r--fs/ext2/super.c1
-rw-r--r--fs/ext3/super.c1
-rw-r--r--fs/ext4/Makefile4
-rw-r--r--fs/ext4/balloc.c28
-rw-r--r--fs/ext4/block_validity.c244
-rw-r--r--fs/ext4/dir.c3
-rw-r--r--fs/ext4/ext4.h354
-rw-r--r--fs/ext4/ext4_i.h140
-rw-r--r--fs/ext4/ext4_sb.h161
-rw-r--r--fs/ext4/extents.c85
-rw-r--r--fs/ext4/group.h29
-rw-r--r--fs/ext4/ialloc.c73
-rw-r--r--fs/ext4/inode.c593
-rw-r--r--fs/ext4/mballoc.c166
-rw-r--r--fs/ext4/mballoc.h1
-rw-r--r--fs/ext4/namei.c27
-rw-r--r--fs/ext4/namei.h8
-rw-r--r--fs/ext4/resize.c36
-rw-r--r--fs/ext4/super.c831
-rw-r--r--fs/ioctl.c14
-rw-r--r--fs/jbd2/journal.c8
-rw-r--r--fs/mpage.c6
23 files changed, 1582 insertions, 1233 deletions
diff --git a/fs/buffer.c b/fs/buffer.c
index 49106127a4aa..1864d0b63088 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2935,6 +2935,8 @@ int submit_bh(int rw, struct buffer_head * bh)
2935 BUG_ON(!buffer_locked(bh)); 2935 BUG_ON(!buffer_locked(bh));
2936 BUG_ON(!buffer_mapped(bh)); 2936 BUG_ON(!buffer_mapped(bh));
2937 BUG_ON(!bh->b_end_io); 2937 BUG_ON(!bh->b_end_io);
2938 BUG_ON(buffer_delay(bh));
2939 BUG_ON(buffer_unwritten(bh));
2938 2940
2939 /* 2941 /*
2940 * Mask in barrier bit for a write (could be either a WRITE or a 2942 * Mask in barrier bit for a write (could be either a WRITE or a
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 5c4afe652245..e3c748faf2db 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1093,6 +1093,7 @@ failed_mount:
1093 brelse(bh); 1093 brelse(bh);
1094failed_sbi: 1094failed_sbi:
1095 sb->s_fs_info = NULL; 1095 sb->s_fs_info = NULL;
1096 kfree(sbi->s_blockgroup_lock);
1096 kfree(sbi); 1097 kfree(sbi);
1097 return ret; 1098 return ret;
1098} 1099}
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 599dbfe504c3..d8b73d4abe3e 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2021,6 +2021,7 @@ failed_mount:
2021 brelse(bh); 2021 brelse(bh);
2022out_fail: 2022out_fail:
2023 sb->s_fs_info = NULL; 2023 sb->s_fs_info = NULL;
2024 kfree(sbi->s_blockgroup_lock);
2024 kfree(sbi); 2025 kfree(sbi);
2025 lock_kernel(); 2026 lock_kernel();
2026 return ret; 2027 return ret;
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
index a8ff003a00f7..8a34710ecf40 100644
--- a/fs/ext4/Makefile
+++ b/fs/ext4/Makefile
@@ -5,8 +5,8 @@
5obj-$(CONFIG_EXT4_FS) += ext4.o 5obj-$(CONFIG_EXT4_FS) += ext4.o
6 6
7ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ 7ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
8 ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ 8 ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
9 ext4_jbd2.o migrate.o mballoc.o 9 ext4_jbd2.o migrate.o mballoc.o block_validity.o
10 10
11ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o 11ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
12ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o 12ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 53c72ad85877..e2126d70dff5 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -19,7 +19,6 @@
19#include <linux/buffer_head.h> 19#include <linux/buffer_head.h>
20#include "ext4.h" 20#include "ext4.h"
21#include "ext4_jbd2.h" 21#include "ext4_jbd2.h"
22#include "group.h"
23#include "mballoc.h" 22#include "mballoc.h"
24 23
25/* 24/*
@@ -88,6 +87,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
88 ext4_group_t block_group, struct ext4_group_desc *gdp) 87 ext4_group_t block_group, struct ext4_group_desc *gdp)
89{ 88{
90 int bit, bit_max; 89 int bit, bit_max;
90 ext4_group_t ngroups = ext4_get_groups_count(sb);
91 unsigned free_blocks, group_blocks; 91 unsigned free_blocks, group_blocks;
92 struct ext4_sb_info *sbi = EXT4_SB(sb); 92 struct ext4_sb_info *sbi = EXT4_SB(sb);
93 93
@@ -123,7 +123,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
123 bit_max += ext4_bg_num_gdb(sb, block_group); 123 bit_max += ext4_bg_num_gdb(sb, block_group);
124 } 124 }
125 125
126 if (block_group == sbi->s_groups_count - 1) { 126 if (block_group == ngroups - 1) {
127 /* 127 /*
128 * Even though mke2fs always initialize first and last group 128 * Even though mke2fs always initialize first and last group
129 * if some other tool enabled the EXT4_BG_BLOCK_UNINIT we need 129 * if some other tool enabled the EXT4_BG_BLOCK_UNINIT we need
@@ -131,7 +131,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
131 */ 131 */
132 group_blocks = ext4_blocks_count(sbi->s_es) - 132 group_blocks = ext4_blocks_count(sbi->s_es) -
133 le32_to_cpu(sbi->s_es->s_first_data_block) - 133 le32_to_cpu(sbi->s_es->s_first_data_block) -
134 (EXT4_BLOCKS_PER_GROUP(sb) * (sbi->s_groups_count - 1)); 134 (EXT4_BLOCKS_PER_GROUP(sb) * (ngroups - 1));
135 } else { 135 } else {
136 group_blocks = EXT4_BLOCKS_PER_GROUP(sb); 136 group_blocks = EXT4_BLOCKS_PER_GROUP(sb);
137 } 137 }
@@ -205,18 +205,18 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
205{ 205{
206 unsigned int group_desc; 206 unsigned int group_desc;
207 unsigned int offset; 207 unsigned int offset;
208 ext4_group_t ngroups = ext4_get_groups_count(sb);
208 struct ext4_group_desc *desc; 209 struct ext4_group_desc *desc;
209 struct ext4_sb_info *sbi = EXT4_SB(sb); 210 struct ext4_sb_info *sbi = EXT4_SB(sb);
210 211
211 if (block_group >= sbi->s_groups_count) { 212 if (block_group >= ngroups) {
212 ext4_error(sb, "ext4_get_group_desc", 213 ext4_error(sb, "ext4_get_group_desc",
213 "block_group >= groups_count - " 214 "block_group >= groups_count - "
214 "block_group = %u, groups_count = %u", 215 "block_group = %u, groups_count = %u",
215 block_group, sbi->s_groups_count); 216 block_group, ngroups);
216 217
217 return NULL; 218 return NULL;
218 } 219 }
219 smp_rmb();
220 220
221 group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb); 221 group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb);
222 offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1); 222 offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1);
@@ -326,16 +326,16 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
326 unlock_buffer(bh); 326 unlock_buffer(bh);
327 return bh; 327 return bh;
328 } 328 }
329 spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group)); 329 ext4_lock_group(sb, block_group);
330 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { 330 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
331 ext4_init_block_bitmap(sb, bh, block_group, desc); 331 ext4_init_block_bitmap(sb, bh, block_group, desc);
332 set_bitmap_uptodate(bh); 332 set_bitmap_uptodate(bh);
333 set_buffer_uptodate(bh); 333 set_buffer_uptodate(bh);
334 spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group)); 334 ext4_unlock_group(sb, block_group);
335 unlock_buffer(bh); 335 unlock_buffer(bh);
336 return bh; 336 return bh;
337 } 337 }
338 spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group)); 338 ext4_unlock_group(sb, block_group);
339 if (buffer_uptodate(bh)) { 339 if (buffer_uptodate(bh)) {
340 /* 340 /*
341 * if not uninit if bh is uptodate, 341 * if not uninit if bh is uptodate,
@@ -451,7 +451,7 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
451 down_write(&grp->alloc_sem); 451 down_write(&grp->alloc_sem);
452 for (i = 0, blocks_freed = 0; i < count; i++) { 452 for (i = 0, blocks_freed = 0; i < count; i++) {
453 BUFFER_TRACE(bitmap_bh, "clear bit"); 453 BUFFER_TRACE(bitmap_bh, "clear bit");
454 if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group), 454 if (!ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group),
455 bit + i, bitmap_bh->b_data)) { 455 bit + i, bitmap_bh->b_data)) {
456 ext4_error(sb, __func__, 456 ext4_error(sb, __func__,
457 "bit already cleared for block %llu", 457 "bit already cleared for block %llu",
@@ -461,11 +461,11 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
461 blocks_freed++; 461 blocks_freed++;
462 } 462 }
463 } 463 }
464 spin_lock(sb_bgl_lock(sbi, block_group)); 464 ext4_lock_group(sb, block_group);
465 blk_free_count = blocks_freed + ext4_free_blks_count(sb, desc); 465 blk_free_count = blocks_freed + ext4_free_blks_count(sb, desc);
466 ext4_free_blks_set(sb, desc, blk_free_count); 466 ext4_free_blks_set(sb, desc, blk_free_count);
467 desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc); 467 desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
468 spin_unlock(sb_bgl_lock(sbi, block_group)); 468 ext4_unlock_group(sb, block_group);
469 percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed); 469 percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed);
470 470
471 if (sbi->s_log_groups_per_flex) { 471 if (sbi->s_log_groups_per_flex) {
@@ -665,7 +665,7 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
665 ext4_fsblk_t desc_count; 665 ext4_fsblk_t desc_count;
666 struct ext4_group_desc *gdp; 666 struct ext4_group_desc *gdp;
667 ext4_group_t i; 667 ext4_group_t i;
668 ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; 668 ext4_group_t ngroups = ext4_get_groups_count(sb);
669#ifdef EXT4FS_DEBUG 669#ifdef EXT4FS_DEBUG
670 struct ext4_super_block *es; 670 struct ext4_super_block *es;
671 ext4_fsblk_t bitmap_count; 671 ext4_fsblk_t bitmap_count;
@@ -677,7 +677,6 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
677 bitmap_count = 0; 677 bitmap_count = 0;
678 gdp = NULL; 678 gdp = NULL;
679 679
680 smp_rmb();
681 for (i = 0; i < ngroups; i++) { 680 for (i = 0; i < ngroups; i++) {
682 gdp = ext4_get_group_desc(sb, i, NULL); 681 gdp = ext4_get_group_desc(sb, i, NULL);
683 if (!gdp) 682 if (!gdp)
@@ -700,7 +699,6 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
700 return bitmap_count; 699 return bitmap_count;
701#else 700#else
702 desc_count = 0; 701 desc_count = 0;
703 smp_rmb();
704 for (i = 0; i < ngroups; i++) { 702 for (i = 0; i < ngroups; i++) {
705 gdp = ext4_get_group_desc(sb, i, NULL); 703 gdp = ext4_get_group_desc(sb, i, NULL);
706 if (!gdp) 704 if (!gdp)
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
new file mode 100644
index 000000000000..50784ef07563
--- /dev/null
+++ b/fs/ext4/block_validity.c
@@ -0,0 +1,244 @@
1/*
2 * linux/fs/ext4/block_validity.c
3 *
4 * Copyright (C) 2009
5 * Theodore Ts'o (tytso@mit.edu)
6 *
7 * Track which blocks in the filesystem are metadata blocks that
8 * should never be used as data blocks by files or directories.
9 */
10
11#include <linux/time.h>
12#include <linux/fs.h>
13#include <linux/namei.h>
14#include <linux/quotaops.h>
15#include <linux/buffer_head.h>
16#include <linux/module.h>
17#include <linux/swap.h>
18#include <linux/pagemap.h>
19#include <linux/version.h>
20#include <linux/blkdev.h>
21#include <linux/mutex.h>
22#include "ext4.h"
23
24struct ext4_system_zone {
25 struct rb_node node;
26 ext4_fsblk_t start_blk;
27 unsigned int count;
28};
29
30static struct kmem_cache *ext4_system_zone_cachep;
31
32int __init init_ext4_system_zone(void)
33{
34 ext4_system_zone_cachep = KMEM_CACHE(ext4_system_zone,
35 SLAB_RECLAIM_ACCOUNT);
36 if (ext4_system_zone_cachep == NULL)
37 return -ENOMEM;
38 return 0;
39}
40
41void exit_ext4_system_zone(void)
42{
43 kmem_cache_destroy(ext4_system_zone_cachep);
44}
45
46static inline int can_merge(struct ext4_system_zone *entry1,
47 struct ext4_system_zone *entry2)
48{
49 if ((entry1->start_blk + entry1->count) == entry2->start_blk)
50 return 1;
51 return 0;
52}
53
54/*
55 * Mark a range of blocks as belonging to the "system zone" --- that
56 * is, filesystem metadata blocks which should never be used by
57 * inodes.
58 */
59static int add_system_zone(struct ext4_sb_info *sbi,
60 ext4_fsblk_t start_blk,
61 unsigned int count)
62{
63 struct ext4_system_zone *new_entry = NULL, *entry;
64 struct rb_node **n = &sbi->system_blks.rb_node, *node;
65 struct rb_node *parent = NULL, *new_node = NULL;
66
67 while (*n) {
68 parent = *n;
69 entry = rb_entry(parent, struct ext4_system_zone, node);
70 if (start_blk < entry->start_blk)
71 n = &(*n)->rb_left;
72 else if (start_blk >= (entry->start_blk + entry->count))
73 n = &(*n)->rb_right;
74 else {
75 if (start_blk + count > (entry->start_blk +
76 entry->count))
77 entry->count = (start_blk + count -
78 entry->start_blk);
79 new_node = *n;
80 new_entry = rb_entry(new_node, struct ext4_system_zone,
81 node);
82 break;
83 }
84 }
85
86 if (!new_entry) {
87 new_entry = kmem_cache_alloc(ext4_system_zone_cachep,
88 GFP_KERNEL);
89 if (!new_entry)
90 return -ENOMEM;
91 new_entry->start_blk = start_blk;
92 new_entry->count = count;
93 new_node = &new_entry->node;
94
95 rb_link_node(new_node, parent, n);
96 rb_insert_color(new_node, &sbi->system_blks);
97 }
98
99 /* Can we merge to the left? */
100 node = rb_prev(new_node);
101 if (node) {
102 entry = rb_entry(node, struct ext4_system_zone, node);
103 if (can_merge(entry, new_entry)) {
104 new_entry->start_blk = entry->start_blk;
105 new_entry->count += entry->count;
106 rb_erase(node, &sbi->system_blks);
107 kmem_cache_free(ext4_system_zone_cachep, entry);
108 }
109 }
110
111 /* Can we merge to the right? */
112 node = rb_next(new_node);
113 if (node) {
114 entry = rb_entry(node, struct ext4_system_zone, node);
115 if (can_merge(new_entry, entry)) {
116 new_entry->count += entry->count;
117 rb_erase(node, &sbi->system_blks);
118 kmem_cache_free(ext4_system_zone_cachep, entry);
119 }
120 }
121 return 0;
122}
123
124static void debug_print_tree(struct ext4_sb_info *sbi)
125{
126 struct rb_node *node;
127 struct ext4_system_zone *entry;
128 int first = 1;
129
130 printk(KERN_INFO "System zones: ");
131 node = rb_first(&sbi->system_blks);
132 while (node) {
133 entry = rb_entry(node, struct ext4_system_zone, node);
134 printk("%s%llu-%llu", first ? "" : ", ",
135 entry->start_blk, entry->start_blk + entry->count - 1);
136 first = 0;
137 node = rb_next(node);
138 }
139 printk("\n");
140}
141
142int ext4_setup_system_zone(struct super_block *sb)
143{
144 ext4_group_t ngroups = ext4_get_groups_count(sb);
145 struct ext4_sb_info *sbi = EXT4_SB(sb);
146 struct ext4_group_desc *gdp;
147 ext4_group_t i;
148 int flex_size = ext4_flex_bg_size(sbi);
149 int ret;
150
151 if (!test_opt(sb, BLOCK_VALIDITY)) {
152 if (EXT4_SB(sb)->system_blks.rb_node)
153 ext4_release_system_zone(sb);
154 return 0;
155 }
156 if (EXT4_SB(sb)->system_blks.rb_node)
157 return 0;
158
159 for (i=0; i < ngroups; i++) {
160 if (ext4_bg_has_super(sb, i) &&
161 ((i < 5) || ((i % flex_size) == 0)))
162 add_system_zone(sbi, ext4_group_first_block_no(sb, i),
163 sbi->s_gdb_count + 1);
164 gdp = ext4_get_group_desc(sb, i, NULL);
165 ret = add_system_zone(sbi, ext4_block_bitmap(sb, gdp), 1);
166 if (ret)
167 return ret;
168 ret = add_system_zone(sbi, ext4_inode_bitmap(sb, gdp), 1);
169 if (ret)
170 return ret;
171 ret = add_system_zone(sbi, ext4_inode_table(sb, gdp),
172 sbi->s_itb_per_group);
173 if (ret)
174 return ret;
175 }
176
177 if (test_opt(sb, DEBUG))
178 debug_print_tree(EXT4_SB(sb));
179 return 0;
180}
181
182/* Called when the filesystem is unmounted */
183void ext4_release_system_zone(struct super_block *sb)
184{
185 struct rb_node *n = EXT4_SB(sb)->system_blks.rb_node;
186 struct rb_node *parent;
187 struct ext4_system_zone *entry;
188
189 while (n) {
190 /* Do the node's children first */
191 if (n->rb_left) {
192 n = n->rb_left;
193 continue;
194 }
195 if (n->rb_right) {
196 n = n->rb_right;
197 continue;
198 }
199 /*
200 * The node has no children; free it, and then zero
201 * out parent's link to it. Finally go to the
202 * beginning of the loop and try to free the parent
203 * node.
204 */
205 parent = rb_parent(n);
206 entry = rb_entry(n, struct ext4_system_zone, node);
207 kmem_cache_free(ext4_system_zone_cachep, entry);
208 if (!parent)
209 EXT4_SB(sb)->system_blks.rb_node = NULL;
210 else if (parent->rb_left == n)
211 parent->rb_left = NULL;
212 else if (parent->rb_right == n)
213 parent->rb_right = NULL;
214 n = parent;
215 }
216 EXT4_SB(sb)->system_blks.rb_node = NULL;
217}
218
219/*
220 * Returns 1 if the passed-in block region (start_blk,
221 * start_blk+count) is valid; 0 if some part of the block region
222 * overlaps with filesystem metadata blocks.
223 */
224int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk,
225 unsigned int count)
226{
227 struct ext4_system_zone *entry;
228 struct rb_node *n = sbi->system_blks.rb_node;
229
230 if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) ||
231 (start_blk + count > ext4_blocks_count(sbi->s_es)))
232 return 0;
233 while (n) {
234 entry = rb_entry(n, struct ext4_system_zone, node);
235 if (start_blk + count - 1 < entry->start_blk)
236 n = n->rb_left;
237 else if (start_blk >= (entry->start_blk + entry->count))
238 n = n->rb_right;
239 else
240 return 0;
241 }
242 return 1;
243}
244
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index b64789929a65..9dc93168e262 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -131,8 +131,7 @@ static int ext4_readdir(struct file *filp,
131 struct buffer_head *bh = NULL; 131 struct buffer_head *bh = NULL;
132 132
133 map_bh.b_state = 0; 133 map_bh.b_state = 0;
134 err = ext4_get_blocks_wrap(NULL, inode, blk, 1, &map_bh, 134 err = ext4_get_blocks(NULL, inode, blk, 1, &map_bh, 0);
135 0, 0, 0);
136 if (err > 0) { 135 if (err > 0) {
137 pgoff_t index = map_bh.b_blocknr >> 136 pgoff_t index = map_bh.b_blocknr >>
138 (PAGE_CACHE_SHIFT - inode->i_blkbits); 137 (PAGE_CACHE_SHIFT - inode->i_blkbits);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index d0f15ef56de1..cc7d5edc38c9 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -21,7 +21,14 @@
21#include <linux/magic.h> 21#include <linux/magic.h>
22#include <linux/jbd2.h> 22#include <linux/jbd2.h>
23#include <linux/quota.h> 23#include <linux/quota.h>
24#include "ext4_i.h" 24#include <linux/rwsem.h>
25#include <linux/rbtree.h>
26#include <linux/seqlock.h>
27#include <linux/mutex.h>
28#include <linux/timer.h>
29#include <linux/wait.h>
30#include <linux/blockgroup_lock.h>
31#include <linux/percpu_counter.h>
25 32
26/* 33/*
27 * The fourth extended filesystem constants/structures 34 * The fourth extended filesystem constants/structures
@@ -46,6 +53,19 @@
46#define ext4_debug(f, a...) do {} while (0) 53#define ext4_debug(f, a...) do {} while (0)
47#endif 54#endif
48 55
56/* data type for block offset of block group */
57typedef int ext4_grpblk_t;
58
59/* data type for filesystem-wide blocks number */
60typedef unsigned long long ext4_fsblk_t;
61
62/* data type for file logical block number */
63typedef __u32 ext4_lblk_t;
64
65/* data type for block group number */
66typedef unsigned int ext4_group_t;
67
68
49/* prefer goal again. length */ 69/* prefer goal again. length */
50#define EXT4_MB_HINT_MERGE 1 70#define EXT4_MB_HINT_MERGE 1
51/* blocks already reserved */ 71/* blocks already reserved */
@@ -179,9 +199,6 @@ struct flex_groups {
179#define EXT4_BG_BLOCK_UNINIT 0x0002 /* Block bitmap not in use */ 199#define EXT4_BG_BLOCK_UNINIT 0x0002 /* Block bitmap not in use */
180#define EXT4_BG_INODE_ZEROED 0x0004 /* On-disk itable initialized to zero */ 200#define EXT4_BG_INODE_ZEROED 0x0004 /* On-disk itable initialized to zero */
181 201
182#ifdef __KERNEL__
183#include "ext4_sb.h"
184#endif
185/* 202/*
186 * Macro-instructions used to manage group descriptors 203 * Macro-instructions used to manage group descriptors
187 */ 204 */
@@ -297,10 +314,23 @@ struct ext4_new_group_data {
297}; 314};
298 315
299/* 316/*
300 * Following is used by preallocation code to tell get_blocks() that we 317 * Flags used by ext4_get_blocks()
301 * want uninitialzed extents.
302 */ 318 */
303#define EXT4_CREATE_UNINITIALIZED_EXT 2 319 /* Allocate any needed blocks and/or convert an unitialized
320 extent to be an initialized ext4 */
321#define EXT4_GET_BLOCKS_CREATE 0x0001
322 /* Request the creation of an unitialized extent */
323#define EXT4_GET_BLOCKS_UNINIT_EXT 0x0002
324#define EXT4_GET_BLOCKS_CREATE_UNINIT_EXT (EXT4_GET_BLOCKS_UNINIT_EXT|\
325 EXT4_GET_BLOCKS_CREATE)
326 /* Caller is from the delayed allocation writeout path,
327 so set the magic i_delalloc_reserve_flag after taking the
328 inode allocation semaphore for */
329#define EXT4_GET_BLOCKS_DELALLOC_RESERVE 0x0004
330 /* Call ext4_da_update_reserve_space() after successfully
331 allocating the blocks */
332#define EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE 0x0008
333
304 334
305/* 335/*
306 * ioctl commands 336 * ioctl commands
@@ -516,6 +546,110 @@ do { \
516#endif /* defined(__KERNEL__) || defined(__linux__) */ 546#endif /* defined(__KERNEL__) || defined(__linux__) */
517 547
518/* 548/*
549 * storage for cached extent
550 */
551struct ext4_ext_cache {
552 ext4_fsblk_t ec_start;
553 ext4_lblk_t ec_block;
554 __u32 ec_len; /* must be 32bit to return holes */
555 __u32 ec_type;
556};
557
558/*
559 * fourth extended file system inode data in memory
560 */
561struct ext4_inode_info {
562 __le32 i_data[15]; /* unconverted */
563 __u32 i_flags;
564 ext4_fsblk_t i_file_acl;
565 __u32 i_dtime;
566
567 /*
568 * i_block_group is the number of the block group which contains
569 * this file's inode. Constant across the lifetime of the inode,
570 * it is ued for making block allocation decisions - we try to
571 * place a file's data blocks near its inode block, and new inodes
572 * near to their parent directory's inode.
573 */
574 ext4_group_t i_block_group;
575 __u32 i_state; /* Dynamic state flags for ext4 */
576
577 ext4_lblk_t i_dir_start_lookup;
578#ifdef CONFIG_EXT4_FS_XATTR
579 /*
580 * Extended attributes can be read independently of the main file
581 * data. Taking i_mutex even when reading would cause contention
582 * between readers of EAs and writers of regular file data, so
583 * instead we synchronize on xattr_sem when reading or changing
584 * EAs.
585 */
586 struct rw_semaphore xattr_sem;
587#endif
588#ifdef CONFIG_EXT4_FS_POSIX_ACL
589 struct posix_acl *i_acl;
590 struct posix_acl *i_default_acl;
591#endif
592
593 struct list_head i_orphan; /* unlinked but open inodes */
594
595 /*
596 * i_disksize keeps track of what the inode size is ON DISK, not
597 * in memory. During truncate, i_size is set to the new size by
598 * the VFS prior to calling ext4_truncate(), but the filesystem won't
599 * set i_disksize to 0 until the truncate is actually under way.
600 *
601 * The intent is that i_disksize always represents the blocks which
602 * are used by this file. This allows recovery to restart truncate
603 * on orphans if we crash during truncate. We actually write i_disksize
604 * into the on-disk inode when writing inodes out, instead of i_size.
605 *
606 * The only time when i_disksize and i_size may be different is when
607 * a truncate is in progress. The only things which change i_disksize
608 * are ext4_get_block (growth) and ext4_truncate (shrinkth).
609 */
610 loff_t i_disksize;
611
612 /*
613 * i_data_sem is for serialising ext4_truncate() against
614 * ext4_getblock(). In the 2.4 ext2 design, great chunks of inode's
615 * data tree are chopped off during truncate. We can't do that in
616 * ext4 because whenever we perform intermediate commits during
617 * truncate, the inode and all the metadata blocks *must* be in a
618 * consistent state which allows truncation of the orphans to restart
619 * during recovery. Hence we must fix the get_block-vs-truncate race
620 * by other means, so we have i_data_sem.
621 */
622 struct rw_semaphore i_data_sem;
623 struct inode vfs_inode;
624 struct jbd2_inode jinode;
625
626 struct ext4_ext_cache i_cached_extent;
627 /*
628 * File creation time. Its function is same as that of
629 * struct timespec i_{a,c,m}time in the generic inode.
630 */
631 struct timespec i_crtime;
632
633 /* mballoc */
634 struct list_head i_prealloc_list;
635 spinlock_t i_prealloc_lock;
636
637 /* ialloc */
638 ext4_group_t i_last_alloc_group;
639
640 /* allocation reservation info for delalloc */
641 unsigned int i_reserved_data_blocks;
642 unsigned int i_reserved_meta_blocks;
643 unsigned int i_allocated_meta_blocks;
644 unsigned short i_delalloc_reserved_flag;
645
646 /* on-disk additional length */
647 __u16 i_extra_isize;
648
649 spinlock_t i_block_reservation_lock;
650};
651
652/*
519 * File system states 653 * File system states
520 */ 654 */
521#define EXT4_VALID_FS 0x0001 /* Unmounted cleanly */ 655#define EXT4_VALID_FS 0x0001 /* Unmounted cleanly */
@@ -560,6 +694,7 @@ do { \
560#define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ 694#define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */
561#define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ 695#define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */
562#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ 696#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */
697#define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */
563 698
564/* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */ 699/* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */
565#ifndef _LINUX_EXT2_FS_H 700#ifndef _LINUX_EXT2_FS_H
@@ -689,6 +824,137 @@ struct ext4_super_block {
689}; 824};
690 825
691#ifdef __KERNEL__ 826#ifdef __KERNEL__
827/*
828 * fourth extended-fs super-block data in memory
829 */
830struct ext4_sb_info {
831 unsigned long s_desc_size; /* Size of a group descriptor in bytes */
832 unsigned long s_inodes_per_block;/* Number of inodes per block */
833 unsigned long s_blocks_per_group;/* Number of blocks in a group */
834 unsigned long s_inodes_per_group;/* Number of inodes in a group */
835 unsigned long s_itb_per_group; /* Number of inode table blocks per group */
836 unsigned long s_gdb_count; /* Number of group descriptor blocks */
837 unsigned long s_desc_per_block; /* Number of group descriptors per block */
838 ext4_group_t s_groups_count; /* Number of groups in the fs */
839 unsigned long s_overhead_last; /* Last calculated overhead */
840 unsigned long s_blocks_last; /* Last seen block count */
841 loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */
842 struct buffer_head * s_sbh; /* Buffer containing the super block */
843 struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */
844 struct buffer_head **s_group_desc;
845 unsigned long s_mount_opt;
846 ext4_fsblk_t s_sb_block;
847 uid_t s_resuid;
848 gid_t s_resgid;
849 unsigned short s_mount_state;
850 unsigned short s_pad;
851 int s_addr_per_block_bits;
852 int s_desc_per_block_bits;
853 int s_inode_size;
854 int s_first_ino;
855 unsigned int s_inode_readahead_blks;
856 spinlock_t s_next_gen_lock;
857 u32 s_next_generation;
858 u32 s_hash_seed[4];
859 int s_def_hash_version;
860 int s_hash_unsigned; /* 3 if hash should be signed, 0 if not */
861 struct percpu_counter s_freeblocks_counter;
862 struct percpu_counter s_freeinodes_counter;
863 struct percpu_counter s_dirs_counter;
864 struct percpu_counter s_dirtyblocks_counter;
865 struct blockgroup_lock *s_blockgroup_lock;
866 struct proc_dir_entry *s_proc;
867 struct kobject s_kobj;
868 struct completion s_kobj_unregister;
869
870 /* Journaling */
871 struct inode *s_journal_inode;
872 struct journal_s *s_journal;
873 struct list_head s_orphan;
874 struct mutex s_orphan_lock;
875 struct mutex s_resize_lock;
876 unsigned long s_commit_interval;
877 u32 s_max_batch_time;
878 u32 s_min_batch_time;
879 struct block_device *journal_bdev;
880#ifdef CONFIG_JBD2_DEBUG
881 struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */
882 wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */
883#endif
884#ifdef CONFIG_QUOTA
885 char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */
886 int s_jquota_fmt; /* Format of quota to use */
887#endif
888 unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */
889 struct rb_root system_blks;
890
891#ifdef EXTENTS_STATS
892 /* ext4 extents stats */
893 unsigned long s_ext_min;
894 unsigned long s_ext_max;
895 unsigned long s_depth_max;
896 spinlock_t s_ext_stats_lock;
897 unsigned long s_ext_blocks;
898 unsigned long s_ext_extents;
899#endif
900
901 /* for buddy allocator */
902 struct ext4_group_info ***s_group_info;
903 struct inode *s_buddy_cache;
904 long s_blocks_reserved;
905 spinlock_t s_reserve_lock;
906 spinlock_t s_md_lock;
907 tid_t s_last_transaction;
908 unsigned short *s_mb_offsets;
909 unsigned int *s_mb_maxs;
910
911 /* tunables */
912 unsigned long s_stripe;
913 unsigned int s_mb_stream_request;
914 unsigned int s_mb_max_to_scan;
915 unsigned int s_mb_min_to_scan;
916 unsigned int s_mb_stats;
917 unsigned int s_mb_order2_reqs;
918 unsigned int s_mb_group_prealloc;
919 /* where last allocation was done - for stream allocation */
920 unsigned long s_mb_last_group;
921 unsigned long s_mb_last_start;
922
923 /* history to debug policy */
924 struct ext4_mb_history *s_mb_history;
925 int s_mb_history_cur;
926 int s_mb_history_max;
927 int s_mb_history_num;
928 spinlock_t s_mb_history_lock;
929 int s_mb_history_filter;
930
931 /* stats for buddy allocator */
932 spinlock_t s_mb_pa_lock;
933 atomic_t s_bal_reqs; /* number of reqs with len > 1 */
934 atomic_t s_bal_success; /* we found long enough chunks */
935 atomic_t s_bal_allocated; /* in blocks */
936 atomic_t s_bal_ex_scanned; /* total extents scanned */
937 atomic_t s_bal_goals; /* goal hits */
938 atomic_t s_bal_breaks; /* too long searches */
939 atomic_t s_bal_2orders; /* 2^order hits */
940 spinlock_t s_bal_lock;
941 unsigned long s_mb_buddies_generated;
942 unsigned long long s_mb_generation_time;
943 atomic_t s_mb_lost_chunks;
944 atomic_t s_mb_preallocated;
945 atomic_t s_mb_discarded;
946
947 /* locality groups */
948 struct ext4_locality_group *s_locality_groups;
949
950 /* for write statistics */
951 unsigned long s_sectors_written_start;
952 u64 s_kbytes_written;
953
954 unsigned int s_log_groups_per_flex;
955 struct flex_groups *s_flex_groups;
956};
957
692static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) 958static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
693{ 959{
694 return sb->s_fs_info; 960 return sb->s_fs_info;
@@ -704,7 +970,6 @@ static inline struct timespec ext4_current_time(struct inode *inode)
704 current_fs_time(inode->i_sb) : CURRENT_TIME_SEC; 970 current_fs_time(inode->i_sb) : CURRENT_TIME_SEC;
705} 971}
706 972
707
708static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) 973static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
709{ 974{
710 return ino == EXT4_ROOT_INO || 975 return ino == EXT4_ROOT_INO ||
@@ -1014,6 +1279,14 @@ extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
1014 ext4_group_t block_group, 1279 ext4_group_t block_group,
1015 struct buffer_head ** bh); 1280 struct buffer_head ** bh);
1016extern int ext4_should_retry_alloc(struct super_block *sb, int *retries); 1281extern int ext4_should_retry_alloc(struct super_block *sb, int *retries);
1282struct buffer_head *ext4_read_block_bitmap(struct super_block *sb,
1283 ext4_group_t block_group);
1284extern unsigned ext4_init_block_bitmap(struct super_block *sb,
1285 struct buffer_head *bh,
1286 ext4_group_t group,
1287 struct ext4_group_desc *desc);
1288#define ext4_free_blocks_after_init(sb, group, desc) \
1289 ext4_init_block_bitmap(sb, NULL, group, desc)
1017 1290
1018/* dir.c */ 1291/* dir.c */
1019extern int ext4_check_dir_entry(const char *, struct inode *, 1292extern int ext4_check_dir_entry(const char *, struct inode *,
@@ -1038,6 +1311,11 @@ extern struct inode * ext4_orphan_get(struct super_block *, unsigned long);
1038extern unsigned long ext4_count_free_inodes(struct super_block *); 1311extern unsigned long ext4_count_free_inodes(struct super_block *);
1039extern unsigned long ext4_count_dirs(struct super_block *); 1312extern unsigned long ext4_count_dirs(struct super_block *);
1040extern void ext4_check_inodes_bitmap(struct super_block *); 1313extern void ext4_check_inodes_bitmap(struct super_block *);
1314extern unsigned ext4_init_inode_bitmap(struct super_block *sb,
1315 struct buffer_head *bh,
1316 ext4_group_t group,
1317 struct ext4_group_desc *desc);
1318extern void mark_bitmap_end(int start_bit, int end_bit, char *bitmap);
1041 1319
1042/* mballoc.c */ 1320/* mballoc.c */
1043extern long ext4_mb_stats; 1321extern long ext4_mb_stats;
@@ -1123,6 +1401,8 @@ extern void ext4_abort(struct super_block *, const char *, const char *, ...)
1123 __attribute__ ((format (printf, 3, 4))); 1401 __attribute__ ((format (printf, 3, 4)));
1124extern void ext4_warning(struct super_block *, const char *, const char *, ...) 1402extern void ext4_warning(struct super_block *, const char *, const char *, ...)
1125 __attribute__ ((format (printf, 3, 4))); 1403 __attribute__ ((format (printf, 3, 4)));
1404extern void ext4_msg(struct super_block *, const char *, const char *, ...)
1405 __attribute__ ((format (printf, 3, 4)));
1126extern void ext4_grp_locked_error(struct super_block *, ext4_group_t, 1406extern void ext4_grp_locked_error(struct super_block *, ext4_group_t,
1127 const char *, const char *, ...) 1407 const char *, const char *, ...)
1128 __attribute__ ((format (printf, 4, 5))); 1408 __attribute__ ((format (printf, 4, 5)));
@@ -1161,6 +1441,10 @@ extern void ext4_used_dirs_set(struct super_block *sb,
1161 struct ext4_group_desc *bg, __u32 count); 1441 struct ext4_group_desc *bg, __u32 count);
1162extern void ext4_itable_unused_set(struct super_block *sb, 1442extern void ext4_itable_unused_set(struct super_block *sb,
1163 struct ext4_group_desc *bg, __u32 count); 1443 struct ext4_group_desc *bg, __u32 count);
1444extern __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 group,
1445 struct ext4_group_desc *gdp);
1446extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group,
1447 struct ext4_group_desc *gdp);
1164 1448
1165static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es) 1449static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es)
1166{ 1450{
@@ -1228,6 +1512,18 @@ struct ext4_group_info *ext4_get_group_info(struct super_block *sb,
1228 return grp_info[indexv][indexh]; 1512 return grp_info[indexv][indexh];
1229} 1513}
1230 1514
1515/*
1516 * Reading s_groups_count requires using smp_rmb() afterwards. See
1517 * the locking protocol documented in the comments of ext4_group_add()
1518 * in resize.c
1519 */
1520static inline ext4_group_t ext4_get_groups_count(struct super_block *sb)
1521{
1522 ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
1523
1524 smp_rmb();
1525 return ngroups;
1526}
1231 1527
1232static inline ext4_group_t ext4_flex_group(struct ext4_sb_info *sbi, 1528static inline ext4_group_t ext4_flex_group(struct ext4_sb_info *sbi,
1233 ext4_group_t block_group) 1529 ext4_group_t block_group)
@@ -1283,33 +1579,25 @@ struct ext4_group_info {
1283}; 1579};
1284 1580
1285#define EXT4_GROUP_INFO_NEED_INIT_BIT 0 1581#define EXT4_GROUP_INFO_NEED_INIT_BIT 0
1286#define EXT4_GROUP_INFO_LOCKED_BIT 1
1287 1582
1288#define EXT4_MB_GRP_NEED_INIT(grp) \ 1583#define EXT4_MB_GRP_NEED_INIT(grp) \
1289 (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state))) 1584 (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
1290 1585
1291static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group) 1586static inline spinlock_t *ext4_group_lock_ptr(struct super_block *sb,
1587 ext4_group_t group)
1292{ 1588{
1293 struct ext4_group_info *grinfo = ext4_get_group_info(sb, group); 1589 return bgl_lock_ptr(EXT4_SB(sb)->s_blockgroup_lock, group);
1294
1295 bit_spin_lock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
1296} 1590}
1297 1591
1298static inline void ext4_unlock_group(struct super_block *sb, 1592static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
1299 ext4_group_t group)
1300{ 1593{
1301 struct ext4_group_info *grinfo = ext4_get_group_info(sb, group); 1594 spin_lock(ext4_group_lock_ptr(sb, group));
1302
1303 bit_spin_unlock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
1304} 1595}
1305 1596
1306static inline int ext4_is_group_locked(struct super_block *sb, 1597static inline void ext4_unlock_group(struct super_block *sb,
1307 ext4_group_t group) 1598 ext4_group_t group)
1308{ 1599{
1309 struct ext4_group_info *grinfo = ext4_get_group_info(sb, group); 1600 spin_unlock(ext4_group_lock_ptr(sb, group));
1310
1311 return bit_spin_is_locked(EXT4_GROUP_INFO_LOCKED_BIT,
1312 &(grinfo->bb_state));
1313} 1601}
1314 1602
1315/* 1603/*
@@ -1326,11 +1614,21 @@ extern const struct file_operations ext4_file_operations;
1326/* namei.c */ 1614/* namei.c */
1327extern const struct inode_operations ext4_dir_inode_operations; 1615extern const struct inode_operations ext4_dir_inode_operations;
1328extern const struct inode_operations ext4_special_inode_operations; 1616extern const struct inode_operations ext4_special_inode_operations;
1617extern struct dentry *ext4_get_parent(struct dentry *child);
1329 1618
1330/* symlink.c */ 1619/* symlink.c */
1331extern const struct inode_operations ext4_symlink_inode_operations; 1620extern const struct inode_operations ext4_symlink_inode_operations;
1332extern const struct inode_operations ext4_fast_symlink_inode_operations; 1621extern const struct inode_operations ext4_fast_symlink_inode_operations;
1333 1622
1623/* block_validity */
1624extern void ext4_release_system_zone(struct super_block *sb);
1625extern int ext4_setup_system_zone(struct super_block *sb);
1626extern int __init init_ext4_system_zone(void);
1627extern void exit_ext4_system_zone(void);
1628extern int ext4_data_block_valid(struct ext4_sb_info *sbi,
1629 ext4_fsblk_t start_blk,
1630 unsigned int count);
1631
1334/* extents.c */ 1632/* extents.c */
1335extern int ext4_ext_tree_init(handle_t *handle, struct inode *); 1633extern int ext4_ext_tree_init(handle_t *handle, struct inode *);
1336extern int ext4_ext_writepage_trans_blocks(struct inode *, int); 1634extern int ext4_ext_writepage_trans_blocks(struct inode *, int);
@@ -1338,17 +1636,15 @@ extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks,
1338 int chunk); 1636 int chunk);
1339extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, 1637extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
1340 ext4_lblk_t iblock, unsigned int max_blocks, 1638 ext4_lblk_t iblock, unsigned int max_blocks,
1341 struct buffer_head *bh_result, 1639 struct buffer_head *bh_result, int flags);
1342 int create, int extend_disksize);
1343extern void ext4_ext_truncate(struct inode *); 1640extern void ext4_ext_truncate(struct inode *);
1344extern void ext4_ext_init(struct super_block *); 1641extern void ext4_ext_init(struct super_block *);
1345extern void ext4_ext_release(struct super_block *); 1642extern void ext4_ext_release(struct super_block *);
1346extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset, 1643extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset,
1347 loff_t len); 1644 loff_t len);
1348extern int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, 1645extern int ext4_get_blocks(handle_t *handle, struct inode *inode,
1349 sector_t block, unsigned int max_blocks, 1646 sector_t block, unsigned int max_blocks,
1350 struct buffer_head *bh, int create, 1647 struct buffer_head *bh, int flags);
1351 int extend_disksize, int flag);
1352extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 1648extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
1353 __u64 start, __u64 len); 1649 __u64 start, __u64 len);
1354 1650
diff --git a/fs/ext4/ext4_i.h b/fs/ext4/ext4_i.h
deleted file mode 100644
index 4ce2187123aa..000000000000
--- a/fs/ext4/ext4_i.h
+++ /dev/null
@@ -1,140 +0,0 @@
1/*
2 * ext4_i.h
3 *
4 * Copyright (C) 1992, 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr)
6 * Laboratoire MASI - Institut Blaise Pascal
7 * Universite Pierre et Marie Curie (Paris VI)
8 *
9 * from
10 *
11 * linux/include/linux/minix_fs_i.h
12 *
13 * Copyright (C) 1991, 1992 Linus Torvalds
14 */
15
16#ifndef _EXT4_I
17#define _EXT4_I
18
19#include <linux/rwsem.h>
20#include <linux/rbtree.h>
21#include <linux/seqlock.h>
22#include <linux/mutex.h>
23
24/* data type for block offset of block group */
25typedef int ext4_grpblk_t;
26
27/* data type for filesystem-wide blocks number */
28typedef unsigned long long ext4_fsblk_t;
29
30/* data type for file logical block number */
31typedef __u32 ext4_lblk_t;
32
33/* data type for block group number */
34typedef unsigned int ext4_group_t;
35
36/*
37 * storage for cached extent
38 */
39struct ext4_ext_cache {
40 ext4_fsblk_t ec_start;
41 ext4_lblk_t ec_block;
42 __u32 ec_len; /* must be 32bit to return holes */
43 __u32 ec_type;
44};
45
46/*
47 * fourth extended file system inode data in memory
48 */
49struct ext4_inode_info {
50 __le32 i_data[15]; /* unconverted */
51 __u32 i_flags;
52 ext4_fsblk_t i_file_acl;
53 __u32 i_dtime;
54
55 /*
56 * i_block_group is the number of the block group which contains
57 * this file's inode. Constant across the lifetime of the inode,
58 * it is ued for making block allocation decisions - we try to
59 * place a file's data blocks near its inode block, and new inodes
60 * near to their parent directory's inode.
61 */
62 ext4_group_t i_block_group;
63 __u32 i_state; /* Dynamic state flags for ext4 */
64
65 ext4_lblk_t i_dir_start_lookup;
66#ifdef CONFIG_EXT4_FS_XATTR
67 /*
68 * Extended attributes can be read independently of the main file
69 * data. Taking i_mutex even when reading would cause contention
70 * between readers of EAs and writers of regular file data, so
71 * instead we synchronize on xattr_sem when reading or changing
72 * EAs.
73 */
74 struct rw_semaphore xattr_sem;
75#endif
76#ifdef CONFIG_EXT4_FS_POSIX_ACL
77 struct posix_acl *i_acl;
78 struct posix_acl *i_default_acl;
79#endif
80
81 struct list_head i_orphan; /* unlinked but open inodes */
82
83 /*
84 * i_disksize keeps track of what the inode size is ON DISK, not
85 * in memory. During truncate, i_size is set to the new size by
86 * the VFS prior to calling ext4_truncate(), but the filesystem won't
87 * set i_disksize to 0 until the truncate is actually under way.
88 *
89 * The intent is that i_disksize always represents the blocks which
90 * are used by this file. This allows recovery to restart truncate
91 * on orphans if we crash during truncate. We actually write i_disksize
92 * into the on-disk inode when writing inodes out, instead of i_size.
93 *
94 * The only time when i_disksize and i_size may be different is when
95 * a truncate is in progress. The only things which change i_disksize
96 * are ext4_get_block (growth) and ext4_truncate (shrinkth).
97 */
98 loff_t i_disksize;
99
100 /*
101 * i_data_sem is for serialising ext4_truncate() against
102 * ext4_getblock(). In the 2.4 ext2 design, great chunks of inode's
103 * data tree are chopped off during truncate. We can't do that in
104 * ext4 because whenever we perform intermediate commits during
105 * truncate, the inode and all the metadata blocks *must* be in a
106 * consistent state which allows truncation of the orphans to restart
107 * during recovery. Hence we must fix the get_block-vs-truncate race
108 * by other means, so we have i_data_sem.
109 */
110 struct rw_semaphore i_data_sem;
111 struct inode vfs_inode;
112 struct jbd2_inode jinode;
113
114 struct ext4_ext_cache i_cached_extent;
115 /*
116 * File creation time. Its function is same as that of
117 * struct timespec i_{a,c,m}time in the generic inode.
118 */
119 struct timespec i_crtime;
120
121 /* mballoc */
122 struct list_head i_prealloc_list;
123 spinlock_t i_prealloc_lock;
124
125 /* ialloc */
126 ext4_group_t i_last_alloc_group;
127
128 /* allocation reservation info for delalloc */
129 unsigned int i_reserved_data_blocks;
130 unsigned int i_reserved_meta_blocks;
131 unsigned int i_allocated_meta_blocks;
132 unsigned short i_delalloc_reserved_flag;
133
134 /* on-disk additional length */
135 __u16 i_extra_isize;
136
137 spinlock_t i_block_reservation_lock;
138};
139
140#endif /* _EXT4_I */
diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h
deleted file mode 100644
index 57b71fefbccf..000000000000
--- a/fs/ext4/ext4_sb.h
+++ /dev/null
@@ -1,161 +0,0 @@
1/*
2 * ext4_sb.h
3 *
4 * Copyright (C) 1992, 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr)
6 * Laboratoire MASI - Institut Blaise Pascal
7 * Universite Pierre et Marie Curie (Paris VI)
8 *
9 * from
10 *
11 * linux/include/linux/minix_fs_sb.h
12 *
13 * Copyright (C) 1991, 1992 Linus Torvalds
14 */
15
16#ifndef _EXT4_SB
17#define _EXT4_SB
18
19#ifdef __KERNEL__
20#include <linux/timer.h>
21#include <linux/wait.h>
22#include <linux/blockgroup_lock.h>
23#include <linux/percpu_counter.h>
24#endif
25#include <linux/rbtree.h>
26
27/*
28 * fourth extended-fs super-block data in memory
29 */
30struct ext4_sb_info {
31 unsigned long s_desc_size; /* Size of a group descriptor in bytes */
32 unsigned long s_inodes_per_block;/* Number of inodes per block */
33 unsigned long s_blocks_per_group;/* Number of blocks in a group */
34 unsigned long s_inodes_per_group;/* Number of inodes in a group */
35 unsigned long s_itb_per_group; /* Number of inode table blocks per group */
36 unsigned long s_gdb_count; /* Number of group descriptor blocks */
37 unsigned long s_desc_per_block; /* Number of group descriptors per block */
38 ext4_group_t s_groups_count; /* Number of groups in the fs */
39 unsigned long s_overhead_last; /* Last calculated overhead */
40 unsigned long s_blocks_last; /* Last seen block count */
41 loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */
42 struct buffer_head * s_sbh; /* Buffer containing the super block */
43 struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */
44 struct buffer_head **s_group_desc;
45 unsigned long s_mount_opt;
46 ext4_fsblk_t s_sb_block;
47 uid_t s_resuid;
48 gid_t s_resgid;
49 unsigned short s_mount_state;
50 unsigned short s_pad;
51 int s_addr_per_block_bits;
52 int s_desc_per_block_bits;
53 int s_inode_size;
54 int s_first_ino;
55 unsigned int s_inode_readahead_blks;
56 spinlock_t s_next_gen_lock;
57 u32 s_next_generation;
58 u32 s_hash_seed[4];
59 int s_def_hash_version;
60 int s_hash_unsigned; /* 3 if hash should be signed, 0 if not */
61 struct percpu_counter s_freeblocks_counter;
62 struct percpu_counter s_freeinodes_counter;
63 struct percpu_counter s_dirs_counter;
64 struct percpu_counter s_dirtyblocks_counter;
65 struct blockgroup_lock *s_blockgroup_lock;
66 struct proc_dir_entry *s_proc;
67 struct kobject s_kobj;
68 struct completion s_kobj_unregister;
69
70 /* Journaling */
71 struct inode *s_journal_inode;
72 struct journal_s *s_journal;
73 struct list_head s_orphan;
74 unsigned long s_commit_interval;
75 u32 s_max_batch_time;
76 u32 s_min_batch_time;
77 struct block_device *journal_bdev;
78#ifdef CONFIG_JBD2_DEBUG
79 struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */
80 wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */
81#endif
82#ifdef CONFIG_QUOTA
83 char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */
84 int s_jquota_fmt; /* Format of quota to use */
85#endif
86 unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */
87
88#ifdef EXTENTS_STATS
89 /* ext4 extents stats */
90 unsigned long s_ext_min;
91 unsigned long s_ext_max;
92 unsigned long s_depth_max;
93 spinlock_t s_ext_stats_lock;
94 unsigned long s_ext_blocks;
95 unsigned long s_ext_extents;
96#endif
97
98 /* for buddy allocator */
99 struct ext4_group_info ***s_group_info;
100 struct inode *s_buddy_cache;
101 long s_blocks_reserved;
102 spinlock_t s_reserve_lock;
103 spinlock_t s_md_lock;
104 tid_t s_last_transaction;
105 unsigned short *s_mb_offsets;
106 unsigned int *s_mb_maxs;
107
108 /* tunables */
109 unsigned long s_stripe;
110 unsigned int s_mb_stream_request;
111 unsigned int s_mb_max_to_scan;
112 unsigned int s_mb_min_to_scan;
113 unsigned int s_mb_stats;
114 unsigned int s_mb_order2_reqs;
115 unsigned int s_mb_group_prealloc;
116 /* where last allocation was done - for stream allocation */
117 unsigned long s_mb_last_group;
118 unsigned long s_mb_last_start;
119
120 /* history to debug policy */
121 struct ext4_mb_history *s_mb_history;
122 int s_mb_history_cur;
123 int s_mb_history_max;
124 int s_mb_history_num;
125 spinlock_t s_mb_history_lock;
126 int s_mb_history_filter;
127
128 /* stats for buddy allocator */
129 spinlock_t s_mb_pa_lock;
130 atomic_t s_bal_reqs; /* number of reqs with len > 1 */
131 atomic_t s_bal_success; /* we found long enough chunks */
132 atomic_t s_bal_allocated; /* in blocks */
133 atomic_t s_bal_ex_scanned; /* total extents scanned */
134 atomic_t s_bal_goals; /* goal hits */
135 atomic_t s_bal_breaks; /* too long searches */
136 atomic_t s_bal_2orders; /* 2^order hits */
137 spinlock_t s_bal_lock;
138 unsigned long s_mb_buddies_generated;
139 unsigned long long s_mb_generation_time;
140 atomic_t s_mb_lost_chunks;
141 atomic_t s_mb_preallocated;
142 atomic_t s_mb_discarded;
143
144 /* locality groups */
145 struct ext4_locality_group *s_locality_groups;
146
147 /* for write statistics */
148 unsigned long s_sectors_written_start;
149 u64 s_kbytes_written;
150
151 unsigned int s_log_groups_per_flex;
152 struct flex_groups *s_flex_groups;
153};
154
155static inline spinlock_t *
156sb_bgl_lock(struct ext4_sb_info *sbi, unsigned int block_group)
157{
158 return bgl_lock_ptr(sbi->s_blockgroup_lock, block_group);
159}
160
161#endif /* _EXT4_SB */
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index e3a55eb8b26a..2593f748c3a4 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -326,32 +326,18 @@ ext4_ext_max_entries(struct inode *inode, int depth)
326 326
327static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext) 327static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
328{ 328{
329 ext4_fsblk_t block = ext_pblock(ext), valid_block; 329 ext4_fsblk_t block = ext_pblock(ext);
330 int len = ext4_ext_get_actual_len(ext); 330 int len = ext4_ext_get_actual_len(ext);
331 struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
332 331
333 valid_block = le32_to_cpu(es->s_first_data_block) + 332 return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len);
334 EXT4_SB(inode->i_sb)->s_gdb_count;
335 if (unlikely(block <= valid_block ||
336 ((block + len) > ext4_blocks_count(es))))
337 return 0;
338 else
339 return 1;
340} 333}
341 334
342static int ext4_valid_extent_idx(struct inode *inode, 335static int ext4_valid_extent_idx(struct inode *inode,
343 struct ext4_extent_idx *ext_idx) 336 struct ext4_extent_idx *ext_idx)
344{ 337{
345 ext4_fsblk_t block = idx_pblock(ext_idx), valid_block; 338 ext4_fsblk_t block = idx_pblock(ext_idx);
346 struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
347 339
348 valid_block = le32_to_cpu(es->s_first_data_block) + 340 return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, 1);
349 EXT4_SB(inode->i_sb)->s_gdb_count;
350 if (unlikely(block <= valid_block ||
351 (block >= ext4_blocks_count(es))))
352 return 0;
353 else
354 return 1;
355} 341}
356 342
357static int ext4_valid_extent_entries(struct inode *inode, 343static int ext4_valid_extent_entries(struct inode *inode,
@@ -2097,12 +2083,16 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2097 ex = EXT_LAST_EXTENT(eh); 2083 ex = EXT_LAST_EXTENT(eh);
2098 2084
2099 ex_ee_block = le32_to_cpu(ex->ee_block); 2085 ex_ee_block = le32_to_cpu(ex->ee_block);
2100 if (ext4_ext_is_uninitialized(ex))
2101 uninitialized = 1;
2102 ex_ee_len = ext4_ext_get_actual_len(ex); 2086 ex_ee_len = ext4_ext_get_actual_len(ex);
2103 2087
2104 while (ex >= EXT_FIRST_EXTENT(eh) && 2088 while (ex >= EXT_FIRST_EXTENT(eh) &&
2105 ex_ee_block + ex_ee_len > start) { 2089 ex_ee_block + ex_ee_len > start) {
2090
2091 if (ext4_ext_is_uninitialized(ex))
2092 uninitialized = 1;
2093 else
2094 uninitialized = 0;
2095
2106 ext_debug("remove ext %lu:%u\n", ex_ee_block, ex_ee_len); 2096 ext_debug("remove ext %lu:%u\n", ex_ee_block, ex_ee_len);
2107 path[depth].p_ext = ex; 2097 path[depth].p_ext = ex;
2108 2098
@@ -2784,7 +2774,7 @@ fix_extent_len:
2784int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, 2774int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2785 ext4_lblk_t iblock, 2775 ext4_lblk_t iblock,
2786 unsigned int max_blocks, struct buffer_head *bh_result, 2776 unsigned int max_blocks, struct buffer_head *bh_result,
2787 int create, int extend_disksize) 2777 int flags)
2788{ 2778{
2789 struct ext4_ext_path *path = NULL; 2779 struct ext4_ext_path *path = NULL;
2790 struct ext4_extent_header *eh; 2780 struct ext4_extent_header *eh;
@@ -2793,7 +2783,6 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2793 int err = 0, depth, ret, cache_type; 2783 int err = 0, depth, ret, cache_type;
2794 unsigned int allocated = 0; 2784 unsigned int allocated = 0;
2795 struct ext4_allocation_request ar; 2785 struct ext4_allocation_request ar;
2796 loff_t disksize;
2797 2786
2798 __clear_bit(BH_New, &bh_result->b_state); 2787 __clear_bit(BH_New, &bh_result->b_state);
2799 ext_debug("blocks %u/%u requested for inode %u\n", 2788 ext_debug("blocks %u/%u requested for inode %u\n",
@@ -2803,7 +2792,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2803 cache_type = ext4_ext_in_cache(inode, iblock, &newex); 2792 cache_type = ext4_ext_in_cache(inode, iblock, &newex);
2804 if (cache_type) { 2793 if (cache_type) {
2805 if (cache_type == EXT4_EXT_CACHE_GAP) { 2794 if (cache_type == EXT4_EXT_CACHE_GAP) {
2806 if (!create) { 2795 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
2807 /* 2796 /*
2808 * block isn't allocated yet and 2797 * block isn't allocated yet and
2809 * user doesn't want to allocate it 2798 * user doesn't want to allocate it
@@ -2869,9 +2858,11 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2869 EXT4_EXT_CACHE_EXTENT); 2858 EXT4_EXT_CACHE_EXTENT);
2870 goto out; 2859 goto out;
2871 } 2860 }
2872 if (create == EXT4_CREATE_UNINITIALIZED_EXT) 2861 if (flags & EXT4_GET_BLOCKS_UNINIT_EXT)
2873 goto out; 2862 goto out;
2874 if (!create) { 2863 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
2864 if (allocated > max_blocks)
2865 allocated = max_blocks;
2875 /* 2866 /*
2876 * We have blocks reserved already. We 2867 * We have blocks reserved already. We
2877 * return allocated blocks so that delalloc 2868 * return allocated blocks so that delalloc
@@ -2879,8 +2870,6 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2879 * the buffer head will be unmapped so that 2870 * the buffer head will be unmapped so that
2880 * a read from the block returns 0s. 2871 * a read from the block returns 0s.
2881 */ 2872 */
2882 if (allocated > max_blocks)
2883 allocated = max_blocks;
2884 set_buffer_unwritten(bh_result); 2873 set_buffer_unwritten(bh_result);
2885 bh_result->b_bdev = inode->i_sb->s_bdev; 2874 bh_result->b_bdev = inode->i_sb->s_bdev;
2886 bh_result->b_blocknr = newblock; 2875 bh_result->b_blocknr = newblock;
@@ -2903,7 +2892,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2903 * requested block isn't allocated yet; 2892 * requested block isn't allocated yet;
2904 * we couldn't try to create block if create flag is zero 2893 * we couldn't try to create block if create flag is zero
2905 */ 2894 */
2906 if (!create) { 2895 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
2907 /* 2896 /*
2908 * put just found gap into cache to speed up 2897 * put just found gap into cache to speed up
2909 * subsequent requests 2898 * subsequent requests
@@ -2932,10 +2921,10 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2932 * EXT_UNINIT_MAX_LEN. 2921 * EXT_UNINIT_MAX_LEN.
2933 */ 2922 */
2934 if (max_blocks > EXT_INIT_MAX_LEN && 2923 if (max_blocks > EXT_INIT_MAX_LEN &&
2935 create != EXT4_CREATE_UNINITIALIZED_EXT) 2924 !(flags & EXT4_GET_BLOCKS_UNINIT_EXT))
2936 max_blocks = EXT_INIT_MAX_LEN; 2925 max_blocks = EXT_INIT_MAX_LEN;
2937 else if (max_blocks > EXT_UNINIT_MAX_LEN && 2926 else if (max_blocks > EXT_UNINIT_MAX_LEN &&
2938 create == EXT4_CREATE_UNINITIALIZED_EXT) 2927 (flags & EXT4_GET_BLOCKS_UNINIT_EXT))
2939 max_blocks = EXT_UNINIT_MAX_LEN; 2928 max_blocks = EXT_UNINIT_MAX_LEN;
2940 2929
2941 /* Check if we can really insert (iblock)::(iblock+max_blocks) extent */ 2930 /* Check if we can really insert (iblock)::(iblock+max_blocks) extent */
@@ -2966,7 +2955,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2966 /* try to insert new extent into found leaf and return */ 2955 /* try to insert new extent into found leaf and return */
2967 ext4_ext_store_pblock(&newex, newblock); 2956 ext4_ext_store_pblock(&newex, newblock);
2968 newex.ee_len = cpu_to_le16(ar.len); 2957 newex.ee_len = cpu_to_le16(ar.len);
2969 if (create == EXT4_CREATE_UNINITIALIZED_EXT) /* Mark uninitialized */ 2958 if (flags & EXT4_GET_BLOCKS_UNINIT_EXT) /* Mark uninitialized */
2970 ext4_ext_mark_uninitialized(&newex); 2959 ext4_ext_mark_uninitialized(&newex);
2971 err = ext4_ext_insert_extent(handle, inode, path, &newex); 2960 err = ext4_ext_insert_extent(handle, inode, path, &newex);
2972 if (err) { 2961 if (err) {
@@ -2983,18 +2972,10 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2983 newblock = ext_pblock(&newex); 2972 newblock = ext_pblock(&newex);
2984 allocated = ext4_ext_get_actual_len(&newex); 2973 allocated = ext4_ext_get_actual_len(&newex);
2985outnew: 2974outnew:
2986 if (extend_disksize) {
2987 disksize = ((loff_t) iblock + ar.len) << inode->i_blkbits;
2988 if (disksize > i_size_read(inode))
2989 disksize = i_size_read(inode);
2990 if (disksize > EXT4_I(inode)->i_disksize)
2991 EXT4_I(inode)->i_disksize = disksize;
2992 }
2993
2994 set_buffer_new(bh_result); 2975 set_buffer_new(bh_result);
2995 2976
2996 /* Cache only when it is _not_ an uninitialized extent */ 2977 /* Cache only when it is _not_ an uninitialized extent */
2997 if (create != EXT4_CREATE_UNINITIALIZED_EXT) 2978 if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0)
2998 ext4_ext_put_in_cache(inode, iblock, allocated, newblock, 2979 ext4_ext_put_in_cache(inode, iblock, allocated, newblock,
2999 EXT4_EXT_CACHE_EXTENT); 2980 EXT4_EXT_CACHE_EXTENT);
3000out: 2981out:
@@ -3150,9 +3131,10 @@ retry:
3150 ret = PTR_ERR(handle); 3131 ret = PTR_ERR(handle);
3151 break; 3132 break;
3152 } 3133 }
3153 ret = ext4_get_blocks_wrap(handle, inode, block, 3134 map_bh.b_state = 0;
3154 max_blocks, &map_bh, 3135 ret = ext4_get_blocks(handle, inode, block,
3155 EXT4_CREATE_UNINITIALIZED_EXT, 0, 0); 3136 max_blocks, &map_bh,
3137 EXT4_GET_BLOCKS_CREATE_UNINIT_EXT);
3156 if (ret <= 0) { 3138 if (ret <= 0) {
3157#ifdef EXT4FS_DEBUG 3139#ifdef EXT4FS_DEBUG
3158 WARN_ON(ret <= 0); 3140 WARN_ON(ret <= 0);
@@ -3195,7 +3177,7 @@ static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path,
3195 void *data) 3177 void *data)
3196{ 3178{
3197 struct fiemap_extent_info *fieinfo = data; 3179 struct fiemap_extent_info *fieinfo = data;
3198 unsigned long blksize_bits = inode->i_sb->s_blocksize_bits; 3180 unsigned char blksize_bits = inode->i_sb->s_blocksize_bits;
3199 __u64 logical; 3181 __u64 logical;
3200 __u64 physical; 3182 __u64 physical;
3201 __u64 length; 3183 __u64 length;
@@ -3242,9 +3224,16 @@ static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path,
3242 * 3224 *
3243 * XXX this might miss a single-block extent at EXT_MAX_BLOCK 3225 * XXX this might miss a single-block extent at EXT_MAX_BLOCK
3244 */ 3226 */
3245 if (logical + length - 1 == EXT_MAX_BLOCK || 3227 if (ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK ||
3246 ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK) 3228 newex->ec_block + newex->ec_len - 1 == EXT_MAX_BLOCK) {
3229 loff_t size = i_size_read(inode);
3230 loff_t bs = EXT4_BLOCK_SIZE(inode->i_sb);
3231
3247 flags |= FIEMAP_EXTENT_LAST; 3232 flags |= FIEMAP_EXTENT_LAST;
3233 if ((flags & FIEMAP_EXTENT_DELALLOC) &&
3234 logical+length > size)
3235 length = (size - logical + bs - 1) & ~(bs-1);
3236 }
3248 3237
3249 error = fiemap_fill_next_extent(fieinfo, logical, physical, 3238 error = fiemap_fill_next_extent(fieinfo, logical, physical,
3250 length, flags); 3239 length, flags);
@@ -3318,10 +3307,10 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3318 * Walk the extent tree gathering extent information. 3307 * Walk the extent tree gathering extent information.
3319 * ext4_ext_fiemap_cb will push extents back to user. 3308 * ext4_ext_fiemap_cb will push extents back to user.
3320 */ 3309 */
3321 down_write(&EXT4_I(inode)->i_data_sem); 3310 down_read(&EXT4_I(inode)->i_data_sem);
3322 error = ext4_ext_walk_space(inode, start_blk, len_blks, 3311 error = ext4_ext_walk_space(inode, start_blk, len_blks,
3323 ext4_ext_fiemap_cb, fieinfo); 3312 ext4_ext_fiemap_cb, fieinfo);
3324 up_write(&EXT4_I(inode)->i_data_sem); 3313 up_read(&EXT4_I(inode)->i_data_sem);
3325 } 3314 }
3326 3315
3327 return error; 3316 return error;
diff --git a/fs/ext4/group.h b/fs/ext4/group.h
deleted file mode 100644
index c2c0a8d06d0e..000000000000
--- a/fs/ext4/group.h
+++ /dev/null
@@ -1,29 +0,0 @@
1/*
2 * linux/fs/ext4/group.h
3 *
4 * Copyright (C) 2007 Cluster File Systems, Inc
5 *
6 * Author: Andreas Dilger <adilger@clusterfs.com>
7 */
8
9#ifndef _LINUX_EXT4_GROUP_H
10#define _LINUX_EXT4_GROUP_H
11
12extern __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 group,
13 struct ext4_group_desc *gdp);
14extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group,
15 struct ext4_group_desc *gdp);
16struct buffer_head *ext4_read_block_bitmap(struct super_block *sb,
17 ext4_group_t block_group);
18extern unsigned ext4_init_block_bitmap(struct super_block *sb,
19 struct buffer_head *bh,
20 ext4_group_t group,
21 struct ext4_group_desc *desc);
22#define ext4_free_blocks_after_init(sb, group, desc) \
23 ext4_init_block_bitmap(sb, NULL, group, desc)
24extern unsigned ext4_init_inode_bitmap(struct super_block *sb,
25 struct buffer_head *bh,
26 ext4_group_t group,
27 struct ext4_group_desc *desc);
28extern void mark_bitmap_end(int start_bit, int end_bit, char *bitmap);
29#endif /* _LINUX_EXT4_GROUP_H */
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index f18e0a08a6b5..3743bd849bce 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -27,7 +27,6 @@
27#include "ext4_jbd2.h" 27#include "ext4_jbd2.h"
28#include "xattr.h" 28#include "xattr.h"
29#include "acl.h" 29#include "acl.h"
30#include "group.h"
31 30
32/* 31/*
33 * ialloc.c contains the inodes allocation and deallocation routines 32 * ialloc.c contains the inodes allocation and deallocation routines
@@ -123,16 +122,16 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
123 unlock_buffer(bh); 122 unlock_buffer(bh);
124 return bh; 123 return bh;
125 } 124 }
126 spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group)); 125 ext4_lock_group(sb, block_group);
127 if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { 126 if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
128 ext4_init_inode_bitmap(sb, bh, block_group, desc); 127 ext4_init_inode_bitmap(sb, bh, block_group, desc);
129 set_bitmap_uptodate(bh); 128 set_bitmap_uptodate(bh);
130 set_buffer_uptodate(bh); 129 set_buffer_uptodate(bh);
131 spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group)); 130 ext4_unlock_group(sb, block_group);
132 unlock_buffer(bh); 131 unlock_buffer(bh);
133 return bh; 132 return bh;
134 } 133 }
135 spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group)); 134 ext4_unlock_group(sb, block_group);
136 if (buffer_uptodate(bh)) { 135 if (buffer_uptodate(bh)) {
137 /* 136 /*
138 * if not uninit if bh is uptodate, 137 * if not uninit if bh is uptodate,
@@ -247,9 +246,8 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
247 goto error_return; 246 goto error_return;
248 247
249 /* Ok, now we can actually update the inode bitmaps.. */ 248 /* Ok, now we can actually update the inode bitmaps.. */
250 spin_lock(sb_bgl_lock(sbi, block_group)); 249 cleared = ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group),
251 cleared = ext4_clear_bit(bit, bitmap_bh->b_data); 250 bit, bitmap_bh->b_data);
252 spin_unlock(sb_bgl_lock(sbi, block_group));
253 if (!cleared) 251 if (!cleared)
254 ext4_error(sb, "ext4_free_inode", 252 ext4_error(sb, "ext4_free_inode",
255 "bit already cleared for inode %lu", ino); 253 "bit already cleared for inode %lu", ino);
@@ -261,7 +259,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
261 if (fatal) goto error_return; 259 if (fatal) goto error_return;
262 260
263 if (gdp) { 261 if (gdp) {
264 spin_lock(sb_bgl_lock(sbi, block_group)); 262 ext4_lock_group(sb, block_group);
265 count = ext4_free_inodes_count(sb, gdp) + 1; 263 count = ext4_free_inodes_count(sb, gdp) + 1;
266 ext4_free_inodes_set(sb, gdp, count); 264 ext4_free_inodes_set(sb, gdp, count);
267 if (is_directory) { 265 if (is_directory) {
@@ -277,7 +275,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
277 } 275 }
278 gdp->bg_checksum = ext4_group_desc_csum(sbi, 276 gdp->bg_checksum = ext4_group_desc_csum(sbi,
279 block_group, gdp); 277 block_group, gdp);
280 spin_unlock(sb_bgl_lock(sbi, block_group)); 278 ext4_unlock_group(sb, block_group);
281 percpu_counter_inc(&sbi->s_freeinodes_counter); 279 percpu_counter_inc(&sbi->s_freeinodes_counter);
282 if (is_directory) 280 if (is_directory)
283 percpu_counter_dec(&sbi->s_dirs_counter); 281 percpu_counter_dec(&sbi->s_dirs_counter);
@@ -316,7 +314,7 @@ error_return:
316static int find_group_dir(struct super_block *sb, struct inode *parent, 314static int find_group_dir(struct super_block *sb, struct inode *parent,
317 ext4_group_t *best_group) 315 ext4_group_t *best_group)
318{ 316{
319 ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; 317 ext4_group_t ngroups = ext4_get_groups_count(sb);
320 unsigned int freei, avefreei; 318 unsigned int freei, avefreei;
321 struct ext4_group_desc *desc, *best_desc = NULL; 319 struct ext4_group_desc *desc, *best_desc = NULL;
322 ext4_group_t group; 320 ext4_group_t group;
@@ -349,11 +347,10 @@ static int find_group_flex(struct super_block *sb, struct inode *parent,
349{ 347{
350 struct ext4_sb_info *sbi = EXT4_SB(sb); 348 struct ext4_sb_info *sbi = EXT4_SB(sb);
351 struct ext4_group_desc *desc; 349 struct ext4_group_desc *desc;
352 struct buffer_head *bh;
353 struct flex_groups *flex_group = sbi->s_flex_groups; 350 struct flex_groups *flex_group = sbi->s_flex_groups;
354 ext4_group_t parent_group = EXT4_I(parent)->i_block_group; 351 ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
355 ext4_group_t parent_fbg_group = ext4_flex_group(sbi, parent_group); 352 ext4_group_t parent_fbg_group = ext4_flex_group(sbi, parent_group);
356 ext4_group_t ngroups = sbi->s_groups_count; 353 ext4_group_t ngroups = ext4_get_groups_count(sb);
357 int flex_size = ext4_flex_bg_size(sbi); 354 int flex_size = ext4_flex_bg_size(sbi);
358 ext4_group_t best_flex = parent_fbg_group; 355 ext4_group_t best_flex = parent_fbg_group;
359 int blocks_per_flex = sbi->s_blocks_per_group * flex_size; 356 int blocks_per_flex = sbi->s_blocks_per_group * flex_size;
@@ -362,7 +359,7 @@ static int find_group_flex(struct super_block *sb, struct inode *parent,
362 ext4_group_t n_fbg_groups; 359 ext4_group_t n_fbg_groups;
363 ext4_group_t i; 360 ext4_group_t i;
364 361
365 n_fbg_groups = (sbi->s_groups_count + flex_size - 1) >> 362 n_fbg_groups = (ngroups + flex_size - 1) >>
366 sbi->s_log_groups_per_flex; 363 sbi->s_log_groups_per_flex;
367 364
368find_close_to_parent: 365find_close_to_parent:
@@ -404,7 +401,7 @@ find_close_to_parent:
404found_flexbg: 401found_flexbg:
405 for (i = best_flex * flex_size; i < ngroups && 402 for (i = best_flex * flex_size; i < ngroups &&
406 i < (best_flex + 1) * flex_size; i++) { 403 i < (best_flex + 1) * flex_size; i++) {
407 desc = ext4_get_group_desc(sb, i, &bh); 404 desc = ext4_get_group_desc(sb, i, NULL);
408 if (ext4_free_inodes_count(sb, desc)) { 405 if (ext4_free_inodes_count(sb, desc)) {
409 *best_group = i; 406 *best_group = i;
410 goto out; 407 goto out;
@@ -478,20 +475,21 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
478{ 475{
479 ext4_group_t parent_group = EXT4_I(parent)->i_block_group; 476 ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
480 struct ext4_sb_info *sbi = EXT4_SB(sb); 477 struct ext4_sb_info *sbi = EXT4_SB(sb);
481 ext4_group_t ngroups = sbi->s_groups_count; 478 ext4_group_t real_ngroups = ext4_get_groups_count(sb);
482 int inodes_per_group = EXT4_INODES_PER_GROUP(sb); 479 int inodes_per_group = EXT4_INODES_PER_GROUP(sb);
483 unsigned int freei, avefreei; 480 unsigned int freei, avefreei;
484 ext4_fsblk_t freeb, avefreeb; 481 ext4_fsblk_t freeb, avefreeb;
485 unsigned int ndirs; 482 unsigned int ndirs;
486 int max_dirs, min_inodes; 483 int max_dirs, min_inodes;
487 ext4_grpblk_t min_blocks; 484 ext4_grpblk_t min_blocks;
488 ext4_group_t i, grp, g; 485 ext4_group_t i, grp, g, ngroups;
489 struct ext4_group_desc *desc; 486 struct ext4_group_desc *desc;
490 struct orlov_stats stats; 487 struct orlov_stats stats;
491 int flex_size = ext4_flex_bg_size(sbi); 488 int flex_size = ext4_flex_bg_size(sbi);
492 489
490 ngroups = real_ngroups;
493 if (flex_size > 1) { 491 if (flex_size > 1) {
494 ngroups = (ngroups + flex_size - 1) >> 492 ngroups = (real_ngroups + flex_size - 1) >>
495 sbi->s_log_groups_per_flex; 493 sbi->s_log_groups_per_flex;
496 parent_group >>= sbi->s_log_groups_per_flex; 494 parent_group >>= sbi->s_log_groups_per_flex;
497 } 495 }
@@ -543,7 +541,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
543 */ 541 */
544 grp *= flex_size; 542 grp *= flex_size;
545 for (i = 0; i < flex_size; i++) { 543 for (i = 0; i < flex_size; i++) {
546 if (grp+i >= sbi->s_groups_count) 544 if (grp+i >= real_ngroups)
547 break; 545 break;
548 desc = ext4_get_group_desc(sb, grp+i, NULL); 546 desc = ext4_get_group_desc(sb, grp+i, NULL);
549 if (desc && ext4_free_inodes_count(sb, desc)) { 547 if (desc && ext4_free_inodes_count(sb, desc)) {
@@ -583,7 +581,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
583 } 581 }
584 582
585fallback: 583fallback:
586 ngroups = sbi->s_groups_count; 584 ngroups = real_ngroups;
587 avefreei = freei / ngroups; 585 avefreei = freei / ngroups;
588fallback_retry: 586fallback_retry:
589 parent_group = EXT4_I(parent)->i_block_group; 587 parent_group = EXT4_I(parent)->i_block_group;
@@ -613,9 +611,8 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
613 ext4_group_t *group, int mode) 611 ext4_group_t *group, int mode)
614{ 612{
615 ext4_group_t parent_group = EXT4_I(parent)->i_block_group; 613 ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
616 ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; 614 ext4_group_t i, last, ngroups = ext4_get_groups_count(sb);
617 struct ext4_group_desc *desc; 615 struct ext4_group_desc *desc;
618 ext4_group_t i, last;
619 int flex_size = ext4_flex_bg_size(EXT4_SB(sb)); 616 int flex_size = ext4_flex_bg_size(EXT4_SB(sb));
620 617
621 /* 618 /*
@@ -708,10 +705,10 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
708 705
709/* 706/*
710 * claim the inode from the inode bitmap. If the group 707 * claim the inode from the inode bitmap. If the group
711 * is uninit we need to take the groups's sb_bgl_lock 708 * is uninit we need to take the groups's ext4_group_lock
712 * and clear the uninit flag. The inode bitmap update 709 * and clear the uninit flag. The inode bitmap update
713 * and group desc uninit flag clear should be done 710 * and group desc uninit flag clear should be done
714 * after holding sb_bgl_lock so that ext4_read_inode_bitmap 711 * after holding ext4_group_lock so that ext4_read_inode_bitmap
715 * doesn't race with the ext4_claim_inode 712 * doesn't race with the ext4_claim_inode
716 */ 713 */
717static int ext4_claim_inode(struct super_block *sb, 714static int ext4_claim_inode(struct super_block *sb,
@@ -722,7 +719,7 @@ static int ext4_claim_inode(struct super_block *sb,
722 struct ext4_sb_info *sbi = EXT4_SB(sb); 719 struct ext4_sb_info *sbi = EXT4_SB(sb);
723 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL); 720 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
724 721
725 spin_lock(sb_bgl_lock(sbi, group)); 722 ext4_lock_group(sb, group);
726 if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) { 723 if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) {
727 /* not a free inode */ 724 /* not a free inode */
728 retval = 1; 725 retval = 1;
@@ -731,7 +728,7 @@ static int ext4_claim_inode(struct super_block *sb,
731 ino++; 728 ino++;
732 if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || 729 if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
733 ino > EXT4_INODES_PER_GROUP(sb)) { 730 ino > EXT4_INODES_PER_GROUP(sb)) {
734 spin_unlock(sb_bgl_lock(sbi, group)); 731 ext4_unlock_group(sb, group);
735 ext4_error(sb, __func__, 732 ext4_error(sb, __func__,
736 "reserved inode or inode > inodes count - " 733 "reserved inode or inode > inodes count - "
737 "block_group = %u, inode=%lu", group, 734 "block_group = %u, inode=%lu", group,
@@ -780,7 +777,7 @@ static int ext4_claim_inode(struct super_block *sb,
780 } 777 }
781 gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); 778 gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
782err_ret: 779err_ret:
783 spin_unlock(sb_bgl_lock(sbi, group)); 780 ext4_unlock_group(sb, group);
784 return retval; 781 return retval;
785} 782}
786 783
@@ -799,11 +796,10 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
799 struct super_block *sb; 796 struct super_block *sb;
800 struct buffer_head *inode_bitmap_bh = NULL; 797 struct buffer_head *inode_bitmap_bh = NULL;
801 struct buffer_head *group_desc_bh; 798 struct buffer_head *group_desc_bh;
802 ext4_group_t group = 0; 799 ext4_group_t ngroups, group = 0;
803 unsigned long ino = 0; 800 unsigned long ino = 0;
804 struct inode *inode; 801 struct inode *inode;
805 struct ext4_group_desc *gdp = NULL; 802 struct ext4_group_desc *gdp = NULL;
806 struct ext4_super_block *es;
807 struct ext4_inode_info *ei; 803 struct ext4_inode_info *ei;
808 struct ext4_sb_info *sbi; 804 struct ext4_sb_info *sbi;
809 int ret2, err = 0; 805 int ret2, err = 0;
@@ -818,15 +814,14 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
818 return ERR_PTR(-EPERM); 814 return ERR_PTR(-EPERM);
819 815
820 sb = dir->i_sb; 816 sb = dir->i_sb;
817 ngroups = ext4_get_groups_count(sb);
821 trace_mark(ext4_request_inode, "dev %s dir %lu mode %d", sb->s_id, 818 trace_mark(ext4_request_inode, "dev %s dir %lu mode %d", sb->s_id,
822 dir->i_ino, mode); 819 dir->i_ino, mode);
823 inode = new_inode(sb); 820 inode = new_inode(sb);
824 if (!inode) 821 if (!inode)
825 return ERR_PTR(-ENOMEM); 822 return ERR_PTR(-ENOMEM);
826 ei = EXT4_I(inode); 823 ei = EXT4_I(inode);
827
828 sbi = EXT4_SB(sb); 824 sbi = EXT4_SB(sb);
829 es = sbi->s_es;
830 825
831 if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) { 826 if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) {
832 ret2 = find_group_flex(sb, dir, &group); 827 ret2 = find_group_flex(sb, dir, &group);
@@ -856,7 +851,7 @@ got_group:
856 if (ret2 == -1) 851 if (ret2 == -1)
857 goto out; 852 goto out;
858 853
859 for (i = 0; i < sbi->s_groups_count; i++) { 854 for (i = 0; i < ngroups; i++) {
860 err = -EIO; 855 err = -EIO;
861 856
862 gdp = ext4_get_group_desc(sb, group, &group_desc_bh); 857 gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
@@ -917,7 +912,7 @@ repeat_in_this_group:
917 * group descriptor metadata has not yet been updated. 912 * group descriptor metadata has not yet been updated.
918 * So we just go onto the next blockgroup. 913 * So we just go onto the next blockgroup.
919 */ 914 */
920 if (++group == sbi->s_groups_count) 915 if (++group == ngroups)
921 group = 0; 916 group = 0;
922 } 917 }
923 err = -ENOSPC; 918 err = -ENOSPC;
@@ -938,7 +933,7 @@ got:
938 } 933 }
939 934
940 free = 0; 935 free = 0;
941 spin_lock(sb_bgl_lock(sbi, group)); 936 ext4_lock_group(sb, group);
942 /* recheck and clear flag under lock if we still need to */ 937 /* recheck and clear flag under lock if we still need to */
943 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { 938 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
944 free = ext4_free_blocks_after_init(sb, group, gdp); 939 free = ext4_free_blocks_after_init(sb, group, gdp);
@@ -947,7 +942,7 @@ got:
947 gdp->bg_checksum = ext4_group_desc_csum(sbi, group, 942 gdp->bg_checksum = ext4_group_desc_csum(sbi, group,
948 gdp); 943 gdp);
949 } 944 }
950 spin_unlock(sb_bgl_lock(sbi, group)); 945 ext4_unlock_group(sb, group);
951 946
952 /* Don't need to dirty bitmap block if we didn't change it */ 947 /* Don't need to dirty bitmap block if we didn't change it */
953 if (free) { 948 if (free) {
@@ -1158,7 +1153,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb)
1158{ 1153{
1159 unsigned long desc_count; 1154 unsigned long desc_count;
1160 struct ext4_group_desc *gdp; 1155 struct ext4_group_desc *gdp;
1161 ext4_group_t i; 1156 ext4_group_t i, ngroups = ext4_get_groups_count(sb);
1162#ifdef EXT4FS_DEBUG 1157#ifdef EXT4FS_DEBUG
1163 struct ext4_super_block *es; 1158 struct ext4_super_block *es;
1164 unsigned long bitmap_count, x; 1159 unsigned long bitmap_count, x;
@@ -1168,7 +1163,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb)
1168 desc_count = 0; 1163 desc_count = 0;
1169 bitmap_count = 0; 1164 bitmap_count = 0;
1170 gdp = NULL; 1165 gdp = NULL;
1171 for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { 1166 for (i = 0; i < ngroups; i++) {
1172 gdp = ext4_get_group_desc(sb, i, NULL); 1167 gdp = ext4_get_group_desc(sb, i, NULL);
1173 if (!gdp) 1168 if (!gdp)
1174 continue; 1169 continue;
@@ -1190,7 +1185,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb)
1190 return desc_count; 1185 return desc_count;
1191#else 1186#else
1192 desc_count = 0; 1187 desc_count = 0;
1193 for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { 1188 for (i = 0; i < ngroups; i++) {
1194 gdp = ext4_get_group_desc(sb, i, NULL); 1189 gdp = ext4_get_group_desc(sb, i, NULL);
1195 if (!gdp) 1190 if (!gdp)
1196 continue; 1191 continue;
@@ -1205,9 +1200,9 @@ unsigned long ext4_count_free_inodes(struct super_block *sb)
1205unsigned long ext4_count_dirs(struct super_block * sb) 1200unsigned long ext4_count_dirs(struct super_block * sb)
1206{ 1201{
1207 unsigned long count = 0; 1202 unsigned long count = 0;
1208 ext4_group_t i; 1203 ext4_group_t i, ngroups = ext4_get_groups_count(sb);
1209 1204
1210 for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { 1205 for (i = 0; i < ngroups; i++) {
1211 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL); 1206 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
1212 if (!gdp) 1207 if (!gdp)
1213 continue; 1208 continue;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 2a9ffd528dd1..875db944b22f 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -372,20 +372,21 @@ static int ext4_block_to_path(struct inode *inode,
372} 372}
373 373
374static int __ext4_check_blockref(const char *function, struct inode *inode, 374static int __ext4_check_blockref(const char *function, struct inode *inode,
375 __le32 *p, unsigned int max) { 375 __le32 *p, unsigned int max)
376 376{
377 unsigned int maxblocks = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es);
378 __le32 *bref = p; 377 __le32 *bref = p;
378 unsigned int blk;
379
379 while (bref < p+max) { 380 while (bref < p+max) {
380 if (unlikely(le32_to_cpu(*bref) >= maxblocks)) { 381 blk = le32_to_cpu(*bref++);
382 if (blk &&
383 unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb),
384 blk, 1))) {
381 ext4_error(inode->i_sb, function, 385 ext4_error(inode->i_sb, function,
382 "block reference %u >= max (%u) " 386 "invalid block reference %u "
383 "in inode #%lu, offset=%d", 387 "in inode #%lu", blk, inode->i_ino);
384 le32_to_cpu(*bref), maxblocks,
385 inode->i_ino, (int)(bref-p));
386 return -EIO; 388 return -EIO;
387 } 389 }
388 bref++;
389 } 390 }
390 return 0; 391 return 0;
391} 392}
@@ -892,6 +893,10 @@ err_out:
892} 893}
893 894
894/* 895/*
896 * The ext4_ind_get_blocks() function handles non-extents inodes
897 * (i.e., using the traditional indirect/double-indirect i_blocks
898 * scheme) for ext4_get_blocks().
899 *
895 * Allocation strategy is simple: if we have to allocate something, we will 900 * Allocation strategy is simple: if we have to allocate something, we will
896 * have to go the whole way to leaf. So let's do it before attaching anything 901 * have to go the whole way to leaf. So let's do it before attaching anything
897 * to tree, set linkage between the newborn blocks, write them if sync is 902 * to tree, set linkage between the newborn blocks, write them if sync is
@@ -909,15 +914,16 @@ err_out:
909 * return = 0, if plain lookup failed. 914 * return = 0, if plain lookup failed.
910 * return < 0, error case. 915 * return < 0, error case.
911 * 916 *
912 * 917 * The ext4_ind_get_blocks() function should be called with
913 * Need to be called with 918 * down_write(&EXT4_I(inode)->i_data_sem) if allocating filesystem
914 * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system block 919 * blocks (i.e., flags has EXT4_GET_BLOCKS_CREATE set) or
915 * (ie, create is zero). Otherwise down_write(&EXT4_I(inode)->i_data_sem) 920 * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system
921 * blocks.
916 */ 922 */
917static int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, 923static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
918 ext4_lblk_t iblock, unsigned int maxblocks, 924 ext4_lblk_t iblock, unsigned int maxblocks,
919 struct buffer_head *bh_result, 925 struct buffer_head *bh_result,
920 int create, int extend_disksize) 926 int flags)
921{ 927{
922 int err = -EIO; 928 int err = -EIO;
923 ext4_lblk_t offsets[4]; 929 ext4_lblk_t offsets[4];
@@ -927,14 +933,11 @@ static int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
927 int indirect_blks; 933 int indirect_blks;
928 int blocks_to_boundary = 0; 934 int blocks_to_boundary = 0;
929 int depth; 935 int depth;
930 struct ext4_inode_info *ei = EXT4_I(inode);
931 int count = 0; 936 int count = 0;
932 ext4_fsblk_t first_block = 0; 937 ext4_fsblk_t first_block = 0;
933 loff_t disksize;
934
935 938
936 J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)); 939 J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL));
937 J_ASSERT(handle != NULL || create == 0); 940 J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0);
938 depth = ext4_block_to_path(inode, iblock, offsets, 941 depth = ext4_block_to_path(inode, iblock, offsets,
939 &blocks_to_boundary); 942 &blocks_to_boundary);
940 943
@@ -963,7 +966,7 @@ static int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
963 } 966 }
964 967
965 /* Next simple case - plain lookup or failed read of indirect block */ 968 /* Next simple case - plain lookup or failed read of indirect block */
966 if (!create || err == -EIO) 969 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0 || err == -EIO)
967 goto cleanup; 970 goto cleanup;
968 971
969 /* 972 /*
@@ -997,19 +1000,7 @@ static int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
997 if (!err) 1000 if (!err)
998 err = ext4_splice_branch(handle, inode, iblock, 1001 err = ext4_splice_branch(handle, inode, iblock,
999 partial, indirect_blks, count); 1002 partial, indirect_blks, count);
1000 /* 1003 else
1001 * i_disksize growing is protected by i_data_sem. Don't forget to
1002 * protect it if you're about to implement concurrent
1003 * ext4_get_block() -bzzz
1004 */
1005 if (!err && extend_disksize) {
1006 disksize = ((loff_t) iblock + count) << inode->i_blkbits;
1007 if (disksize > i_size_read(inode))
1008 disksize = i_size_read(inode);
1009 if (disksize > ei->i_disksize)
1010 ei->i_disksize = disksize;
1011 }
1012 if (err)
1013 goto cleanup; 1004 goto cleanup;
1014 1005
1015 set_buffer_new(bh_result); 1006 set_buffer_new(bh_result);
@@ -1120,8 +1111,23 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
1120 ext4_discard_preallocations(inode); 1111 ext4_discard_preallocations(inode);
1121} 1112}
1122 1113
1114static int check_block_validity(struct inode *inode, sector_t logical,
1115 sector_t phys, int len)
1116{
1117 if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) {
1118 ext4_error(inode->i_sb, "check_block_validity",
1119 "inode #%lu logical block %llu mapped to %llu "
1120 "(size %d)", inode->i_ino,
1121 (unsigned long long) logical,
1122 (unsigned long long) phys, len);
1123 WARN_ON(1);
1124 return -EIO;
1125 }
1126 return 0;
1127}
1128
1123/* 1129/*
1124 * The ext4_get_blocks_wrap() function try to look up the requested blocks, 1130 * The ext4_get_blocks() function tries to look up the requested blocks,
1125 * and returns if the blocks are already mapped. 1131 * and returns if the blocks are already mapped.
1126 * 1132 *
1127 * Otherwise it takes the write lock of the i_data_sem and allocate blocks 1133 * Otherwise it takes the write lock of the i_data_sem and allocate blocks
@@ -1129,7 +1135,7 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
1129 * mapped. 1135 * mapped.
1130 * 1136 *
1131 * If file type is extents based, it will call ext4_ext_get_blocks(), 1137 * If file type is extents based, it will call ext4_ext_get_blocks(),
1132 * Otherwise, call with ext4_get_blocks_handle() to handle indirect mapping 1138 * Otherwise, call with ext4_ind_get_blocks() to handle indirect mapping
1133 * based files 1139 * based files
1134 * 1140 *
1135 * On success, it returns the number of blocks being mapped or allocate. 1141 * On success, it returns the number of blocks being mapped or allocate.
@@ -1142,9 +1148,9 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
1142 * 1148 *
1143 * It returns the error in case of allocation failure. 1149 * It returns the error in case of allocation failure.
1144 */ 1150 */
1145int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, 1151int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
1146 unsigned int max_blocks, struct buffer_head *bh, 1152 unsigned int max_blocks, struct buffer_head *bh,
1147 int create, int extend_disksize, int flag) 1153 int flags)
1148{ 1154{
1149 int retval; 1155 int retval;
1150 1156
@@ -1152,21 +1158,28 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
1152 clear_buffer_unwritten(bh); 1158 clear_buffer_unwritten(bh);
1153 1159
1154 /* 1160 /*
1155 * Try to see if we can get the block without requesting 1161 * Try to see if we can get the block without requesting a new
1156 * for new file system block. 1162 * file system block.
1157 */ 1163 */
1158 down_read((&EXT4_I(inode)->i_data_sem)); 1164 down_read((&EXT4_I(inode)->i_data_sem));
1159 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { 1165 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
1160 retval = ext4_ext_get_blocks(handle, inode, block, max_blocks, 1166 retval = ext4_ext_get_blocks(handle, inode, block, max_blocks,
1161 bh, 0, 0); 1167 bh, 0);
1162 } else { 1168 } else {
1163 retval = ext4_get_blocks_handle(handle, 1169 retval = ext4_ind_get_blocks(handle, inode, block, max_blocks,
1164 inode, block, max_blocks, bh, 0, 0); 1170 bh, 0);
1165 } 1171 }
1166 up_read((&EXT4_I(inode)->i_data_sem)); 1172 up_read((&EXT4_I(inode)->i_data_sem));
1167 1173
1174 if (retval > 0 && buffer_mapped(bh)) {
1175 int ret = check_block_validity(inode, block,
1176 bh->b_blocknr, retval);
1177 if (ret != 0)
1178 return ret;
1179 }
1180
1168 /* If it is only a block(s) look up */ 1181 /* If it is only a block(s) look up */
1169 if (!create) 1182 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0)
1170 return retval; 1183 return retval;
1171 1184
1172 /* 1185 /*
@@ -1205,7 +1218,7 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
1205 * let the underlying get_block() function know to 1218 * let the underlying get_block() function know to
1206 * avoid double accounting 1219 * avoid double accounting
1207 */ 1220 */
1208 if (flag) 1221 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
1209 EXT4_I(inode)->i_delalloc_reserved_flag = 1; 1222 EXT4_I(inode)->i_delalloc_reserved_flag = 1;
1210 /* 1223 /*
1211 * We need to check for EXT4 here because migrate 1224 * We need to check for EXT4 here because migrate
@@ -1213,10 +1226,10 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
1213 */ 1226 */
1214 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { 1227 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
1215 retval = ext4_ext_get_blocks(handle, inode, block, max_blocks, 1228 retval = ext4_ext_get_blocks(handle, inode, block, max_blocks,
1216 bh, create, extend_disksize); 1229 bh, flags);
1217 } else { 1230 } else {
1218 retval = ext4_get_blocks_handle(handle, inode, block, 1231 retval = ext4_ind_get_blocks(handle, inode, block,
1219 max_blocks, bh, create, extend_disksize); 1232 max_blocks, bh, flags);
1220 1233
1221 if (retval > 0 && buffer_new(bh)) { 1234 if (retval > 0 && buffer_new(bh)) {
1222 /* 1235 /*
@@ -1229,18 +1242,23 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
1229 } 1242 }
1230 } 1243 }
1231 1244
1232 if (flag) { 1245 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
1233 EXT4_I(inode)->i_delalloc_reserved_flag = 0; 1246 EXT4_I(inode)->i_delalloc_reserved_flag = 0;
1234 /* 1247
1235 * Update reserved blocks/metadata blocks 1248 /*
1236 * after successful block allocation 1249 * Update reserved blocks/metadata blocks after successful
1237 * which were deferred till now 1250 * block allocation which had been deferred till now.
1238 */ 1251 */
1239 if ((retval > 0) && buffer_delay(bh)) 1252 if ((retval > 0) && (flags & EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE))
1240 ext4_da_update_reserve_space(inode, retval); 1253 ext4_da_update_reserve_space(inode, retval);
1241 }
1242 1254
1243 up_write((&EXT4_I(inode)->i_data_sem)); 1255 up_write((&EXT4_I(inode)->i_data_sem));
1256 if (retval > 0 && buffer_mapped(bh)) {
1257 int ret = check_block_validity(inode, block,
1258 bh->b_blocknr, retval);
1259 if (ret != 0)
1260 return ret;
1261 }
1244 return retval; 1262 return retval;
1245} 1263}
1246 1264
@@ -1268,8 +1286,8 @@ int ext4_get_block(struct inode *inode, sector_t iblock,
1268 started = 1; 1286 started = 1;
1269 } 1287 }
1270 1288
1271 ret = ext4_get_blocks_wrap(handle, inode, iblock, 1289 ret = ext4_get_blocks(handle, inode, iblock, max_blocks, bh_result,
1272 max_blocks, bh_result, create, 0, 0); 1290 create ? EXT4_GET_BLOCKS_CREATE : 0);
1273 if (ret > 0) { 1291 if (ret > 0) {
1274 bh_result->b_size = (ret << inode->i_blkbits); 1292 bh_result->b_size = (ret << inode->i_blkbits);
1275 ret = 0; 1293 ret = 0;
@@ -1288,17 +1306,19 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
1288{ 1306{
1289 struct buffer_head dummy; 1307 struct buffer_head dummy;
1290 int fatal = 0, err; 1308 int fatal = 0, err;
1309 int flags = 0;
1291 1310
1292 J_ASSERT(handle != NULL || create == 0); 1311 J_ASSERT(handle != NULL || create == 0);
1293 1312
1294 dummy.b_state = 0; 1313 dummy.b_state = 0;
1295 dummy.b_blocknr = -1000; 1314 dummy.b_blocknr = -1000;
1296 buffer_trace_init(&dummy.b_history); 1315 buffer_trace_init(&dummy.b_history);
1297 err = ext4_get_blocks_wrap(handle, inode, block, 1, 1316 if (create)
1298 &dummy, create, 1, 0); 1317 flags |= EXT4_GET_BLOCKS_CREATE;
1318 err = ext4_get_blocks(handle, inode, block, 1, &dummy, flags);
1299 /* 1319 /*
1300 * ext4_get_blocks_handle() returns number of blocks 1320 * ext4_get_blocks() returns number of blocks mapped. 0 in
1301 * mapped. 0 in case of a HOLE. 1321 * case of a HOLE.
1302 */ 1322 */
1303 if (err > 0) { 1323 if (err > 0) {
1304 if (err > 1) 1324 if (err > 1)
@@ -1439,7 +1459,7 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
1439 struct page **pagep, void **fsdata) 1459 struct page **pagep, void **fsdata)
1440{ 1460{
1441 struct inode *inode = mapping->host; 1461 struct inode *inode = mapping->host;
1442 int ret, needed_blocks = ext4_writepage_trans_blocks(inode); 1462 int ret, needed_blocks;
1443 handle_t *handle; 1463 handle_t *handle;
1444 int retries = 0; 1464 int retries = 0;
1445 struct page *page; 1465 struct page *page;
@@ -1450,6 +1470,11 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
1450 "dev %s ino %lu pos %llu len %u flags %u", 1470 "dev %s ino %lu pos %llu len %u flags %u",
1451 inode->i_sb->s_id, inode->i_ino, 1471 inode->i_sb->s_id, inode->i_ino,
1452 (unsigned long long) pos, len, flags); 1472 (unsigned long long) pos, len, flags);
1473 /*
1474 * Reserve one block more for addition to orphan list in case
1475 * we allocate blocks but write fails for some reason
1476 */
1477 needed_blocks = ext4_writepage_trans_blocks(inode) + 1;
1453 index = pos >> PAGE_CACHE_SHIFT; 1478 index = pos >> PAGE_CACHE_SHIFT;
1454 from = pos & (PAGE_CACHE_SIZE - 1); 1479 from = pos & (PAGE_CACHE_SIZE - 1);
1455 to = from + len; 1480 to = from + len;
@@ -1483,15 +1508,30 @@ retry:
1483 1508
1484 if (ret) { 1509 if (ret) {
1485 unlock_page(page); 1510 unlock_page(page);
1486 ext4_journal_stop(handle);
1487 page_cache_release(page); 1511 page_cache_release(page);
1488 /* 1512 /*
1489 * block_write_begin may have instantiated a few blocks 1513 * block_write_begin may have instantiated a few blocks
1490 * outside i_size. Trim these off again. Don't need 1514 * outside i_size. Trim these off again. Don't need
1491 * i_size_read because we hold i_mutex. 1515 * i_size_read because we hold i_mutex.
1516 *
1517 * Add inode to orphan list in case we crash before
1518 * truncate finishes
1492 */ 1519 */
1493 if (pos + len > inode->i_size) 1520 if (pos + len > inode->i_size)
1521 ext4_orphan_add(handle, inode);
1522
1523 ext4_journal_stop(handle);
1524 if (pos + len > inode->i_size) {
1494 vmtruncate(inode, inode->i_size); 1525 vmtruncate(inode, inode->i_size);
1526 /*
1527 * If vmtruncate failed early the inode might
1528 * still be on the orphan list; we need to
1529 * make sure the inode is removed from the
1530 * orphan list in that case.
1531 */
1532 if (inode->i_nlink)
1533 ext4_orphan_del(NULL, inode);
1534 }
1495 } 1535 }
1496 1536
1497 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 1537 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
@@ -1509,6 +1549,52 @@ static int write_end_fn(handle_t *handle, struct buffer_head *bh)
1509 return ext4_handle_dirty_metadata(handle, NULL, bh); 1549 return ext4_handle_dirty_metadata(handle, NULL, bh);
1510} 1550}
1511 1551
1552static int ext4_generic_write_end(struct file *file,
1553 struct address_space *mapping,
1554 loff_t pos, unsigned len, unsigned copied,
1555 struct page *page, void *fsdata)
1556{
1557 int i_size_changed = 0;
1558 struct inode *inode = mapping->host;
1559 handle_t *handle = ext4_journal_current_handle();
1560
1561 copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
1562
1563 /*
1564 * No need to use i_size_read() here, the i_size
1565 * cannot change under us because we hold i_mutex.
1566 *
1567 * But it's important to update i_size while still holding page lock:
1568 * page writeout could otherwise come in and zero beyond i_size.
1569 */
1570 if (pos + copied > inode->i_size) {
1571 i_size_write(inode, pos + copied);
1572 i_size_changed = 1;
1573 }
1574
1575 if (pos + copied > EXT4_I(inode)->i_disksize) {
1576 /* We need to mark inode dirty even if
1577 * new_i_size is less that inode->i_size
1578 * bu greater than i_disksize.(hint delalloc)
1579 */
1580 ext4_update_i_disksize(inode, (pos + copied));
1581 i_size_changed = 1;
1582 }
1583 unlock_page(page);
1584 page_cache_release(page);
1585
1586 /*
1587 * Don't mark the inode dirty under page lock. First, it unnecessarily
1588 * makes the holding time of page lock longer. Second, it forces lock
1589 * ordering of page lock and transaction start for journaling
1590 * filesystems.
1591 */
1592 if (i_size_changed)
1593 ext4_mark_inode_dirty(handle, inode);
1594
1595 return copied;
1596}
1597
1512/* 1598/*
1513 * We need to pick up the new inode size which generic_commit_write gave us 1599 * We need to pick up the new inode size which generic_commit_write gave us
1514 * `file' can be NULL - eg, when called from page_symlink(). 1600 * `file' can be NULL - eg, when called from page_symlink().
@@ -1532,21 +1618,15 @@ static int ext4_ordered_write_end(struct file *file,
1532 ret = ext4_jbd2_file_inode(handle, inode); 1618 ret = ext4_jbd2_file_inode(handle, inode);
1533 1619
1534 if (ret == 0) { 1620 if (ret == 0) {
1535 loff_t new_i_size; 1621 ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
1536
1537 new_i_size = pos + copied;
1538 if (new_i_size > EXT4_I(inode)->i_disksize) {
1539 ext4_update_i_disksize(inode, new_i_size);
1540 /* We need to mark inode dirty even if
1541 * new_i_size is less that inode->i_size
1542 * bu greater than i_disksize.(hint delalloc)
1543 */
1544 ext4_mark_inode_dirty(handle, inode);
1545 }
1546
1547 ret2 = generic_write_end(file, mapping, pos, len, copied,
1548 page, fsdata); 1622 page, fsdata);
1549 copied = ret2; 1623 copied = ret2;
1624 if (pos + len > inode->i_size)
1625 /* if we have allocated more blocks and copied
1626 * less. We will have blocks allocated outside
1627 * inode->i_size. So truncate them
1628 */
1629 ext4_orphan_add(handle, inode);
1550 if (ret2 < 0) 1630 if (ret2 < 0)
1551 ret = ret2; 1631 ret = ret2;
1552 } 1632 }
@@ -1554,6 +1634,18 @@ static int ext4_ordered_write_end(struct file *file,
1554 if (!ret) 1634 if (!ret)
1555 ret = ret2; 1635 ret = ret2;
1556 1636
1637 if (pos + len > inode->i_size) {
1638 vmtruncate(inode, inode->i_size);
1639 /*
1640 * If vmtruncate failed early the inode might still be
1641 * on the orphan list; we need to make sure the inode
1642 * is removed from the orphan list in that case.
1643 */
1644 if (inode->i_nlink)
1645 ext4_orphan_del(NULL, inode);
1646 }
1647
1648
1557 return ret ? ret : copied; 1649 return ret ? ret : copied;
1558} 1650}
1559 1651
@@ -1565,25 +1657,21 @@ static int ext4_writeback_write_end(struct file *file,
1565 handle_t *handle = ext4_journal_current_handle(); 1657 handle_t *handle = ext4_journal_current_handle();
1566 struct inode *inode = mapping->host; 1658 struct inode *inode = mapping->host;
1567 int ret = 0, ret2; 1659 int ret = 0, ret2;
1568 loff_t new_i_size;
1569 1660
1570 trace_mark(ext4_writeback_write_end, 1661 trace_mark(ext4_writeback_write_end,
1571 "dev %s ino %lu pos %llu len %u copied %u", 1662 "dev %s ino %lu pos %llu len %u copied %u",
1572 inode->i_sb->s_id, inode->i_ino, 1663 inode->i_sb->s_id, inode->i_ino,
1573 (unsigned long long) pos, len, copied); 1664 (unsigned long long) pos, len, copied);
1574 new_i_size = pos + copied; 1665 ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
1575 if (new_i_size > EXT4_I(inode)->i_disksize) {
1576 ext4_update_i_disksize(inode, new_i_size);
1577 /* We need to mark inode dirty even if
1578 * new_i_size is less that inode->i_size
1579 * bu greater than i_disksize.(hint delalloc)
1580 */
1581 ext4_mark_inode_dirty(handle, inode);
1582 }
1583
1584 ret2 = generic_write_end(file, mapping, pos, len, copied,
1585 page, fsdata); 1666 page, fsdata);
1586 copied = ret2; 1667 copied = ret2;
1668 if (pos + len > inode->i_size)
1669 /* if we have allocated more blocks and copied
1670 * less. We will have blocks allocated outside
1671 * inode->i_size. So truncate them
1672 */
1673 ext4_orphan_add(handle, inode);
1674
1587 if (ret2 < 0) 1675 if (ret2 < 0)
1588 ret = ret2; 1676 ret = ret2;
1589 1677
@@ -1591,6 +1679,17 @@ static int ext4_writeback_write_end(struct file *file,
1591 if (!ret) 1679 if (!ret)
1592 ret = ret2; 1680 ret = ret2;
1593 1681
1682 if (pos + len > inode->i_size) {
1683 vmtruncate(inode, inode->i_size);
1684 /*
1685 * If vmtruncate failed early the inode might still be
1686 * on the orphan list; we need to make sure the inode
1687 * is removed from the orphan list in that case.
1688 */
1689 if (inode->i_nlink)
1690 ext4_orphan_del(NULL, inode);
1691 }
1692
1594 return ret ? ret : copied; 1693 return ret ? ret : copied;
1595} 1694}
1596 1695
@@ -1635,10 +1734,27 @@ static int ext4_journalled_write_end(struct file *file,
1635 } 1734 }
1636 1735
1637 unlock_page(page); 1736 unlock_page(page);
1737 page_cache_release(page);
1738 if (pos + len > inode->i_size)
1739 /* if we have allocated more blocks and copied
1740 * less. We will have blocks allocated outside
1741 * inode->i_size. So truncate them
1742 */
1743 ext4_orphan_add(handle, inode);
1744
1638 ret2 = ext4_journal_stop(handle); 1745 ret2 = ext4_journal_stop(handle);
1639 if (!ret) 1746 if (!ret)
1640 ret = ret2; 1747 ret = ret2;
1641 page_cache_release(page); 1748 if (pos + len > inode->i_size) {
1749 vmtruncate(inode, inode->i_size);
1750 /*
1751 * If vmtruncate failed early the inode might still be
1752 * on the orphan list; we need to make sure the inode
1753 * is removed from the orphan list in that case.
1754 */
1755 if (inode->i_nlink)
1756 ext4_orphan_del(NULL, inode);
1757 }
1642 1758
1643 return ret ? ret : copied; 1759 return ret ? ret : copied;
1644} 1760}
@@ -1852,7 +1968,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
1852 * @logical - first logical block to start assignment with 1968 * @logical - first logical block to start assignment with
1853 * 1969 *
1854 * the function goes through all passed space and put actual disk 1970 * the function goes through all passed space and put actual disk
1855 * block numbers into buffer heads, dropping BH_Delay 1971 * block numbers into buffer heads, dropping BH_Delay and BH_Unwritten
1856 */ 1972 */
1857static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, 1973static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical,
1858 struct buffer_head *exbh) 1974 struct buffer_head *exbh)
@@ -1902,16 +2018,24 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical,
1902 do { 2018 do {
1903 if (cur_logical >= logical + blocks) 2019 if (cur_logical >= logical + blocks)
1904 break; 2020 break;
1905 if (buffer_delay(bh)) { 2021
1906 bh->b_blocknr = pblock; 2022 if (buffer_delay(bh) ||
1907 clear_buffer_delay(bh); 2023 buffer_unwritten(bh)) {
1908 bh->b_bdev = inode->i_sb->s_bdev; 2024
1909 } else if (buffer_unwritten(bh)) { 2025 BUG_ON(bh->b_bdev != inode->i_sb->s_bdev);
1910 bh->b_blocknr = pblock; 2026
1911 clear_buffer_unwritten(bh); 2027 if (buffer_delay(bh)) {
1912 set_buffer_mapped(bh); 2028 clear_buffer_delay(bh);
1913 set_buffer_new(bh); 2029 bh->b_blocknr = pblock;
1914 bh->b_bdev = inode->i_sb->s_bdev; 2030 } else {
2031 /*
2032 * unwritten already should have
2033 * blocknr assigned. Verify that
2034 */
2035 clear_buffer_unwritten(bh);
2036 BUG_ON(bh->b_blocknr != pblock);
2037 }
2038
1915 } else if (buffer_mapped(bh)) 2039 } else if (buffer_mapped(bh))
1916 BUG_ON(bh->b_blocknr != pblock); 2040 BUG_ON(bh->b_blocknr != pblock);
1917 2041
@@ -1990,51 +2114,6 @@ static void ext4_print_free_blocks(struct inode *inode)
1990 return; 2114 return;
1991} 2115}
1992 2116
1993#define EXT4_DELALLOC_RSVED 1
1994static int ext4_da_get_block_write(struct inode *inode, sector_t iblock,
1995 struct buffer_head *bh_result, int create)
1996{
1997 int ret;
1998 unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
1999 loff_t disksize = EXT4_I(inode)->i_disksize;
2000 handle_t *handle = NULL;
2001
2002 handle = ext4_journal_current_handle();
2003 BUG_ON(!handle);
2004 ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks,
2005 bh_result, create, 0, EXT4_DELALLOC_RSVED);
2006 if (ret <= 0)
2007 return ret;
2008
2009 bh_result->b_size = (ret << inode->i_blkbits);
2010
2011 if (ext4_should_order_data(inode)) {
2012 int retval;
2013 retval = ext4_jbd2_file_inode(handle, inode);
2014 if (retval)
2015 /*
2016 * Failed to add inode for ordered mode. Don't
2017 * update file size
2018 */
2019 return retval;
2020 }
2021
2022 /*
2023 * Update on-disk size along with block allocation we don't
2024 * use 'extend_disksize' as size may change within already
2025 * allocated block -bzzz
2026 */
2027 disksize = ((loff_t) iblock + ret) << inode->i_blkbits;
2028 if (disksize > i_size_read(inode))
2029 disksize = i_size_read(inode);
2030 if (disksize > EXT4_I(inode)->i_disksize) {
2031 ext4_update_i_disksize(inode, disksize);
2032 ret = ext4_mark_inode_dirty(handle, inode);
2033 return ret;
2034 }
2035 return 0;
2036}
2037
2038/* 2117/*
2039 * mpage_da_map_blocks - go through given space 2118 * mpage_da_map_blocks - go through given space
2040 * 2119 *
@@ -2045,29 +2124,57 @@ static int ext4_da_get_block_write(struct inode *inode, sector_t iblock,
2045 */ 2124 */
2046static int mpage_da_map_blocks(struct mpage_da_data *mpd) 2125static int mpage_da_map_blocks(struct mpage_da_data *mpd)
2047{ 2126{
2048 int err = 0; 2127 int err, blks, get_blocks_flags;
2049 struct buffer_head new; 2128 struct buffer_head new;
2050 sector_t next; 2129 sector_t next = mpd->b_blocknr;
2130 unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits;
2131 loff_t disksize = EXT4_I(mpd->inode)->i_disksize;
2132 handle_t *handle = NULL;
2051 2133
2052 /* 2134 /*
2053 * We consider only non-mapped and non-allocated blocks 2135 * We consider only non-mapped and non-allocated blocks
2054 */ 2136 */
2055 if ((mpd->b_state & (1 << BH_Mapped)) && 2137 if ((mpd->b_state & (1 << BH_Mapped)) &&
2056 !(mpd->b_state & (1 << BH_Delay))) 2138 !(mpd->b_state & (1 << BH_Delay)) &&
2139 !(mpd->b_state & (1 << BH_Unwritten)))
2057 return 0; 2140 return 0;
2058 new.b_state = mpd->b_state; 2141
2059 new.b_blocknr = 0;
2060 new.b_size = mpd->b_size;
2061 next = mpd->b_blocknr;
2062 /* 2142 /*
2063 * If we didn't accumulate anything 2143 * If we didn't accumulate anything to write simply return
2064 * to write simply return
2065 */ 2144 */
2066 if (!new.b_size) 2145 if (!mpd->b_size)
2067 return 0; 2146 return 0;
2068 2147
2069 err = ext4_da_get_block_write(mpd->inode, next, &new, 1); 2148 handle = ext4_journal_current_handle();
2070 if (err) { 2149 BUG_ON(!handle);
2150
2151 /*
2152 * Call ext4_get_blocks() to allocate any delayed allocation
2153 * blocks, or to convert an uninitialized extent to be
2154 * initialized (in the case where we have written into
2155 * one or more preallocated blocks).
2156 *
2157 * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE to
2158 * indicate that we are on the delayed allocation path. This
2159 * affects functions in many different parts of the allocation
2160 * call path. This flag exists primarily because we don't
2161 * want to change *many* call functions, so ext4_get_blocks()
2162 * will set the magic i_delalloc_reserved_flag once the
2163 * inode's allocation semaphore is taken.
2164 *
2165 * If the blocks in questions were delalloc blocks, set
2166 * EXT4_GET_BLOCKS_DELALLOC_RESERVE so the delalloc accounting
2167 * variables are updated after the blocks have been allocated.
2168 */
2169 new.b_state = 0;
2170 get_blocks_flags = (EXT4_GET_BLOCKS_CREATE |
2171 EXT4_GET_BLOCKS_DELALLOC_RESERVE);
2172 if (mpd->b_state & (1 << BH_Delay))
2173 get_blocks_flags |= EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE;
2174 blks = ext4_get_blocks(handle, mpd->inode, next, max_blocks,
2175 &new, get_blocks_flags);
2176 if (blks < 0) {
2177 err = blks;
2071 /* 2178 /*
2072 * If get block returns with error we simply 2179 * If get block returns with error we simply
2073 * return. Later writepage will redirty the page and 2180 * return. Later writepage will redirty the page and
@@ -2100,12 +2207,14 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
2100 if (err == -ENOSPC) { 2207 if (err == -ENOSPC) {
2101 ext4_print_free_blocks(mpd->inode); 2208 ext4_print_free_blocks(mpd->inode);
2102 } 2209 }
2103 /* invlaidate all the pages */ 2210 /* invalidate all the pages */
2104 ext4_da_block_invalidatepages(mpd, next, 2211 ext4_da_block_invalidatepages(mpd, next,
2105 mpd->b_size >> mpd->inode->i_blkbits); 2212 mpd->b_size >> mpd->inode->i_blkbits);
2106 return err; 2213 return err;
2107 } 2214 }
2108 BUG_ON(new.b_size == 0); 2215 BUG_ON(blks == 0);
2216
2217 new.b_size = (blks << mpd->inode->i_blkbits);
2109 2218
2110 if (buffer_new(&new)) 2219 if (buffer_new(&new))
2111 __unmap_underlying_blocks(mpd->inode, &new); 2220 __unmap_underlying_blocks(mpd->inode, &new);
@@ -2118,6 +2227,23 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
2118 (mpd->b_state & (1 << BH_Unwritten))) 2227 (mpd->b_state & (1 << BH_Unwritten)))
2119 mpage_put_bnr_to_bhs(mpd, next, &new); 2228 mpage_put_bnr_to_bhs(mpd, next, &new);
2120 2229
2230 if (ext4_should_order_data(mpd->inode)) {
2231 err = ext4_jbd2_file_inode(handle, mpd->inode);
2232 if (err)
2233 return err;
2234 }
2235
2236 /*
2237 * Update on-disk size along with block allocation.
2238 */
2239 disksize = ((loff_t) next + blks) << mpd->inode->i_blkbits;
2240 if (disksize > i_size_read(mpd->inode))
2241 disksize = i_size_read(mpd->inode);
2242 if (disksize > EXT4_I(mpd->inode)->i_disksize) {
2243 ext4_update_i_disksize(mpd->inode, disksize);
2244 return ext4_mark_inode_dirty(handle, mpd->inode);
2245 }
2246
2121 return 0; 2247 return 0;
2122} 2248}
2123 2249
@@ -2192,6 +2318,17 @@ flush_it:
2192 return; 2318 return;
2193} 2319}
2194 2320
2321static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh)
2322{
2323 /*
2324 * unmapped buffer is possible for holes.
2325 * delay buffer is possible with delayed allocation.
2326 * We also need to consider unwritten buffer as unmapped.
2327 */
2328 return (!buffer_mapped(bh) || buffer_delay(bh) ||
2329 buffer_unwritten(bh)) && buffer_dirty(bh);
2330}
2331
2195/* 2332/*
2196 * __mpage_da_writepage - finds extent of pages and blocks 2333 * __mpage_da_writepage - finds extent of pages and blocks
2197 * 2334 *
@@ -2276,8 +2413,7 @@ static int __mpage_da_writepage(struct page *page,
2276 * Otherwise we won't make progress 2413 * Otherwise we won't make progress
2277 * with the page in ext4_da_writepage 2414 * with the page in ext4_da_writepage
2278 */ 2415 */
2279 if (buffer_dirty(bh) && 2416 if (ext4_bh_unmapped_or_delay(NULL, bh)) {
2280 (!buffer_mapped(bh) || buffer_delay(bh))) {
2281 mpage_add_bh_to_extent(mpd, logical, 2417 mpage_add_bh_to_extent(mpd, logical,
2282 bh->b_size, 2418 bh->b_size,
2283 bh->b_state); 2419 bh->b_state);
@@ -2303,8 +2439,16 @@ static int __mpage_da_writepage(struct page *page,
2303} 2439}
2304 2440
2305/* 2441/*
2306 * this is a special callback for ->write_begin() only 2442 * This is a special get_blocks_t callback which is used by
2307 * it's intention is to return mapped block or reserve space 2443 * ext4_da_write_begin(). It will either return mapped block or
2444 * reserve space for a single block.
2445 *
2446 * For delayed buffer_head we have BH_Mapped, BH_New, BH_Delay set.
2447 * We also have b_blocknr = -1 and b_bdev initialized properly
2448 *
2449 * For unwritten buffer_head we have BH_Mapped, BH_New, BH_Unwritten set.
2450 * We also have b_blocknr = physicalblock mapping unwritten extent and b_bdev
2451 * initialized properly.
2308 */ 2452 */
2309static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, 2453static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
2310 struct buffer_head *bh_result, int create) 2454 struct buffer_head *bh_result, int create)
@@ -2323,7 +2467,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
2323 * preallocated blocks are unmapped but should treated 2467 * preallocated blocks are unmapped but should treated
2324 * the same as allocated blocks. 2468 * the same as allocated blocks.
2325 */ 2469 */
2326 ret = ext4_get_blocks_wrap(NULL, inode, iblock, 1, bh_result, 0, 0, 0); 2470 ret = ext4_get_blocks(NULL, inode, iblock, 1, bh_result, 0);
2327 if ((ret == 0) && !buffer_delay(bh_result)) { 2471 if ((ret == 0) && !buffer_delay(bh_result)) {
2328 /* the block isn't (pre)allocated yet, let's reserve space */ 2472 /* the block isn't (pre)allocated yet, let's reserve space */
2329 /* 2473 /*
@@ -2340,40 +2484,53 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
2340 set_buffer_delay(bh_result); 2484 set_buffer_delay(bh_result);
2341 } else if (ret > 0) { 2485 } else if (ret > 0) {
2342 bh_result->b_size = (ret << inode->i_blkbits); 2486 bh_result->b_size = (ret << inode->i_blkbits);
2343 /* 2487 if (buffer_unwritten(bh_result)) {
2344 * With sub-block writes into unwritten extents 2488 /* A delayed write to unwritten bh should
2345 * we also need to mark the buffer as new so that 2489 * be marked new and mapped. Mapped ensures
2346 * the unwritten parts of the buffer gets correctly zeroed. 2490 * that we don't do get_block multiple times
2347 */ 2491 * when we write to the same offset and new
2348 if (buffer_unwritten(bh_result)) 2492 * ensures that we do proper zero out for
2493 * partial write.
2494 */
2349 set_buffer_new(bh_result); 2495 set_buffer_new(bh_result);
2496 set_buffer_mapped(bh_result);
2497 }
2350 ret = 0; 2498 ret = 0;
2351 } 2499 }
2352 2500
2353 return ret; 2501 return ret;
2354} 2502}
2355 2503
2356static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh) 2504/*
2357{ 2505 * This function is used as a standard get_block_t calback function
2358 /* 2506 * when there is no desire to allocate any blocks. It is used as a
2359 * unmapped buffer is possible for holes. 2507 * callback function for block_prepare_write(), nobh_writepage(), and
2360 * delay buffer is possible with delayed allocation 2508 * block_write_full_page(). These functions should only try to map a
2361 */ 2509 * single block at a time.
2362 return ((!buffer_mapped(bh) || buffer_delay(bh)) && buffer_dirty(bh)); 2510 *
2363} 2511 * Since this function doesn't do block allocations even if the caller
2364 2512 * requests it by passing in create=1, it is critically important that
2365static int ext4_normal_get_block_write(struct inode *inode, sector_t iblock, 2513 * any caller checks to make sure that any buffer heads are returned
2514 * by this function are either all already mapped or marked for
2515 * delayed allocation before calling nobh_writepage() or
2516 * block_write_full_page(). Otherwise, b_blocknr could be left
2517 * unitialized, and the page write functions will be taken by
2518 * surprise.
2519 */
2520static int noalloc_get_block_write(struct inode *inode, sector_t iblock,
2366 struct buffer_head *bh_result, int create) 2521 struct buffer_head *bh_result, int create)
2367{ 2522{
2368 int ret = 0; 2523 int ret = 0;
2369 unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; 2524 unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
2370 2525
2526 BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize);
2527
2371 /* 2528 /*
2372 * we don't want to do block allocation in writepage 2529 * we don't want to do block allocation in writepage
2373 * so call get_block_wrap with create = 0 2530 * so call get_block_wrap with create = 0
2374 */ 2531 */
2375 ret = ext4_get_blocks_wrap(NULL, inode, iblock, max_blocks, 2532 ret = ext4_get_blocks(NULL, inode, iblock, max_blocks, bh_result, 0);
2376 bh_result, 0, 0, 0); 2533 BUG_ON(create && ret == 0);
2377 if (ret > 0) { 2534 if (ret > 0) {
2378 bh_result->b_size = (ret << inode->i_blkbits); 2535 bh_result->b_size = (ret << inode->i_blkbits);
2379 ret = 0; 2536 ret = 0;
@@ -2382,10 +2539,11 @@ static int ext4_normal_get_block_write(struct inode *inode, sector_t iblock,
2382} 2539}
2383 2540
2384/* 2541/*
2385 * get called vi ext4_da_writepages after taking page lock (have journal handle) 2542 * This function can get called via...
2386 * get called via journal_submit_inode_data_buffers (no journal handle) 2543 * - ext4_da_writepages after taking page lock (have journal handle)
2387 * get called via shrink_page_list via pdflush (no journal handle) 2544 * - journal_submit_inode_data_buffers (no journal handle)
2388 * or grab_page_cache when doing write_begin (have journal handle) 2545 * - shrink_page_list via pdflush (no journal handle)
2546 * - grab_page_cache when doing write_begin (have journal handle)
2389 */ 2547 */
2390static int ext4_da_writepage(struct page *page, 2548static int ext4_da_writepage(struct page *page,
2391 struct writeback_control *wbc) 2549 struct writeback_control *wbc)
@@ -2436,7 +2594,7 @@ static int ext4_da_writepage(struct page *page,
2436 * do block allocation here. 2594 * do block allocation here.
2437 */ 2595 */
2438 ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE, 2596 ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE,
2439 ext4_normal_get_block_write); 2597 noalloc_get_block_write);
2440 if (!ret) { 2598 if (!ret) {
2441 page_bufs = page_buffers(page); 2599 page_bufs = page_buffers(page);
2442 /* check whether all are mapped and non delay */ 2600 /* check whether all are mapped and non delay */
@@ -2461,11 +2619,10 @@ static int ext4_da_writepage(struct page *page,
2461 } 2619 }
2462 2620
2463 if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) 2621 if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
2464 ret = nobh_writepage(page, ext4_normal_get_block_write, wbc); 2622 ret = nobh_writepage(page, noalloc_get_block_write, wbc);
2465 else 2623 else
2466 ret = block_write_full_page(page, 2624 ret = block_write_full_page(page, noalloc_get_block_write,
2467 ext4_normal_get_block_write, 2625 wbc);
2468 wbc);
2469 2626
2470 return ret; 2627 return ret;
2471} 2628}
@@ -2777,7 +2934,7 @@ retry:
2777 *pagep = page; 2934 *pagep = page;
2778 2935
2779 ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, 2936 ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
2780 ext4_da_get_block_prep); 2937 ext4_da_get_block_prep);
2781 if (ret < 0) { 2938 if (ret < 0) {
2782 unlock_page(page); 2939 unlock_page(page);
2783 ext4_journal_stop(handle); 2940 ext4_journal_stop(handle);
@@ -2815,7 +2972,7 @@ static int ext4_da_should_update_i_disksize(struct page *page,
2815 for (i = 0; i < idx; i++) 2972 for (i = 0; i < idx; i++)
2816 bh = bh->b_this_page; 2973 bh = bh->b_this_page;
2817 2974
2818 if (!buffer_mapped(bh) || (buffer_delay(bh))) 2975 if (!buffer_mapped(bh) || (buffer_delay(bh)) || buffer_unwritten(bh))
2819 return 0; 2976 return 0;
2820 return 1; 2977 return 1;
2821} 2978}
@@ -3085,12 +3242,10 @@ static int __ext4_normal_writepage(struct page *page,
3085 struct inode *inode = page->mapping->host; 3242 struct inode *inode = page->mapping->host;
3086 3243
3087 if (test_opt(inode->i_sb, NOBH)) 3244 if (test_opt(inode->i_sb, NOBH))
3088 return nobh_writepage(page, 3245 return nobh_writepage(page, noalloc_get_block_write, wbc);
3089 ext4_normal_get_block_write, wbc);
3090 else 3246 else
3091 return block_write_full_page(page, 3247 return block_write_full_page(page, noalloc_get_block_write,
3092 ext4_normal_get_block_write, 3248 wbc);
3093 wbc);
3094} 3249}
3095 3250
3096static int ext4_normal_writepage(struct page *page, 3251static int ext4_normal_writepage(struct page *page,
@@ -3142,7 +3297,7 @@ static int __ext4_journalled_writepage(struct page *page,
3142 int err; 3297 int err;
3143 3298
3144 ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE, 3299 ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE,
3145 ext4_normal_get_block_write); 3300 noalloc_get_block_write);
3146 if (ret != 0) 3301 if (ret != 0)
3147 goto out_unlock; 3302 goto out_unlock;
3148 3303
@@ -3227,9 +3382,8 @@ static int ext4_journalled_writepage(struct page *page,
3227 * really know unless we go poke around in the buffer_heads. 3382 * really know unless we go poke around in the buffer_heads.
3228 * But block_write_full_page will do the right thing. 3383 * But block_write_full_page will do the right thing.
3229 */ 3384 */
3230 return block_write_full_page(page, 3385 return block_write_full_page(page, noalloc_get_block_write,
3231 ext4_normal_get_block_write, 3386 wbc);
3232 wbc);
3233 } 3387 }
3234no_write: 3388no_write:
3235 redirty_page_for_writepage(wbc, page); 3389 redirty_page_for_writepage(wbc, page);
@@ -3973,7 +4127,8 @@ void ext4_truncate(struct inode *inode)
3973 if (!ext4_can_truncate(inode)) 4127 if (!ext4_can_truncate(inode))
3974 return; 4128 return;
3975 4129
3976 if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) 4130 if (ei->i_disksize && inode->i_size == 0 &&
4131 !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC))
3977 ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE; 4132 ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE;
3978 4133
3979 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { 4134 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
@@ -4715,25 +4870,6 @@ int ext4_write_inode(struct inode *inode, int wait)
4715 return ext4_force_commit(inode->i_sb); 4870 return ext4_force_commit(inode->i_sb);
4716} 4871}
4717 4872
4718int __ext4_write_dirty_metadata(struct inode *inode, struct buffer_head *bh)
4719{
4720 int err = 0;
4721
4722 mark_buffer_dirty(bh);
4723 if (inode && inode_needs_sync(inode)) {
4724 sync_dirty_buffer(bh);
4725 if (buffer_req(bh) && !buffer_uptodate(bh)) {
4726 ext4_error(inode->i_sb, __func__,
4727 "IO error syncing inode, "
4728 "inode=%lu, block=%llu",
4729 inode->i_ino,
4730 (unsigned long long)bh->b_blocknr);
4731 err = -EIO;
4732 }
4733 }
4734 return err;
4735}
4736
4737/* 4873/*
4738 * ext4_setattr() 4874 * ext4_setattr()
4739 * 4875 *
@@ -4930,7 +5066,8 @@ static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
4930 */ 5066 */
4931int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) 5067int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk)
4932{ 5068{
4933 int groups, gdpblocks; 5069 ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb);
5070 int gdpblocks;
4934 int idxblocks; 5071 int idxblocks;
4935 int ret = 0; 5072 int ret = 0;
4936 5073
@@ -4957,8 +5094,8 @@ int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk)
4957 groups += nrblocks; 5094 groups += nrblocks;
4958 5095
4959 gdpblocks = groups; 5096 gdpblocks = groups;
4960 if (groups > EXT4_SB(inode->i_sb)->s_groups_count) 5097 if (groups > ngroups)
4961 groups = EXT4_SB(inode->i_sb)->s_groups_count; 5098 groups = ngroups;
4962 if (groups > EXT4_SB(inode->i_sb)->s_gdb_count) 5099 if (groups > EXT4_SB(inode->i_sb)->s_gdb_count)
4963 gdpblocks = EXT4_SB(inode->i_sb)->s_gdb_count; 5100 gdpblocks = EXT4_SB(inode->i_sb)->s_gdb_count;
4964 5101
@@ -4998,7 +5135,7 @@ int ext4_writepage_trans_blocks(struct inode *inode)
4998 * Calculate the journal credits for a chunk of data modification. 5135 * Calculate the journal credits for a chunk of data modification.
4999 * 5136 *
5000 * This is called from DIO, fallocate or whoever calling 5137 * This is called from DIO, fallocate or whoever calling
5001 * ext4_get_blocks_wrap() to map/allocate a chunk of contigous disk blocks. 5138 * ext4_get_blocks() to map/allocate a chunk of contigous disk blocks.
5002 * 5139 *
5003 * journal buffers for data blocks are not included here, as DIO 5140 * journal buffers for data blocks are not included here, as DIO
5004 * and fallocate do no need to journal data buffers. 5141 * and fallocate do no need to journal data buffers.
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index f871677a7984..ed8482e22c0e 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -372,24 +372,12 @@ static inline void mb_set_bit(int bit, void *addr)
372 ext4_set_bit(bit, addr); 372 ext4_set_bit(bit, addr);
373} 373}
374 374
375static inline void mb_set_bit_atomic(spinlock_t *lock, int bit, void *addr)
376{
377 addr = mb_correct_addr_and_bit(&bit, addr);
378 ext4_set_bit_atomic(lock, bit, addr);
379}
380
381static inline void mb_clear_bit(int bit, void *addr) 375static inline void mb_clear_bit(int bit, void *addr)
382{ 376{
383 addr = mb_correct_addr_and_bit(&bit, addr); 377 addr = mb_correct_addr_and_bit(&bit, addr);
384 ext4_clear_bit(bit, addr); 378 ext4_clear_bit(bit, addr);
385} 379}
386 380
387static inline void mb_clear_bit_atomic(spinlock_t *lock, int bit, void *addr)
388{
389 addr = mb_correct_addr_and_bit(&bit, addr);
390 ext4_clear_bit_atomic(lock, bit, addr);
391}
392
393static inline int mb_find_next_zero_bit(void *addr, int max, int start) 381static inline int mb_find_next_zero_bit(void *addr, int max, int start)
394{ 382{
395 int fix = 0, ret, tmpmax; 383 int fix = 0, ret, tmpmax;
@@ -448,7 +436,7 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
448 436
449 if (unlikely(e4b->bd_info->bb_bitmap == NULL)) 437 if (unlikely(e4b->bd_info->bb_bitmap == NULL))
450 return; 438 return;
451 BUG_ON(!ext4_is_group_locked(sb, e4b->bd_group)); 439 assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
452 for (i = 0; i < count; i++) { 440 for (i = 0; i < count; i++) {
453 if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) { 441 if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) {
454 ext4_fsblk_t blocknr; 442 ext4_fsblk_t blocknr;
@@ -472,7 +460,7 @@ static void mb_mark_used_double(struct ext4_buddy *e4b, int first, int count)
472 460
473 if (unlikely(e4b->bd_info->bb_bitmap == NULL)) 461 if (unlikely(e4b->bd_info->bb_bitmap == NULL))
474 return; 462 return;
475 BUG_ON(!ext4_is_group_locked(e4b->bd_sb, e4b->bd_group)); 463 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
476 for (i = 0; i < count; i++) { 464 for (i = 0; i < count; i++) {
477 BUG_ON(mb_test_bit(first + i, e4b->bd_info->bb_bitmap)); 465 BUG_ON(mb_test_bit(first + i, e4b->bd_info->bb_bitmap));
478 mb_set_bit(first + i, e4b->bd_info->bb_bitmap); 466 mb_set_bit(first + i, e4b->bd_info->bb_bitmap);
@@ -739,6 +727,7 @@ static void ext4_mb_generate_buddy(struct super_block *sb,
739 727
740static int ext4_mb_init_cache(struct page *page, char *incore) 728static int ext4_mb_init_cache(struct page *page, char *incore)
741{ 729{
730 ext4_group_t ngroups;
742 int blocksize; 731 int blocksize;
743 int blocks_per_page; 732 int blocks_per_page;
744 int groups_per_page; 733 int groups_per_page;
@@ -757,6 +746,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
757 746
758 inode = page->mapping->host; 747 inode = page->mapping->host;
759 sb = inode->i_sb; 748 sb = inode->i_sb;
749 ngroups = ext4_get_groups_count(sb);
760 blocksize = 1 << inode->i_blkbits; 750 blocksize = 1 << inode->i_blkbits;
761 blocks_per_page = PAGE_CACHE_SIZE / blocksize; 751 blocks_per_page = PAGE_CACHE_SIZE / blocksize;
762 752
@@ -780,7 +770,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
780 for (i = 0; i < groups_per_page; i++) { 770 for (i = 0; i < groups_per_page; i++) {
781 struct ext4_group_desc *desc; 771 struct ext4_group_desc *desc;
782 772
783 if (first_group + i >= EXT4_SB(sb)->s_groups_count) 773 if (first_group + i >= ngroups)
784 break; 774 break;
785 775
786 err = -EIO; 776 err = -EIO;
@@ -801,17 +791,17 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
801 unlock_buffer(bh[i]); 791 unlock_buffer(bh[i]);
802 continue; 792 continue;
803 } 793 }
804 spin_lock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); 794 ext4_lock_group(sb, first_group + i);
805 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { 795 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
806 ext4_init_block_bitmap(sb, bh[i], 796 ext4_init_block_bitmap(sb, bh[i],
807 first_group + i, desc); 797 first_group + i, desc);
808 set_bitmap_uptodate(bh[i]); 798 set_bitmap_uptodate(bh[i]);
809 set_buffer_uptodate(bh[i]); 799 set_buffer_uptodate(bh[i]);
810 spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); 800 ext4_unlock_group(sb, first_group + i);
811 unlock_buffer(bh[i]); 801 unlock_buffer(bh[i]);
812 continue; 802 continue;
813 } 803 }
814 spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); 804 ext4_unlock_group(sb, first_group + i);
815 if (buffer_uptodate(bh[i])) { 805 if (buffer_uptodate(bh[i])) {
816 /* 806 /*
817 * if not uninit if bh is uptodate, 807 * if not uninit if bh is uptodate,
@@ -852,7 +842,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
852 struct ext4_group_info *grinfo; 842 struct ext4_group_info *grinfo;
853 843
854 group = (first_block + i) >> 1; 844 group = (first_block + i) >> 1;
855 if (group >= EXT4_SB(sb)->s_groups_count) 845 if (group >= ngroups)
856 break; 846 break;
857 847
858 /* 848 /*
@@ -1078,7 +1068,7 @@ static int mb_find_order_for_block(struct ext4_buddy *e4b, int block)
1078 return 0; 1068 return 0;
1079} 1069}
1080 1070
1081static void mb_clear_bits(spinlock_t *lock, void *bm, int cur, int len) 1071static void mb_clear_bits(void *bm, int cur, int len)
1082{ 1072{
1083 __u32 *addr; 1073 __u32 *addr;
1084 1074
@@ -1091,15 +1081,12 @@ static void mb_clear_bits(spinlock_t *lock, void *bm, int cur, int len)
1091 cur += 32; 1081 cur += 32;
1092 continue; 1082 continue;
1093 } 1083 }
1094 if (lock) 1084 mb_clear_bit(cur, bm);
1095 mb_clear_bit_atomic(lock, cur, bm);
1096 else
1097 mb_clear_bit(cur, bm);
1098 cur++; 1085 cur++;
1099 } 1086 }
1100} 1087}
1101 1088
1102static void mb_set_bits(spinlock_t *lock, void *bm, int cur, int len) 1089static void mb_set_bits(void *bm, int cur, int len)
1103{ 1090{
1104 __u32 *addr; 1091 __u32 *addr;
1105 1092
@@ -1112,10 +1099,7 @@ static void mb_set_bits(spinlock_t *lock, void *bm, int cur, int len)
1112 cur += 32; 1099 cur += 32;
1113 continue; 1100 continue;
1114 } 1101 }
1115 if (lock) 1102 mb_set_bit(cur, bm);
1116 mb_set_bit_atomic(lock, cur, bm);
1117 else
1118 mb_set_bit(cur, bm);
1119 cur++; 1103 cur++;
1120 } 1104 }
1121} 1105}
@@ -1131,7 +1115,7 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1131 struct super_block *sb = e4b->bd_sb; 1115 struct super_block *sb = e4b->bd_sb;
1132 1116
1133 BUG_ON(first + count > (sb->s_blocksize << 3)); 1117 BUG_ON(first + count > (sb->s_blocksize << 3));
1134 BUG_ON(!ext4_is_group_locked(sb, e4b->bd_group)); 1118 assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
1135 mb_check_buddy(e4b); 1119 mb_check_buddy(e4b);
1136 mb_free_blocks_double(inode, e4b, first, count); 1120 mb_free_blocks_double(inode, e4b, first, count);
1137 1121
@@ -1212,7 +1196,7 @@ static int mb_find_extent(struct ext4_buddy *e4b, int order, int block,
1212 int ord; 1196 int ord;
1213 void *buddy; 1197 void *buddy;
1214 1198
1215 BUG_ON(!ext4_is_group_locked(e4b->bd_sb, e4b->bd_group)); 1199 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
1216 BUG_ON(ex == NULL); 1200 BUG_ON(ex == NULL);
1217 1201
1218 buddy = mb_find_buddy(e4b, order, &max); 1202 buddy = mb_find_buddy(e4b, order, &max);
@@ -1276,7 +1260,7 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
1276 1260
1277 BUG_ON(start + len > (e4b->bd_sb->s_blocksize << 3)); 1261 BUG_ON(start + len > (e4b->bd_sb->s_blocksize << 3));
1278 BUG_ON(e4b->bd_group != ex->fe_group); 1262 BUG_ON(e4b->bd_group != ex->fe_group);
1279 BUG_ON(!ext4_is_group_locked(e4b->bd_sb, e4b->bd_group)); 1263 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
1280 mb_check_buddy(e4b); 1264 mb_check_buddy(e4b);
1281 mb_mark_used_double(e4b, start, len); 1265 mb_mark_used_double(e4b, start, len);
1282 1266
@@ -1330,8 +1314,7 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
1330 e4b->bd_info->bb_counters[ord]++; 1314 e4b->bd_info->bb_counters[ord]++;
1331 } 1315 }
1332 1316
1333 mb_set_bits(sb_bgl_lock(EXT4_SB(e4b->bd_sb), ex->fe_group), 1317 mb_set_bits(EXT4_MB_BITMAP(e4b), ex->fe_start, len0);
1334 EXT4_MB_BITMAP(e4b), ex->fe_start, len0);
1335 mb_check_buddy(e4b); 1318 mb_check_buddy(e4b);
1336 1319
1337 return ret; 1320 return ret;
@@ -1726,7 +1709,6 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
1726 unsigned free, fragments; 1709 unsigned free, fragments;
1727 unsigned i, bits; 1710 unsigned i, bits;
1728 int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb)); 1711 int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb));
1729 struct ext4_group_desc *desc;
1730 struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group); 1712 struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
1731 1713
1732 BUG_ON(cr < 0 || cr >= 4); 1714 BUG_ON(cr < 0 || cr >= 4);
@@ -1742,10 +1724,6 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
1742 switch (cr) { 1724 switch (cr) {
1743 case 0: 1725 case 0:
1744 BUG_ON(ac->ac_2order == 0); 1726 BUG_ON(ac->ac_2order == 0);
1745 /* If this group is uninitialized, skip it initially */
1746 desc = ext4_get_group_desc(ac->ac_sb, group, NULL);
1747 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))
1748 return 0;
1749 1727
1750 /* Avoid using the first bg of a flexgroup for data files */ 1728 /* Avoid using the first bg of a flexgroup for data files */
1751 if ((ac->ac_flags & EXT4_MB_HINT_DATA) && 1729 if ((ac->ac_flags & EXT4_MB_HINT_DATA) &&
@@ -1788,6 +1766,7 @@ int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group)
1788 int block, pnum; 1766 int block, pnum;
1789 int blocks_per_page; 1767 int blocks_per_page;
1790 int groups_per_page; 1768 int groups_per_page;
1769 ext4_group_t ngroups = ext4_get_groups_count(sb);
1791 ext4_group_t first_group; 1770 ext4_group_t first_group;
1792 struct ext4_group_info *grp; 1771 struct ext4_group_info *grp;
1793 1772
@@ -1807,7 +1786,7 @@ int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group)
1807 /* read all groups the page covers into the cache */ 1786 /* read all groups the page covers into the cache */
1808 for (i = 0; i < groups_per_page; i++) { 1787 for (i = 0; i < groups_per_page; i++) {
1809 1788
1810 if ((first_group + i) >= EXT4_SB(sb)->s_groups_count) 1789 if ((first_group + i) >= ngroups)
1811 break; 1790 break;
1812 grp = ext4_get_group_info(sb, first_group + i); 1791 grp = ext4_get_group_info(sb, first_group + i);
1813 /* take all groups write allocation 1792 /* take all groups write allocation
@@ -1945,8 +1924,7 @@ err:
1945static noinline_for_stack int 1924static noinline_for_stack int
1946ext4_mb_regular_allocator(struct ext4_allocation_context *ac) 1925ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
1947{ 1926{
1948 ext4_group_t group; 1927 ext4_group_t ngroups, group, i;
1949 ext4_group_t i;
1950 int cr; 1928 int cr;
1951 int err = 0; 1929 int err = 0;
1952 int bsbits; 1930 int bsbits;
@@ -1957,6 +1935,7 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
1957 1935
1958 sb = ac->ac_sb; 1936 sb = ac->ac_sb;
1959 sbi = EXT4_SB(sb); 1937 sbi = EXT4_SB(sb);
1938 ngroups = ext4_get_groups_count(sb);
1960 BUG_ON(ac->ac_status == AC_STATUS_FOUND); 1939 BUG_ON(ac->ac_status == AC_STATUS_FOUND);
1961 1940
1962 /* first, try the goal */ 1941 /* first, try the goal */
@@ -2017,11 +1996,11 @@ repeat:
2017 */ 1996 */
2018 group = ac->ac_g_ex.fe_group; 1997 group = ac->ac_g_ex.fe_group;
2019 1998
2020 for (i = 0; i < EXT4_SB(sb)->s_groups_count; group++, i++) { 1999 for (i = 0; i < ngroups; group++, i++) {
2021 struct ext4_group_info *grp; 2000 struct ext4_group_info *grp;
2022 struct ext4_group_desc *desc; 2001 struct ext4_group_desc *desc;
2023 2002
2024 if (group == EXT4_SB(sb)->s_groups_count) 2003 if (group == ngroups)
2025 group = 0; 2004 group = 0;
2026 2005
2027 /* quick check to skip empty groups */ 2006 /* quick check to skip empty groups */
@@ -2064,9 +2043,7 @@ repeat:
2064 2043
2065 ac->ac_groups_scanned++; 2044 ac->ac_groups_scanned++;
2066 desc = ext4_get_group_desc(sb, group, NULL); 2045 desc = ext4_get_group_desc(sb, group, NULL);
2067 if (cr == 0 || (desc->bg_flags & 2046 if (cr == 0)
2068 cpu_to_le16(EXT4_BG_BLOCK_UNINIT) &&
2069 ac->ac_2order != 0))
2070 ext4_mb_simple_scan_group(ac, &e4b); 2047 ext4_mb_simple_scan_group(ac, &e4b);
2071 else if (cr == 1 && 2048 else if (cr == 1 &&
2072 ac->ac_g_ex.fe_len == sbi->s_stripe) 2049 ac->ac_g_ex.fe_len == sbi->s_stripe)
@@ -2315,12 +2292,10 @@ static struct file_operations ext4_mb_seq_history_fops = {
2315static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos) 2292static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
2316{ 2293{
2317 struct super_block *sb = seq->private; 2294 struct super_block *sb = seq->private;
2318 struct ext4_sb_info *sbi = EXT4_SB(sb);
2319 ext4_group_t group; 2295 ext4_group_t group;
2320 2296
2321 if (*pos < 0 || *pos >= sbi->s_groups_count) 2297 if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
2322 return NULL; 2298 return NULL;
2323
2324 group = *pos + 1; 2299 group = *pos + 1;
2325 return (void *) ((unsigned long) group); 2300 return (void *) ((unsigned long) group);
2326} 2301}
@@ -2328,11 +2303,10 @@ static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
2328static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos) 2303static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
2329{ 2304{
2330 struct super_block *sb = seq->private; 2305 struct super_block *sb = seq->private;
2331 struct ext4_sb_info *sbi = EXT4_SB(sb);
2332 ext4_group_t group; 2306 ext4_group_t group;
2333 2307
2334 ++*pos; 2308 ++*pos;
2335 if (*pos < 0 || *pos >= sbi->s_groups_count) 2309 if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
2336 return NULL; 2310 return NULL;
2337 group = *pos + 1; 2311 group = *pos + 1;
2338 return (void *) ((unsigned long) group); 2312 return (void *) ((unsigned long) group);
@@ -2420,7 +2394,8 @@ static void ext4_mb_history_release(struct super_block *sb)
2420 2394
2421 if (sbi->s_proc != NULL) { 2395 if (sbi->s_proc != NULL) {
2422 remove_proc_entry("mb_groups", sbi->s_proc); 2396 remove_proc_entry("mb_groups", sbi->s_proc);
2423 remove_proc_entry("mb_history", sbi->s_proc); 2397 if (sbi->s_mb_history_max)
2398 remove_proc_entry("mb_history", sbi->s_proc);
2424 } 2399 }
2425 kfree(sbi->s_mb_history); 2400 kfree(sbi->s_mb_history);
2426} 2401}
@@ -2431,17 +2406,17 @@ static void ext4_mb_history_init(struct super_block *sb)
2431 int i; 2406 int i;
2432 2407
2433 if (sbi->s_proc != NULL) { 2408 if (sbi->s_proc != NULL) {
2434 proc_create_data("mb_history", S_IRUGO, sbi->s_proc, 2409 if (sbi->s_mb_history_max)
2435 &ext4_mb_seq_history_fops, sb); 2410 proc_create_data("mb_history", S_IRUGO, sbi->s_proc,
2411 &ext4_mb_seq_history_fops, sb);
2436 proc_create_data("mb_groups", S_IRUGO, sbi->s_proc, 2412 proc_create_data("mb_groups", S_IRUGO, sbi->s_proc,
2437 &ext4_mb_seq_groups_fops, sb); 2413 &ext4_mb_seq_groups_fops, sb);
2438 } 2414 }
2439 2415
2440 sbi->s_mb_history_max = 1000;
2441 sbi->s_mb_history_cur = 0; 2416 sbi->s_mb_history_cur = 0;
2442 spin_lock_init(&sbi->s_mb_history_lock); 2417 spin_lock_init(&sbi->s_mb_history_lock);
2443 i = sbi->s_mb_history_max * sizeof(struct ext4_mb_history); 2418 i = sbi->s_mb_history_max * sizeof(struct ext4_mb_history);
2444 sbi->s_mb_history = kzalloc(i, GFP_KERNEL); 2419 sbi->s_mb_history = i ? kzalloc(i, GFP_KERNEL) : NULL;
2445 /* if we can't allocate history, then we simple won't use it */ 2420 /* if we can't allocate history, then we simple won't use it */
2446} 2421}
2447 2422
@@ -2451,7 +2426,7 @@ ext4_mb_store_history(struct ext4_allocation_context *ac)
2451 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); 2426 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
2452 struct ext4_mb_history h; 2427 struct ext4_mb_history h;
2453 2428
2454 if (unlikely(sbi->s_mb_history == NULL)) 2429 if (sbi->s_mb_history == NULL)
2455 return; 2430 return;
2456 2431
2457 if (!(ac->ac_op & sbi->s_mb_history_filter)) 2432 if (!(ac->ac_op & sbi->s_mb_history_filter))
@@ -2587,6 +2562,7 @@ void ext4_mb_update_group_info(struct ext4_group_info *grp, ext4_grpblk_t add)
2587 2562
2588static int ext4_mb_init_backend(struct super_block *sb) 2563static int ext4_mb_init_backend(struct super_block *sb)
2589{ 2564{
2565 ext4_group_t ngroups = ext4_get_groups_count(sb);
2590 ext4_group_t i; 2566 ext4_group_t i;
2591 int metalen; 2567 int metalen;
2592 struct ext4_sb_info *sbi = EXT4_SB(sb); 2568 struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -2598,7 +2574,7 @@ static int ext4_mb_init_backend(struct super_block *sb)
2598 struct ext4_group_desc *desc; 2574 struct ext4_group_desc *desc;
2599 2575
2600 /* This is the number of blocks used by GDT */ 2576 /* This is the number of blocks used by GDT */
2601 num_meta_group_infos = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 2577 num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) -
2602 1) >> EXT4_DESC_PER_BLOCK_BITS(sb); 2578 1) >> EXT4_DESC_PER_BLOCK_BITS(sb);
2603 2579
2604 /* 2580 /*
@@ -2644,7 +2620,7 @@ static int ext4_mb_init_backend(struct super_block *sb)
2644 for (i = 0; i < num_meta_group_infos; i++) { 2620 for (i = 0; i < num_meta_group_infos; i++) {
2645 if ((i + 1) == num_meta_group_infos) 2621 if ((i + 1) == num_meta_group_infos)
2646 metalen = sizeof(*meta_group_info) * 2622 metalen = sizeof(*meta_group_info) *
2647 (sbi->s_groups_count - 2623 (ngroups -
2648 (i << EXT4_DESC_PER_BLOCK_BITS(sb))); 2624 (i << EXT4_DESC_PER_BLOCK_BITS(sb)));
2649 meta_group_info = kmalloc(metalen, GFP_KERNEL); 2625 meta_group_info = kmalloc(metalen, GFP_KERNEL);
2650 if (meta_group_info == NULL) { 2626 if (meta_group_info == NULL) {
@@ -2655,7 +2631,7 @@ static int ext4_mb_init_backend(struct super_block *sb)
2655 sbi->s_group_info[i] = meta_group_info; 2631 sbi->s_group_info[i] = meta_group_info;
2656 } 2632 }
2657 2633
2658 for (i = 0; i < sbi->s_groups_count; i++) { 2634 for (i = 0; i < ngroups; i++) {
2659 desc = ext4_get_group_desc(sb, i, NULL); 2635 desc = ext4_get_group_desc(sb, i, NULL);
2660 if (desc == NULL) { 2636 if (desc == NULL) {
2661 printk(KERN_ERR 2637 printk(KERN_ERR
@@ -2761,7 +2737,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2761 return 0; 2737 return 0;
2762} 2738}
2763 2739
2764/* need to called with ext4 group lock (ext4_lock_group) */ 2740/* need to called with the ext4 group lock held */
2765static void ext4_mb_cleanup_pa(struct ext4_group_info *grp) 2741static void ext4_mb_cleanup_pa(struct ext4_group_info *grp)
2766{ 2742{
2767 struct ext4_prealloc_space *pa; 2743 struct ext4_prealloc_space *pa;
@@ -2781,13 +2757,14 @@ static void ext4_mb_cleanup_pa(struct ext4_group_info *grp)
2781 2757
2782int ext4_mb_release(struct super_block *sb) 2758int ext4_mb_release(struct super_block *sb)
2783{ 2759{
2760 ext4_group_t ngroups = ext4_get_groups_count(sb);
2784 ext4_group_t i; 2761 ext4_group_t i;
2785 int num_meta_group_infos; 2762 int num_meta_group_infos;
2786 struct ext4_group_info *grinfo; 2763 struct ext4_group_info *grinfo;
2787 struct ext4_sb_info *sbi = EXT4_SB(sb); 2764 struct ext4_sb_info *sbi = EXT4_SB(sb);
2788 2765
2789 if (sbi->s_group_info) { 2766 if (sbi->s_group_info) {
2790 for (i = 0; i < sbi->s_groups_count; i++) { 2767 for (i = 0; i < ngroups; i++) {
2791 grinfo = ext4_get_group_info(sb, i); 2768 grinfo = ext4_get_group_info(sb, i);
2792#ifdef DOUBLE_CHECK 2769#ifdef DOUBLE_CHECK
2793 kfree(grinfo->bb_bitmap); 2770 kfree(grinfo->bb_bitmap);
@@ -2797,7 +2774,7 @@ int ext4_mb_release(struct super_block *sb)
2797 ext4_unlock_group(sb, i); 2774 ext4_unlock_group(sb, i);
2798 kfree(grinfo); 2775 kfree(grinfo);
2799 } 2776 }
2800 num_meta_group_infos = (sbi->s_groups_count + 2777 num_meta_group_infos = (ngroups +
2801 EXT4_DESC_PER_BLOCK(sb) - 1) >> 2778 EXT4_DESC_PER_BLOCK(sb) - 1) >>
2802 EXT4_DESC_PER_BLOCK_BITS(sb); 2779 EXT4_DESC_PER_BLOCK_BITS(sb);
2803 for (i = 0; i < num_meta_group_infos; i++) 2780 for (i = 0; i < num_meta_group_infos; i++)
@@ -2984,27 +2961,25 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2984 + le32_to_cpu(es->s_first_data_block); 2961 + le32_to_cpu(es->s_first_data_block);
2985 2962
2986 len = ac->ac_b_ex.fe_len; 2963 len = ac->ac_b_ex.fe_len;
2987 if (in_range(ext4_block_bitmap(sb, gdp), block, len) || 2964 if (!ext4_data_block_valid(sbi, block, len)) {
2988 in_range(ext4_inode_bitmap(sb, gdp), block, len) ||
2989 in_range(block, ext4_inode_table(sb, gdp),
2990 EXT4_SB(sb)->s_itb_per_group) ||
2991 in_range(block + len - 1, ext4_inode_table(sb, gdp),
2992 EXT4_SB(sb)->s_itb_per_group)) {
2993 ext4_error(sb, __func__, 2965 ext4_error(sb, __func__,
2994 "Allocating block %llu in system zone of %d group\n", 2966 "Allocating blocks %llu-%llu which overlap "
2995 block, ac->ac_b_ex.fe_group); 2967 "fs metadata\n", block, block+len);
2996 /* File system mounted not to panic on error 2968 /* File system mounted not to panic on error
2997 * Fix the bitmap and repeat the block allocation 2969 * Fix the bitmap and repeat the block allocation
2998 * We leak some of the blocks here. 2970 * We leak some of the blocks here.
2999 */ 2971 */
3000 mb_set_bits(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group), 2972 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
3001 bitmap_bh->b_data, ac->ac_b_ex.fe_start, 2973 mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
3002 ac->ac_b_ex.fe_len); 2974 ac->ac_b_ex.fe_len);
2975 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
3003 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); 2976 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
3004 if (!err) 2977 if (!err)
3005 err = -EAGAIN; 2978 err = -EAGAIN;
3006 goto out_err; 2979 goto out_err;
3007 } 2980 }
2981
2982 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
3008#ifdef AGGRESSIVE_CHECK 2983#ifdef AGGRESSIVE_CHECK
3009 { 2984 {
3010 int i; 2985 int i;
@@ -3014,9 +2989,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
3014 } 2989 }
3015 } 2990 }
3016#endif 2991#endif
3017 spin_lock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); 2992 mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,ac->ac_b_ex.fe_len);
3018 mb_set_bits(NULL, bitmap_bh->b_data,
3019 ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len);
3020 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { 2993 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
3021 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); 2994 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
3022 ext4_free_blks_set(sb, gdp, 2995 ext4_free_blks_set(sb, gdp,
@@ -3026,7 +2999,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
3026 len = ext4_free_blks_count(sb, gdp) - ac->ac_b_ex.fe_len; 2999 len = ext4_free_blks_count(sb, gdp) - ac->ac_b_ex.fe_len;
3027 ext4_free_blks_set(sb, gdp, len); 3000 ext4_free_blks_set(sb, gdp, len);
3028 gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); 3001 gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
3029 spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); 3002
3003 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
3030 percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len); 3004 percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len);
3031 /* 3005 /*
3032 * Now reduce the dirty block count also. Should not go negative 3006 * Now reduce the dirty block count also. Should not go negative
@@ -3459,7 +3433,7 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3459 * the function goes through all block freed in the group 3433 * the function goes through all block freed in the group
3460 * but not yet committed and marks them used in in-core bitmap. 3434 * but not yet committed and marks them used in in-core bitmap.
3461 * buddy must be generated from this bitmap 3435 * buddy must be generated from this bitmap
3462 * Need to be called with ext4 group lock (ext4_lock_group) 3436 * Need to be called with the ext4 group lock held
3463 */ 3437 */
3464static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, 3438static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
3465 ext4_group_t group) 3439 ext4_group_t group)
@@ -3473,9 +3447,7 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
3473 3447
3474 while (n) { 3448 while (n) {
3475 entry = rb_entry(n, struct ext4_free_data, node); 3449 entry = rb_entry(n, struct ext4_free_data, node);
3476 mb_set_bits(sb_bgl_lock(EXT4_SB(sb), group), 3450 mb_set_bits(bitmap, entry->start_blk, entry->count);
3477 bitmap, entry->start_blk,
3478 entry->count);
3479 n = rb_next(n); 3451 n = rb_next(n);
3480 } 3452 }
3481 return; 3453 return;
@@ -3484,7 +3456,7 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
3484/* 3456/*
3485 * the function goes through all preallocation in this group and marks them 3457 * the function goes through all preallocation in this group and marks them
3486 * used in in-core bitmap. buddy must be generated from this bitmap 3458 * used in in-core bitmap. buddy must be generated from this bitmap
3487 * Need to be called with ext4 group lock (ext4_lock_group) 3459 * Need to be called with ext4 group lock held
3488 */ 3460 */
3489static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, 3461static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
3490 ext4_group_t group) 3462 ext4_group_t group)
@@ -3516,8 +3488,7 @@ static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
3516 if (unlikely(len == 0)) 3488 if (unlikely(len == 0))
3517 continue; 3489 continue;
3518 BUG_ON(groupnr != group); 3490 BUG_ON(groupnr != group);
3519 mb_set_bits(sb_bgl_lock(EXT4_SB(sb), group), 3491 mb_set_bits(bitmap, start, len);
3520 bitmap, start, len);
3521 preallocated += len; 3492 preallocated += len;
3522 count++; 3493 count++;
3523 } 3494 }
@@ -4121,7 +4092,7 @@ static void ext4_mb_return_to_preallocation(struct inode *inode,
4121static void ext4_mb_show_ac(struct ext4_allocation_context *ac) 4092static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
4122{ 4093{
4123 struct super_block *sb = ac->ac_sb; 4094 struct super_block *sb = ac->ac_sb;
4124 ext4_group_t i; 4095 ext4_group_t ngroups, i;
4125 4096
4126 printk(KERN_ERR "EXT4-fs: Can't allocate:" 4097 printk(KERN_ERR "EXT4-fs: Can't allocate:"
4127 " Allocation context details:\n"); 4098 " Allocation context details:\n");
@@ -4145,7 +4116,8 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
4145 printk(KERN_ERR "EXT4-fs: %lu scanned, %d found\n", ac->ac_ex_scanned, 4116 printk(KERN_ERR "EXT4-fs: %lu scanned, %d found\n", ac->ac_ex_scanned,
4146 ac->ac_found); 4117 ac->ac_found);
4147 printk(KERN_ERR "EXT4-fs: groups: \n"); 4118 printk(KERN_ERR "EXT4-fs: groups: \n");
4148 for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { 4119 ngroups = ext4_get_groups_count(sb);
4120 for (i = 0; i < ngroups; i++) {
4149 struct ext4_group_info *grp = ext4_get_group_info(sb, i); 4121 struct ext4_group_info *grp = ext4_get_group_info(sb, i);
4150 struct ext4_prealloc_space *pa; 4122 struct ext4_prealloc_space *pa;
4151 ext4_grpblk_t start; 4123 ext4_grpblk_t start;
@@ -4469,13 +4441,13 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac)
4469 4441
4470static int ext4_mb_discard_preallocations(struct super_block *sb, int needed) 4442static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
4471{ 4443{
4472 ext4_group_t i; 4444 ext4_group_t i, ngroups = ext4_get_groups_count(sb);
4473 int ret; 4445 int ret;
4474 int freed = 0; 4446 int freed = 0;
4475 4447
4476 trace_mark(ext4_mb_discard_preallocations, "dev %s needed %d", 4448 trace_mark(ext4_mb_discard_preallocations, "dev %s needed %d",
4477 sb->s_id, needed); 4449 sb->s_id, needed);
4478 for (i = 0; i < EXT4_SB(sb)->s_groups_count && needed > 0; i++) { 4450 for (i = 0; i < ngroups && needed > 0; i++) {
4479 ret = ext4_mb_discard_group_preallocations(sb, i, needed); 4451 ret = ext4_mb_discard_group_preallocations(sb, i, needed);
4480 freed += ret; 4452 freed += ret;
4481 needed -= ret; 4453 needed -= ret;
@@ -4859,29 +4831,25 @@ do_more:
4859 new_entry->group = block_group; 4831 new_entry->group = block_group;
4860 new_entry->count = count; 4832 new_entry->count = count;
4861 new_entry->t_tid = handle->h_transaction->t_tid; 4833 new_entry->t_tid = handle->h_transaction->t_tid;
4834
4862 ext4_lock_group(sb, block_group); 4835 ext4_lock_group(sb, block_group);
4863 mb_clear_bits(sb_bgl_lock(sbi, block_group), bitmap_bh->b_data, 4836 mb_clear_bits(bitmap_bh->b_data, bit, count);
4864 bit, count);
4865 ext4_mb_free_metadata(handle, &e4b, new_entry); 4837 ext4_mb_free_metadata(handle, &e4b, new_entry);
4866 ext4_unlock_group(sb, block_group);
4867 } else { 4838 } else {
4868 ext4_lock_group(sb, block_group);
4869 /* need to update group_info->bb_free and bitmap 4839 /* need to update group_info->bb_free and bitmap
4870 * with group lock held. generate_buddy look at 4840 * with group lock held. generate_buddy look at
4871 * them with group lock_held 4841 * them with group lock_held
4872 */ 4842 */
4873 mb_clear_bits(sb_bgl_lock(sbi, block_group), bitmap_bh->b_data, 4843 ext4_lock_group(sb, block_group);
4874 bit, count); 4844 mb_clear_bits(bitmap_bh->b_data, bit, count);
4875 mb_free_blocks(inode, &e4b, bit, count); 4845 mb_free_blocks(inode, &e4b, bit, count);
4876 ext4_mb_return_to_preallocation(inode, &e4b, block, count); 4846 ext4_mb_return_to_preallocation(inode, &e4b, block, count);
4877 ext4_unlock_group(sb, block_group);
4878 } 4847 }
4879 4848
4880 spin_lock(sb_bgl_lock(sbi, block_group));
4881 ret = ext4_free_blks_count(sb, gdp) + count; 4849 ret = ext4_free_blks_count(sb, gdp) + count;
4882 ext4_free_blks_set(sb, gdp, ret); 4850 ext4_free_blks_set(sb, gdp, ret);
4883 gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp); 4851 gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp);
4884 spin_unlock(sb_bgl_lock(sbi, block_group)); 4852 ext4_unlock_group(sb, block_group);
4885 percpu_counter_add(&sbi->s_freeblocks_counter, count); 4853 percpu_counter_add(&sbi->s_freeblocks_counter, count);
4886 4854
4887 if (sbi->s_log_groups_per_flex) { 4855 if (sbi->s_log_groups_per_flex) {
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index dd9e6cd5f6cf..75e34f69215b 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -23,7 +23,6 @@
23#include <linux/mutex.h> 23#include <linux/mutex.h>
24#include "ext4_jbd2.h" 24#include "ext4_jbd2.h"
25#include "ext4.h" 25#include "ext4.h"
26#include "group.h"
27 26
28/* 27/*
29 * with AGGRESSIVE_CHECK allocator runs consistency checks over 28 * with AGGRESSIVE_CHECK allocator runs consistency checks over
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 22098e1cd085..07eb6649e4fa 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -37,7 +37,6 @@
37#include "ext4.h" 37#include "ext4.h"
38#include "ext4_jbd2.h" 38#include "ext4_jbd2.h"
39 39
40#include "namei.h"
41#include "xattr.h" 40#include "xattr.h"
42#include "acl.h" 41#include "acl.h"
43 42
@@ -750,7 +749,7 @@ static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize,
750 ext4fs_dirhash(de->name, de->name_len, &h); 749 ext4fs_dirhash(de->name, de->name_len, &h);
751 map_tail--; 750 map_tail--;
752 map_tail->hash = h.hash; 751 map_tail->hash = h.hash;
753 map_tail->offs = (u16) ((char *) de - base); 752 map_tail->offs = ((char *) de - base)>>2;
754 map_tail->size = le16_to_cpu(de->rec_len); 753 map_tail->size = le16_to_cpu(de->rec_len);
755 count++; 754 count++;
756 cond_resched(); 755 cond_resched();
@@ -1148,7 +1147,8 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count,
1148 unsigned rec_len = 0; 1147 unsigned rec_len = 0;
1149 1148
1150 while (count--) { 1149 while (count--) {
1151 struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *) (from + map->offs); 1150 struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *)
1151 (from + (map->offs<<2));
1152 rec_len = EXT4_DIR_REC_LEN(de->name_len); 1152 rec_len = EXT4_DIR_REC_LEN(de->name_len);
1153 memcpy (to, de, rec_len); 1153 memcpy (to, de, rec_len);
1154 ((struct ext4_dir_entry_2 *) to)->rec_len = 1154 ((struct ext4_dir_entry_2 *) to)->rec_len =
@@ -1997,7 +1997,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
1997 if (!ext4_handle_valid(handle)) 1997 if (!ext4_handle_valid(handle))
1998 return 0; 1998 return 0;
1999 1999
2000 lock_super(sb); 2000 mutex_lock(&EXT4_SB(sb)->s_orphan_lock);
2001 if (!list_empty(&EXT4_I(inode)->i_orphan)) 2001 if (!list_empty(&EXT4_I(inode)->i_orphan))
2002 goto out_unlock; 2002 goto out_unlock;
2003 2003
@@ -2006,9 +2006,13 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
2006 2006
2007 /* @@@ FIXME: Observation from aviro: 2007 /* @@@ FIXME: Observation from aviro:
2008 * I think I can trigger J_ASSERT in ext4_orphan_add(). We block 2008 * I think I can trigger J_ASSERT in ext4_orphan_add(). We block
2009 * here (on lock_super()), so race with ext4_link() which might bump 2009 * here (on s_orphan_lock), so race with ext4_link() which might bump
2010 * ->i_nlink. For, say it, character device. Not a regular file, 2010 * ->i_nlink. For, say it, character device. Not a regular file,
2011 * not a directory, not a symlink and ->i_nlink > 0. 2011 * not a directory, not a symlink and ->i_nlink > 0.
2012 *
2013 * tytso, 4/25/2009: I'm not sure how that could happen;
2014 * shouldn't the fs core protect us from these sort of
2015 * unlink()/link() races?
2012 */ 2016 */
2013 J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || 2017 J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
2014 S_ISLNK(inode->i_mode)) || inode->i_nlink == 0); 2018 S_ISLNK(inode->i_mode)) || inode->i_nlink == 0);
@@ -2045,7 +2049,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
2045 jbd_debug(4, "orphan inode %lu will point to %d\n", 2049 jbd_debug(4, "orphan inode %lu will point to %d\n",
2046 inode->i_ino, NEXT_ORPHAN(inode)); 2050 inode->i_ino, NEXT_ORPHAN(inode));
2047out_unlock: 2051out_unlock:
2048 unlock_super(sb); 2052 mutex_unlock(&EXT4_SB(sb)->s_orphan_lock);
2049 ext4_std_error(inode->i_sb, err); 2053 ext4_std_error(inode->i_sb, err);
2050 return err; 2054 return err;
2051} 2055}
@@ -2066,11 +2070,9 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
2066 if (!ext4_handle_valid(handle)) 2070 if (!ext4_handle_valid(handle))
2067 return 0; 2071 return 0;
2068 2072
2069 lock_super(inode->i_sb); 2073 mutex_lock(&EXT4_SB(inode->i_sb)->s_orphan_lock);
2070 if (list_empty(&ei->i_orphan)) { 2074 if (list_empty(&ei->i_orphan))
2071 unlock_super(inode->i_sb); 2075 goto out;
2072 return 0;
2073 }
2074 2076
2075 ino_next = NEXT_ORPHAN(inode); 2077 ino_next = NEXT_ORPHAN(inode);
2076 prev = ei->i_orphan.prev; 2078 prev = ei->i_orphan.prev;
@@ -2120,7 +2122,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
2120out_err: 2122out_err:
2121 ext4_std_error(inode->i_sb, err); 2123 ext4_std_error(inode->i_sb, err);
2122out: 2124out:
2123 unlock_super(inode->i_sb); 2125 mutex_unlock(&EXT4_SB(inode->i_sb)->s_orphan_lock);
2124 return err; 2126 return err;
2125 2127
2126out_brelse: 2128out_brelse:
@@ -2533,6 +2535,7 @@ const struct inode_operations ext4_dir_inode_operations = {
2533 .removexattr = generic_removexattr, 2535 .removexattr = generic_removexattr,
2534#endif 2536#endif
2535 .permission = ext4_permission, 2537 .permission = ext4_permission,
2538 .fiemap = ext4_fiemap,
2536}; 2539};
2537 2540
2538const struct inode_operations ext4_special_inode_operations = { 2541const struct inode_operations ext4_special_inode_operations = {
diff --git a/fs/ext4/namei.h b/fs/ext4/namei.h
deleted file mode 100644
index 5e4dfff36a00..000000000000
--- a/fs/ext4/namei.h
+++ /dev/null
@@ -1,8 +0,0 @@
1/* linux/fs/ext4/namei.h
2 *
3 * Copyright (C) 2005 Simtec Electronics
4 * Ben Dooks <ben@simtec.co.uk>
5 *
6*/
7
8extern struct dentry *ext4_get_parent(struct dentry *child);
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 546c7dd869e1..27eb289eea37 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -15,7 +15,6 @@
15#include <linux/slab.h> 15#include <linux/slab.h>
16 16
17#include "ext4_jbd2.h" 17#include "ext4_jbd2.h"
18#include "group.h"
19 18
20#define outside(b, first, last) ((b) < (first) || (b) >= (last)) 19#define outside(b, first, last) ((b) < (first) || (b) >= (last))
21#define inside(b, first, last) ((b) >= (first) && (b) < (last)) 20#define inside(b, first, last) ((b) >= (first) && (b) < (last))
@@ -193,7 +192,7 @@ static int setup_new_group_blocks(struct super_block *sb,
193 if (IS_ERR(handle)) 192 if (IS_ERR(handle))
194 return PTR_ERR(handle); 193 return PTR_ERR(handle);
195 194
196 lock_super(sb); 195 mutex_lock(&sbi->s_resize_lock);
197 if (input->group != sbi->s_groups_count) { 196 if (input->group != sbi->s_groups_count) {
198 err = -EBUSY; 197 err = -EBUSY;
199 goto exit_journal; 198 goto exit_journal;
@@ -302,7 +301,7 @@ exit_bh:
302 brelse(bh); 301 brelse(bh);
303 302
304exit_journal: 303exit_journal:
305 unlock_super(sb); 304 mutex_unlock(&sbi->s_resize_lock);
306 if ((err2 = ext4_journal_stop(handle)) && !err) 305 if ((err2 = ext4_journal_stop(handle)) && !err)
307 err = err2; 306 err = err2;
308 307
@@ -643,11 +642,12 @@ exit_free:
643 * important part is that the new block and inode counts are in the backup 642 * important part is that the new block and inode counts are in the backup
644 * superblocks, and the location of the new group metadata in the GDT backups. 643 * superblocks, and the location of the new group metadata in the GDT backups.
645 * 644 *
646 * We do not need lock_super() for this, because these blocks are not 645 * We do not need take the s_resize_lock for this, because these
647 * otherwise touched by the filesystem code when it is mounted. We don't 646 * blocks are not otherwise touched by the filesystem code when it is
648 * need to worry about last changing from sbi->s_groups_count, because the 647 * mounted. We don't need to worry about last changing from
649 * worst that can happen is that we do not copy the full number of backups 648 * sbi->s_groups_count, because the worst that can happen is that we
650 * at this time. The resize which changed s_groups_count will backup again. 649 * do not copy the full number of backups at this time. The resize
650 * which changed s_groups_count will backup again.
651 */ 651 */
652static void update_backups(struct super_block *sb, 652static void update_backups(struct super_block *sb,
653 int blk_off, char *data, int size) 653 int blk_off, char *data, int size)
@@ -809,7 +809,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
809 goto exit_put; 809 goto exit_put;
810 } 810 }
811 811
812 lock_super(sb); 812 mutex_lock(&sbi->s_resize_lock);
813 if (input->group != sbi->s_groups_count) { 813 if (input->group != sbi->s_groups_count) {
814 ext4_warning(sb, __func__, 814 ext4_warning(sb, __func__,
815 "multiple resizers run on filesystem!"); 815 "multiple resizers run on filesystem!");
@@ -840,7 +840,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
840 /* 840 /*
841 * OK, now we've set up the new group. Time to make it active. 841 * OK, now we've set up the new group. Time to make it active.
842 * 842 *
843 * Current kernels don't lock all allocations via lock_super(), 843 * We do not lock all allocations via s_resize_lock
844 * so we have to be safe wrt. concurrent accesses the group 844 * so we have to be safe wrt. concurrent accesses the group
845 * data. So we need to be careful to set all of the relevant 845 * data. So we need to be careful to set all of the relevant
846 * group descriptor data etc. *before* we enable the group. 846 * group descriptor data etc. *before* we enable the group.
@@ -900,12 +900,12 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
900 * 900 *
901 * The precise rules we use are: 901 * The precise rules we use are:
902 * 902 *
903 * * Writers of s_groups_count *must* hold lock_super 903 * * Writers of s_groups_count *must* hold s_resize_lock
904 * AND 904 * AND
905 * * Writers must perform a smp_wmb() after updating all dependent 905 * * Writers must perform a smp_wmb() after updating all dependent
906 * data and before modifying the groups count 906 * data and before modifying the groups count
907 * 907 *
908 * * Readers must hold lock_super() over the access 908 * * Readers must hold s_resize_lock over the access
909 * OR 909 * OR
910 * * Readers must perform an smp_rmb() after reading the groups count 910 * * Readers must perform an smp_rmb() after reading the groups count
911 * and before reading any dependent data. 911 * and before reading any dependent data.
@@ -948,7 +948,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
948 sb->s_dirt = 1; 948 sb->s_dirt = 1;
949 949
950exit_journal: 950exit_journal:
951 unlock_super(sb); 951 mutex_unlock(&sbi->s_resize_lock);
952 if ((err2 = ext4_journal_stop(handle)) && !err) 952 if ((err2 = ext4_journal_stop(handle)) && !err)
953 err = err2; 953 err = err2;
954 if (!err) { 954 if (!err) {
@@ -986,7 +986,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
986 986
987 /* We don't need to worry about locking wrt other resizers just 987 /* We don't need to worry about locking wrt other resizers just
988 * yet: we're going to revalidate es->s_blocks_count after 988 * yet: we're going to revalidate es->s_blocks_count after
989 * taking lock_super() below. */ 989 * taking the s_resize_lock below. */
990 o_blocks_count = ext4_blocks_count(es); 990 o_blocks_count = ext4_blocks_count(es);
991 o_groups_count = EXT4_SB(sb)->s_groups_count; 991 o_groups_count = EXT4_SB(sb)->s_groups_count;
992 992
@@ -1056,11 +1056,11 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1056 goto exit_put; 1056 goto exit_put;
1057 } 1057 }
1058 1058
1059 lock_super(sb); 1059 mutex_lock(&EXT4_SB(sb)->s_resize_lock);
1060 if (o_blocks_count != ext4_blocks_count(es)) { 1060 if (o_blocks_count != ext4_blocks_count(es)) {
1061 ext4_warning(sb, __func__, 1061 ext4_warning(sb, __func__,
1062 "multiple resizers run on filesystem!"); 1062 "multiple resizers run on filesystem!");
1063 unlock_super(sb); 1063 mutex_unlock(&EXT4_SB(sb)->s_resize_lock);
1064 ext4_journal_stop(handle); 1064 ext4_journal_stop(handle);
1065 err = -EBUSY; 1065 err = -EBUSY;
1066 goto exit_put; 1066 goto exit_put;
@@ -1070,14 +1070,14 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1070 EXT4_SB(sb)->s_sbh))) { 1070 EXT4_SB(sb)->s_sbh))) {
1071 ext4_warning(sb, __func__, 1071 ext4_warning(sb, __func__,
1072 "error %d on journal write access", err); 1072 "error %d on journal write access", err);
1073 unlock_super(sb); 1073 mutex_unlock(&EXT4_SB(sb)->s_resize_lock);
1074 ext4_journal_stop(handle); 1074 ext4_journal_stop(handle);
1075 goto exit_put; 1075 goto exit_put;
1076 } 1076 }
1077 ext4_blocks_count_set(es, o_blocks_count + add); 1077 ext4_blocks_count_set(es, o_blocks_count + add);
1078 ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh); 1078 ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
1079 sb->s_dirt = 1; 1079 sb->s_dirt = 1;
1080 unlock_super(sb); 1080 mutex_unlock(&EXT4_SB(sb)->s_resize_lock);
1081 ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, 1081 ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count,
1082 o_blocks_count + add); 1082 o_blocks_count + add);
1083 /* We add the blocks to the bitmap and set the group need init bit */ 1083 /* We add the blocks to the bitmap and set the group need init bit */
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 2958f4e6f222..c191d0f65fed 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -20,6 +20,7 @@
20#include <linux/string.h> 20#include <linux/string.h>
21#include <linux/fs.h> 21#include <linux/fs.h>
22#include <linux/time.h> 22#include <linux/time.h>
23#include <linux/vmalloc.h>
23#include <linux/jbd2.h> 24#include <linux/jbd2.h>
24#include <linux/slab.h> 25#include <linux/slab.h>
25#include <linux/init.h> 26#include <linux/init.h>
@@ -45,16 +46,20 @@
45#include "ext4_jbd2.h" 46#include "ext4_jbd2.h"
46#include "xattr.h" 47#include "xattr.h"
47#include "acl.h" 48#include "acl.h"
48#include "namei.h" 49
49#include "group.h" 50static int default_mb_history_length = 1000;
51
52module_param_named(default_mb_history_length, default_mb_history_length,
53 int, 0644);
54MODULE_PARM_DESC(default_mb_history_length,
55 "Default number of entries saved for mb_history");
50 56
51struct proc_dir_entry *ext4_proc_root; 57struct proc_dir_entry *ext4_proc_root;
52static struct kset *ext4_kset; 58static struct kset *ext4_kset;
53 59
54static int ext4_load_journal(struct super_block *, struct ext4_super_block *, 60static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
55 unsigned long journal_devnum); 61 unsigned long journal_devnum);
56static int ext4_commit_super(struct super_block *sb, 62static int ext4_commit_super(struct super_block *sb, int sync);
57 struct ext4_super_block *es, int sync);
58static void ext4_mark_recovery_complete(struct super_block *sb, 63static void ext4_mark_recovery_complete(struct super_block *sb,
59 struct ext4_super_block *es); 64 struct ext4_super_block *es);
60static void ext4_clear_journal_err(struct super_block *sb, 65static void ext4_clear_journal_err(struct super_block *sb,
@@ -74,7 +79,7 @@ ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
74{ 79{
75 return le32_to_cpu(bg->bg_block_bitmap_lo) | 80 return le32_to_cpu(bg->bg_block_bitmap_lo) |
76 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 81 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
77 (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0); 82 (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0);
78} 83}
79 84
80ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb, 85ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
@@ -82,7 +87,7 @@ ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
82{ 87{
83 return le32_to_cpu(bg->bg_inode_bitmap_lo) | 88 return le32_to_cpu(bg->bg_inode_bitmap_lo) |
84 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 89 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
85 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0); 90 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0);
86} 91}
87 92
88ext4_fsblk_t ext4_inode_table(struct super_block *sb, 93ext4_fsblk_t ext4_inode_table(struct super_block *sb,
@@ -90,7 +95,7 @@ ext4_fsblk_t ext4_inode_table(struct super_block *sb,
90{ 95{
91 return le32_to_cpu(bg->bg_inode_table_lo) | 96 return le32_to_cpu(bg->bg_inode_table_lo) |
92 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 97 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
93 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0); 98 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
94} 99}
95 100
96__u32 ext4_free_blks_count(struct super_block *sb, 101__u32 ext4_free_blks_count(struct super_block *sb,
@@ -98,7 +103,7 @@ __u32 ext4_free_blks_count(struct super_block *sb,
98{ 103{
99 return le16_to_cpu(bg->bg_free_blocks_count_lo) | 104 return le16_to_cpu(bg->bg_free_blocks_count_lo) |
100 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 105 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
101 (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0); 106 (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0);
102} 107}
103 108
104__u32 ext4_free_inodes_count(struct super_block *sb, 109__u32 ext4_free_inodes_count(struct super_block *sb,
@@ -106,7 +111,7 @@ __u32 ext4_free_inodes_count(struct super_block *sb,
106{ 111{
107 return le16_to_cpu(bg->bg_free_inodes_count_lo) | 112 return le16_to_cpu(bg->bg_free_inodes_count_lo) |
108 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 113 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
109 (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0); 114 (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0);
110} 115}
111 116
112__u32 ext4_used_dirs_count(struct super_block *sb, 117__u32 ext4_used_dirs_count(struct super_block *sb,
@@ -114,7 +119,7 @@ __u32 ext4_used_dirs_count(struct super_block *sb,
114{ 119{
115 return le16_to_cpu(bg->bg_used_dirs_count_lo) | 120 return le16_to_cpu(bg->bg_used_dirs_count_lo) |
116 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 121 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
117 (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0); 122 (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0);
118} 123}
119 124
120__u32 ext4_itable_unused_count(struct super_block *sb, 125__u32 ext4_itable_unused_count(struct super_block *sb,
@@ -122,7 +127,7 @@ __u32 ext4_itable_unused_count(struct super_block *sb,
122{ 127{
123 return le16_to_cpu(bg->bg_itable_unused_lo) | 128 return le16_to_cpu(bg->bg_itable_unused_lo) |
124 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 129 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
125 (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0); 130 (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0);
126} 131}
127 132
128void ext4_block_bitmap_set(struct super_block *sb, 133void ext4_block_bitmap_set(struct super_block *sb,
@@ -202,8 +207,7 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
202 journal = EXT4_SB(sb)->s_journal; 207 journal = EXT4_SB(sb)->s_journal;
203 if (journal) { 208 if (journal) {
204 if (is_journal_aborted(journal)) { 209 if (is_journal_aborted(journal)) {
205 ext4_abort(sb, __func__, 210 ext4_abort(sb, __func__, "Detected aborted journal");
206 "Detected aborted journal");
207 return ERR_PTR(-EROFS); 211 return ERR_PTR(-EROFS);
208 } 212 }
209 return jbd2_journal_start(journal, nblocks); 213 return jbd2_journal_start(journal, nblocks);
@@ -302,10 +306,10 @@ static void ext4_handle_error(struct super_block *sb)
302 jbd2_journal_abort(journal, -EIO); 306 jbd2_journal_abort(journal, -EIO);
303 } 307 }
304 if (test_opt(sb, ERRORS_RO)) { 308 if (test_opt(sb, ERRORS_RO)) {
305 printk(KERN_CRIT "Remounting filesystem read-only\n"); 309 ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
306 sb->s_flags |= MS_RDONLY; 310 sb->s_flags |= MS_RDONLY;
307 } 311 }
308 ext4_commit_super(sb, es, 1); 312 ext4_commit_super(sb, 1);
309 if (test_opt(sb, ERRORS_PANIC)) 313 if (test_opt(sb, ERRORS_PANIC))
310 panic("EXT4-fs (device %s): panic forced after error\n", 314 panic("EXT4-fs (device %s): panic forced after error\n",
311 sb->s_id); 315 sb->s_id);
@@ -395,8 +399,6 @@ void ext4_abort(struct super_block *sb, const char *function,
395{ 399{
396 va_list args; 400 va_list args;
397 401
398 printk(KERN_CRIT "ext4_abort called.\n");
399
400 va_start(args, fmt); 402 va_start(args, fmt);
401 printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function); 403 printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
402 vprintk(fmt, args); 404 vprintk(fmt, args);
@@ -409,7 +411,7 @@ void ext4_abort(struct super_block *sb, const char *function,
409 if (sb->s_flags & MS_RDONLY) 411 if (sb->s_flags & MS_RDONLY)
410 return; 412 return;
411 413
412 printk(KERN_CRIT "Remounting filesystem read-only\n"); 414 ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
413 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 415 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
414 sb->s_flags |= MS_RDONLY; 416 sb->s_flags |= MS_RDONLY;
415 EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT; 417 EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT;
@@ -417,6 +419,18 @@ void ext4_abort(struct super_block *sb, const char *function,
417 jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); 419 jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
418} 420}
419 421
422void ext4_msg (struct super_block * sb, const char *prefix,
423 const char *fmt, ...)
424{
425 va_list args;
426
427 va_start(args, fmt);
428 printk("%sEXT4-fs (%s): ", prefix, sb->s_id);
429 vprintk(fmt, args);
430 printk("\n");
431 va_end(args);
432}
433
420void ext4_warning(struct super_block *sb, const char *function, 434void ext4_warning(struct super_block *sb, const char *function,
421 const char *fmt, ...) 435 const char *fmt, ...)
422{ 436{
@@ -431,7 +445,7 @@ void ext4_warning(struct super_block *sb, const char *function,
431} 445}
432 446
433void ext4_grp_locked_error(struct super_block *sb, ext4_group_t grp, 447void ext4_grp_locked_error(struct super_block *sb, ext4_group_t grp,
434 const char *function, const char *fmt, ...) 448 const char *function, const char *fmt, ...)
435__releases(bitlock) 449__releases(bitlock)
436__acquires(bitlock) 450__acquires(bitlock)
437{ 451{
@@ -447,7 +461,7 @@ __acquires(bitlock)
447 if (test_opt(sb, ERRORS_CONT)) { 461 if (test_opt(sb, ERRORS_CONT)) {
448 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 462 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
449 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 463 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
450 ext4_commit_super(sb, es, 0); 464 ext4_commit_super(sb, 0);
451 return; 465 return;
452 } 466 }
453 ext4_unlock_group(sb, grp); 467 ext4_unlock_group(sb, grp);
@@ -467,7 +481,6 @@ __acquires(bitlock)
467 return; 481 return;
468} 482}
469 483
470
471void ext4_update_dynamic_rev(struct super_block *sb) 484void ext4_update_dynamic_rev(struct super_block *sb)
472{ 485{
473 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 486 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
@@ -496,7 +509,7 @@ void ext4_update_dynamic_rev(struct super_block *sb)
496/* 509/*
497 * Open the external journal device 510 * Open the external journal device
498 */ 511 */
499static struct block_device *ext4_blkdev_get(dev_t dev) 512static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb)
500{ 513{
501 struct block_device *bdev; 514 struct block_device *bdev;
502 char b[BDEVNAME_SIZE]; 515 char b[BDEVNAME_SIZE];
@@ -507,7 +520,7 @@ static struct block_device *ext4_blkdev_get(dev_t dev)
507 return bdev; 520 return bdev;
508 521
509fail: 522fail:
510 printk(KERN_ERR "EXT4-fs: failed to open journal device %s: %ld\n", 523 ext4_msg(sb, KERN_ERR, "failed to open journal device %s: %ld",
511 __bdevname(dev, b), PTR_ERR(bdev)); 524 __bdevname(dev, b), PTR_ERR(bdev));
512 return NULL; 525 return NULL;
513} 526}
@@ -543,8 +556,8 @@ static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi)
543{ 556{
544 struct list_head *l; 557 struct list_head *l;
545 558
546 printk(KERN_ERR "sb orphan head is %d\n", 559 ext4_msg(sb, KERN_ERR, "sb orphan head is %d",
547 le32_to_cpu(sbi->s_es->s_last_orphan)); 560 le32_to_cpu(sbi->s_es->s_last_orphan));
548 561
549 printk(KERN_ERR "sb_info orphan list:\n"); 562 printk(KERN_ERR "sb_info orphan list:\n");
550 list_for_each(l, &sbi->s_orphan) { 563 list_for_each(l, &sbi->s_orphan) {
@@ -563,6 +576,7 @@ static void ext4_put_super(struct super_block *sb)
563 struct ext4_super_block *es = sbi->s_es; 576 struct ext4_super_block *es = sbi->s_es;
564 int i, err; 577 int i, err;
565 578
579 ext4_release_system_zone(sb);
566 ext4_mb_release(sb); 580 ext4_mb_release(sb);
567 ext4_ext_release(sb); 581 ext4_ext_release(sb);
568 ext4_xattr_put_super(sb); 582 ext4_xattr_put_super(sb);
@@ -576,7 +590,7 @@ static void ext4_put_super(struct super_block *sb)
576 if (!(sb->s_flags & MS_RDONLY)) { 590 if (!(sb->s_flags & MS_RDONLY)) {
577 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 591 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
578 es->s_state = cpu_to_le16(sbi->s_mount_state); 592 es->s_state = cpu_to_le16(sbi->s_mount_state);
579 ext4_commit_super(sb, es, 1); 593 ext4_commit_super(sb, 1);
580 } 594 }
581 if (sbi->s_proc) { 595 if (sbi->s_proc) {
582 remove_proc_entry(sb->s_id, ext4_proc_root); 596 remove_proc_entry(sb->s_id, ext4_proc_root);
@@ -586,7 +600,10 @@ static void ext4_put_super(struct super_block *sb)
586 for (i = 0; i < sbi->s_gdb_count; i++) 600 for (i = 0; i < sbi->s_gdb_count; i++)
587 brelse(sbi->s_group_desc[i]); 601 brelse(sbi->s_group_desc[i]);
588 kfree(sbi->s_group_desc); 602 kfree(sbi->s_group_desc);
589 kfree(sbi->s_flex_groups); 603 if (is_vmalloc_addr(sbi->s_flex_groups))
604 vfree(sbi->s_flex_groups);
605 else
606 kfree(sbi->s_flex_groups);
590 percpu_counter_destroy(&sbi->s_freeblocks_counter); 607 percpu_counter_destroy(&sbi->s_freeblocks_counter);
591 percpu_counter_destroy(&sbi->s_freeinodes_counter); 608 percpu_counter_destroy(&sbi->s_freeinodes_counter);
592 percpu_counter_destroy(&sbi->s_dirs_counter); 609 percpu_counter_destroy(&sbi->s_dirs_counter);
@@ -629,7 +646,6 @@ static void ext4_put_super(struct super_block *sb)
629 lock_kernel(); 646 lock_kernel();
630 kfree(sbi->s_blockgroup_lock); 647 kfree(sbi->s_blockgroup_lock);
631 kfree(sbi); 648 kfree(sbi);
632 return;
633} 649}
634 650
635static struct kmem_cache *ext4_inode_cachep; 651static struct kmem_cache *ext4_inode_cachep;
@@ -644,6 +660,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
644 ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS); 660 ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS);
645 if (!ei) 661 if (!ei)
646 return NULL; 662 return NULL;
663
647#ifdef CONFIG_EXT4_FS_POSIX_ACL 664#ifdef CONFIG_EXT4_FS_POSIX_ACL
648 ei->i_acl = EXT4_ACL_NOT_CACHED; 665 ei->i_acl = EXT4_ACL_NOT_CACHED;
649 ei->i_default_acl = EXT4_ACL_NOT_CACHED; 666 ei->i_default_acl = EXT4_ACL_NOT_CACHED;
@@ -664,14 +681,16 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
664 ei->i_allocated_meta_blocks = 0; 681 ei->i_allocated_meta_blocks = 0;
665 ei->i_delalloc_reserved_flag = 0; 682 ei->i_delalloc_reserved_flag = 0;
666 spin_lock_init(&(ei->i_block_reservation_lock)); 683 spin_lock_init(&(ei->i_block_reservation_lock));
684
667 return &ei->vfs_inode; 685 return &ei->vfs_inode;
668} 686}
669 687
670static void ext4_destroy_inode(struct inode *inode) 688static void ext4_destroy_inode(struct inode *inode)
671{ 689{
672 if (!list_empty(&(EXT4_I(inode)->i_orphan))) { 690 if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
673 printk("EXT4 Inode %p: orphan list check failed!\n", 691 ext4_msg(inode->i_sb, KERN_ERR,
674 EXT4_I(inode)); 692 "Inode %lu (%p): orphan list check failed!",
693 inode->i_ino, EXT4_I(inode));
675 print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4, 694 print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4,
676 EXT4_I(inode), sizeof(struct ext4_inode_info), 695 EXT4_I(inode), sizeof(struct ext4_inode_info),
677 true); 696 true);
@@ -870,12 +889,12 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
870 seq_puts(seq, ",noauto_da_alloc"); 889 seq_puts(seq, ",noauto_da_alloc");
871 890
872 ext4_show_quota_options(seq, sb); 891 ext4_show_quota_options(seq, sb);
892
873 return 0; 893 return 0;
874} 894}
875 895
876
877static struct inode *ext4_nfs_get_inode(struct super_block *sb, 896static struct inode *ext4_nfs_get_inode(struct super_block *sb,
878 u64 ino, u32 generation) 897 u64 ino, u32 generation)
879{ 898{
880 struct inode *inode; 899 struct inode *inode;
881 900
@@ -904,14 +923,14 @@ static struct inode *ext4_nfs_get_inode(struct super_block *sb,
904} 923}
905 924
906static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid, 925static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid,
907 int fh_len, int fh_type) 926 int fh_len, int fh_type)
908{ 927{
909 return generic_fh_to_dentry(sb, fid, fh_len, fh_type, 928 return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
910 ext4_nfs_get_inode); 929 ext4_nfs_get_inode);
911} 930}
912 931
913static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid, 932static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
914 int fh_len, int fh_type) 933 int fh_len, int fh_type)
915{ 934{
916 return generic_fh_to_parent(sb, fid, fh_len, fh_type, 935 return generic_fh_to_parent(sb, fid, fh_len, fh_type,
917 ext4_nfs_get_inode); 936 ext4_nfs_get_inode);
@@ -923,7 +942,8 @@ static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
923 * which would prevent try_to_free_buffers() from freeing them, we must use 942 * which would prevent try_to_free_buffers() from freeing them, we must use
924 * jbd2 layer's try_to_free_buffers() function to release them. 943 * jbd2 layer's try_to_free_buffers() function to release them.
925 */ 944 */
926static int bdev_try_to_free_page(struct super_block *sb, struct page *page, gfp_t wait) 945static int bdev_try_to_free_page(struct super_block *sb, struct page *page,
946 gfp_t wait)
927{ 947{
928 journal_t *journal = EXT4_SB(sb)->s_journal; 948 journal_t *journal = EXT4_SB(sb)->s_journal;
929 949
@@ -992,7 +1012,6 @@ static const struct super_operations ext4_sops = {
992 .dirty_inode = ext4_dirty_inode, 1012 .dirty_inode = ext4_dirty_inode,
993 .delete_inode = ext4_delete_inode, 1013 .delete_inode = ext4_delete_inode,
994 .put_super = ext4_put_super, 1014 .put_super = ext4_put_super,
995 .write_super = ext4_write_super,
996 .sync_fs = ext4_sync_fs, 1015 .sync_fs = ext4_sync_fs,
997 .freeze_fs = ext4_freeze, 1016 .freeze_fs = ext4_freeze,
998 .unfreeze_fs = ext4_unfreeze, 1017 .unfreeze_fs = ext4_unfreeze,
@@ -1007,6 +1026,25 @@ static const struct super_operations ext4_sops = {
1007 .bdev_try_to_free_page = bdev_try_to_free_page, 1026 .bdev_try_to_free_page = bdev_try_to_free_page,
1008}; 1027};
1009 1028
1029static const struct super_operations ext4_nojournal_sops = {
1030 .alloc_inode = ext4_alloc_inode,
1031 .destroy_inode = ext4_destroy_inode,
1032 .write_inode = ext4_write_inode,
1033 .dirty_inode = ext4_dirty_inode,
1034 .delete_inode = ext4_delete_inode,
1035 .write_super = ext4_write_super,
1036 .put_super = ext4_put_super,
1037 .statfs = ext4_statfs,
1038 .remount_fs = ext4_remount,
1039 .clear_inode = ext4_clear_inode,
1040 .show_options = ext4_show_options,
1041#ifdef CONFIG_QUOTA
1042 .quota_read = ext4_quota_read,
1043 .quota_write = ext4_quota_write,
1044#endif
1045 .bdev_try_to_free_page = bdev_try_to_free_page,
1046};
1047
1010static const struct export_operations ext4_export_ops = { 1048static const struct export_operations ext4_export_ops = {
1011 .fh_to_dentry = ext4_fh_to_dentry, 1049 .fh_to_dentry = ext4_fh_to_dentry,
1012 .fh_to_parent = ext4_fh_to_parent, 1050 .fh_to_parent = ext4_fh_to_parent,
@@ -1023,12 +1061,13 @@ enum {
1023 Opt_journal_update, Opt_journal_dev, 1061 Opt_journal_update, Opt_journal_dev,
1024 Opt_journal_checksum, Opt_journal_async_commit, 1062 Opt_journal_checksum, Opt_journal_async_commit,
1025 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, 1063 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
1026 Opt_data_err_abort, Opt_data_err_ignore, 1064 Opt_data_err_abort, Opt_data_err_ignore, Opt_mb_history_length,
1027 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, 1065 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
1028 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, 1066 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
1029 Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, Opt_resize, 1067 Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, Opt_resize,
1030 Opt_usrquota, Opt_grpquota, Opt_i_version, 1068 Opt_usrquota, Opt_grpquota, Opt_i_version,
1031 Opt_stripe, Opt_delalloc, Opt_nodelalloc, 1069 Opt_stripe, Opt_delalloc, Opt_nodelalloc,
1070 Opt_block_validity, Opt_noblock_validity,
1032 Opt_inode_readahead_blks, Opt_journal_ioprio 1071 Opt_inode_readahead_blks, Opt_journal_ioprio
1033}; 1072};
1034 1073
@@ -1069,6 +1108,7 @@ static const match_table_t tokens = {
1069 {Opt_data_writeback, "data=writeback"}, 1108 {Opt_data_writeback, "data=writeback"},
1070 {Opt_data_err_abort, "data_err=abort"}, 1109 {Opt_data_err_abort, "data_err=abort"},
1071 {Opt_data_err_ignore, "data_err=ignore"}, 1110 {Opt_data_err_ignore, "data_err=ignore"},
1111 {Opt_mb_history_length, "mb_history_length=%u"},
1072 {Opt_offusrjquota, "usrjquota="}, 1112 {Opt_offusrjquota, "usrjquota="},
1073 {Opt_usrjquota, "usrjquota=%s"}, 1113 {Opt_usrjquota, "usrjquota=%s"},
1074 {Opt_offgrpjquota, "grpjquota="}, 1114 {Opt_offgrpjquota, "grpjquota="},
@@ -1087,6 +1127,8 @@ static const match_table_t tokens = {
1087 {Opt_resize, "resize"}, 1127 {Opt_resize, "resize"},
1088 {Opt_delalloc, "delalloc"}, 1128 {Opt_delalloc, "delalloc"},
1089 {Opt_nodelalloc, "nodelalloc"}, 1129 {Opt_nodelalloc, "nodelalloc"},
1130 {Opt_block_validity, "block_validity"},
1131 {Opt_noblock_validity, "noblock_validity"},
1090 {Opt_inode_readahead_blks, "inode_readahead_blks=%u"}, 1132 {Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
1091 {Opt_journal_ioprio, "journal_ioprio=%u"}, 1133 {Opt_journal_ioprio, "journal_ioprio=%u"},
1092 {Opt_auto_da_alloc, "auto_da_alloc=%u"}, 1134 {Opt_auto_da_alloc, "auto_da_alloc=%u"},
@@ -1102,8 +1144,9 @@ static ext4_fsblk_t get_sb_block(void **data)
1102 1144
1103 if (!options || strncmp(options, "sb=", 3) != 0) 1145 if (!options || strncmp(options, "sb=", 3) != 0)
1104 return 1; /* Default location */ 1146 return 1; /* Default location */
1147
1105 options += 3; 1148 options += 3;
1106 /*todo: use simple_strtoll with >32bit ext4 */ 1149 /* TODO: use simple_strtoll with >32bit ext4 */
1107 sb_block = simple_strtoul(options, &options, 0); 1150 sb_block = simple_strtoul(options, &options, 0);
1108 if (*options && *options != ',') { 1151 if (*options && *options != ',') {
1109 printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n", 1152 printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n",
@@ -1113,6 +1156,7 @@ static ext4_fsblk_t get_sb_block(void **data)
1113 if (*options == ',') 1156 if (*options == ',')
1114 options++; 1157 options++;
1115 *data = (void *) options; 1158 *data = (void *) options;
1159
1116 return sb_block; 1160 return sb_block;
1117} 1161}
1118 1162
@@ -1206,8 +1250,7 @@ static int parse_options(char *options, struct super_block *sb,
1206#else 1250#else
1207 case Opt_user_xattr: 1251 case Opt_user_xattr:
1208 case Opt_nouser_xattr: 1252 case Opt_nouser_xattr:
1209 printk(KERN_ERR "EXT4 (no)user_xattr options " 1253 ext4_msg(sb, KERN_ERR, "(no)user_xattr options not supported");
1210 "not supported\n");
1211 break; 1254 break;
1212#endif 1255#endif
1213#ifdef CONFIG_EXT4_FS_POSIX_ACL 1256#ifdef CONFIG_EXT4_FS_POSIX_ACL
@@ -1220,8 +1263,7 @@ static int parse_options(char *options, struct super_block *sb,
1220#else 1263#else
1221 case Opt_acl: 1264 case Opt_acl:
1222 case Opt_noacl: 1265 case Opt_noacl:
1223 printk(KERN_ERR "EXT4 (no)acl options " 1266 ext4_msg(sb, KERN_ERR, "(no)acl options not supported");
1224 "not supported\n");
1225 break; 1267 break;
1226#endif 1268#endif
1227 case Opt_journal_update: 1269 case Opt_journal_update:
@@ -1231,16 +1273,16 @@ static int parse_options(char *options, struct super_block *sb,
1231 user to specify an existing inode to be the 1273 user to specify an existing inode to be the
1232 journal file. */ 1274 journal file. */
1233 if (is_remount) { 1275 if (is_remount) {
1234 printk(KERN_ERR "EXT4-fs: cannot specify " 1276 ext4_msg(sb, KERN_ERR,
1235 "journal on remount\n"); 1277 "Cannot specify journal on remount");
1236 return 0; 1278 return 0;
1237 } 1279 }
1238 set_opt(sbi->s_mount_opt, UPDATE_JOURNAL); 1280 set_opt(sbi->s_mount_opt, UPDATE_JOURNAL);
1239 break; 1281 break;
1240 case Opt_journal_dev: 1282 case Opt_journal_dev:
1241 if (is_remount) { 1283 if (is_remount) {
1242 printk(KERN_ERR "EXT4-fs: cannot specify " 1284 ext4_msg(sb, KERN_ERR,
1243 "journal on remount\n"); 1285 "Cannot specify journal on remount");
1244 return 0; 1286 return 0;
1245 } 1287 }
1246 if (match_int(&args[0], &option)) 1288 if (match_int(&args[0], &option))
@@ -1294,9 +1336,8 @@ static int parse_options(char *options, struct super_block *sb,
1294 if (is_remount) { 1336 if (is_remount) {
1295 if ((sbi->s_mount_opt & EXT4_MOUNT_DATA_FLAGS) 1337 if ((sbi->s_mount_opt & EXT4_MOUNT_DATA_FLAGS)
1296 != data_opt) { 1338 != data_opt) {
1297 printk(KERN_ERR 1339 ext4_msg(sb, KERN_ERR,
1298 "EXT4-fs: cannot change data " 1340 "Cannot change data mode on remount");
1299 "mode on remount\n");
1300 return 0; 1341 return 0;
1301 } 1342 }
1302 } else { 1343 } else {
@@ -1310,6 +1351,13 @@ static int parse_options(char *options, struct super_block *sb,
1310 case Opt_data_err_ignore: 1351 case Opt_data_err_ignore:
1311 clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT); 1352 clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
1312 break; 1353 break;
1354 case Opt_mb_history_length:
1355 if (match_int(&args[0], &option))
1356 return 0;
1357 if (option < 0)
1358 return 0;
1359 sbi->s_mb_history_max = option;
1360 break;
1313#ifdef CONFIG_QUOTA 1361#ifdef CONFIG_QUOTA
1314 case Opt_usrjquota: 1362 case Opt_usrjquota:
1315 qtype = USRQUOTA; 1363 qtype = USRQUOTA;
@@ -1319,31 +1367,31 @@ static int parse_options(char *options, struct super_block *sb,
1319set_qf_name: 1367set_qf_name:
1320 if (sb_any_quota_loaded(sb) && 1368 if (sb_any_quota_loaded(sb) &&
1321 !sbi->s_qf_names[qtype]) { 1369 !sbi->s_qf_names[qtype]) {
1322 printk(KERN_ERR 1370 ext4_msg(sb, KERN_ERR,
1323 "EXT4-fs: Cannot change journaled " 1371 "Cannot change journaled "
1324 "quota options when quota turned on.\n"); 1372 "quota options when quota turned on");
1325 return 0; 1373 return 0;
1326 } 1374 }
1327 qname = match_strdup(&args[0]); 1375 qname = match_strdup(&args[0]);
1328 if (!qname) { 1376 if (!qname) {
1329 printk(KERN_ERR 1377 ext4_msg(sb, KERN_ERR,
1330 "EXT4-fs: not enough memory for " 1378 "Not enough memory for "
1331 "storing quotafile name.\n"); 1379 "storing quotafile name");
1332 return 0; 1380 return 0;
1333 } 1381 }
1334 if (sbi->s_qf_names[qtype] && 1382 if (sbi->s_qf_names[qtype] &&
1335 strcmp(sbi->s_qf_names[qtype], qname)) { 1383 strcmp(sbi->s_qf_names[qtype], qname)) {
1336 printk(KERN_ERR 1384 ext4_msg(sb, KERN_ERR,
1337 "EXT4-fs: %s quota file already " 1385 "%s quota file already "
1338 "specified.\n", QTYPE2NAME(qtype)); 1386 "specified", QTYPE2NAME(qtype));
1339 kfree(qname); 1387 kfree(qname);
1340 return 0; 1388 return 0;
1341 } 1389 }
1342 sbi->s_qf_names[qtype] = qname; 1390 sbi->s_qf_names[qtype] = qname;
1343 if (strchr(sbi->s_qf_names[qtype], '/')) { 1391 if (strchr(sbi->s_qf_names[qtype], '/')) {
1344 printk(KERN_ERR 1392 ext4_msg(sb, KERN_ERR,
1345 "EXT4-fs: quotafile must be on " 1393 "quotafile must be on "
1346 "filesystem root.\n"); 1394 "filesystem root");
1347 kfree(sbi->s_qf_names[qtype]); 1395 kfree(sbi->s_qf_names[qtype]);
1348 sbi->s_qf_names[qtype] = NULL; 1396 sbi->s_qf_names[qtype] = NULL;
1349 return 0; 1397 return 0;
@@ -1358,9 +1406,9 @@ set_qf_name:
1358clear_qf_name: 1406clear_qf_name:
1359 if (sb_any_quota_loaded(sb) && 1407 if (sb_any_quota_loaded(sb) &&
1360 sbi->s_qf_names[qtype]) { 1408 sbi->s_qf_names[qtype]) {
1361 printk(KERN_ERR "EXT4-fs: Cannot change " 1409 ext4_msg(sb, KERN_ERR, "Cannot change "
1362 "journaled quota options when " 1410 "journaled quota options when "
1363 "quota turned on.\n"); 1411 "quota turned on");
1364 return 0; 1412 return 0;
1365 } 1413 }
1366 /* 1414 /*
@@ -1377,9 +1425,9 @@ clear_qf_name:
1377set_qf_format: 1425set_qf_format:
1378 if (sb_any_quota_loaded(sb) && 1426 if (sb_any_quota_loaded(sb) &&
1379 sbi->s_jquota_fmt != qfmt) { 1427 sbi->s_jquota_fmt != qfmt) {
1380 printk(KERN_ERR "EXT4-fs: Cannot change " 1428 ext4_msg(sb, KERN_ERR, "Cannot change "
1381 "journaled quota options when " 1429 "journaled quota options when "
1382 "quota turned on.\n"); 1430 "quota turned on");
1383 return 0; 1431 return 0;
1384 } 1432 }
1385 sbi->s_jquota_fmt = qfmt; 1433 sbi->s_jquota_fmt = qfmt;
@@ -1395,8 +1443,8 @@ set_qf_format:
1395 break; 1443 break;
1396 case Opt_noquota: 1444 case Opt_noquota:
1397 if (sb_any_quota_loaded(sb)) { 1445 if (sb_any_quota_loaded(sb)) {
1398 printk(KERN_ERR "EXT4-fs: Cannot change quota " 1446 ext4_msg(sb, KERN_ERR, "Cannot change quota "
1399 "options when quota turned on.\n"); 1447 "options when quota turned on");
1400 return 0; 1448 return 0;
1401 } 1449 }
1402 clear_opt(sbi->s_mount_opt, QUOTA); 1450 clear_opt(sbi->s_mount_opt, QUOTA);
@@ -1407,8 +1455,8 @@ set_qf_format:
1407 case Opt_quota: 1455 case Opt_quota:
1408 case Opt_usrquota: 1456 case Opt_usrquota:
1409 case Opt_grpquota: 1457 case Opt_grpquota:
1410 printk(KERN_ERR 1458 ext4_msg(sb, KERN_ERR,
1411 "EXT4-fs: quota options not supported.\n"); 1459 "quota options not supported");
1412 break; 1460 break;
1413 case Opt_usrjquota: 1461 case Opt_usrjquota:
1414 case Opt_grpjquota: 1462 case Opt_grpjquota:
@@ -1416,9 +1464,8 @@ set_qf_format:
1416 case Opt_offgrpjquota: 1464 case Opt_offgrpjquota:
1417 case Opt_jqfmt_vfsold: 1465 case Opt_jqfmt_vfsold:
1418 case Opt_jqfmt_vfsv0: 1466 case Opt_jqfmt_vfsv0:
1419 printk(KERN_ERR 1467 ext4_msg(sb, KERN_ERR,
1420 "EXT4-fs: journaled quota options not " 1468 "journaled quota options not supported");
1421 "supported.\n");
1422 break; 1469 break;
1423 case Opt_noquota: 1470 case Opt_noquota:
1424 break; 1471 break;
@@ -1443,8 +1490,9 @@ set_qf_format:
1443 break; 1490 break;
1444 case Opt_resize: 1491 case Opt_resize:
1445 if (!is_remount) { 1492 if (!is_remount) {
1446 printk("EXT4-fs: resize option only available " 1493 ext4_msg(sb, KERN_ERR,
1447 "for remount\n"); 1494 "resize option only available "
1495 "for remount");
1448 return 0; 1496 return 0;
1449 } 1497 }
1450 if (match_int(&args[0], &option) != 0) 1498 if (match_int(&args[0], &option) != 0)
@@ -1474,14 +1522,21 @@ set_qf_format:
1474 case Opt_delalloc: 1522 case Opt_delalloc:
1475 set_opt(sbi->s_mount_opt, DELALLOC); 1523 set_opt(sbi->s_mount_opt, DELALLOC);
1476 break; 1524 break;
1525 case Opt_block_validity:
1526 set_opt(sbi->s_mount_opt, BLOCK_VALIDITY);
1527 break;
1528 case Opt_noblock_validity:
1529 clear_opt(sbi->s_mount_opt, BLOCK_VALIDITY);
1530 break;
1477 case Opt_inode_readahead_blks: 1531 case Opt_inode_readahead_blks:
1478 if (match_int(&args[0], &option)) 1532 if (match_int(&args[0], &option))
1479 return 0; 1533 return 0;
1480 if (option < 0 || option > (1 << 30)) 1534 if (option < 0 || option > (1 << 30))
1481 return 0; 1535 return 0;
1482 if (option & (option - 1)) { 1536 if (!is_power_of_2(option)) {
1483 printk(KERN_ERR "EXT4-fs: inode_readahead_blks" 1537 ext4_msg(sb, KERN_ERR,
1484 " must be a power of 2\n"); 1538 "EXT4-fs: inode_readahead_blks"
1539 " must be a power of 2");
1485 return 0; 1540 return 0;
1486 } 1541 }
1487 sbi->s_inode_readahead_blks = option; 1542 sbi->s_inode_readahead_blks = option;
@@ -1508,9 +1563,9 @@ set_qf_format:
1508 set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC); 1563 set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC);
1509 break; 1564 break;
1510 default: 1565 default:
1511 printk(KERN_ERR 1566 ext4_msg(sb, KERN_ERR,
1512 "EXT4-fs: Unrecognized mount option \"%s\" " 1567 "Unrecognized mount option \"%s\" "
1513 "or missing value\n", p); 1568 "or missing value", p);
1514 return 0; 1569 return 0;
1515 } 1570 }
1516 } 1571 }
@@ -1528,21 +1583,21 @@ set_qf_format:
1528 (sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA)) || 1583 (sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA)) ||
1529 (sbi->s_qf_names[GRPQUOTA] && 1584 (sbi->s_qf_names[GRPQUOTA] &&
1530 (sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA))) { 1585 (sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA))) {
1531 printk(KERN_ERR "EXT4-fs: old and new quota " 1586 ext4_msg(sb, KERN_ERR, "old and new quota "
1532 "format mixing.\n"); 1587 "format mixing");
1533 return 0; 1588 return 0;
1534 } 1589 }
1535 1590
1536 if (!sbi->s_jquota_fmt) { 1591 if (!sbi->s_jquota_fmt) {
1537 printk(KERN_ERR "EXT4-fs: journaled quota format " 1592 ext4_msg(sb, KERN_ERR, "journaled quota format "
1538 "not specified.\n"); 1593 "not specified");
1539 return 0; 1594 return 0;
1540 } 1595 }
1541 } else { 1596 } else {
1542 if (sbi->s_jquota_fmt) { 1597 if (sbi->s_jquota_fmt) {
1543 printk(KERN_ERR "EXT4-fs: journaled quota format " 1598 ext4_msg(sb, KERN_ERR, "journaled quota format "
1544 "specified with no journaling " 1599 "specified with no journaling "
1545 "enabled.\n"); 1600 "enabled");
1546 return 0; 1601 return 0;
1547 } 1602 }
1548 } 1603 }
@@ -1557,32 +1612,32 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
1557 int res = 0; 1612 int res = 0;
1558 1613
1559 if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) { 1614 if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) {
1560 printk(KERN_ERR "EXT4-fs warning: revision level too high, " 1615 ext4_msg(sb, KERN_ERR, "revision level too high, "
1561 "forcing read-only mode\n"); 1616 "forcing read-only mode");
1562 res = MS_RDONLY; 1617 res = MS_RDONLY;
1563 } 1618 }
1564 if (read_only) 1619 if (read_only)
1565 return res; 1620 return res;
1566 if (!(sbi->s_mount_state & EXT4_VALID_FS)) 1621 if (!(sbi->s_mount_state & EXT4_VALID_FS))
1567 printk(KERN_WARNING "EXT4-fs warning: mounting unchecked fs, " 1622 ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, "
1568 "running e2fsck is recommended\n"); 1623 "running e2fsck is recommended");
1569 else if ((sbi->s_mount_state & EXT4_ERROR_FS)) 1624 else if ((sbi->s_mount_state & EXT4_ERROR_FS))
1570 printk(KERN_WARNING 1625 ext4_msg(sb, KERN_WARNING,
1571 "EXT4-fs warning: mounting fs with errors, " 1626 "warning: mounting fs with errors, "
1572 "running e2fsck is recommended\n"); 1627 "running e2fsck is recommended");
1573 else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 && 1628 else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 &&
1574 le16_to_cpu(es->s_mnt_count) >= 1629 le16_to_cpu(es->s_mnt_count) >=
1575 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count)) 1630 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
1576 printk(KERN_WARNING 1631 ext4_msg(sb, KERN_WARNING,
1577 "EXT4-fs warning: maximal mount count reached, " 1632 "warning: maximal mount count reached, "
1578 "running e2fsck is recommended\n"); 1633 "running e2fsck is recommended");
1579 else if (le32_to_cpu(es->s_checkinterval) && 1634 else if (le32_to_cpu(es->s_checkinterval) &&
1580 (le32_to_cpu(es->s_lastcheck) + 1635 (le32_to_cpu(es->s_lastcheck) +
1581 le32_to_cpu(es->s_checkinterval) <= get_seconds())) 1636 le32_to_cpu(es->s_checkinterval) <= get_seconds()))
1582 printk(KERN_WARNING 1637 ext4_msg(sb, KERN_WARNING,
1583 "EXT4-fs warning: checktime reached, " 1638 "warning: checktime reached, "
1584 "running e2fsck is recommended\n"); 1639 "running e2fsck is recommended");
1585 if (!sbi->s_journal) 1640 if (!sbi->s_journal)
1586 es->s_state &= cpu_to_le16(~EXT4_VALID_FS); 1641 es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
1587 if (!(__s16) le16_to_cpu(es->s_max_mnt_count)) 1642 if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
1588 es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT); 1643 es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
@@ -1592,7 +1647,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
1592 if (sbi->s_journal) 1647 if (sbi->s_journal)
1593 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 1648 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
1594 1649
1595 ext4_commit_super(sb, es, 1); 1650 ext4_commit_super(sb, 1);
1596 if (test_opt(sb, DEBUG)) 1651 if (test_opt(sb, DEBUG))
1597 printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, " 1652 printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
1598 "bpg=%lu, ipg=%lu, mo=%04lx]\n", 1653 "bpg=%lu, ipg=%lu, mo=%04lx]\n",
@@ -1603,11 +1658,11 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
1603 sbi->s_mount_opt); 1658 sbi->s_mount_opt);
1604 1659
1605 if (EXT4_SB(sb)->s_journal) { 1660 if (EXT4_SB(sb)->s_journal) {
1606 printk(KERN_INFO "EXT4 FS on %s, %s journal on %s\n", 1661 ext4_msg(sb, KERN_INFO, "%s journal on %s",
1607 sb->s_id, EXT4_SB(sb)->s_journal->j_inode ? "internal" : 1662 EXT4_SB(sb)->s_journal->j_inode ? "internal" :
1608 "external", EXT4_SB(sb)->s_journal->j_devname); 1663 "external", EXT4_SB(sb)->s_journal->j_devname);
1609 } else { 1664 } else {
1610 printk(KERN_INFO "EXT4 FS on %s, no journal\n", sb->s_id); 1665 ext4_msg(sb, KERN_INFO, "no journal");
1611 } 1666 }
1612 return res; 1667 return res;
1613} 1668}
@@ -1616,10 +1671,10 @@ static int ext4_fill_flex_info(struct super_block *sb)
1616{ 1671{
1617 struct ext4_sb_info *sbi = EXT4_SB(sb); 1672 struct ext4_sb_info *sbi = EXT4_SB(sb);
1618 struct ext4_group_desc *gdp = NULL; 1673 struct ext4_group_desc *gdp = NULL;
1619 struct buffer_head *bh;
1620 ext4_group_t flex_group_count; 1674 ext4_group_t flex_group_count;
1621 ext4_group_t flex_group; 1675 ext4_group_t flex_group;
1622 int groups_per_flex = 0; 1676 int groups_per_flex = 0;
1677 size_t size;
1623 int i; 1678 int i;
1624 1679
1625 if (!sbi->s_es->s_log_groups_per_flex) { 1680 if (!sbi->s_es->s_log_groups_per_flex) {
@@ -1634,16 +1689,21 @@ static int ext4_fill_flex_info(struct super_block *sb)
1634 flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) + 1689 flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) +
1635 ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) << 1690 ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) <<
1636 EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex; 1691 EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex;
1637 sbi->s_flex_groups = kzalloc(flex_group_count * 1692 size = flex_group_count * sizeof(struct flex_groups);
1638 sizeof(struct flex_groups), GFP_KERNEL); 1693 sbi->s_flex_groups = kzalloc(size, GFP_KERNEL);
1694 if (sbi->s_flex_groups == NULL) {
1695 sbi->s_flex_groups = vmalloc(size);
1696 if (sbi->s_flex_groups)
1697 memset(sbi->s_flex_groups, 0, size);
1698 }
1639 if (sbi->s_flex_groups == NULL) { 1699 if (sbi->s_flex_groups == NULL) {
1640 printk(KERN_ERR "EXT4-fs: not enough memory for " 1700 ext4_msg(sb, KERN_ERR, "not enough memory for "
1641 "%u flex groups\n", flex_group_count); 1701 "%u flex groups", flex_group_count);
1642 goto failed; 1702 goto failed;
1643 } 1703 }
1644 1704
1645 for (i = 0; i < sbi->s_groups_count; i++) { 1705 for (i = 0; i < sbi->s_groups_count; i++) {
1646 gdp = ext4_get_group_desc(sb, i, &bh); 1706 gdp = ext4_get_group_desc(sb, i, NULL);
1647 1707
1648 flex_group = ext4_flex_group(sbi, i); 1708 flex_group = ext4_flex_group(sbi, i);
1649 atomic_set(&sbi->s_flex_groups[flex_group].free_inodes, 1709 atomic_set(&sbi->s_flex_groups[flex_group].free_inodes,
@@ -1724,44 +1784,44 @@ static int ext4_check_descriptors(struct super_block *sb)
1724 1784
1725 block_bitmap = ext4_block_bitmap(sb, gdp); 1785 block_bitmap = ext4_block_bitmap(sb, gdp);
1726 if (block_bitmap < first_block || block_bitmap > last_block) { 1786 if (block_bitmap < first_block || block_bitmap > last_block) {
1727 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " 1787 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
1728 "Block bitmap for group %u not in group " 1788 "Block bitmap for group %u not in group "
1729 "(block %llu)!\n", i, block_bitmap); 1789 "(block %llu)!", i, block_bitmap);
1730 return 0; 1790 return 0;
1731 } 1791 }
1732 inode_bitmap = ext4_inode_bitmap(sb, gdp); 1792 inode_bitmap = ext4_inode_bitmap(sb, gdp);
1733 if (inode_bitmap < first_block || inode_bitmap > last_block) { 1793 if (inode_bitmap < first_block || inode_bitmap > last_block) {
1734 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " 1794 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
1735 "Inode bitmap for group %u not in group " 1795 "Inode bitmap for group %u not in group "
1736 "(block %llu)!\n", i, inode_bitmap); 1796 "(block %llu)!", i, inode_bitmap);
1737 return 0; 1797 return 0;
1738 } 1798 }
1739 inode_table = ext4_inode_table(sb, gdp); 1799 inode_table = ext4_inode_table(sb, gdp);
1740 if (inode_table < first_block || 1800 if (inode_table < first_block ||
1741 inode_table + sbi->s_itb_per_group - 1 > last_block) { 1801 inode_table + sbi->s_itb_per_group - 1 > last_block) {
1742 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " 1802 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
1743 "Inode table for group %u not in group " 1803 "Inode table for group %u not in group "
1744 "(block %llu)!\n", i, inode_table); 1804 "(block %llu)!", i, inode_table);
1745 return 0; 1805 return 0;
1746 } 1806 }
1747 spin_lock(sb_bgl_lock(sbi, i)); 1807 ext4_lock_group(sb, i);
1748 if (!ext4_group_desc_csum_verify(sbi, i, gdp)) { 1808 if (!ext4_group_desc_csum_verify(sbi, i, gdp)) {
1749 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " 1809 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
1750 "Checksum for group %u failed (%u!=%u)\n", 1810 "Checksum for group %u failed (%u!=%u)",
1751 i, le16_to_cpu(ext4_group_desc_csum(sbi, i, 1811 i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
1752 gdp)), le16_to_cpu(gdp->bg_checksum)); 1812 gdp)), le16_to_cpu(gdp->bg_checksum));
1753 if (!(sb->s_flags & MS_RDONLY)) { 1813 if (!(sb->s_flags & MS_RDONLY)) {
1754 spin_unlock(sb_bgl_lock(sbi, i)); 1814 ext4_unlock_group(sb, i);
1755 return 0; 1815 return 0;
1756 } 1816 }
1757 } 1817 }
1758 spin_unlock(sb_bgl_lock(sbi, i)); 1818 ext4_unlock_group(sb, i);
1759 if (!flexbg_flag) 1819 if (!flexbg_flag)
1760 first_block += EXT4_BLOCKS_PER_GROUP(sb); 1820 first_block += EXT4_BLOCKS_PER_GROUP(sb);
1761 } 1821 }
1762 1822
1763 ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb)); 1823 ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb));
1764 sbi->s_es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb)); 1824 sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb));
1765 return 1; 1825 return 1;
1766} 1826}
1767 1827
@@ -1796,8 +1856,8 @@ static void ext4_orphan_cleanup(struct super_block *sb,
1796 } 1856 }
1797 1857
1798 if (bdev_read_only(sb->s_bdev)) { 1858 if (bdev_read_only(sb->s_bdev)) {
1799 printk(KERN_ERR "EXT4-fs: write access " 1859 ext4_msg(sb, KERN_ERR, "write access "
1800 "unavailable, skipping orphan cleanup.\n"); 1860 "unavailable, skipping orphan cleanup");
1801 return; 1861 return;
1802 } 1862 }
1803 1863
@@ -1811,8 +1871,7 @@ static void ext4_orphan_cleanup(struct super_block *sb,
1811 } 1871 }
1812 1872
1813 if (s_flags & MS_RDONLY) { 1873 if (s_flags & MS_RDONLY) {
1814 printk(KERN_INFO "EXT4-fs: %s: orphan cleanup on readonly fs\n", 1874 ext4_msg(sb, KERN_INFO, "orphan cleanup on readonly fs");
1815 sb->s_id);
1816 sb->s_flags &= ~MS_RDONLY; 1875 sb->s_flags &= ~MS_RDONLY;
1817 } 1876 }
1818#ifdef CONFIG_QUOTA 1877#ifdef CONFIG_QUOTA
@@ -1823,9 +1882,9 @@ static void ext4_orphan_cleanup(struct super_block *sb,
1823 if (EXT4_SB(sb)->s_qf_names[i]) { 1882 if (EXT4_SB(sb)->s_qf_names[i]) {
1824 int ret = ext4_quota_on_mount(sb, i); 1883 int ret = ext4_quota_on_mount(sb, i);
1825 if (ret < 0) 1884 if (ret < 0)
1826 printk(KERN_ERR 1885 ext4_msg(sb, KERN_ERR,
1827 "EXT4-fs: Cannot turn on journaled " 1886 "Cannot turn on journaled "
1828 "quota: error %d\n", ret); 1887 "quota: error %d", ret);
1829 } 1888 }
1830 } 1889 }
1831#endif 1890#endif
@@ -1842,16 +1901,16 @@ static void ext4_orphan_cleanup(struct super_block *sb,
1842 list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan); 1901 list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);
1843 vfs_dq_init(inode); 1902 vfs_dq_init(inode);
1844 if (inode->i_nlink) { 1903 if (inode->i_nlink) {
1845 printk(KERN_DEBUG 1904 ext4_msg(sb, KERN_DEBUG,
1846 "%s: truncating inode %lu to %lld bytes\n", 1905 "%s: truncating inode %lu to %lld bytes",
1847 __func__, inode->i_ino, inode->i_size); 1906 __func__, inode->i_ino, inode->i_size);
1848 jbd_debug(2, "truncating inode %lu to %lld bytes\n", 1907 jbd_debug(2, "truncating inode %lu to %lld bytes\n",
1849 inode->i_ino, inode->i_size); 1908 inode->i_ino, inode->i_size);
1850 ext4_truncate(inode); 1909 ext4_truncate(inode);
1851 nr_truncates++; 1910 nr_truncates++;
1852 } else { 1911 } else {
1853 printk(KERN_DEBUG 1912 ext4_msg(sb, KERN_DEBUG,
1854 "%s: deleting unreferenced inode %lu\n", 1913 "%s: deleting unreferenced inode %lu",
1855 __func__, inode->i_ino); 1914 __func__, inode->i_ino);
1856 jbd_debug(2, "deleting unreferenced inode %lu\n", 1915 jbd_debug(2, "deleting unreferenced inode %lu\n",
1857 inode->i_ino); 1916 inode->i_ino);
@@ -1863,11 +1922,11 @@ static void ext4_orphan_cleanup(struct super_block *sb,
1863#define PLURAL(x) (x), ((x) == 1) ? "" : "s" 1922#define PLURAL(x) (x), ((x) == 1) ? "" : "s"
1864 1923
1865 if (nr_orphans) 1924 if (nr_orphans)
1866 printk(KERN_INFO "EXT4-fs: %s: %d orphan inode%s deleted\n", 1925 ext4_msg(sb, KERN_INFO, "%d orphan inode%s deleted",
1867 sb->s_id, PLURAL(nr_orphans)); 1926 PLURAL(nr_orphans));
1868 if (nr_truncates) 1927 if (nr_truncates)
1869 printk(KERN_INFO "EXT4-fs: %s: %d truncate%s cleaned up\n", 1928 ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up",
1870 sb->s_id, PLURAL(nr_truncates)); 1929 PLURAL(nr_truncates));
1871#ifdef CONFIG_QUOTA 1930#ifdef CONFIG_QUOTA
1872 /* Turn quotas off */ 1931 /* Turn quotas off */
1873 for (i = 0; i < MAXQUOTAS; i++) { 1932 for (i = 0; i < MAXQUOTAS; i++) {
@@ -1877,6 +1936,7 @@ static void ext4_orphan_cleanup(struct super_block *sb,
1877#endif 1936#endif
1878 sb->s_flags = s_flags; /* Restore MS_RDONLY status */ 1937 sb->s_flags = s_flags; /* Restore MS_RDONLY status */
1879} 1938}
1939
1880/* 1940/*
1881 * Maximal extent format file size. 1941 * Maximal extent format file size.
1882 * Resulting logical blkno at s_maxbytes must fit in our on-disk 1942 * Resulting logical blkno at s_maxbytes must fit in our on-disk
@@ -1927,19 +1987,19 @@ static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
1927 loff_t res = EXT4_NDIR_BLOCKS; 1987 loff_t res = EXT4_NDIR_BLOCKS;
1928 int meta_blocks; 1988 int meta_blocks;
1929 loff_t upper_limit; 1989 loff_t upper_limit;
1930 /* This is calculated to be the largest file size for a 1990 /* This is calculated to be the largest file size for a dense, block
1931 * dense, bitmapped file such that the total number of 1991 * mapped file such that the file's total number of 512-byte sectors,
1932 * sectors in the file, including data and all indirect blocks, 1992 * including data and all indirect blocks, does not exceed (2^48 - 1).
1933 * does not exceed 2^48 -1 1993 *
1934 * __u32 i_blocks_lo and _u16 i_blocks_high representing the 1994 * __u32 i_blocks_lo and _u16 i_blocks_high represent the total
1935 * total number of 512 bytes blocks of the file 1995 * number of 512-byte sectors of the file.
1936 */ 1996 */
1937 1997
1938 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { 1998 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
1939 /* 1999 /*
1940 * !has_huge_files or CONFIG_LBD is not enabled 2000 * !has_huge_files or CONFIG_LBD not enabled implies that
1941 * implies the inode i_block represent total blocks in 2001 * the inode i_block field represents total file blocks in
1942 * 512 bytes 32 == size of vfs inode i_blocks * 8 2002 * 2^32 512-byte sectors == size of vfs inode i_blocks * 8
1943 */ 2003 */
1944 upper_limit = (1LL << 32) - 1; 2004 upper_limit = (1LL << 32) - 1;
1945 2005
@@ -1981,7 +2041,7 @@ static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
1981} 2041}
1982 2042
1983static ext4_fsblk_t descriptor_loc(struct super_block *sb, 2043static ext4_fsblk_t descriptor_loc(struct super_block *sb,
1984 ext4_fsblk_t logical_sb_block, int nr) 2044 ext4_fsblk_t logical_sb_block, int nr)
1985{ 2045{
1986 struct ext4_sb_info *sbi = EXT4_SB(sb); 2046 struct ext4_sb_info *sbi = EXT4_SB(sb);
1987 ext4_group_t bg, first_meta_bg; 2047 ext4_group_t bg, first_meta_bg;
@@ -1995,6 +2055,7 @@ static ext4_fsblk_t descriptor_loc(struct super_block *sb,
1995 bg = sbi->s_desc_per_block * nr; 2055 bg = sbi->s_desc_per_block * nr;
1996 if (ext4_bg_has_super(sb, bg)) 2056 if (ext4_bg_has_super(sb, bg))
1997 has_super = 1; 2057 has_super = 1;
2058
1998 return (has_super + ext4_group_first_block_no(sb, bg)); 2059 return (has_super + ext4_group_first_block_no(sb, bg));
1999} 2060}
2000 2061
@@ -2091,8 +2152,7 @@ static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
2091 if (parse_strtoul(buf, 0x40000000, &t)) 2152 if (parse_strtoul(buf, 0x40000000, &t))
2092 return -EINVAL; 2153 return -EINVAL;
2093 2154
2094 /* inode_readahead_blks must be a power of 2 */ 2155 if (!is_power_of_2(t))
2095 if (t & (t-1))
2096 return -EINVAL; 2156 return -EINVAL;
2097 2157
2098 sbi->s_inode_readahead_blks = t; 2158 sbi->s_inode_readahead_blks = t;
@@ -2100,7 +2160,7 @@ static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
2100} 2160}
2101 2161
2102static ssize_t sbi_ui_show(struct ext4_attr *a, 2162static ssize_t sbi_ui_show(struct ext4_attr *a,
2103 struct ext4_sb_info *sbi, char *buf) 2163 struct ext4_sb_info *sbi, char *buf)
2104{ 2164{
2105 unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset); 2165 unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset);
2106 2166
@@ -2205,7 +2265,6 @@ static struct kobj_type ext4_ktype = {
2205static int ext4_fill_super(struct super_block *sb, void *data, int silent) 2265static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2206 __releases(kernel_lock) 2266 __releases(kernel_lock)
2207 __acquires(kernel_lock) 2267 __acquires(kernel_lock)
2208
2209{ 2268{
2210 struct buffer_head *bh; 2269 struct buffer_head *bh;
2211 struct ext4_super_block *es = NULL; 2270 struct ext4_super_block *es = NULL;
@@ -2256,7 +2315,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2256 2315
2257 blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE); 2316 blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
2258 if (!blocksize) { 2317 if (!blocksize) {
2259 printk(KERN_ERR "EXT4-fs: unable to set blocksize\n"); 2318 ext4_msg(sb, KERN_ERR, "unable to set blocksize");
2260 goto out_fail; 2319 goto out_fail;
2261 } 2320 }
2262 2321
@@ -2272,7 +2331,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2272 } 2331 }
2273 2332
2274 if (!(bh = sb_bread(sb, logical_sb_block))) { 2333 if (!(bh = sb_bread(sb, logical_sb_block))) {
2275 printk(KERN_ERR "EXT4-fs: unable to read superblock\n"); 2334 ext4_msg(sb, KERN_ERR, "unable to read superblock");
2276 goto out_fail; 2335 goto out_fail;
2277 } 2336 }
2278 /* 2337 /*
@@ -2321,6 +2380,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2321 sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ; 2380 sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ;
2322 sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME; 2381 sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
2323 sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME; 2382 sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
2383 sbi->s_mb_history_max = default_mb_history_length;
2324 2384
2325 set_opt(sbi->s_mount_opt, BARRIER); 2385 set_opt(sbi->s_mount_opt, BARRIER);
2326 2386
@@ -2330,7 +2390,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2330 */ 2390 */
2331 set_opt(sbi->s_mount_opt, DELALLOC); 2391 set_opt(sbi->s_mount_opt, DELALLOC);
2332 2392
2333
2334 if (!parse_options((char *) data, sb, &journal_devnum, 2393 if (!parse_options((char *) data, sb, &journal_devnum,
2335 &journal_ioprio, NULL, 0)) 2394 &journal_ioprio, NULL, 0))
2336 goto failed_mount; 2395 goto failed_mount;
@@ -2342,9 +2401,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2342 (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) || 2401 (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) ||
2343 EXT4_HAS_RO_COMPAT_FEATURE(sb, ~0U) || 2402 EXT4_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
2344 EXT4_HAS_INCOMPAT_FEATURE(sb, ~0U))) 2403 EXT4_HAS_INCOMPAT_FEATURE(sb, ~0U)))
2345 printk(KERN_WARNING 2404 ext4_msg(sb, KERN_WARNING,
2346 "EXT4-fs warning: feature flags set on rev 0 fs, " 2405 "feature flags set on rev 0 fs, "
2347 "running e2fsck is recommended\n"); 2406 "running e2fsck is recommended");
2348 2407
2349 /* 2408 /*
2350 * Check feature flags regardless of the revision level, since we 2409 * Check feature flags regardless of the revision level, since we
@@ -2353,16 +2412,18 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2353 */ 2412 */
2354 features = EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP); 2413 features = EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP);
2355 if (features) { 2414 if (features) {
2356 printk(KERN_ERR "EXT4-fs: %s: couldn't mount because of " 2415 ext4_msg(sb, KERN_ERR,
2357 "unsupported optional features (%x).\n", sb->s_id, 2416 "Couldn't mount because of "
2417 "unsupported optional features (%x)",
2358 (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) & 2418 (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) &
2359 ~EXT4_FEATURE_INCOMPAT_SUPP)); 2419 ~EXT4_FEATURE_INCOMPAT_SUPP));
2360 goto failed_mount; 2420 goto failed_mount;
2361 } 2421 }
2362 features = EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP); 2422 features = EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP);
2363 if (!(sb->s_flags & MS_RDONLY) && features) { 2423 if (!(sb->s_flags & MS_RDONLY) && features) {
2364 printk(KERN_ERR "EXT4-fs: %s: couldn't mount RDWR because of " 2424 ext4_msg(sb, KERN_ERR,
2365 "unsupported optional features (%x).\n", sb->s_id, 2425 "Couldn't mount RDWR because of "
2426 "unsupported optional features (%x)",
2366 (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) & 2427 (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) &
2367 ~EXT4_FEATURE_RO_COMPAT_SUPP)); 2428 ~EXT4_FEATURE_RO_COMPAT_SUPP));
2368 goto failed_mount; 2429 goto failed_mount;
@@ -2376,9 +2437,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2376 */ 2437 */
2377 if (sizeof(root->i_blocks) < sizeof(u64) && 2438 if (sizeof(root->i_blocks) < sizeof(u64) &&
2378 !(sb->s_flags & MS_RDONLY)) { 2439 !(sb->s_flags & MS_RDONLY)) {
2379 printk(KERN_ERR "EXT4-fs: %s: Filesystem with huge " 2440 ext4_msg(sb, KERN_ERR, "Filesystem with huge "
2380 "files cannot be mounted read-write " 2441 "files cannot be mounted read-write "
2381 "without CONFIG_LBD.\n", sb->s_id); 2442 "without CONFIG_LBD");
2382 goto failed_mount; 2443 goto failed_mount;
2383 } 2444 }
2384 } 2445 }
@@ -2386,17 +2447,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2386 2447
2387 if (blocksize < EXT4_MIN_BLOCK_SIZE || 2448 if (blocksize < EXT4_MIN_BLOCK_SIZE ||
2388 blocksize > EXT4_MAX_BLOCK_SIZE) { 2449 blocksize > EXT4_MAX_BLOCK_SIZE) {
2389 printk(KERN_ERR 2450 ext4_msg(sb, KERN_ERR,
2390 "EXT4-fs: Unsupported filesystem blocksize %d on %s.\n", 2451 "Unsupported filesystem blocksize %d", blocksize);
2391 blocksize, sb->s_id);
2392 goto failed_mount; 2452 goto failed_mount;
2393 } 2453 }
2394 2454
2395 if (sb->s_blocksize != blocksize) { 2455 if (sb->s_blocksize != blocksize) {
2396
2397 /* Validate the filesystem blocksize */ 2456 /* Validate the filesystem blocksize */
2398 if (!sb_set_blocksize(sb, blocksize)) { 2457 if (!sb_set_blocksize(sb, blocksize)) {
2399 printk(KERN_ERR "EXT4-fs: bad block size %d.\n", 2458 ext4_msg(sb, KERN_ERR, "bad block size %d",
2400 blocksize); 2459 blocksize);
2401 goto failed_mount; 2460 goto failed_mount;
2402 } 2461 }
@@ -2406,15 +2465,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2406 offset = do_div(logical_sb_block, blocksize); 2465 offset = do_div(logical_sb_block, blocksize);
2407 bh = sb_bread(sb, logical_sb_block); 2466 bh = sb_bread(sb, logical_sb_block);
2408 if (!bh) { 2467 if (!bh) {
2409 printk(KERN_ERR 2468 ext4_msg(sb, KERN_ERR,
2410 "EXT4-fs: Can't read superblock on 2nd try.\n"); 2469 "Can't read superblock on 2nd try");
2411 goto failed_mount; 2470 goto failed_mount;
2412 } 2471 }
2413 es = (struct ext4_super_block *)(((char *)bh->b_data) + offset); 2472 es = (struct ext4_super_block *)(((char *)bh->b_data) + offset);
2414 sbi->s_es = es; 2473 sbi->s_es = es;
2415 if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) { 2474 if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) {
2416 printk(KERN_ERR 2475 ext4_msg(sb, KERN_ERR,
2417 "EXT4-fs: Magic mismatch, very weird !\n"); 2476 "Magic mismatch, very weird!");
2418 goto failed_mount; 2477 goto failed_mount;
2419 } 2478 }
2420 } 2479 }
@@ -2432,30 +2491,33 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2432 if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) || 2491 if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
2433 (!is_power_of_2(sbi->s_inode_size)) || 2492 (!is_power_of_2(sbi->s_inode_size)) ||
2434 (sbi->s_inode_size > blocksize)) { 2493 (sbi->s_inode_size > blocksize)) {
2435 printk(KERN_ERR 2494 ext4_msg(sb, KERN_ERR,
2436 "EXT4-fs: unsupported inode size: %d\n", 2495 "unsupported inode size: %d",
2437 sbi->s_inode_size); 2496 sbi->s_inode_size);
2438 goto failed_mount; 2497 goto failed_mount;
2439 } 2498 }
2440 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) 2499 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE)
2441 sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2); 2500 sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2);
2442 } 2501 }
2502
2443 sbi->s_desc_size = le16_to_cpu(es->s_desc_size); 2503 sbi->s_desc_size = le16_to_cpu(es->s_desc_size);
2444 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) { 2504 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) {
2445 if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT || 2505 if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT ||
2446 sbi->s_desc_size > EXT4_MAX_DESC_SIZE || 2506 sbi->s_desc_size > EXT4_MAX_DESC_SIZE ||
2447 !is_power_of_2(sbi->s_desc_size)) { 2507 !is_power_of_2(sbi->s_desc_size)) {
2448 printk(KERN_ERR 2508 ext4_msg(sb, KERN_ERR,
2449 "EXT4-fs: unsupported descriptor size %lu\n", 2509 "unsupported descriptor size %lu",
2450 sbi->s_desc_size); 2510 sbi->s_desc_size);
2451 goto failed_mount; 2511 goto failed_mount;
2452 } 2512 }
2453 } else 2513 } else
2454 sbi->s_desc_size = EXT4_MIN_DESC_SIZE; 2514 sbi->s_desc_size = EXT4_MIN_DESC_SIZE;
2515
2455 sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group); 2516 sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
2456 sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group); 2517 sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
2457 if (EXT4_INODE_SIZE(sb) == 0 || EXT4_INODES_PER_GROUP(sb) == 0) 2518 if (EXT4_INODE_SIZE(sb) == 0 || EXT4_INODES_PER_GROUP(sb) == 0)
2458 goto cantfind_ext4; 2519 goto cantfind_ext4;
2520
2459 sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb); 2521 sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb);
2460 if (sbi->s_inodes_per_block == 0) 2522 if (sbi->s_inodes_per_block == 0)
2461 goto cantfind_ext4; 2523 goto cantfind_ext4;
@@ -2466,6 +2528,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2466 sbi->s_mount_state = le16_to_cpu(es->s_state); 2528 sbi->s_mount_state = le16_to_cpu(es->s_state);
2467 sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb)); 2529 sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
2468 sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb)); 2530 sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
2531
2469 for (i = 0; i < 4; i++) 2532 for (i = 0; i < 4; i++)
2470 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); 2533 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
2471 sbi->s_def_hash_version = es->s_def_hash_version; 2534 sbi->s_def_hash_version = es->s_def_hash_version;
@@ -2483,25 +2546,24 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2483 } 2546 }
2484 2547
2485 if (sbi->s_blocks_per_group > blocksize * 8) { 2548 if (sbi->s_blocks_per_group > blocksize * 8) {
2486 printk(KERN_ERR 2549 ext4_msg(sb, KERN_ERR,
2487 "EXT4-fs: #blocks per group too big: %lu\n", 2550 "#blocks per group too big: %lu",
2488 sbi->s_blocks_per_group); 2551 sbi->s_blocks_per_group);
2489 goto failed_mount; 2552 goto failed_mount;
2490 } 2553 }
2491 if (sbi->s_inodes_per_group > blocksize * 8) { 2554 if (sbi->s_inodes_per_group > blocksize * 8) {
2492 printk(KERN_ERR 2555 ext4_msg(sb, KERN_ERR,
2493 "EXT4-fs: #inodes per group too big: %lu\n", 2556 "#inodes per group too big: %lu",
2494 sbi->s_inodes_per_group); 2557 sbi->s_inodes_per_group);
2495 goto failed_mount; 2558 goto failed_mount;
2496 } 2559 }
2497 2560
2498 if (ext4_blocks_count(es) > 2561 if (ext4_blocks_count(es) >
2499 (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { 2562 (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) {
2500 printk(KERN_ERR "EXT4-fs: filesystem on %s:" 2563 ext4_msg(sb, KERN_ERR, "filesystem"
2501 " too large to mount safely\n", sb->s_id); 2564 " too large to mount safely");
2502 if (sizeof(sector_t) < 8) 2565 if (sizeof(sector_t) < 8)
2503 printk(KERN_WARNING "EXT4-fs: CONFIG_LBD not " 2566 ext4_msg(sb, KERN_WARNING, "CONFIG_LBD not enabled");
2504 "enabled\n");
2505 goto failed_mount; 2567 goto failed_mount;
2506 } 2568 }
2507 2569
@@ -2511,21 +2573,21 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2511 /* check blocks count against device size */ 2573 /* check blocks count against device size */
2512 blocks_count = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits; 2574 blocks_count = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits;
2513 if (blocks_count && ext4_blocks_count(es) > blocks_count) { 2575 if (blocks_count && ext4_blocks_count(es) > blocks_count) {
2514 printk(KERN_WARNING "EXT4-fs: bad geometry: block count %llu " 2576 ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu "
2515 "exceeds size of device (%llu blocks)\n", 2577 "exceeds size of device (%llu blocks)",
2516 ext4_blocks_count(es), blocks_count); 2578 ext4_blocks_count(es), blocks_count);
2517 goto failed_mount; 2579 goto failed_mount;
2518 } 2580 }
2519 2581
2520 /* 2582 /*
2521 * It makes no sense for the first data block to be beyond the end 2583 * It makes no sense for the first data block to be beyond the end
2522 * of the filesystem. 2584 * of the filesystem.
2523 */ 2585 */
2524 if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) { 2586 if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) {
2525 printk(KERN_WARNING "EXT4-fs: bad geometry: first data" 2587 ext4_msg(sb, KERN_WARNING, "bad geometry: first data"
2526 "block %u is beyond end of filesystem (%llu)\n", 2588 "block %u is beyond end of filesystem (%llu)",
2527 le32_to_cpu(es->s_first_data_block), 2589 le32_to_cpu(es->s_first_data_block),
2528 ext4_blocks_count(es)); 2590 ext4_blocks_count(es));
2529 goto failed_mount; 2591 goto failed_mount;
2530 } 2592 }
2531 blocks_count = (ext4_blocks_count(es) - 2593 blocks_count = (ext4_blocks_count(es) -
@@ -2533,9 +2595,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2533 EXT4_BLOCKS_PER_GROUP(sb) - 1); 2595 EXT4_BLOCKS_PER_GROUP(sb) - 1);
2534 do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb)); 2596 do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb));
2535 if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) { 2597 if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) {
2536 printk(KERN_WARNING "EXT4-fs: groups count too large: %u " 2598 ext4_msg(sb, KERN_WARNING, "groups count too large: %u "
2537 "(block count %llu, first data block %u, " 2599 "(block count %llu, first data block %u, "
2538 "blocks per group %lu)\n", sbi->s_groups_count, 2600 "blocks per group %lu)", sbi->s_groups_count,
2539 ext4_blocks_count(es), 2601 ext4_blocks_count(es),
2540 le32_to_cpu(es->s_first_data_block), 2602 le32_to_cpu(es->s_first_data_block),
2541 EXT4_BLOCKS_PER_GROUP(sb)); 2603 EXT4_BLOCKS_PER_GROUP(sb));
@@ -2547,7 +2609,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2547 sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *), 2609 sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *),
2548 GFP_KERNEL); 2610 GFP_KERNEL);
2549 if (sbi->s_group_desc == NULL) { 2611 if (sbi->s_group_desc == NULL) {
2550 printk(KERN_ERR "EXT4-fs: not enough memory\n"); 2612 ext4_msg(sb, KERN_ERR, "not enough memory");
2551 goto failed_mount; 2613 goto failed_mount;
2552 } 2614 }
2553 2615
@@ -2562,21 +2624,21 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2562 block = descriptor_loc(sb, logical_sb_block, i); 2624 block = descriptor_loc(sb, logical_sb_block, i);
2563 sbi->s_group_desc[i] = sb_bread(sb, block); 2625 sbi->s_group_desc[i] = sb_bread(sb, block);
2564 if (!sbi->s_group_desc[i]) { 2626 if (!sbi->s_group_desc[i]) {
2565 printk(KERN_ERR "EXT4-fs: " 2627 ext4_msg(sb, KERN_ERR,
2566 "can't read group descriptor %d\n", i); 2628 "can't read group descriptor %d", i);
2567 db_count = i; 2629 db_count = i;
2568 goto failed_mount2; 2630 goto failed_mount2;
2569 } 2631 }
2570 } 2632 }
2571 if (!ext4_check_descriptors(sb)) { 2633 if (!ext4_check_descriptors(sb)) {
2572 printk(KERN_ERR "EXT4-fs: group descriptors corrupted!\n"); 2634 ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
2573 goto failed_mount2; 2635 goto failed_mount2;
2574 } 2636 }
2575 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) 2637 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
2576 if (!ext4_fill_flex_info(sb)) { 2638 if (!ext4_fill_flex_info(sb)) {
2577 printk(KERN_ERR 2639 ext4_msg(sb, KERN_ERR,
2578 "EXT4-fs: unable to initialize " 2640 "unable to initialize "
2579 "flex_bg meta info!\n"); 2641 "flex_bg meta info!");
2580 goto failed_mount2; 2642 goto failed_mount2;
2581 } 2643 }
2582 2644
@@ -2598,7 +2660,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2598 err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0); 2660 err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0);
2599 } 2661 }
2600 if (err) { 2662 if (err) {
2601 printk(KERN_ERR "EXT4-fs: insufficient memory\n"); 2663 ext4_msg(sb, KERN_ERR, "insufficient memory");
2602 goto failed_mount3; 2664 goto failed_mount3;
2603 } 2665 }
2604 2666
@@ -2607,7 +2669,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2607 /* 2669 /*
2608 * set up enough so that it can read an inode 2670 * set up enough so that it can read an inode
2609 */ 2671 */
2610 sb->s_op = &ext4_sops; 2672 if (!test_opt(sb, NOLOAD) &&
2673 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL))
2674 sb->s_op = &ext4_sops;
2675 else
2676 sb->s_op = &ext4_nojournal_sops;
2611 sb->s_export_op = &ext4_export_ops; 2677 sb->s_export_op = &ext4_export_ops;
2612 sb->s_xattr = ext4_xattr_handlers; 2678 sb->s_xattr = ext4_xattr_handlers;
2613#ifdef CONFIG_QUOTA 2679#ifdef CONFIG_QUOTA
@@ -2615,6 +2681,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2615 sb->dq_op = &ext4_quota_operations; 2681 sb->dq_op = &ext4_quota_operations;
2616#endif 2682#endif
2617 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ 2683 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
2684 mutex_init(&sbi->s_orphan_lock);
2685 mutex_init(&sbi->s_resize_lock);
2618 2686
2619 sb->s_root = NULL; 2687 sb->s_root = NULL;
2620 2688
@@ -2632,13 +2700,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2632 goto failed_mount3; 2700 goto failed_mount3;
2633 if (!(sb->s_flags & MS_RDONLY) && 2701 if (!(sb->s_flags & MS_RDONLY) &&
2634 EXT4_SB(sb)->s_journal->j_failed_commit) { 2702 EXT4_SB(sb)->s_journal->j_failed_commit) {
2635 printk(KERN_CRIT "EXT4-fs error (device %s): " 2703 ext4_msg(sb, KERN_CRIT, "error: "
2636 "ext4_fill_super: Journal transaction " 2704 "ext4_fill_super: Journal transaction "
2637 "%u is corrupt\n", sb->s_id, 2705 "%u is corrupt",
2638 EXT4_SB(sb)->s_journal->j_failed_commit); 2706 EXT4_SB(sb)->s_journal->j_failed_commit);
2639 if (test_opt(sb, ERRORS_RO)) { 2707 if (test_opt(sb, ERRORS_RO)) {
2640 printk(KERN_CRIT 2708 ext4_msg(sb, KERN_CRIT,
2641 "Mounting filesystem read-only\n"); 2709 "Mounting filesystem read-only");
2642 sb->s_flags |= MS_RDONLY; 2710 sb->s_flags |= MS_RDONLY;
2643 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 2711 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
2644 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 2712 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
@@ -2646,14 +2714,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2646 if (test_opt(sb, ERRORS_PANIC)) { 2714 if (test_opt(sb, ERRORS_PANIC)) {
2647 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 2715 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
2648 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 2716 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
2649 ext4_commit_super(sb, es, 1); 2717 ext4_commit_super(sb, 1);
2650 goto failed_mount4; 2718 goto failed_mount4;
2651 } 2719 }
2652 } 2720 }
2653 } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) && 2721 } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) &&
2654 EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { 2722 EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
2655 printk(KERN_ERR "EXT4-fs: required journal recovery " 2723 ext4_msg(sb, KERN_ERR, "required journal recovery "
2656 "suppressed and not mounted read-only\n"); 2724 "suppressed and not mounted read-only");
2657 goto failed_mount4; 2725 goto failed_mount4;
2658 } else { 2726 } else {
2659 clear_opt(sbi->s_mount_opt, DATA_FLAGS); 2727 clear_opt(sbi->s_mount_opt, DATA_FLAGS);
@@ -2666,7 +2734,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2666 if (ext4_blocks_count(es) > 0xffffffffULL && 2734 if (ext4_blocks_count(es) > 0xffffffffULL &&
2667 !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0, 2735 !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
2668 JBD2_FEATURE_INCOMPAT_64BIT)) { 2736 JBD2_FEATURE_INCOMPAT_64BIT)) {
2669 printk(KERN_ERR "EXT4-fs: Failed to set 64-bit journal feature\n"); 2737 ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature");
2670 goto failed_mount4; 2738 goto failed_mount4;
2671 } 2739 }
2672 2740
@@ -2704,8 +2772,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2704 case EXT4_MOUNT_WRITEBACK_DATA: 2772 case EXT4_MOUNT_WRITEBACK_DATA:
2705 if (!jbd2_journal_check_available_features 2773 if (!jbd2_journal_check_available_features
2706 (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) { 2774 (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
2707 printk(KERN_ERR "EXT4-fs: Journal does not support " 2775 ext4_msg(sb, KERN_ERR, "Journal does not support "
2708 "requested data journaling mode\n"); 2776 "requested data journaling mode");
2709 goto failed_mount4; 2777 goto failed_mount4;
2710 } 2778 }
2711 default: 2779 default:
@@ -2717,8 +2785,8 @@ no_journal:
2717 2785
2718 if (test_opt(sb, NOBH)) { 2786 if (test_opt(sb, NOBH)) {
2719 if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) { 2787 if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) {
2720 printk(KERN_WARNING "EXT4-fs: Ignoring nobh option - " 2788 ext4_msg(sb, KERN_WARNING, "Ignoring nobh option - "
2721 "its supported only with writeback mode\n"); 2789 "its supported only with writeback mode");
2722 clear_opt(sbi->s_mount_opt, NOBH); 2790 clear_opt(sbi->s_mount_opt, NOBH);
2723 } 2791 }
2724 } 2792 }
@@ -2729,18 +2797,18 @@ no_journal:
2729 2797
2730 root = ext4_iget(sb, EXT4_ROOT_INO); 2798 root = ext4_iget(sb, EXT4_ROOT_INO);
2731 if (IS_ERR(root)) { 2799 if (IS_ERR(root)) {
2732 printk(KERN_ERR "EXT4-fs: get root inode failed\n"); 2800 ext4_msg(sb, KERN_ERR, "get root inode failed");
2733 ret = PTR_ERR(root); 2801 ret = PTR_ERR(root);
2734 goto failed_mount4; 2802 goto failed_mount4;
2735 } 2803 }
2736 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { 2804 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
2737 iput(root); 2805 iput(root);
2738 printk(KERN_ERR "EXT4-fs: corrupt root inode, run e2fsck\n"); 2806 ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck");
2739 goto failed_mount4; 2807 goto failed_mount4;
2740 } 2808 }
2741 sb->s_root = d_alloc_root(root); 2809 sb->s_root = d_alloc_root(root);
2742 if (!sb->s_root) { 2810 if (!sb->s_root) {
2743 printk(KERN_ERR "EXT4-fs: get root dentry failed\n"); 2811 ext4_msg(sb, KERN_ERR, "get root dentry failed");
2744 iput(root); 2812 iput(root);
2745 ret = -ENOMEM; 2813 ret = -ENOMEM;
2746 goto failed_mount4; 2814 goto failed_mount4;
@@ -2769,22 +2837,29 @@ no_journal:
2769 sbi->s_inode_size) { 2837 sbi->s_inode_size) {
2770 sbi->s_want_extra_isize = sizeof(struct ext4_inode) - 2838 sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
2771 EXT4_GOOD_OLD_INODE_SIZE; 2839 EXT4_GOOD_OLD_INODE_SIZE;
2772 printk(KERN_INFO "EXT4-fs: required extra inode space not" 2840 ext4_msg(sb, KERN_INFO, "required extra inode space not"
2773 "available.\n"); 2841 "available");
2774 } 2842 }
2775 2843
2776 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { 2844 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
2777 printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - " 2845 ext4_msg(sb, KERN_WARNING, "Ignoring delalloc option - "
2778 "requested data journaling mode\n"); 2846 "requested data journaling mode");
2779 clear_opt(sbi->s_mount_opt, DELALLOC); 2847 clear_opt(sbi->s_mount_opt, DELALLOC);
2780 } else if (test_opt(sb, DELALLOC)) 2848 } else if (test_opt(sb, DELALLOC))
2781 printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n"); 2849 ext4_msg(sb, KERN_INFO, "delayed allocation enabled");
2850
2851 err = ext4_setup_system_zone(sb);
2852 if (err) {
2853 ext4_msg(sb, KERN_ERR, "failed to initialize system "
2854 "zone (%d)\n", err);
2855 goto failed_mount4;
2856 }
2782 2857
2783 ext4_ext_init(sb); 2858 ext4_ext_init(sb);
2784 err = ext4_mb_init(sb, needs_recovery); 2859 err = ext4_mb_init(sb, needs_recovery);
2785 if (err) { 2860 if (err) {
2786 printk(KERN_ERR "EXT4-fs: failed to initalize mballoc (%d)\n", 2861 ext4_msg(sb, KERN_ERR, "failed to initalize mballoc (%d)",
2787 err); 2862 err);
2788 goto failed_mount4; 2863 goto failed_mount4;
2789 } 2864 }
2790 2865
@@ -2798,19 +2873,11 @@ no_journal:
2798 goto failed_mount4; 2873 goto failed_mount4;
2799 }; 2874 };
2800 2875
2801 /*
2802 * akpm: core read_super() calls in here with the superblock locked.
2803 * That deadlocks, because orphan cleanup needs to lock the superblock
2804 * in numerous places. Here we just pop the lock - it's relatively
2805 * harmless, because we are now ready to accept write_super() requests,
2806 * and aviro says that's the only reason for hanging onto the
2807 * superblock lock.
2808 */
2809 EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS; 2876 EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
2810 ext4_orphan_cleanup(sb, es); 2877 ext4_orphan_cleanup(sb, es);
2811 EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS; 2878 EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
2812 if (needs_recovery) { 2879 if (needs_recovery) {
2813 printk(KERN_INFO "EXT4-fs: recovery complete.\n"); 2880 ext4_msg(sb, KERN_INFO, "recovery complete");
2814 ext4_mark_recovery_complete(sb, es); 2881 ext4_mark_recovery_complete(sb, es);
2815 } 2882 }
2816 if (EXT4_SB(sb)->s_journal) { 2883 if (EXT4_SB(sb)->s_journal) {
@@ -2823,25 +2890,30 @@ no_journal:
2823 } else 2890 } else
2824 descr = "out journal"; 2891 descr = "out journal";
2825 2892
2826 printk(KERN_INFO "EXT4-fs: mounted filesystem %s with%s\n", 2893 ext4_msg(sb, KERN_INFO, "mounted filesystem with%s", descr);
2827 sb->s_id, descr);
2828 2894
2829 lock_kernel(); 2895 lock_kernel();
2830 return 0; 2896 return 0;
2831 2897
2832cantfind_ext4: 2898cantfind_ext4:
2833 if (!silent) 2899 if (!silent)
2834 printk(KERN_ERR "VFS: Can't find ext4 filesystem on dev %s.\n", 2900 ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
2835 sb->s_id);
2836 goto failed_mount; 2901 goto failed_mount;
2837 2902
2838failed_mount4: 2903failed_mount4:
2839 printk(KERN_ERR "EXT4-fs (device %s): mount failed\n", sb->s_id); 2904 ext4_msg(sb, KERN_ERR, "mount failed");
2905 ext4_release_system_zone(sb);
2840 if (sbi->s_journal) { 2906 if (sbi->s_journal) {
2841 jbd2_journal_destroy(sbi->s_journal); 2907 jbd2_journal_destroy(sbi->s_journal);
2842 sbi->s_journal = NULL; 2908 sbi->s_journal = NULL;
2843 } 2909 }
2844failed_mount3: 2910failed_mount3:
2911 if (sbi->s_flex_groups) {
2912 if (is_vmalloc_addr(sbi->s_flex_groups))
2913 vfree(sbi->s_flex_groups);
2914 else
2915 kfree(sbi->s_flex_groups);
2916 }
2845 percpu_counter_destroy(&sbi->s_freeblocks_counter); 2917 percpu_counter_destroy(&sbi->s_freeblocks_counter);
2846 percpu_counter_destroy(&sbi->s_freeinodes_counter); 2918 percpu_counter_destroy(&sbi->s_freeinodes_counter);
2847 percpu_counter_destroy(&sbi->s_dirs_counter); 2919 percpu_counter_destroy(&sbi->s_dirs_counter);
@@ -2862,6 +2934,7 @@ failed_mount:
2862 brelse(bh); 2934 brelse(bh);
2863out_fail: 2935out_fail:
2864 sb->s_fs_info = NULL; 2936 sb->s_fs_info = NULL;
2937 kfree(sbi->s_blockgroup_lock);
2865 kfree(sbi); 2938 kfree(sbi);
2866 lock_kernel(); 2939 lock_kernel();
2867 return ret; 2940 return ret;
@@ -2906,27 +2979,27 @@ static journal_t *ext4_get_journal(struct super_block *sb,
2906 2979
2907 journal_inode = ext4_iget(sb, journal_inum); 2980 journal_inode = ext4_iget(sb, journal_inum);
2908 if (IS_ERR(journal_inode)) { 2981 if (IS_ERR(journal_inode)) {
2909 printk(KERN_ERR "EXT4-fs: no journal found.\n"); 2982 ext4_msg(sb, KERN_ERR, "no journal found");
2910 return NULL; 2983 return NULL;
2911 } 2984 }
2912 if (!journal_inode->i_nlink) { 2985 if (!journal_inode->i_nlink) {
2913 make_bad_inode(journal_inode); 2986 make_bad_inode(journal_inode);
2914 iput(journal_inode); 2987 iput(journal_inode);
2915 printk(KERN_ERR "EXT4-fs: journal inode is deleted.\n"); 2988 ext4_msg(sb, KERN_ERR, "journal inode is deleted");
2916 return NULL; 2989 return NULL;
2917 } 2990 }
2918 2991
2919 jbd_debug(2, "Journal inode found at %p: %lld bytes\n", 2992 jbd_debug(2, "Journal inode found at %p: %lld bytes\n",
2920 journal_inode, journal_inode->i_size); 2993 journal_inode, journal_inode->i_size);
2921 if (!S_ISREG(journal_inode->i_mode)) { 2994 if (!S_ISREG(journal_inode->i_mode)) {
2922 printk(KERN_ERR "EXT4-fs: invalid journal inode.\n"); 2995 ext4_msg(sb, KERN_ERR, "invalid journal inode");
2923 iput(journal_inode); 2996 iput(journal_inode);
2924 return NULL; 2997 return NULL;
2925 } 2998 }
2926 2999
2927 journal = jbd2_journal_init_inode(journal_inode); 3000 journal = jbd2_journal_init_inode(journal_inode);
2928 if (!journal) { 3001 if (!journal) {
2929 printk(KERN_ERR "EXT4-fs: Could not load journal inode\n"); 3002 ext4_msg(sb, KERN_ERR, "Could not load journal inode");
2930 iput(journal_inode); 3003 iput(journal_inode);
2931 return NULL; 3004 return NULL;
2932 } 3005 }
@@ -2950,13 +3023,13 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
2950 3023
2951 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); 3024 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
2952 3025
2953 bdev = ext4_blkdev_get(j_dev); 3026 bdev = ext4_blkdev_get(j_dev, sb);
2954 if (bdev == NULL) 3027 if (bdev == NULL)
2955 return NULL; 3028 return NULL;
2956 3029
2957 if (bd_claim(bdev, sb)) { 3030 if (bd_claim(bdev, sb)) {
2958 printk(KERN_ERR 3031 ext4_msg(sb, KERN_ERR,
2959 "EXT4-fs: failed to claim external journal device.\n"); 3032 "failed to claim external journal device");
2960 blkdev_put(bdev, FMODE_READ|FMODE_WRITE); 3033 blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
2961 return NULL; 3034 return NULL;
2962 } 3035 }
@@ -2964,8 +3037,8 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
2964 blocksize = sb->s_blocksize; 3037 blocksize = sb->s_blocksize;
2965 hblock = bdev_hardsect_size(bdev); 3038 hblock = bdev_hardsect_size(bdev);
2966 if (blocksize < hblock) { 3039 if (blocksize < hblock) {
2967 printk(KERN_ERR 3040 ext4_msg(sb, KERN_ERR,
2968 "EXT4-fs: blocksize too small for journal device.\n"); 3041 "blocksize too small for journal device");
2969 goto out_bdev; 3042 goto out_bdev;
2970 } 3043 }
2971 3044
@@ -2973,8 +3046,8 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
2973 offset = EXT4_MIN_BLOCK_SIZE % blocksize; 3046 offset = EXT4_MIN_BLOCK_SIZE % blocksize;
2974 set_blocksize(bdev, blocksize); 3047 set_blocksize(bdev, blocksize);
2975 if (!(bh = __bread(bdev, sb_block, blocksize))) { 3048 if (!(bh = __bread(bdev, sb_block, blocksize))) {
2976 printk(KERN_ERR "EXT4-fs: couldn't read superblock of " 3049 ext4_msg(sb, KERN_ERR, "couldn't read superblock of "
2977 "external journal\n"); 3050 "external journal");
2978 goto out_bdev; 3051 goto out_bdev;
2979 } 3052 }
2980 3053
@@ -2982,14 +3055,14 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
2982 if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) || 3055 if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) ||
2983 !(le32_to_cpu(es->s_feature_incompat) & 3056 !(le32_to_cpu(es->s_feature_incompat) &
2984 EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) { 3057 EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) {
2985 printk(KERN_ERR "EXT4-fs: external journal has " 3058 ext4_msg(sb, KERN_ERR, "external journal has "
2986 "bad superblock\n"); 3059 "bad superblock");
2987 brelse(bh); 3060 brelse(bh);
2988 goto out_bdev; 3061 goto out_bdev;
2989 } 3062 }
2990 3063
2991 if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) { 3064 if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
2992 printk(KERN_ERR "EXT4-fs: journal UUID does not match\n"); 3065 ext4_msg(sb, KERN_ERR, "journal UUID does not match");
2993 brelse(bh); 3066 brelse(bh);
2994 goto out_bdev; 3067 goto out_bdev;
2995 } 3068 }
@@ -3001,25 +3074,26 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
3001 journal = jbd2_journal_init_dev(bdev, sb->s_bdev, 3074 journal = jbd2_journal_init_dev(bdev, sb->s_bdev,
3002 start, len, blocksize); 3075 start, len, blocksize);
3003 if (!journal) { 3076 if (!journal) {
3004 printk(KERN_ERR "EXT4-fs: failed to create device journal\n"); 3077 ext4_msg(sb, KERN_ERR, "failed to create device journal");
3005 goto out_bdev; 3078 goto out_bdev;
3006 } 3079 }
3007 journal->j_private = sb; 3080 journal->j_private = sb;
3008 ll_rw_block(READ, 1, &journal->j_sb_buffer); 3081 ll_rw_block(READ, 1, &journal->j_sb_buffer);
3009 wait_on_buffer(journal->j_sb_buffer); 3082 wait_on_buffer(journal->j_sb_buffer);
3010 if (!buffer_uptodate(journal->j_sb_buffer)) { 3083 if (!buffer_uptodate(journal->j_sb_buffer)) {
3011 printk(KERN_ERR "EXT4-fs: I/O error on journal device\n"); 3084 ext4_msg(sb, KERN_ERR, "I/O error on journal device");
3012 goto out_journal; 3085 goto out_journal;
3013 } 3086 }
3014 if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) { 3087 if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
3015 printk(KERN_ERR "EXT4-fs: External journal has more than one " 3088 ext4_msg(sb, KERN_ERR, "External journal has more than one "
3016 "user (unsupported) - %d\n", 3089 "user (unsupported) - %d",
3017 be32_to_cpu(journal->j_superblock->s_nr_users)); 3090 be32_to_cpu(journal->j_superblock->s_nr_users));
3018 goto out_journal; 3091 goto out_journal;
3019 } 3092 }
3020 EXT4_SB(sb)->journal_bdev = bdev; 3093 EXT4_SB(sb)->journal_bdev = bdev;
3021 ext4_init_journal_params(sb, journal); 3094 ext4_init_journal_params(sb, journal);
3022 return journal; 3095 return journal;
3096
3023out_journal: 3097out_journal:
3024 jbd2_journal_destroy(journal); 3098 jbd2_journal_destroy(journal);
3025out_bdev: 3099out_bdev:
@@ -3041,8 +3115,8 @@ static int ext4_load_journal(struct super_block *sb,
3041 3115
3042 if (journal_devnum && 3116 if (journal_devnum &&
3043 journal_devnum != le32_to_cpu(es->s_journal_dev)) { 3117 journal_devnum != le32_to_cpu(es->s_journal_dev)) {
3044 printk(KERN_INFO "EXT4-fs: external journal device major/minor " 3118 ext4_msg(sb, KERN_INFO, "external journal device major/minor "
3045 "numbers have changed\n"); 3119 "numbers have changed");
3046 journal_dev = new_decode_dev(journal_devnum); 3120 journal_dev = new_decode_dev(journal_devnum);
3047 } else 3121 } else
3048 journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev)); 3122 journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
@@ -3054,24 +3128,23 @@ static int ext4_load_journal(struct super_block *sb,
3054 * crash? For recovery, we need to check in advance whether we 3128 * crash? For recovery, we need to check in advance whether we
3055 * can get read-write access to the device. 3129 * can get read-write access to the device.
3056 */ 3130 */
3057
3058 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { 3131 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
3059 if (sb->s_flags & MS_RDONLY) { 3132 if (sb->s_flags & MS_RDONLY) {
3060 printk(KERN_INFO "EXT4-fs: INFO: recovery " 3133 ext4_msg(sb, KERN_INFO, "INFO: recovery "
3061 "required on readonly filesystem.\n"); 3134 "required on readonly filesystem");
3062 if (really_read_only) { 3135 if (really_read_only) {
3063 printk(KERN_ERR "EXT4-fs: write access " 3136 ext4_msg(sb, KERN_ERR, "write access "
3064 "unavailable, cannot proceed.\n"); 3137 "unavailable, cannot proceed");
3065 return -EROFS; 3138 return -EROFS;
3066 } 3139 }
3067 printk(KERN_INFO "EXT4-fs: write access will " 3140 ext4_msg(sb, KERN_INFO, "write access will "
3068 "be enabled during recovery.\n"); 3141 "be enabled during recovery");
3069 } 3142 }
3070 } 3143 }
3071 3144
3072 if (journal_inum && journal_dev) { 3145 if (journal_inum && journal_dev) {
3073 printk(KERN_ERR "EXT4-fs: filesystem has both journal " 3146 ext4_msg(sb, KERN_ERR, "filesystem has both journal "
3074 "and inode journals!\n"); 3147 "and inode journals!");
3075 return -EINVAL; 3148 return -EINVAL;
3076 } 3149 }
3077 3150
@@ -3084,14 +3157,14 @@ static int ext4_load_journal(struct super_block *sb,
3084 } 3157 }
3085 3158
3086 if (journal->j_flags & JBD2_BARRIER) 3159 if (journal->j_flags & JBD2_BARRIER)
3087 printk(KERN_INFO "EXT4-fs: barriers enabled\n"); 3160 ext4_msg(sb, KERN_INFO, "barriers enabled");
3088 else 3161 else
3089 printk(KERN_INFO "EXT4-fs: barriers disabled\n"); 3162 ext4_msg(sb, KERN_INFO, "barriers disabled");
3090 3163
3091 if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) { 3164 if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) {
3092 err = jbd2_journal_update_format(journal); 3165 err = jbd2_journal_update_format(journal);
3093 if (err) { 3166 if (err) {
3094 printk(KERN_ERR "EXT4-fs: error updating journal.\n"); 3167 ext4_msg(sb, KERN_ERR, "error updating journal");
3095 jbd2_journal_destroy(journal); 3168 jbd2_journal_destroy(journal);
3096 return err; 3169 return err;
3097 } 3170 }
@@ -3103,7 +3176,7 @@ static int ext4_load_journal(struct super_block *sb,
3103 err = jbd2_journal_load(journal); 3176 err = jbd2_journal_load(journal);
3104 3177
3105 if (err) { 3178 if (err) {
3106 printk(KERN_ERR "EXT4-fs: error loading journal.\n"); 3179 ext4_msg(sb, KERN_ERR, "error loading journal");
3107 jbd2_journal_destroy(journal); 3180 jbd2_journal_destroy(journal);
3108 return err; 3181 return err;
3109 } 3182 }
@@ -3114,18 +3187,17 @@ static int ext4_load_journal(struct super_block *sb,
3114 if (journal_devnum && 3187 if (journal_devnum &&
3115 journal_devnum != le32_to_cpu(es->s_journal_dev)) { 3188 journal_devnum != le32_to_cpu(es->s_journal_dev)) {
3116 es->s_journal_dev = cpu_to_le32(journal_devnum); 3189 es->s_journal_dev = cpu_to_le32(journal_devnum);
3117 sb->s_dirt = 1;
3118 3190
3119 /* Make sure we flush the recovery flag to disk. */ 3191 /* Make sure we flush the recovery flag to disk. */
3120 ext4_commit_super(sb, es, 1); 3192 ext4_commit_super(sb, 1);
3121 } 3193 }
3122 3194
3123 return 0; 3195 return 0;
3124} 3196}
3125 3197
3126static int ext4_commit_super(struct super_block *sb, 3198static int ext4_commit_super(struct super_block *sb, int sync)
3127 struct ext4_super_block *es, int sync)
3128{ 3199{
3200 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
3129 struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; 3201 struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
3130 int error = 0; 3202 int error = 0;
3131 3203
@@ -3140,8 +3212,8 @@ static int ext4_commit_super(struct super_block *sb,
3140 * be remapped. Nothing we can do but to retry the 3212 * be remapped. Nothing we can do but to retry the
3141 * write and hope for the best. 3213 * write and hope for the best.
3142 */ 3214 */
3143 printk(KERN_ERR "EXT4-fs: previous I/O error to " 3215 ext4_msg(sb, KERN_ERR, "previous I/O error to "
3144 "superblock detected for %s.\n", sb->s_id); 3216 "superblock detected");
3145 clear_buffer_write_io_error(sbh); 3217 clear_buffer_write_io_error(sbh);
3146 set_buffer_uptodate(sbh); 3218 set_buffer_uptodate(sbh);
3147 } 3219 }
@@ -3154,7 +3226,7 @@ static int ext4_commit_super(struct super_block *sb,
3154 &EXT4_SB(sb)->s_freeblocks_counter)); 3226 &EXT4_SB(sb)->s_freeblocks_counter));
3155 es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive( 3227 es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive(
3156 &EXT4_SB(sb)->s_freeinodes_counter)); 3228 &EXT4_SB(sb)->s_freeinodes_counter));
3157 3229 sb->s_dirt = 0;
3158 BUFFER_TRACE(sbh, "marking dirty"); 3230 BUFFER_TRACE(sbh, "marking dirty");
3159 mark_buffer_dirty(sbh); 3231 mark_buffer_dirty(sbh);
3160 if (sync) { 3232 if (sync) {
@@ -3164,8 +3236,8 @@ static int ext4_commit_super(struct super_block *sb,
3164 3236
3165 error = buffer_write_io_error(sbh); 3237 error = buffer_write_io_error(sbh);
3166 if (error) { 3238 if (error) {
3167 printk(KERN_ERR "EXT4-fs: I/O error while writing " 3239 ext4_msg(sb, KERN_ERR, "I/O error while writing "
3168 "superblock for %s.\n", sb->s_id); 3240 "superblock");
3169 clear_buffer_write_io_error(sbh); 3241 clear_buffer_write_io_error(sbh);
3170 set_buffer_uptodate(sbh); 3242 set_buffer_uptodate(sbh);
3171 } 3243 }
@@ -3173,7 +3245,6 @@ static int ext4_commit_super(struct super_block *sb,
3173 return error; 3245 return error;
3174} 3246}
3175 3247
3176
3177/* 3248/*
3178 * Have we just finished recovery? If so, and if we are mounting (or 3249 * Have we just finished recovery? If so, and if we are mounting (or
3179 * remounting) the filesystem readonly, then we will end up with a 3250 * remounting) the filesystem readonly, then we will end up with a
@@ -3192,14 +3263,11 @@ static void ext4_mark_recovery_complete(struct super_block *sb,
3192 if (jbd2_journal_flush(journal) < 0) 3263 if (jbd2_journal_flush(journal) < 0)
3193 goto out; 3264 goto out;
3194 3265
3195 lock_super(sb);
3196 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) && 3266 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) &&
3197 sb->s_flags & MS_RDONLY) { 3267 sb->s_flags & MS_RDONLY) {
3198 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 3268 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
3199 sb->s_dirt = 0; 3269 ext4_commit_super(sb, 1);
3200 ext4_commit_super(sb, es, 1);
3201 } 3270 }
3202 unlock_super(sb);
3203 3271
3204out: 3272out:
3205 jbd2_journal_unlock_updates(journal); 3273 jbd2_journal_unlock_updates(journal);
@@ -3238,7 +3306,7 @@ static void ext4_clear_journal_err(struct super_block *sb,
3238 3306
3239 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 3307 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
3240 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 3308 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
3241 ext4_commit_super(sb, es, 1); 3309 ext4_commit_super(sb, 1);
3242 3310
3243 jbd2_journal_clear_err(journal); 3311 jbd2_journal_clear_err(journal);
3244 } 3312 }
@@ -3257,29 +3325,15 @@ int ext4_force_commit(struct super_block *sb)
3257 return 0; 3325 return 0;
3258 3326
3259 journal = EXT4_SB(sb)->s_journal; 3327 journal = EXT4_SB(sb)->s_journal;
3260 if (journal) { 3328 if (journal)
3261 sb->s_dirt = 0;
3262 ret = ext4_journal_force_commit(journal); 3329 ret = ext4_journal_force_commit(journal);
3263 }
3264 3330
3265 return ret; 3331 return ret;
3266} 3332}
3267 3333
3268/*
3269 * Ext4 always journals updates to the superblock itself, so we don't
3270 * have to propagate any other updates to the superblock on disk at this
3271 * point. (We can probably nuke this function altogether, and remove
3272 * any mention to sb->s_dirt in all of fs/ext4; eventual cleanup...)
3273 */
3274static void ext4_write_super(struct super_block *sb) 3334static void ext4_write_super(struct super_block *sb)
3275{ 3335{
3276 if (EXT4_SB(sb)->s_journal) { 3336 ext4_commit_super(sb, 1);
3277 if (mutex_trylock(&sb->s_lock) != 0)
3278 BUG();
3279 sb->s_dirt = 0;
3280 } else {
3281 ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1);
3282 }
3283} 3337}
3284 3338
3285static int ext4_sync_fs(struct super_block *sb, int wait) 3339static int ext4_sync_fs(struct super_block *sb, int wait)
@@ -3288,16 +3342,9 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
3288 tid_t target; 3342 tid_t target;
3289 3343
3290 trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait); 3344 trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait);
3291 sb->s_dirt = 0; 3345 if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) {
3292 if (EXT4_SB(sb)->s_journal) { 3346 if (wait)
3293 if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, 3347 jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, target);
3294 &target)) {
3295 if (wait)
3296 jbd2_log_wait_commit(EXT4_SB(sb)->s_journal,
3297 target);
3298 }
3299 } else {
3300 ext4_commit_super(sb, EXT4_SB(sb)->s_es, wait);
3301 } 3348 }
3302 return ret; 3349 return ret;
3303} 3350}
@@ -3310,34 +3357,32 @@ static int ext4_freeze(struct super_block *sb)
3310{ 3357{
3311 int error = 0; 3358 int error = 0;
3312 journal_t *journal; 3359 journal_t *journal;
3313 sb->s_dirt = 0;
3314 3360
3315 if (!(sb->s_flags & MS_RDONLY)) { 3361 if (sb->s_flags & MS_RDONLY)
3316 journal = EXT4_SB(sb)->s_journal; 3362 return 0;
3317 3363
3318 if (journal) { 3364 journal = EXT4_SB(sb)->s_journal;
3319 /* Now we set up the journal barrier. */
3320 jbd2_journal_lock_updates(journal);
3321 3365
3322 /* 3366 /* Now we set up the journal barrier. */
3323 * We don't want to clear needs_recovery flag when we 3367 jbd2_journal_lock_updates(journal);
3324 * failed to flush the journal.
3325 */
3326 error = jbd2_journal_flush(journal);
3327 if (error < 0)
3328 goto out;
3329 }
3330 3368
3331 /* Journal blocked and flushed, clear needs_recovery flag. */ 3369 /*
3332 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 3370 * Don't clear the needs_recovery flag if we failed to flush
3333 error = ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1); 3371 * the journal.
3334 if (error) 3372 */
3335 goto out; 3373 error = jbd2_journal_flush(journal);
3374 if (error < 0) {
3375 out:
3376 jbd2_journal_unlock_updates(journal);
3377 return error;
3336 } 3378 }
3379
3380 /* Journal blocked and flushed, clear needs_recovery flag. */
3381 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
3382 error = ext4_commit_super(sb, 1);
3383 if (error)
3384 goto out;
3337 return 0; 3385 return 0;
3338out:
3339 jbd2_journal_unlock_updates(journal);
3340 return error;
3341} 3386}
3342 3387
3343/* 3388/*
@@ -3346,14 +3391,15 @@ out:
3346 */ 3391 */
3347static int ext4_unfreeze(struct super_block *sb) 3392static int ext4_unfreeze(struct super_block *sb)
3348{ 3393{
3349 if (EXT4_SB(sb)->s_journal && !(sb->s_flags & MS_RDONLY)) { 3394 if (sb->s_flags & MS_RDONLY)
3350 lock_super(sb); 3395 return 0;
3351 /* Reser the needs_recovery flag before the fs is unlocked. */ 3396
3352 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 3397 lock_super(sb);
3353 ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1); 3398 /* Reset the needs_recovery flag before the fs is unlocked. */
3354 unlock_super(sb); 3399 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
3355 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); 3400 ext4_commit_super(sb, 1);
3356 } 3401 unlock_super(sb);
3402 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
3357 return 0; 3403 return 0;
3358} 3404}
3359 3405
@@ -3432,22 +3478,15 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
3432 (sbi->s_mount_state & EXT4_VALID_FS)) 3478 (sbi->s_mount_state & EXT4_VALID_FS))
3433 es->s_state = cpu_to_le16(sbi->s_mount_state); 3479 es->s_state = cpu_to_le16(sbi->s_mount_state);
3434 3480
3435 /* 3481 if (sbi->s_journal)
3436 * We have to unlock super so that we can wait for
3437 * transactions.
3438 */
3439 if (sbi->s_journal) {
3440 unlock_super(sb);
3441 ext4_mark_recovery_complete(sb, es); 3482 ext4_mark_recovery_complete(sb, es);
3442 lock_super(sb);
3443 }
3444 } else { 3483 } else {
3445 int ret; 3484 int ret;
3446 if ((ret = EXT4_HAS_RO_COMPAT_FEATURE(sb, 3485 if ((ret = EXT4_HAS_RO_COMPAT_FEATURE(sb,
3447 ~EXT4_FEATURE_RO_COMPAT_SUPP))) { 3486 ~EXT4_FEATURE_RO_COMPAT_SUPP))) {
3448 printk(KERN_WARNING "EXT4-fs: %s: couldn't " 3487 ext4_msg(sb, KERN_WARNING, "couldn't "
3449 "remount RDWR because of unsupported " 3488 "remount RDWR because of unsupported "
3450 "optional features (%x).\n", sb->s_id, 3489 "optional features (%x)",
3451 (le32_to_cpu(sbi->s_es->s_feature_ro_compat) & 3490 (le32_to_cpu(sbi->s_es->s_feature_ro_compat) &
3452 ~EXT4_FEATURE_RO_COMPAT_SUPP)); 3491 ~EXT4_FEATURE_RO_COMPAT_SUPP));
3453 err = -EROFS; 3492 err = -EROFS;
@@ -3456,17 +3495,15 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
3456 3495
3457 /* 3496 /*
3458 * Make sure the group descriptor checksums 3497 * Make sure the group descriptor checksums
3459 * are sane. If they aren't, refuse to 3498 * are sane. If they aren't, refuse to remount r/w.
3460 * remount r/w.
3461 */ 3499 */
3462 for (g = 0; g < sbi->s_groups_count; g++) { 3500 for (g = 0; g < sbi->s_groups_count; g++) {
3463 struct ext4_group_desc *gdp = 3501 struct ext4_group_desc *gdp =
3464 ext4_get_group_desc(sb, g, NULL); 3502 ext4_get_group_desc(sb, g, NULL);
3465 3503
3466 if (!ext4_group_desc_csum_verify(sbi, g, gdp)) { 3504 if (!ext4_group_desc_csum_verify(sbi, g, gdp)) {
3467 printk(KERN_ERR 3505 ext4_msg(sb, KERN_ERR,
3468 "EXT4-fs: ext4_remount: " 3506 "ext4_remount: Checksum for group %u failed (%u!=%u)",
3469 "Checksum for group %u failed (%u!=%u)\n",
3470 g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)), 3507 g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)),
3471 le16_to_cpu(gdp->bg_checksum)); 3508 le16_to_cpu(gdp->bg_checksum));
3472 err = -EINVAL; 3509 err = -EINVAL;
@@ -3480,11 +3517,10 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
3480 * require a full umount/remount for now. 3517 * require a full umount/remount for now.
3481 */ 3518 */
3482 if (es->s_last_orphan) { 3519 if (es->s_last_orphan) {
3483 printk(KERN_WARNING "EXT4-fs: %s: couldn't " 3520 ext4_msg(sb, KERN_WARNING, "Couldn't "
3484 "remount RDWR because of unprocessed " 3521 "remount RDWR because of unprocessed "
3485 "orphan inode list. Please " 3522 "orphan inode list. Please "
3486 "umount/remount instead.\n", 3523 "umount/remount instead");
3487 sb->s_id);
3488 err = -EINVAL; 3524 err = -EINVAL;
3489 goto restore_opts; 3525 goto restore_opts;
3490 } 3526 }
@@ -3504,8 +3540,9 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
3504 sb->s_flags &= ~MS_RDONLY; 3540 sb->s_flags &= ~MS_RDONLY;
3505 } 3541 }
3506 } 3542 }
3543 ext4_setup_system_zone(sb);
3507 if (sbi->s_journal == NULL) 3544 if (sbi->s_journal == NULL)
3508 ext4_commit_super(sb, es, 1); 3545 ext4_commit_super(sb, 1);
3509 3546
3510#ifdef CONFIG_QUOTA 3547#ifdef CONFIG_QUOTA
3511 /* Release old quota file names */ 3548 /* Release old quota file names */
@@ -3515,6 +3552,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
3515 kfree(old_opts.s_qf_names[i]); 3552 kfree(old_opts.s_qf_names[i]);
3516#endif 3553#endif
3517 return 0; 3554 return 0;
3555
3518restore_opts: 3556restore_opts:
3519 sb->s_flags = old_sb_flags; 3557 sb->s_flags = old_sb_flags;
3520 sbi->s_mount_opt = old_opts.s_mount_opt; 3558 sbi->s_mount_opt = old_opts.s_mount_opt;
@@ -3545,9 +3583,8 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
3545 if (test_opt(sb, MINIX_DF)) { 3583 if (test_opt(sb, MINIX_DF)) {
3546 sbi->s_overhead_last = 0; 3584 sbi->s_overhead_last = 0;
3547 } else if (sbi->s_blocks_last != ext4_blocks_count(es)) { 3585 } else if (sbi->s_blocks_last != ext4_blocks_count(es)) {
3548 ext4_group_t ngroups = sbi->s_groups_count, i; 3586 ext4_group_t i, ngroups = ext4_get_groups_count(sb);
3549 ext4_fsblk_t overhead = 0; 3587 ext4_fsblk_t overhead = 0;
3550 smp_rmb();
3551 3588
3552 /* 3589 /*
3553 * Compute the overhead (FS structures). This is constant 3590 * Compute the overhead (FS structures). This is constant
@@ -3599,11 +3636,12 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
3599 le64_to_cpup((void *)es->s_uuid + sizeof(u64)); 3636 le64_to_cpup((void *)es->s_uuid + sizeof(u64));
3600 buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL; 3637 buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
3601 buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL; 3638 buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
3639
3602 return 0; 3640 return 0;
3603} 3641}
3604 3642
3605/* Helper function for writing quotas on sync - we need to start transaction before quota file 3643/* Helper function for writing quotas on sync - we need to start transaction
3606 * is locked for write. Otherwise the are possible deadlocks: 3644 * before quota file is locked for write. Otherwise the are possible deadlocks:
3607 * Process 1 Process 2 3645 * Process 1 Process 2
3608 * ext4_create() quota_sync() 3646 * ext4_create() quota_sync()
3609 * jbd2_journal_start() write_dquot() 3647 * jbd2_journal_start() write_dquot()
@@ -3627,7 +3665,7 @@ static int ext4_write_dquot(struct dquot *dquot)
3627 3665
3628 inode = dquot_to_inode(dquot); 3666 inode = dquot_to_inode(dquot);
3629 handle = ext4_journal_start(inode, 3667 handle = ext4_journal_start(inode,
3630 EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); 3668 EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
3631 if (IS_ERR(handle)) 3669 if (IS_ERR(handle))
3632 return PTR_ERR(handle); 3670 return PTR_ERR(handle);
3633 ret = dquot_commit(dquot); 3671 ret = dquot_commit(dquot);
@@ -3643,7 +3681,7 @@ static int ext4_acquire_dquot(struct dquot *dquot)
3643 handle_t *handle; 3681 handle_t *handle;
3644 3682
3645 handle = ext4_journal_start(dquot_to_inode(dquot), 3683 handle = ext4_journal_start(dquot_to_inode(dquot),
3646 EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb)); 3684 EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb));
3647 if (IS_ERR(handle)) 3685 if (IS_ERR(handle))
3648 return PTR_ERR(handle); 3686 return PTR_ERR(handle);
3649 ret = dquot_acquire(dquot); 3687 ret = dquot_acquire(dquot);
@@ -3659,7 +3697,7 @@ static int ext4_release_dquot(struct dquot *dquot)
3659 handle_t *handle; 3697 handle_t *handle;
3660 3698
3661 handle = ext4_journal_start(dquot_to_inode(dquot), 3699 handle = ext4_journal_start(dquot_to_inode(dquot),
3662 EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb)); 3700 EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb));
3663 if (IS_ERR(handle)) { 3701 if (IS_ERR(handle)) {
3664 /* Release dquot anyway to avoid endless cycle in dqput() */ 3702 /* Release dquot anyway to avoid endless cycle in dqput() */
3665 dquot_release(dquot); 3703 dquot_release(dquot);
@@ -3707,7 +3745,7 @@ static int ext4_write_info(struct super_block *sb, int type)
3707static int ext4_quota_on_mount(struct super_block *sb, int type) 3745static int ext4_quota_on_mount(struct super_block *sb, int type)
3708{ 3746{
3709 return vfs_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type], 3747 return vfs_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type],
3710 EXT4_SB(sb)->s_jquota_fmt, type); 3748 EXT4_SB(sb)->s_jquota_fmt, type);
3711} 3749}
3712 3750
3713/* 3751/*
@@ -3738,9 +3776,9 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
3738 if (EXT4_SB(sb)->s_qf_names[type]) { 3776 if (EXT4_SB(sb)->s_qf_names[type]) {
3739 /* Quotafile not in fs root? */ 3777 /* Quotafile not in fs root? */
3740 if (path.dentry->d_parent != sb->s_root) 3778 if (path.dentry->d_parent != sb->s_root)
3741 printk(KERN_WARNING 3779 ext4_msg(sb, KERN_WARNING,
3742 "EXT4-fs: Quota file not on filesystem root. " 3780 "Quota file not on filesystem root. "
3743 "Journaled quota will not work.\n"); 3781 "Journaled quota will not work");
3744 } 3782 }
3745 3783
3746 /* 3784 /*
@@ -3823,8 +3861,8 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
3823 handle_t *handle = journal_current_handle(); 3861 handle_t *handle = journal_current_handle();
3824 3862
3825 if (EXT4_SB(sb)->s_journal && !handle) { 3863 if (EXT4_SB(sb)->s_journal && !handle) {
3826 printk(KERN_WARNING "EXT4-fs: Quota write (off=%llu, len=%llu)" 3864 ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
3827 " cancelled because transaction is not started.\n", 3865 " cancelled because transaction is not started",
3828 (unsigned long long)off, (unsigned long long)len); 3866 (unsigned long long)off, (unsigned long long)len);
3829 return -EIO; 3867 return -EIO;
3830 } 3868 }
@@ -3878,10 +3916,10 @@ out:
3878 3916
3879#endif 3917#endif
3880 3918
3881static int ext4_get_sb(struct file_system_type *fs_type, 3919static int ext4_get_sb(struct file_system_type *fs_type, int flags,
3882 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 3920 const char *dev_name, void *data, struct vfsmount *mnt)
3883{ 3921{
3884 return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt); 3922 return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt);
3885} 3923}
3886 3924
3887static struct file_system_type ext4_fs_type = { 3925static struct file_system_type ext4_fs_type = {
@@ -3893,14 +3931,14 @@ static struct file_system_type ext4_fs_type = {
3893}; 3931};
3894 3932
3895#ifdef CONFIG_EXT4DEV_COMPAT 3933#ifdef CONFIG_EXT4DEV_COMPAT
3896static int ext4dev_get_sb(struct file_system_type *fs_type, 3934static int ext4dev_get_sb(struct file_system_type *fs_type, int flags,
3897 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 3935 const char *dev_name, void *data,struct vfsmount *mnt)
3898{ 3936{
3899 printk(KERN_WARNING "EXT4-fs: Update your userspace programs " 3937 printk(KERN_WARNING "EXT4-fs (%s): Update your userspace programs "
3900 "to mount using ext4\n"); 3938 "to mount using ext4\n", dev_name);
3901 printk(KERN_WARNING "EXT4-fs: ext4dev backwards compatibility " 3939 printk(KERN_WARNING "EXT4-fs (%s): ext4dev backwards compatibility "
3902 "will go away by 2.6.31\n"); 3940 "will go away by 2.6.31\n", dev_name);
3903 return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt); 3941 return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt);
3904} 3942}
3905 3943
3906static struct file_system_type ext4dev_fs_type = { 3944static struct file_system_type ext4dev_fs_type = {
@@ -3917,13 +3955,16 @@ static int __init init_ext4_fs(void)
3917{ 3955{
3918 int err; 3956 int err;
3919 3957
3958 err = init_ext4_system_zone();
3959 if (err)
3960 return err;
3920 ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); 3961 ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj);
3921 if (!ext4_kset) 3962 if (!ext4_kset)
3922 return -ENOMEM; 3963 goto out4;
3923 ext4_proc_root = proc_mkdir("fs/ext4", NULL); 3964 ext4_proc_root = proc_mkdir("fs/ext4", NULL);
3924 err = init_ext4_mballoc(); 3965 err = init_ext4_mballoc();
3925 if (err) 3966 if (err)
3926 return err; 3967 goto out3;
3927 3968
3928 err = init_ext4_xattr(); 3969 err = init_ext4_xattr();
3929 if (err) 3970 if (err)
@@ -3948,6 +3989,11 @@ out1:
3948 exit_ext4_xattr(); 3989 exit_ext4_xattr();
3949out2: 3990out2:
3950 exit_ext4_mballoc(); 3991 exit_ext4_mballoc();
3992out3:
3993 remove_proc_entry("fs/ext4", NULL);
3994 kset_unregister(ext4_kset);
3995out4:
3996 exit_ext4_system_zone();
3951 return err; 3997 return err;
3952} 3998}
3953 3999
@@ -3962,6 +4008,7 @@ static void __exit exit_ext4_fs(void)
3962 exit_ext4_mballoc(); 4008 exit_ext4_mballoc();
3963 remove_proc_entry("fs/ext4", NULL); 4009 remove_proc_entry("fs/ext4", NULL);
3964 kset_unregister(ext4_kset); 4010 kset_unregister(ext4_kset);
4011 exit_ext4_system_zone();
3965} 4012}
3966 4013
3967MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); 4014MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 82d9c42b8bac..286f38dfc6c0 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -414,10 +414,6 @@ static int file_ioctl(struct file *filp, unsigned int cmd,
414 switch (cmd) { 414 switch (cmd) {
415 case FIBMAP: 415 case FIBMAP:
416 return ioctl_fibmap(filp, p); 416 return ioctl_fibmap(filp, p);
417 case FS_IOC_FIEMAP:
418 return ioctl_fiemap(filp, arg);
419 case FIGETBSZ:
420 return put_user(inode->i_sb->s_blocksize, p);
421 case FIONREAD: 417 case FIONREAD:
422 return put_user(i_size_read(inode) - filp->f_pos, p); 418 return put_user(i_size_read(inode) - filp->f_pos, p);
423 } 419 }
@@ -557,6 +553,16 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
557 error = ioctl_fsthaw(filp); 553 error = ioctl_fsthaw(filp);
558 break; 554 break;
559 555
556 case FS_IOC_FIEMAP:
557 return ioctl_fiemap(filp, arg);
558
559 case FIGETBSZ:
560 {
561 struct inode *inode = filp->f_path.dentry->d_inode;
562 int __user *p = (int __user *)arg;
563 return put_user(inode->i_sb->s_blocksize, p);
564 }
565
560 default: 566 default:
561 if (S_ISREG(filp->f_path.dentry->d_inode->i_mode)) 567 if (S_ISREG(filp->f_path.dentry->d_inode->i_mode))
562 error = file_ioctl(filp, cmd, arg); 568 error = file_ioctl(filp, cmd, arg);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 58144102bf25..62be7d294ec2 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1781,7 +1781,7 @@ int jbd2_journal_wipe(journal_t *journal, int write)
1781 * Journal abort has very specific semantics, which we describe 1781 * Journal abort has very specific semantics, which we describe
1782 * for journal abort. 1782 * for journal abort.
1783 * 1783 *
1784 * Two internal function, which provide abort to te jbd layer 1784 * Two internal functions, which provide abort to the jbd layer
1785 * itself are here. 1785 * itself are here.
1786 */ 1786 */
1787 1787
@@ -1879,7 +1879,7 @@ void jbd2_journal_abort(journal_t *journal, int errno)
1879 * int jbd2_journal_errno () - returns the journal's error state. 1879 * int jbd2_journal_errno () - returns the journal's error state.
1880 * @journal: journal to examine. 1880 * @journal: journal to examine.
1881 * 1881 *
1882 * This is the errno numbet set with jbd2_journal_abort(), the last 1882 * This is the errno number set with jbd2_journal_abort(), the last
1883 * time the journal was mounted - if the journal was stopped 1883 * time the journal was mounted - if the journal was stopped
1884 * without calling abort this will be 0. 1884 * without calling abort this will be 0.
1885 * 1885 *
@@ -1903,7 +1903,7 @@ int jbd2_journal_errno(journal_t *journal)
1903 * int jbd2_journal_clear_err () - clears the journal's error state 1903 * int jbd2_journal_clear_err () - clears the journal's error state
1904 * @journal: journal to act on. 1904 * @journal: journal to act on.
1905 * 1905 *
1906 * An error must be cleared or Acked to take a FS out of readonly 1906 * An error must be cleared or acked to take a FS out of readonly
1907 * mode. 1907 * mode.
1908 */ 1908 */
1909int jbd2_journal_clear_err(journal_t *journal) 1909int jbd2_journal_clear_err(journal_t *journal)
@@ -1923,7 +1923,7 @@ int jbd2_journal_clear_err(journal_t *journal)
1923 * void jbd2_journal_ack_err() - Ack journal err. 1923 * void jbd2_journal_ack_err() - Ack journal err.
1924 * @journal: journal to act on. 1924 * @journal: journal to act on.
1925 * 1925 *
1926 * An error must be cleared or Acked to take a FS out of readonly 1926 * An error must be cleared or acked to take a FS out of readonly
1927 * mode. 1927 * mode.
1928 */ 1928 */
1929void jbd2_journal_ack_err(journal_t *journal) 1929void jbd2_journal_ack_err(journal_t *journal)
diff --git a/fs/mpage.c b/fs/mpage.c
index 680ba60863ff..42381bd6543b 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -379,7 +379,8 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages,
379 struct buffer_head map_bh; 379 struct buffer_head map_bh;
380 unsigned long first_logical_block = 0; 380 unsigned long first_logical_block = 0;
381 381
382 clear_buffer_mapped(&map_bh); 382 map_bh.b_state = 0;
383 map_bh.b_size = 0;
383 for (page_idx = 0; page_idx < nr_pages; page_idx++) { 384 for (page_idx = 0; page_idx < nr_pages; page_idx++) {
384 struct page *page = list_entry(pages->prev, struct page, lru); 385 struct page *page = list_entry(pages->prev, struct page, lru);
385 386
@@ -412,7 +413,8 @@ int mpage_readpage(struct page *page, get_block_t get_block)
412 struct buffer_head map_bh; 413 struct buffer_head map_bh;
413 unsigned long first_logical_block = 0; 414 unsigned long first_logical_block = 0;
414 415
415 clear_buffer_mapped(&map_bh); 416 map_bh.b_state = 0;
417 map_bh.b_size = 0;
416 bio = do_mpage_readpage(bio, page, 1, &last_block_in_bio, 418 bio = do_mpage_readpage(bio, page, 1, &last_block_in_bio,
417 &map_bh, &first_logical_block, get_block); 419 &map_bh, &first_logical_block, get_block);
418 if (bio) 420 if (bio)