aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/Makefile4
-rw-r--r--fs/ext4/acl.c13
-rw-r--r--fs/ext4/balloc.c28
-rw-r--r--fs/ext4/block_validity.c244
-rw-r--r--fs/ext4/dir.c3
-rw-r--r--fs/ext4/ext4.h354
-rw-r--r--fs/ext4/ext4_i.h140
-rw-r--r--fs/ext4/ext4_sb.h161
-rw-r--r--fs/ext4/extents.c85
-rw-r--r--fs/ext4/group.h29
-rw-r--r--fs/ext4/ialloc.c73
-rw-r--r--fs/ext4/inode.c593
-rw-r--r--fs/ext4/mballoc.c166
-rw-r--r--fs/ext4/mballoc.h1
-rw-r--r--fs/ext4/namei.c27
-rw-r--r--fs/ext4/namei.h8
-rw-r--r--fs/ext4/resize.c36
-rw-r--r--fs/ext4/super.c849
18 files changed, 1583 insertions, 1231 deletions
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
index a8ff003a00f7..8a34710ecf40 100644
--- a/fs/ext4/Makefile
+++ b/fs/ext4/Makefile
@@ -5,8 +5,8 @@
5obj-$(CONFIG_EXT4_FS) += ext4.o 5obj-$(CONFIG_EXT4_FS) += ext4.o
6 6
7ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ 7ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
8 ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ 8 ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
9 ext4_jbd2.o migrate.o mballoc.o 9 ext4_jbd2.o migrate.o mballoc.o block_validity.o
10 10
11ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o 11ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
12ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o 12ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index 647e0d65a284..605aeed96d68 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -129,12 +129,15 @@ fail:
129static inline struct posix_acl * 129static inline struct posix_acl *
130ext4_iget_acl(struct inode *inode, struct posix_acl **i_acl) 130ext4_iget_acl(struct inode *inode, struct posix_acl **i_acl)
131{ 131{
132 struct posix_acl *acl = EXT4_ACL_NOT_CACHED; 132 struct posix_acl *acl = ACCESS_ONCE(*i_acl);
133 133
134 spin_lock(&inode->i_lock); 134 if (acl) {
135 if (*i_acl != EXT4_ACL_NOT_CACHED) 135 spin_lock(&inode->i_lock);
136 acl = posix_acl_dup(*i_acl); 136 acl = *i_acl;
137 spin_unlock(&inode->i_lock); 137 if (acl != EXT4_ACL_NOT_CACHED)
138 acl = posix_acl_dup(acl);
139 spin_unlock(&inode->i_lock);
140 }
138 141
139 return acl; 142 return acl;
140} 143}
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 53c72ad85877..e2126d70dff5 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -19,7 +19,6 @@
19#include <linux/buffer_head.h> 19#include <linux/buffer_head.h>
20#include "ext4.h" 20#include "ext4.h"
21#include "ext4_jbd2.h" 21#include "ext4_jbd2.h"
22#include "group.h"
23#include "mballoc.h" 22#include "mballoc.h"
24 23
25/* 24/*
@@ -88,6 +87,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
88 ext4_group_t block_group, struct ext4_group_desc *gdp) 87 ext4_group_t block_group, struct ext4_group_desc *gdp)
89{ 88{
90 int bit, bit_max; 89 int bit, bit_max;
90 ext4_group_t ngroups = ext4_get_groups_count(sb);
91 unsigned free_blocks, group_blocks; 91 unsigned free_blocks, group_blocks;
92 struct ext4_sb_info *sbi = EXT4_SB(sb); 92 struct ext4_sb_info *sbi = EXT4_SB(sb);
93 93
@@ -123,7 +123,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
123 bit_max += ext4_bg_num_gdb(sb, block_group); 123 bit_max += ext4_bg_num_gdb(sb, block_group);
124 } 124 }
125 125
126 if (block_group == sbi->s_groups_count - 1) { 126 if (block_group == ngroups - 1) {
127 /* 127 /*
128 * Even though mke2fs always initialize first and last group 128 * Even though mke2fs always initialize first and last group
129 * if some other tool enabled the EXT4_BG_BLOCK_UNINIT we need 129 * if some other tool enabled the EXT4_BG_BLOCK_UNINIT we need
@@ -131,7 +131,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
131 */ 131 */
132 group_blocks = ext4_blocks_count(sbi->s_es) - 132 group_blocks = ext4_blocks_count(sbi->s_es) -
133 le32_to_cpu(sbi->s_es->s_first_data_block) - 133 le32_to_cpu(sbi->s_es->s_first_data_block) -
134 (EXT4_BLOCKS_PER_GROUP(sb) * (sbi->s_groups_count - 1)); 134 (EXT4_BLOCKS_PER_GROUP(sb) * (ngroups - 1));
135 } else { 135 } else {
136 group_blocks = EXT4_BLOCKS_PER_GROUP(sb); 136 group_blocks = EXT4_BLOCKS_PER_GROUP(sb);
137 } 137 }
@@ -205,18 +205,18 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
205{ 205{
206 unsigned int group_desc; 206 unsigned int group_desc;
207 unsigned int offset; 207 unsigned int offset;
208 ext4_group_t ngroups = ext4_get_groups_count(sb);
208 struct ext4_group_desc *desc; 209 struct ext4_group_desc *desc;
209 struct ext4_sb_info *sbi = EXT4_SB(sb); 210 struct ext4_sb_info *sbi = EXT4_SB(sb);
210 211
211 if (block_group >= sbi->s_groups_count) { 212 if (block_group >= ngroups) {
212 ext4_error(sb, "ext4_get_group_desc", 213 ext4_error(sb, "ext4_get_group_desc",
213 "block_group >= groups_count - " 214 "block_group >= groups_count - "
214 "block_group = %u, groups_count = %u", 215 "block_group = %u, groups_count = %u",
215 block_group, sbi->s_groups_count); 216 block_group, ngroups);
216 217
217 return NULL; 218 return NULL;
218 } 219 }
219 smp_rmb();
220 220
221 group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb); 221 group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb);
222 offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1); 222 offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1);
@@ -326,16 +326,16 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
326 unlock_buffer(bh); 326 unlock_buffer(bh);
327 return bh; 327 return bh;
328 } 328 }
329 spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group)); 329 ext4_lock_group(sb, block_group);
330 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { 330 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
331 ext4_init_block_bitmap(sb, bh, block_group, desc); 331 ext4_init_block_bitmap(sb, bh, block_group, desc);
332 set_bitmap_uptodate(bh); 332 set_bitmap_uptodate(bh);
333 set_buffer_uptodate(bh); 333 set_buffer_uptodate(bh);
334 spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group)); 334 ext4_unlock_group(sb, block_group);
335 unlock_buffer(bh); 335 unlock_buffer(bh);
336 return bh; 336 return bh;
337 } 337 }
338 spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group)); 338 ext4_unlock_group(sb, block_group);
339 if (buffer_uptodate(bh)) { 339 if (buffer_uptodate(bh)) {
340 /* 340 /*
341 * if not uninit if bh is uptodate, 341 * if not uninit if bh is uptodate,
@@ -451,7 +451,7 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
451 down_write(&grp->alloc_sem); 451 down_write(&grp->alloc_sem);
452 for (i = 0, blocks_freed = 0; i < count; i++) { 452 for (i = 0, blocks_freed = 0; i < count; i++) {
453 BUFFER_TRACE(bitmap_bh, "clear bit"); 453 BUFFER_TRACE(bitmap_bh, "clear bit");
454 if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group), 454 if (!ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group),
455 bit + i, bitmap_bh->b_data)) { 455 bit + i, bitmap_bh->b_data)) {
456 ext4_error(sb, __func__, 456 ext4_error(sb, __func__,
457 "bit already cleared for block %llu", 457 "bit already cleared for block %llu",
@@ -461,11 +461,11 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
461 blocks_freed++; 461 blocks_freed++;
462 } 462 }
463 } 463 }
464 spin_lock(sb_bgl_lock(sbi, block_group)); 464 ext4_lock_group(sb, block_group);
465 blk_free_count = blocks_freed + ext4_free_blks_count(sb, desc); 465 blk_free_count = blocks_freed + ext4_free_blks_count(sb, desc);
466 ext4_free_blks_set(sb, desc, blk_free_count); 466 ext4_free_blks_set(sb, desc, blk_free_count);
467 desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc); 467 desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
468 spin_unlock(sb_bgl_lock(sbi, block_group)); 468 ext4_unlock_group(sb, block_group);
469 percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed); 469 percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed);
470 470
471 if (sbi->s_log_groups_per_flex) { 471 if (sbi->s_log_groups_per_flex) {
@@ -665,7 +665,7 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
665 ext4_fsblk_t desc_count; 665 ext4_fsblk_t desc_count;
666 struct ext4_group_desc *gdp; 666 struct ext4_group_desc *gdp;
667 ext4_group_t i; 667 ext4_group_t i;
668 ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; 668 ext4_group_t ngroups = ext4_get_groups_count(sb);
669#ifdef EXT4FS_DEBUG 669#ifdef EXT4FS_DEBUG
670 struct ext4_super_block *es; 670 struct ext4_super_block *es;
671 ext4_fsblk_t bitmap_count; 671 ext4_fsblk_t bitmap_count;
@@ -677,7 +677,6 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
677 bitmap_count = 0; 677 bitmap_count = 0;
678 gdp = NULL; 678 gdp = NULL;
679 679
680 smp_rmb();
681 for (i = 0; i < ngroups; i++) { 680 for (i = 0; i < ngroups; i++) {
682 gdp = ext4_get_group_desc(sb, i, NULL); 681 gdp = ext4_get_group_desc(sb, i, NULL);
683 if (!gdp) 682 if (!gdp)
@@ -700,7 +699,6 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
700 return bitmap_count; 699 return bitmap_count;
701#else 700#else
702 desc_count = 0; 701 desc_count = 0;
703 smp_rmb();
704 for (i = 0; i < ngroups; i++) { 702 for (i = 0; i < ngroups; i++) {
705 gdp = ext4_get_group_desc(sb, i, NULL); 703 gdp = ext4_get_group_desc(sb, i, NULL);
706 if (!gdp) 704 if (!gdp)
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
new file mode 100644
index 000000000000..50784ef07563
--- /dev/null
+++ b/fs/ext4/block_validity.c
@@ -0,0 +1,244 @@
1/*
2 * linux/fs/ext4/block_validity.c
3 *
4 * Copyright (C) 2009
5 * Theodore Ts'o (tytso@mit.edu)
6 *
7 * Track which blocks in the filesystem are metadata blocks that
8 * should never be used as data blocks by files or directories.
9 */
10
11#include <linux/time.h>
12#include <linux/fs.h>
13#include <linux/namei.h>
14#include <linux/quotaops.h>
15#include <linux/buffer_head.h>
16#include <linux/module.h>
17#include <linux/swap.h>
18#include <linux/pagemap.h>
19#include <linux/version.h>
20#include <linux/blkdev.h>
21#include <linux/mutex.h>
22#include "ext4.h"
23
24struct ext4_system_zone {
25 struct rb_node node;
26 ext4_fsblk_t start_blk;
27 unsigned int count;
28};
29
30static struct kmem_cache *ext4_system_zone_cachep;
31
32int __init init_ext4_system_zone(void)
33{
34 ext4_system_zone_cachep = KMEM_CACHE(ext4_system_zone,
35 SLAB_RECLAIM_ACCOUNT);
36 if (ext4_system_zone_cachep == NULL)
37 return -ENOMEM;
38 return 0;
39}
40
41void exit_ext4_system_zone(void)
42{
43 kmem_cache_destroy(ext4_system_zone_cachep);
44}
45
46static inline int can_merge(struct ext4_system_zone *entry1,
47 struct ext4_system_zone *entry2)
48{
49 if ((entry1->start_blk + entry1->count) == entry2->start_blk)
50 return 1;
51 return 0;
52}
53
54/*
55 * Mark a range of blocks as belonging to the "system zone" --- that
56 * is, filesystem metadata blocks which should never be used by
57 * inodes.
58 */
59static int add_system_zone(struct ext4_sb_info *sbi,
60 ext4_fsblk_t start_blk,
61 unsigned int count)
62{
63 struct ext4_system_zone *new_entry = NULL, *entry;
64 struct rb_node **n = &sbi->system_blks.rb_node, *node;
65 struct rb_node *parent = NULL, *new_node = NULL;
66
67 while (*n) {
68 parent = *n;
69 entry = rb_entry(parent, struct ext4_system_zone, node);
70 if (start_blk < entry->start_blk)
71 n = &(*n)->rb_left;
72 else if (start_blk >= (entry->start_blk + entry->count))
73 n = &(*n)->rb_right;
74 else {
75 if (start_blk + count > (entry->start_blk +
76 entry->count))
77 entry->count = (start_blk + count -
78 entry->start_blk);
79 new_node = *n;
80 new_entry = rb_entry(new_node, struct ext4_system_zone,
81 node);
82 break;
83 }
84 }
85
86 if (!new_entry) {
87 new_entry = kmem_cache_alloc(ext4_system_zone_cachep,
88 GFP_KERNEL);
89 if (!new_entry)
90 return -ENOMEM;
91 new_entry->start_blk = start_blk;
92 new_entry->count = count;
93 new_node = &new_entry->node;
94
95 rb_link_node(new_node, parent, n);
96 rb_insert_color(new_node, &sbi->system_blks);
97 }
98
99 /* Can we merge to the left? */
100 node = rb_prev(new_node);
101 if (node) {
102 entry = rb_entry(node, struct ext4_system_zone, node);
103 if (can_merge(entry, new_entry)) {
104 new_entry->start_blk = entry->start_blk;
105 new_entry->count += entry->count;
106 rb_erase(node, &sbi->system_blks);
107 kmem_cache_free(ext4_system_zone_cachep, entry);
108 }
109 }
110
111 /* Can we merge to the right? */
112 node = rb_next(new_node);
113 if (node) {
114 entry = rb_entry(node, struct ext4_system_zone, node);
115 if (can_merge(new_entry, entry)) {
116 new_entry->count += entry->count;
117 rb_erase(node, &sbi->system_blks);
118 kmem_cache_free(ext4_system_zone_cachep, entry);
119 }
120 }
121 return 0;
122}
123
124static void debug_print_tree(struct ext4_sb_info *sbi)
125{
126 struct rb_node *node;
127 struct ext4_system_zone *entry;
128 int first = 1;
129
130 printk(KERN_INFO "System zones: ");
131 node = rb_first(&sbi->system_blks);
132 while (node) {
133 entry = rb_entry(node, struct ext4_system_zone, node);
134 printk("%s%llu-%llu", first ? "" : ", ",
135 entry->start_blk, entry->start_blk + entry->count - 1);
136 first = 0;
137 node = rb_next(node);
138 }
139 printk("\n");
140}
141
142int ext4_setup_system_zone(struct super_block *sb)
143{
144 ext4_group_t ngroups = ext4_get_groups_count(sb);
145 struct ext4_sb_info *sbi = EXT4_SB(sb);
146 struct ext4_group_desc *gdp;
147 ext4_group_t i;
148 int flex_size = ext4_flex_bg_size(sbi);
149 int ret;
150
151 if (!test_opt(sb, BLOCK_VALIDITY)) {
152 if (EXT4_SB(sb)->system_blks.rb_node)
153 ext4_release_system_zone(sb);
154 return 0;
155 }
156 if (EXT4_SB(sb)->system_blks.rb_node)
157 return 0;
158
159 for (i=0; i < ngroups; i++) {
160 if (ext4_bg_has_super(sb, i) &&
161 ((i < 5) || ((i % flex_size) == 0)))
162 add_system_zone(sbi, ext4_group_first_block_no(sb, i),
163 sbi->s_gdb_count + 1);
164 gdp = ext4_get_group_desc(sb, i, NULL);
165 ret = add_system_zone(sbi, ext4_block_bitmap(sb, gdp), 1);
166 if (ret)
167 return ret;
168 ret = add_system_zone(sbi, ext4_inode_bitmap(sb, gdp), 1);
169 if (ret)
170 return ret;
171 ret = add_system_zone(sbi, ext4_inode_table(sb, gdp),
172 sbi->s_itb_per_group);
173 if (ret)
174 return ret;
175 }
176
177 if (test_opt(sb, DEBUG))
178 debug_print_tree(EXT4_SB(sb));
179 return 0;
180}
181
182/* Called when the filesystem is unmounted */
183void ext4_release_system_zone(struct super_block *sb)
184{
185 struct rb_node *n = EXT4_SB(sb)->system_blks.rb_node;
186 struct rb_node *parent;
187 struct ext4_system_zone *entry;
188
189 while (n) {
190 /* Do the node's children first */
191 if (n->rb_left) {
192 n = n->rb_left;
193 continue;
194 }
195 if (n->rb_right) {
196 n = n->rb_right;
197 continue;
198 }
199 /*
200 * The node has no children; free it, and then zero
201 * out parent's link to it. Finally go to the
202 * beginning of the loop and try to free the parent
203 * node.
204 */
205 parent = rb_parent(n);
206 entry = rb_entry(n, struct ext4_system_zone, node);
207 kmem_cache_free(ext4_system_zone_cachep, entry);
208 if (!parent)
209 EXT4_SB(sb)->system_blks.rb_node = NULL;
210 else if (parent->rb_left == n)
211 parent->rb_left = NULL;
212 else if (parent->rb_right == n)
213 parent->rb_right = NULL;
214 n = parent;
215 }
216 EXT4_SB(sb)->system_blks.rb_node = NULL;
217}
218
219/*
220 * Returns 1 if the passed-in block region (start_blk,
221 * start_blk+count) is valid; 0 if some part of the block region
222 * overlaps with filesystem metadata blocks.
223 */
224int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk,
225 unsigned int count)
226{
227 struct ext4_system_zone *entry;
228 struct rb_node *n = sbi->system_blks.rb_node;
229
230 if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) ||
231 (start_blk + count > ext4_blocks_count(sbi->s_es)))
232 return 0;
233 while (n) {
234 entry = rb_entry(n, struct ext4_system_zone, node);
235 if (start_blk + count - 1 < entry->start_blk)
236 n = n->rb_left;
237 else if (start_blk >= (entry->start_blk + entry->count))
238 n = n->rb_right;
239 else
240 return 0;
241 }
242 return 1;
243}
244
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index b64789929a65..9dc93168e262 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -131,8 +131,7 @@ static int ext4_readdir(struct file *filp,
131 struct buffer_head *bh = NULL; 131 struct buffer_head *bh = NULL;
132 132
133 map_bh.b_state = 0; 133 map_bh.b_state = 0;
134 err = ext4_get_blocks_wrap(NULL, inode, blk, 1, &map_bh, 134 err = ext4_get_blocks(NULL, inode, blk, 1, &map_bh, 0);
135 0, 0, 0);
136 if (err > 0) { 135 if (err > 0) {
137 pgoff_t index = map_bh.b_blocknr >> 136 pgoff_t index = map_bh.b_blocknr >>
138 (PAGE_CACHE_SHIFT - inode->i_blkbits); 137 (PAGE_CACHE_SHIFT - inode->i_blkbits);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index d0f15ef56de1..cc7d5edc38c9 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -21,7 +21,14 @@
21#include <linux/magic.h> 21#include <linux/magic.h>
22#include <linux/jbd2.h> 22#include <linux/jbd2.h>
23#include <linux/quota.h> 23#include <linux/quota.h>
24#include "ext4_i.h" 24#include <linux/rwsem.h>
25#include <linux/rbtree.h>
26#include <linux/seqlock.h>
27#include <linux/mutex.h>
28#include <linux/timer.h>
29#include <linux/wait.h>
30#include <linux/blockgroup_lock.h>
31#include <linux/percpu_counter.h>
25 32
26/* 33/*
27 * The fourth extended filesystem constants/structures 34 * The fourth extended filesystem constants/structures
@@ -46,6 +53,19 @@
46#define ext4_debug(f, a...) do {} while (0) 53#define ext4_debug(f, a...) do {} while (0)
47#endif 54#endif
48 55
56/* data type for block offset of block group */
57typedef int ext4_grpblk_t;
58
59/* data type for filesystem-wide blocks number */
60typedef unsigned long long ext4_fsblk_t;
61
62/* data type for file logical block number */
63typedef __u32 ext4_lblk_t;
64
65/* data type for block group number */
66typedef unsigned int ext4_group_t;
67
68
49/* prefer goal again. length */ 69/* prefer goal again. length */
50#define EXT4_MB_HINT_MERGE 1 70#define EXT4_MB_HINT_MERGE 1
51/* blocks already reserved */ 71/* blocks already reserved */
@@ -179,9 +199,6 @@ struct flex_groups {
179#define EXT4_BG_BLOCK_UNINIT 0x0002 /* Block bitmap not in use */ 199#define EXT4_BG_BLOCK_UNINIT 0x0002 /* Block bitmap not in use */
180#define EXT4_BG_INODE_ZEROED 0x0004 /* On-disk itable initialized to zero */ 200#define EXT4_BG_INODE_ZEROED 0x0004 /* On-disk itable initialized to zero */
181 201
182#ifdef __KERNEL__
183#include "ext4_sb.h"
184#endif
185/* 202/*
186 * Macro-instructions used to manage group descriptors 203 * Macro-instructions used to manage group descriptors
187 */ 204 */
@@ -297,10 +314,23 @@ struct ext4_new_group_data {
297}; 314};
298 315
299/* 316/*
300 * Following is used by preallocation code to tell get_blocks() that we 317 * Flags used by ext4_get_blocks()
301 * want uninitialzed extents.
302 */ 318 */
303#define EXT4_CREATE_UNINITIALIZED_EXT 2 319 /* Allocate any needed blocks and/or convert an unitialized
320 extent to be an initialized ext4 */
321#define EXT4_GET_BLOCKS_CREATE 0x0001
322 /* Request the creation of an unitialized extent */
323#define EXT4_GET_BLOCKS_UNINIT_EXT 0x0002
324#define EXT4_GET_BLOCKS_CREATE_UNINIT_EXT (EXT4_GET_BLOCKS_UNINIT_EXT|\
325 EXT4_GET_BLOCKS_CREATE)
326 /* Caller is from the delayed allocation writeout path,
327 so set the magic i_delalloc_reserve_flag after taking the
328 inode allocation semaphore for */
329#define EXT4_GET_BLOCKS_DELALLOC_RESERVE 0x0004
330 /* Call ext4_da_update_reserve_space() after successfully
331 allocating the blocks */
332#define EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE 0x0008
333
304 334
305/* 335/*
306 * ioctl commands 336 * ioctl commands
@@ -516,6 +546,110 @@ do { \
516#endif /* defined(__KERNEL__) || defined(__linux__) */ 546#endif /* defined(__KERNEL__) || defined(__linux__) */
517 547
518/* 548/*
549 * storage for cached extent
550 */
551struct ext4_ext_cache {
552 ext4_fsblk_t ec_start;
553 ext4_lblk_t ec_block;
554 __u32 ec_len; /* must be 32bit to return holes */
555 __u32 ec_type;
556};
557
558/*
559 * fourth extended file system inode data in memory
560 */
561struct ext4_inode_info {
562 __le32 i_data[15]; /* unconverted */
563 __u32 i_flags;
564 ext4_fsblk_t i_file_acl;
565 __u32 i_dtime;
566
567 /*
568 * i_block_group is the number of the block group which contains
569 * this file's inode. Constant across the lifetime of the inode,
570 * it is ued for making block allocation decisions - we try to
571 * place a file's data blocks near its inode block, and new inodes
572 * near to their parent directory's inode.
573 */
574 ext4_group_t i_block_group;
575 __u32 i_state; /* Dynamic state flags for ext4 */
576
577 ext4_lblk_t i_dir_start_lookup;
578#ifdef CONFIG_EXT4_FS_XATTR
579 /*
580 * Extended attributes can be read independently of the main file
581 * data. Taking i_mutex even when reading would cause contention
582 * between readers of EAs and writers of regular file data, so
583 * instead we synchronize on xattr_sem when reading or changing
584 * EAs.
585 */
586 struct rw_semaphore xattr_sem;
587#endif
588#ifdef CONFIG_EXT4_FS_POSIX_ACL
589 struct posix_acl *i_acl;
590 struct posix_acl *i_default_acl;
591#endif
592
593 struct list_head i_orphan; /* unlinked but open inodes */
594
595 /*
596 * i_disksize keeps track of what the inode size is ON DISK, not
597 * in memory. During truncate, i_size is set to the new size by
598 * the VFS prior to calling ext4_truncate(), but the filesystem won't
599 * set i_disksize to 0 until the truncate is actually under way.
600 *
601 * The intent is that i_disksize always represents the blocks which
602 * are used by this file. This allows recovery to restart truncate
603 * on orphans if we crash during truncate. We actually write i_disksize
604 * into the on-disk inode when writing inodes out, instead of i_size.
605 *
606 * The only time when i_disksize and i_size may be different is when
607 * a truncate is in progress. The only things which change i_disksize
608 * are ext4_get_block (growth) and ext4_truncate (shrinkth).
609 */
610 loff_t i_disksize;
611
612 /*
613 * i_data_sem is for serialising ext4_truncate() against
614 * ext4_getblock(). In the 2.4 ext2 design, great chunks of inode's
615 * data tree are chopped off during truncate. We can't do that in
616 * ext4 because whenever we perform intermediate commits during
617 * truncate, the inode and all the metadata blocks *must* be in a
618 * consistent state which allows truncation of the orphans to restart
619 * during recovery. Hence we must fix the get_block-vs-truncate race
620 * by other means, so we have i_data_sem.
621 */
622 struct rw_semaphore i_data_sem;
623 struct inode vfs_inode;
624 struct jbd2_inode jinode;
625
626 struct ext4_ext_cache i_cached_extent;
627 /*
628 * File creation time. Its function is same as that of
629 * struct timespec i_{a,c,m}time in the generic inode.
630 */
631 struct timespec i_crtime;
632
633 /* mballoc */
634 struct list_head i_prealloc_list;
635 spinlock_t i_prealloc_lock;
636
637 /* ialloc */
638 ext4_group_t i_last_alloc_group;
639
640 /* allocation reservation info for delalloc */
641 unsigned int i_reserved_data_blocks;
642 unsigned int i_reserved_meta_blocks;
643 unsigned int i_allocated_meta_blocks;
644 unsigned short i_delalloc_reserved_flag;
645
646 /* on-disk additional length */
647 __u16 i_extra_isize;
648
649 spinlock_t i_block_reservation_lock;
650};
651
652/*
519 * File system states 653 * File system states
520 */ 654 */
521#define EXT4_VALID_FS 0x0001 /* Unmounted cleanly */ 655#define EXT4_VALID_FS 0x0001 /* Unmounted cleanly */
@@ -560,6 +694,7 @@ do { \
560#define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ 694#define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */
561#define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ 695#define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */
562#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ 696#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */
697#define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */
563 698
564/* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */ 699/* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */
565#ifndef _LINUX_EXT2_FS_H 700#ifndef _LINUX_EXT2_FS_H
@@ -689,6 +824,137 @@ struct ext4_super_block {
689}; 824};
690 825
691#ifdef __KERNEL__ 826#ifdef __KERNEL__
827/*
828 * fourth extended-fs super-block data in memory
829 */
830struct ext4_sb_info {
831 unsigned long s_desc_size; /* Size of a group descriptor in bytes */
832 unsigned long s_inodes_per_block;/* Number of inodes per block */
833 unsigned long s_blocks_per_group;/* Number of blocks in a group */
834 unsigned long s_inodes_per_group;/* Number of inodes in a group */
835 unsigned long s_itb_per_group; /* Number of inode table blocks per group */
836 unsigned long s_gdb_count; /* Number of group descriptor blocks */
837 unsigned long s_desc_per_block; /* Number of group descriptors per block */
838 ext4_group_t s_groups_count; /* Number of groups in the fs */
839 unsigned long s_overhead_last; /* Last calculated overhead */
840 unsigned long s_blocks_last; /* Last seen block count */
841 loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */
842 struct buffer_head * s_sbh; /* Buffer containing the super block */
843 struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */
844 struct buffer_head **s_group_desc;
845 unsigned long s_mount_opt;
846 ext4_fsblk_t s_sb_block;
847 uid_t s_resuid;
848 gid_t s_resgid;
849 unsigned short s_mount_state;
850 unsigned short s_pad;
851 int s_addr_per_block_bits;
852 int s_desc_per_block_bits;
853 int s_inode_size;
854 int s_first_ino;
855 unsigned int s_inode_readahead_blks;
856 spinlock_t s_next_gen_lock;
857 u32 s_next_generation;
858 u32 s_hash_seed[4];
859 int s_def_hash_version;
860 int s_hash_unsigned; /* 3 if hash should be signed, 0 if not */
861 struct percpu_counter s_freeblocks_counter;
862 struct percpu_counter s_freeinodes_counter;
863 struct percpu_counter s_dirs_counter;
864 struct percpu_counter s_dirtyblocks_counter;
865 struct blockgroup_lock *s_blockgroup_lock;
866 struct proc_dir_entry *s_proc;
867 struct kobject s_kobj;
868 struct completion s_kobj_unregister;
869
870 /* Journaling */
871 struct inode *s_journal_inode;
872 struct journal_s *s_journal;
873 struct list_head s_orphan;
874 struct mutex s_orphan_lock;
875 struct mutex s_resize_lock;
876 unsigned long s_commit_interval;
877 u32 s_max_batch_time;
878 u32 s_min_batch_time;
879 struct block_device *journal_bdev;
880#ifdef CONFIG_JBD2_DEBUG
881 struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */
882 wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */
883#endif
884#ifdef CONFIG_QUOTA
885 char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */
886 int s_jquota_fmt; /* Format of quota to use */
887#endif
888 unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */
889 struct rb_root system_blks;
890
891#ifdef EXTENTS_STATS
892 /* ext4 extents stats */
893 unsigned long s_ext_min;
894 unsigned long s_ext_max;
895 unsigned long s_depth_max;
896 spinlock_t s_ext_stats_lock;
897 unsigned long s_ext_blocks;
898 unsigned long s_ext_extents;
899#endif
900
901 /* for buddy allocator */
902 struct ext4_group_info ***s_group_info;
903 struct inode *s_buddy_cache;
904 long s_blocks_reserved;
905 spinlock_t s_reserve_lock;
906 spinlock_t s_md_lock;
907 tid_t s_last_transaction;
908 unsigned short *s_mb_offsets;
909 unsigned int *s_mb_maxs;
910
911 /* tunables */
912 unsigned long s_stripe;
913 unsigned int s_mb_stream_request;
914 unsigned int s_mb_max_to_scan;
915 unsigned int s_mb_min_to_scan;
916 unsigned int s_mb_stats;
917 unsigned int s_mb_order2_reqs;
918 unsigned int s_mb_group_prealloc;
919 /* where last allocation was done - for stream allocation */
920 unsigned long s_mb_last_group;
921 unsigned long s_mb_last_start;
922
923 /* history to debug policy */
924 struct ext4_mb_history *s_mb_history;
925 int s_mb_history_cur;
926 int s_mb_history_max;
927 int s_mb_history_num;
928 spinlock_t s_mb_history_lock;
929 int s_mb_history_filter;
930
931 /* stats for buddy allocator */
932 spinlock_t s_mb_pa_lock;
933 atomic_t s_bal_reqs; /* number of reqs with len > 1 */
934 atomic_t s_bal_success; /* we found long enough chunks */
935 atomic_t s_bal_allocated; /* in blocks */
936 atomic_t s_bal_ex_scanned; /* total extents scanned */
937 atomic_t s_bal_goals; /* goal hits */
938 atomic_t s_bal_breaks; /* too long searches */
939 atomic_t s_bal_2orders; /* 2^order hits */
940 spinlock_t s_bal_lock;
941 unsigned long s_mb_buddies_generated;
942 unsigned long long s_mb_generation_time;
943 atomic_t s_mb_lost_chunks;
944 atomic_t s_mb_preallocated;
945 atomic_t s_mb_discarded;
946
947 /* locality groups */
948 struct ext4_locality_group *s_locality_groups;
949
950 /* for write statistics */
951 unsigned long s_sectors_written_start;
952 u64 s_kbytes_written;
953
954 unsigned int s_log_groups_per_flex;
955 struct flex_groups *s_flex_groups;
956};
957
692static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) 958static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
693{ 959{
694 return sb->s_fs_info; 960 return sb->s_fs_info;
@@ -704,7 +970,6 @@ static inline struct timespec ext4_current_time(struct inode *inode)
704 current_fs_time(inode->i_sb) : CURRENT_TIME_SEC; 970 current_fs_time(inode->i_sb) : CURRENT_TIME_SEC;
705} 971}
706 972
707
708static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) 973static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
709{ 974{
710 return ino == EXT4_ROOT_INO || 975 return ino == EXT4_ROOT_INO ||
@@ -1014,6 +1279,14 @@ extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
1014 ext4_group_t block_group, 1279 ext4_group_t block_group,
1015 struct buffer_head ** bh); 1280 struct buffer_head ** bh);
1016extern int ext4_should_retry_alloc(struct super_block *sb, int *retries); 1281extern int ext4_should_retry_alloc(struct super_block *sb, int *retries);
1282struct buffer_head *ext4_read_block_bitmap(struct super_block *sb,
1283 ext4_group_t block_group);
1284extern unsigned ext4_init_block_bitmap(struct super_block *sb,
1285 struct buffer_head *bh,
1286 ext4_group_t group,
1287 struct ext4_group_desc *desc);
1288#define ext4_free_blocks_after_init(sb, group, desc) \
1289 ext4_init_block_bitmap(sb, NULL, group, desc)
1017 1290
1018/* dir.c */ 1291/* dir.c */
1019extern int ext4_check_dir_entry(const char *, struct inode *, 1292extern int ext4_check_dir_entry(const char *, struct inode *,
@@ -1038,6 +1311,11 @@ extern struct inode * ext4_orphan_get(struct super_block *, unsigned long);
1038extern unsigned long ext4_count_free_inodes(struct super_block *); 1311extern unsigned long ext4_count_free_inodes(struct super_block *);
1039extern unsigned long ext4_count_dirs(struct super_block *); 1312extern unsigned long ext4_count_dirs(struct super_block *);
1040extern void ext4_check_inodes_bitmap(struct super_block *); 1313extern void ext4_check_inodes_bitmap(struct super_block *);
1314extern unsigned ext4_init_inode_bitmap(struct super_block *sb,
1315 struct buffer_head *bh,
1316 ext4_group_t group,
1317 struct ext4_group_desc *desc);
1318extern void mark_bitmap_end(int start_bit, int end_bit, char *bitmap);
1041 1319
1042/* mballoc.c */ 1320/* mballoc.c */
1043extern long ext4_mb_stats; 1321extern long ext4_mb_stats;
@@ -1123,6 +1401,8 @@ extern void ext4_abort(struct super_block *, const char *, const char *, ...)
1123 __attribute__ ((format (printf, 3, 4))); 1401 __attribute__ ((format (printf, 3, 4)));
1124extern void ext4_warning(struct super_block *, const char *, const char *, ...) 1402extern void ext4_warning(struct super_block *, const char *, const char *, ...)
1125 __attribute__ ((format (printf, 3, 4))); 1403 __attribute__ ((format (printf, 3, 4)));
1404extern void ext4_msg(struct super_block *, const char *, const char *, ...)
1405 __attribute__ ((format (printf, 3, 4)));
1126extern void ext4_grp_locked_error(struct super_block *, ext4_group_t, 1406extern void ext4_grp_locked_error(struct super_block *, ext4_group_t,
1127 const char *, const char *, ...) 1407 const char *, const char *, ...)
1128 __attribute__ ((format (printf, 4, 5))); 1408 __attribute__ ((format (printf, 4, 5)));
@@ -1161,6 +1441,10 @@ extern void ext4_used_dirs_set(struct super_block *sb,
1161 struct ext4_group_desc *bg, __u32 count); 1441 struct ext4_group_desc *bg, __u32 count);
1162extern void ext4_itable_unused_set(struct super_block *sb, 1442extern void ext4_itable_unused_set(struct super_block *sb,
1163 struct ext4_group_desc *bg, __u32 count); 1443 struct ext4_group_desc *bg, __u32 count);
1444extern __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 group,
1445 struct ext4_group_desc *gdp);
1446extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group,
1447 struct ext4_group_desc *gdp);
1164 1448
1165static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es) 1449static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es)
1166{ 1450{
@@ -1228,6 +1512,18 @@ struct ext4_group_info *ext4_get_group_info(struct super_block *sb,
1228 return grp_info[indexv][indexh]; 1512 return grp_info[indexv][indexh];
1229} 1513}
1230 1514
1515/*
1516 * Reading s_groups_count requires using smp_rmb() afterwards. See
1517 * the locking protocol documented in the comments of ext4_group_add()
1518 * in resize.c
1519 */
1520static inline ext4_group_t ext4_get_groups_count(struct super_block *sb)
1521{
1522 ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
1523
1524 smp_rmb();
1525 return ngroups;
1526}
1231 1527
1232static inline ext4_group_t ext4_flex_group(struct ext4_sb_info *sbi, 1528static inline ext4_group_t ext4_flex_group(struct ext4_sb_info *sbi,
1233 ext4_group_t block_group) 1529 ext4_group_t block_group)
@@ -1283,33 +1579,25 @@ struct ext4_group_info {
1283}; 1579};
1284 1580
1285#define EXT4_GROUP_INFO_NEED_INIT_BIT 0 1581#define EXT4_GROUP_INFO_NEED_INIT_BIT 0
1286#define EXT4_GROUP_INFO_LOCKED_BIT 1
1287 1582
1288#define EXT4_MB_GRP_NEED_INIT(grp) \ 1583#define EXT4_MB_GRP_NEED_INIT(grp) \
1289 (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state))) 1584 (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
1290 1585
1291static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group) 1586static inline spinlock_t *ext4_group_lock_ptr(struct super_block *sb,
1587 ext4_group_t group)
1292{ 1588{
1293 struct ext4_group_info *grinfo = ext4_get_group_info(sb, group); 1589 return bgl_lock_ptr(EXT4_SB(sb)->s_blockgroup_lock, group);
1294
1295 bit_spin_lock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
1296} 1590}
1297 1591
1298static inline void ext4_unlock_group(struct super_block *sb, 1592static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
1299 ext4_group_t group)
1300{ 1593{
1301 struct ext4_group_info *grinfo = ext4_get_group_info(sb, group); 1594 spin_lock(ext4_group_lock_ptr(sb, group));
1302
1303 bit_spin_unlock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
1304} 1595}
1305 1596
1306static inline int ext4_is_group_locked(struct super_block *sb, 1597static inline void ext4_unlock_group(struct super_block *sb,
1307 ext4_group_t group) 1598 ext4_group_t group)
1308{ 1599{
1309 struct ext4_group_info *grinfo = ext4_get_group_info(sb, group); 1600 spin_unlock(ext4_group_lock_ptr(sb, group));
1310
1311 return bit_spin_is_locked(EXT4_GROUP_INFO_LOCKED_BIT,
1312 &(grinfo->bb_state));
1313} 1601}
1314 1602
1315/* 1603/*
@@ -1326,11 +1614,21 @@ extern const struct file_operations ext4_file_operations;
1326/* namei.c */ 1614/* namei.c */
1327extern const struct inode_operations ext4_dir_inode_operations; 1615extern const struct inode_operations ext4_dir_inode_operations;
1328extern const struct inode_operations ext4_special_inode_operations; 1616extern const struct inode_operations ext4_special_inode_operations;
1617extern struct dentry *ext4_get_parent(struct dentry *child);
1329 1618
1330/* symlink.c */ 1619/* symlink.c */
1331extern const struct inode_operations ext4_symlink_inode_operations; 1620extern const struct inode_operations ext4_symlink_inode_operations;
1332extern const struct inode_operations ext4_fast_symlink_inode_operations; 1621extern const struct inode_operations ext4_fast_symlink_inode_operations;
1333 1622
1623/* block_validity */
1624extern void ext4_release_system_zone(struct super_block *sb);
1625extern int ext4_setup_system_zone(struct super_block *sb);
1626extern int __init init_ext4_system_zone(void);
1627extern void exit_ext4_system_zone(void);
1628extern int ext4_data_block_valid(struct ext4_sb_info *sbi,
1629 ext4_fsblk_t start_blk,
1630 unsigned int count);
1631
1334/* extents.c */ 1632/* extents.c */
1335extern int ext4_ext_tree_init(handle_t *handle, struct inode *); 1633extern int ext4_ext_tree_init(handle_t *handle, struct inode *);
1336extern int ext4_ext_writepage_trans_blocks(struct inode *, int); 1634extern int ext4_ext_writepage_trans_blocks(struct inode *, int);
@@ -1338,17 +1636,15 @@ extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks,
1338 int chunk); 1636 int chunk);
1339extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, 1637extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
1340 ext4_lblk_t iblock, unsigned int max_blocks, 1638 ext4_lblk_t iblock, unsigned int max_blocks,
1341 struct buffer_head *bh_result, 1639 struct buffer_head *bh_result, int flags);
1342 int create, int extend_disksize);
1343extern void ext4_ext_truncate(struct inode *); 1640extern void ext4_ext_truncate(struct inode *);
1344extern void ext4_ext_init(struct super_block *); 1641extern void ext4_ext_init(struct super_block *);
1345extern void ext4_ext_release(struct super_block *); 1642extern void ext4_ext_release(struct super_block *);
1346extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset, 1643extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset,
1347 loff_t len); 1644 loff_t len);
1348extern int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, 1645extern int ext4_get_blocks(handle_t *handle, struct inode *inode,
1349 sector_t block, unsigned int max_blocks, 1646 sector_t block, unsigned int max_blocks,
1350 struct buffer_head *bh, int create, 1647 struct buffer_head *bh, int flags);
1351 int extend_disksize, int flag);
1352extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 1648extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
1353 __u64 start, __u64 len); 1649 __u64 start, __u64 len);
1354 1650
diff --git a/fs/ext4/ext4_i.h b/fs/ext4/ext4_i.h
deleted file mode 100644
index 4ce2187123aa..000000000000
--- a/fs/ext4/ext4_i.h
+++ /dev/null
@@ -1,140 +0,0 @@
1/*
2 * ext4_i.h
3 *
4 * Copyright (C) 1992, 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr)
6 * Laboratoire MASI - Institut Blaise Pascal
7 * Universite Pierre et Marie Curie (Paris VI)
8 *
9 * from
10 *
11 * linux/include/linux/minix_fs_i.h
12 *
13 * Copyright (C) 1991, 1992 Linus Torvalds
14 */
15
16#ifndef _EXT4_I
17#define _EXT4_I
18
19#include <linux/rwsem.h>
20#include <linux/rbtree.h>
21#include <linux/seqlock.h>
22#include <linux/mutex.h>
23
24/* data type for block offset of block group */
25typedef int ext4_grpblk_t;
26
27/* data type for filesystem-wide blocks number */
28typedef unsigned long long ext4_fsblk_t;
29
30/* data type for file logical block number */
31typedef __u32 ext4_lblk_t;
32
33/* data type for block group number */
34typedef unsigned int ext4_group_t;
35
36/*
37 * storage for cached extent
38 */
39struct ext4_ext_cache {
40 ext4_fsblk_t ec_start;
41 ext4_lblk_t ec_block;
42 __u32 ec_len; /* must be 32bit to return holes */
43 __u32 ec_type;
44};
45
46/*
47 * fourth extended file system inode data in memory
48 */
49struct ext4_inode_info {
50 __le32 i_data[15]; /* unconverted */
51 __u32 i_flags;
52 ext4_fsblk_t i_file_acl;
53 __u32 i_dtime;
54
55 /*
56 * i_block_group is the number of the block group which contains
57 * this file's inode. Constant across the lifetime of the inode,
58 * it is ued for making block allocation decisions - we try to
59 * place a file's data blocks near its inode block, and new inodes
60 * near to their parent directory's inode.
61 */
62 ext4_group_t i_block_group;
63 __u32 i_state; /* Dynamic state flags for ext4 */
64
65 ext4_lblk_t i_dir_start_lookup;
66#ifdef CONFIG_EXT4_FS_XATTR
67 /*
68 * Extended attributes can be read independently of the main file
69 * data. Taking i_mutex even when reading would cause contention
70 * between readers of EAs and writers of regular file data, so
71 * instead we synchronize on xattr_sem when reading or changing
72 * EAs.
73 */
74 struct rw_semaphore xattr_sem;
75#endif
76#ifdef CONFIG_EXT4_FS_POSIX_ACL
77 struct posix_acl *i_acl;
78 struct posix_acl *i_default_acl;
79#endif
80
81 struct list_head i_orphan; /* unlinked but open inodes */
82
83 /*
84 * i_disksize keeps track of what the inode size is ON DISK, not
85 * in memory. During truncate, i_size is set to the new size by
86 * the VFS prior to calling ext4_truncate(), but the filesystem won't
87 * set i_disksize to 0 until the truncate is actually under way.
88 *
89 * The intent is that i_disksize always represents the blocks which
90 * are used by this file. This allows recovery to restart truncate
91 * on orphans if we crash during truncate. We actually write i_disksize
92 * into the on-disk inode when writing inodes out, instead of i_size.
93 *
94 * The only time when i_disksize and i_size may be different is when
95 * a truncate is in progress. The only things which change i_disksize
96 * are ext4_get_block (growth) and ext4_truncate (shrinkth).
97 */
98 loff_t i_disksize;
99
100 /*
101 * i_data_sem is for serialising ext4_truncate() against
102 * ext4_getblock(). In the 2.4 ext2 design, great chunks of inode's
103 * data tree are chopped off during truncate. We can't do that in
104 * ext4 because whenever we perform intermediate commits during
105 * truncate, the inode and all the metadata blocks *must* be in a
106 * consistent state which allows truncation of the orphans to restart
107 * during recovery. Hence we must fix the get_block-vs-truncate race
108 * by other means, so we have i_data_sem.
109 */
110 struct rw_semaphore i_data_sem;
111 struct inode vfs_inode;
112 struct jbd2_inode jinode;
113
114 struct ext4_ext_cache i_cached_extent;
115 /*
116 * File creation time. Its function is same as that of
117 * struct timespec i_{a,c,m}time in the generic inode.
118 */
119 struct timespec i_crtime;
120
121 /* mballoc */
122 struct list_head i_prealloc_list;
123 spinlock_t i_prealloc_lock;
124
125 /* ialloc */
126 ext4_group_t i_last_alloc_group;
127
128 /* allocation reservation info for delalloc */
129 unsigned int i_reserved_data_blocks;
130 unsigned int i_reserved_meta_blocks;
131 unsigned int i_allocated_meta_blocks;
132 unsigned short i_delalloc_reserved_flag;
133
134 /* on-disk additional length */
135 __u16 i_extra_isize;
136
137 spinlock_t i_block_reservation_lock;
138};
139
140#endif /* _EXT4_I */
diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h
deleted file mode 100644
index 57b71fefbccf..000000000000
--- a/fs/ext4/ext4_sb.h
+++ /dev/null
@@ -1,161 +0,0 @@
1/*
2 * ext4_sb.h
3 *
4 * Copyright (C) 1992, 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr)
6 * Laboratoire MASI - Institut Blaise Pascal
7 * Universite Pierre et Marie Curie (Paris VI)
8 *
9 * from
10 *
11 * linux/include/linux/minix_fs_sb.h
12 *
13 * Copyright (C) 1991, 1992 Linus Torvalds
14 */
15
16#ifndef _EXT4_SB
17#define _EXT4_SB
18
19#ifdef __KERNEL__
20#include <linux/timer.h>
21#include <linux/wait.h>
22#include <linux/blockgroup_lock.h>
23#include <linux/percpu_counter.h>
24#endif
25#include <linux/rbtree.h>
26
27/*
28 * fourth extended-fs super-block data in memory
29 */
30struct ext4_sb_info {
31 unsigned long s_desc_size; /* Size of a group descriptor in bytes */
32 unsigned long s_inodes_per_block;/* Number of inodes per block */
33 unsigned long s_blocks_per_group;/* Number of blocks in a group */
34 unsigned long s_inodes_per_group;/* Number of inodes in a group */
35 unsigned long s_itb_per_group; /* Number of inode table blocks per group */
36 unsigned long s_gdb_count; /* Number of group descriptor blocks */
37 unsigned long s_desc_per_block; /* Number of group descriptors per block */
38 ext4_group_t s_groups_count; /* Number of groups in the fs */
39 unsigned long s_overhead_last; /* Last calculated overhead */
40 unsigned long s_blocks_last; /* Last seen block count */
41 loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */
42 struct buffer_head * s_sbh; /* Buffer containing the super block */
43 struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */
44 struct buffer_head **s_group_desc;
45 unsigned long s_mount_opt;
46 ext4_fsblk_t s_sb_block;
47 uid_t s_resuid;
48 gid_t s_resgid;
49 unsigned short s_mount_state;
50 unsigned short s_pad;
51 int s_addr_per_block_bits;
52 int s_desc_per_block_bits;
53 int s_inode_size;
54 int s_first_ino;
55 unsigned int s_inode_readahead_blks;
56 spinlock_t s_next_gen_lock;
57 u32 s_next_generation;
58 u32 s_hash_seed[4];
59 int s_def_hash_version;
60 int s_hash_unsigned; /* 3 if hash should be signed, 0 if not */
61 struct percpu_counter s_freeblocks_counter;
62 struct percpu_counter s_freeinodes_counter;
63 struct percpu_counter s_dirs_counter;
64 struct percpu_counter s_dirtyblocks_counter;
65 struct blockgroup_lock *s_blockgroup_lock;
66 struct proc_dir_entry *s_proc;
67 struct kobject s_kobj;
68 struct completion s_kobj_unregister;
69
70 /* Journaling */
71 struct inode *s_journal_inode;
72 struct journal_s *s_journal;
73 struct list_head s_orphan;
74 unsigned long s_commit_interval;
75 u32 s_max_batch_time;
76 u32 s_min_batch_time;
77 struct block_device *journal_bdev;
78#ifdef CONFIG_JBD2_DEBUG
79 struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */
80 wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */
81#endif
82#ifdef CONFIG_QUOTA
83 char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */
84 int s_jquota_fmt; /* Format of quota to use */
85#endif
86 unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */
87
88#ifdef EXTENTS_STATS
89 /* ext4 extents stats */
90 unsigned long s_ext_min;
91 unsigned long s_ext_max;
92 unsigned long s_depth_max;
93 spinlock_t s_ext_stats_lock;
94 unsigned long s_ext_blocks;
95 unsigned long s_ext_extents;
96#endif
97
98 /* for buddy allocator */
99 struct ext4_group_info ***s_group_info;
100 struct inode *s_buddy_cache;
101 long s_blocks_reserved;
102 spinlock_t s_reserve_lock;
103 spinlock_t s_md_lock;
104 tid_t s_last_transaction;
105 unsigned short *s_mb_offsets;
106 unsigned int *s_mb_maxs;
107
108 /* tunables */
109 unsigned long s_stripe;
110 unsigned int s_mb_stream_request;
111 unsigned int s_mb_max_to_scan;
112 unsigned int s_mb_min_to_scan;
113 unsigned int s_mb_stats;
114 unsigned int s_mb_order2_reqs;
115 unsigned int s_mb_group_prealloc;
116 /* where last allocation was done - for stream allocation */
117 unsigned long s_mb_last_group;
118 unsigned long s_mb_last_start;
119
120 /* history to debug policy */
121 struct ext4_mb_history *s_mb_history;
122 int s_mb_history_cur;
123 int s_mb_history_max;
124 int s_mb_history_num;
125 spinlock_t s_mb_history_lock;
126 int s_mb_history_filter;
127
128 /* stats for buddy allocator */
129 spinlock_t s_mb_pa_lock;
130 atomic_t s_bal_reqs; /* number of reqs with len > 1 */
131 atomic_t s_bal_success; /* we found long enough chunks */
132 atomic_t s_bal_allocated; /* in blocks */
133 atomic_t s_bal_ex_scanned; /* total extents scanned */
134 atomic_t s_bal_goals; /* goal hits */
135 atomic_t s_bal_breaks; /* too long searches */
136 atomic_t s_bal_2orders; /* 2^order hits */
137 spinlock_t s_bal_lock;
138 unsigned long s_mb_buddies_generated;
139 unsigned long long s_mb_generation_time;
140 atomic_t s_mb_lost_chunks;
141 atomic_t s_mb_preallocated;
142 atomic_t s_mb_discarded;
143
144 /* locality groups */
145 struct ext4_locality_group *s_locality_groups;
146
147 /* for write statistics */
148 unsigned long s_sectors_written_start;
149 u64 s_kbytes_written;
150
151 unsigned int s_log_groups_per_flex;
152 struct flex_groups *s_flex_groups;
153};
154
155static inline spinlock_t *
156sb_bgl_lock(struct ext4_sb_info *sbi, unsigned int block_group)
157{
158 return bgl_lock_ptr(sbi->s_blockgroup_lock, block_group);
159}
160
161#endif /* _EXT4_SB */
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index e3a55eb8b26a..2593f748c3a4 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -326,32 +326,18 @@ ext4_ext_max_entries(struct inode *inode, int depth)
326 326
327static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext) 327static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
328{ 328{
329 ext4_fsblk_t block = ext_pblock(ext), valid_block; 329 ext4_fsblk_t block = ext_pblock(ext);
330 int len = ext4_ext_get_actual_len(ext); 330 int len = ext4_ext_get_actual_len(ext);
331 struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
332 331
333 valid_block = le32_to_cpu(es->s_first_data_block) + 332 return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len);
334 EXT4_SB(inode->i_sb)->s_gdb_count;
335 if (unlikely(block <= valid_block ||
336 ((block + len) > ext4_blocks_count(es))))
337 return 0;
338 else
339 return 1;
340} 333}
341 334
342static int ext4_valid_extent_idx(struct inode *inode, 335static int ext4_valid_extent_idx(struct inode *inode,
343 struct ext4_extent_idx *ext_idx) 336 struct ext4_extent_idx *ext_idx)
344{ 337{
345 ext4_fsblk_t block = idx_pblock(ext_idx), valid_block; 338 ext4_fsblk_t block = idx_pblock(ext_idx);
346 struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
347 339
348 valid_block = le32_to_cpu(es->s_first_data_block) + 340 return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, 1);
349 EXT4_SB(inode->i_sb)->s_gdb_count;
350 if (unlikely(block <= valid_block ||
351 (block >= ext4_blocks_count(es))))
352 return 0;
353 else
354 return 1;
355} 341}
356 342
357static int ext4_valid_extent_entries(struct inode *inode, 343static int ext4_valid_extent_entries(struct inode *inode,
@@ -2097,12 +2083,16 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2097 ex = EXT_LAST_EXTENT(eh); 2083 ex = EXT_LAST_EXTENT(eh);
2098 2084
2099 ex_ee_block = le32_to_cpu(ex->ee_block); 2085 ex_ee_block = le32_to_cpu(ex->ee_block);
2100 if (ext4_ext_is_uninitialized(ex))
2101 uninitialized = 1;
2102 ex_ee_len = ext4_ext_get_actual_len(ex); 2086 ex_ee_len = ext4_ext_get_actual_len(ex);
2103 2087
2104 while (ex >= EXT_FIRST_EXTENT(eh) && 2088 while (ex >= EXT_FIRST_EXTENT(eh) &&
2105 ex_ee_block + ex_ee_len > start) { 2089 ex_ee_block + ex_ee_len > start) {
2090
2091 if (ext4_ext_is_uninitialized(ex))
2092 uninitialized = 1;
2093 else
2094 uninitialized = 0;
2095
2106 ext_debug("remove ext %lu:%u\n", ex_ee_block, ex_ee_len); 2096 ext_debug("remove ext %lu:%u\n", ex_ee_block, ex_ee_len);
2107 path[depth].p_ext = ex; 2097 path[depth].p_ext = ex;
2108 2098
@@ -2784,7 +2774,7 @@ fix_extent_len:
2784int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, 2774int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2785 ext4_lblk_t iblock, 2775 ext4_lblk_t iblock,
2786 unsigned int max_blocks, struct buffer_head *bh_result, 2776 unsigned int max_blocks, struct buffer_head *bh_result,
2787 int create, int extend_disksize) 2777 int flags)
2788{ 2778{
2789 struct ext4_ext_path *path = NULL; 2779 struct ext4_ext_path *path = NULL;
2790 struct ext4_extent_header *eh; 2780 struct ext4_extent_header *eh;
@@ -2793,7 +2783,6 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2793 int err = 0, depth, ret, cache_type; 2783 int err = 0, depth, ret, cache_type;
2794 unsigned int allocated = 0; 2784 unsigned int allocated = 0;
2795 struct ext4_allocation_request ar; 2785 struct ext4_allocation_request ar;
2796 loff_t disksize;
2797 2786
2798 __clear_bit(BH_New, &bh_result->b_state); 2787 __clear_bit(BH_New, &bh_result->b_state);
2799 ext_debug("blocks %u/%u requested for inode %u\n", 2788 ext_debug("blocks %u/%u requested for inode %u\n",
@@ -2803,7 +2792,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2803 cache_type = ext4_ext_in_cache(inode, iblock, &newex); 2792 cache_type = ext4_ext_in_cache(inode, iblock, &newex);
2804 if (cache_type) { 2793 if (cache_type) {
2805 if (cache_type == EXT4_EXT_CACHE_GAP) { 2794 if (cache_type == EXT4_EXT_CACHE_GAP) {
2806 if (!create) { 2795 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
2807 /* 2796 /*
2808 * block isn't allocated yet and 2797 * block isn't allocated yet and
2809 * user doesn't want to allocate it 2798 * user doesn't want to allocate it
@@ -2869,9 +2858,11 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2869 EXT4_EXT_CACHE_EXTENT); 2858 EXT4_EXT_CACHE_EXTENT);
2870 goto out; 2859 goto out;
2871 } 2860 }
2872 if (create == EXT4_CREATE_UNINITIALIZED_EXT) 2861 if (flags & EXT4_GET_BLOCKS_UNINIT_EXT)
2873 goto out; 2862 goto out;
2874 if (!create) { 2863 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
2864 if (allocated > max_blocks)
2865 allocated = max_blocks;
2875 /* 2866 /*
2876 * We have blocks reserved already. We 2867 * We have blocks reserved already. We
2877 * return allocated blocks so that delalloc 2868 * return allocated blocks so that delalloc
@@ -2879,8 +2870,6 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2879 * the buffer head will be unmapped so that 2870 * the buffer head will be unmapped so that
2880 * a read from the block returns 0s. 2871 * a read from the block returns 0s.
2881 */ 2872 */
2882 if (allocated > max_blocks)
2883 allocated = max_blocks;
2884 set_buffer_unwritten(bh_result); 2873 set_buffer_unwritten(bh_result);
2885 bh_result->b_bdev = inode->i_sb->s_bdev; 2874 bh_result->b_bdev = inode->i_sb->s_bdev;
2886 bh_result->b_blocknr = newblock; 2875 bh_result->b_blocknr = newblock;
@@ -2903,7 +2892,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2903 * requested block isn't allocated yet; 2892 * requested block isn't allocated yet;
2904 * we couldn't try to create block if create flag is zero 2893 * we couldn't try to create block if create flag is zero
2905 */ 2894 */
2906 if (!create) { 2895 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
2907 /* 2896 /*
2908 * put just found gap into cache to speed up 2897 * put just found gap into cache to speed up
2909 * subsequent requests 2898 * subsequent requests
@@ -2932,10 +2921,10 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2932 * EXT_UNINIT_MAX_LEN. 2921 * EXT_UNINIT_MAX_LEN.
2933 */ 2922 */
2934 if (max_blocks > EXT_INIT_MAX_LEN && 2923 if (max_blocks > EXT_INIT_MAX_LEN &&
2935 create != EXT4_CREATE_UNINITIALIZED_EXT) 2924 !(flags & EXT4_GET_BLOCKS_UNINIT_EXT))
2936 max_blocks = EXT_INIT_MAX_LEN; 2925 max_blocks = EXT_INIT_MAX_LEN;
2937 else if (max_blocks > EXT_UNINIT_MAX_LEN && 2926 else if (max_blocks > EXT_UNINIT_MAX_LEN &&
2938 create == EXT4_CREATE_UNINITIALIZED_EXT) 2927 (flags & EXT4_GET_BLOCKS_UNINIT_EXT))
2939 max_blocks = EXT_UNINIT_MAX_LEN; 2928 max_blocks = EXT_UNINIT_MAX_LEN;
2940 2929
2941 /* Check if we can really insert (iblock)::(iblock+max_blocks) extent */ 2930 /* Check if we can really insert (iblock)::(iblock+max_blocks) extent */
@@ -2966,7 +2955,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2966 /* try to insert new extent into found leaf and return */ 2955 /* try to insert new extent into found leaf and return */
2967 ext4_ext_store_pblock(&newex, newblock); 2956 ext4_ext_store_pblock(&newex, newblock);
2968 newex.ee_len = cpu_to_le16(ar.len); 2957 newex.ee_len = cpu_to_le16(ar.len);
2969 if (create == EXT4_CREATE_UNINITIALIZED_EXT) /* Mark uninitialized */ 2958 if (flags & EXT4_GET_BLOCKS_UNINIT_EXT) /* Mark uninitialized */
2970 ext4_ext_mark_uninitialized(&newex); 2959 ext4_ext_mark_uninitialized(&newex);
2971 err = ext4_ext_insert_extent(handle, inode, path, &newex); 2960 err = ext4_ext_insert_extent(handle, inode, path, &newex);
2972 if (err) { 2961 if (err) {
@@ -2983,18 +2972,10 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2983 newblock = ext_pblock(&newex); 2972 newblock = ext_pblock(&newex);
2984 allocated = ext4_ext_get_actual_len(&newex); 2973 allocated = ext4_ext_get_actual_len(&newex);
2985outnew: 2974outnew:
2986 if (extend_disksize) {
2987 disksize = ((loff_t) iblock + ar.len) << inode->i_blkbits;
2988 if (disksize > i_size_read(inode))
2989 disksize = i_size_read(inode);
2990 if (disksize > EXT4_I(inode)->i_disksize)
2991 EXT4_I(inode)->i_disksize = disksize;
2992 }
2993
2994 set_buffer_new(bh_result); 2975 set_buffer_new(bh_result);
2995 2976
2996 /* Cache only when it is _not_ an uninitialized extent */ 2977 /* Cache only when it is _not_ an uninitialized extent */
2997 if (create != EXT4_CREATE_UNINITIALIZED_EXT) 2978 if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0)
2998 ext4_ext_put_in_cache(inode, iblock, allocated, newblock, 2979 ext4_ext_put_in_cache(inode, iblock, allocated, newblock,
2999 EXT4_EXT_CACHE_EXTENT); 2980 EXT4_EXT_CACHE_EXTENT);
3000out: 2981out:
@@ -3150,9 +3131,10 @@ retry:
3150 ret = PTR_ERR(handle); 3131 ret = PTR_ERR(handle);
3151 break; 3132 break;
3152 } 3133 }
3153 ret = ext4_get_blocks_wrap(handle, inode, block, 3134 map_bh.b_state = 0;
3154 max_blocks, &map_bh, 3135 ret = ext4_get_blocks(handle, inode, block,
3155 EXT4_CREATE_UNINITIALIZED_EXT, 0, 0); 3136 max_blocks, &map_bh,
3137 EXT4_GET_BLOCKS_CREATE_UNINIT_EXT);
3156 if (ret <= 0) { 3138 if (ret <= 0) {
3157#ifdef EXT4FS_DEBUG 3139#ifdef EXT4FS_DEBUG
3158 WARN_ON(ret <= 0); 3140 WARN_ON(ret <= 0);
@@ -3195,7 +3177,7 @@ static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path,
3195 void *data) 3177 void *data)
3196{ 3178{
3197 struct fiemap_extent_info *fieinfo = data; 3179 struct fiemap_extent_info *fieinfo = data;
3198 unsigned long blksize_bits = inode->i_sb->s_blocksize_bits; 3180 unsigned char blksize_bits = inode->i_sb->s_blocksize_bits;
3199 __u64 logical; 3181 __u64 logical;
3200 __u64 physical; 3182 __u64 physical;
3201 __u64 length; 3183 __u64 length;
@@ -3242,9 +3224,16 @@ static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path,
3242 * 3224 *
3243 * XXX this might miss a single-block extent at EXT_MAX_BLOCK 3225 * XXX this might miss a single-block extent at EXT_MAX_BLOCK
3244 */ 3226 */
3245 if (logical + length - 1 == EXT_MAX_BLOCK || 3227 if (ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK ||
3246 ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK) 3228 newex->ec_block + newex->ec_len - 1 == EXT_MAX_BLOCK) {
3229 loff_t size = i_size_read(inode);
3230 loff_t bs = EXT4_BLOCK_SIZE(inode->i_sb);
3231
3247 flags |= FIEMAP_EXTENT_LAST; 3232 flags |= FIEMAP_EXTENT_LAST;
3233 if ((flags & FIEMAP_EXTENT_DELALLOC) &&
3234 logical+length > size)
3235 length = (size - logical + bs - 1) & ~(bs-1);
3236 }
3248 3237
3249 error = fiemap_fill_next_extent(fieinfo, logical, physical, 3238 error = fiemap_fill_next_extent(fieinfo, logical, physical,
3250 length, flags); 3239 length, flags);
@@ -3318,10 +3307,10 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3318 * Walk the extent tree gathering extent information. 3307 * Walk the extent tree gathering extent information.
3319 * ext4_ext_fiemap_cb will push extents back to user. 3308 * ext4_ext_fiemap_cb will push extents back to user.
3320 */ 3309 */
3321 down_write(&EXT4_I(inode)->i_data_sem); 3310 down_read(&EXT4_I(inode)->i_data_sem);
3322 error = ext4_ext_walk_space(inode, start_blk, len_blks, 3311 error = ext4_ext_walk_space(inode, start_blk, len_blks,
3323 ext4_ext_fiemap_cb, fieinfo); 3312 ext4_ext_fiemap_cb, fieinfo);
3324 up_write(&EXT4_I(inode)->i_data_sem); 3313 up_read(&EXT4_I(inode)->i_data_sem);
3325 } 3314 }
3326 3315
3327 return error; 3316 return error;
diff --git a/fs/ext4/group.h b/fs/ext4/group.h
deleted file mode 100644
index c2c0a8d06d0e..000000000000
--- a/fs/ext4/group.h
+++ /dev/null
@@ -1,29 +0,0 @@
1/*
2 * linux/fs/ext4/group.h
3 *
4 * Copyright (C) 2007 Cluster File Systems, Inc
5 *
6 * Author: Andreas Dilger <adilger@clusterfs.com>
7 */
8
9#ifndef _LINUX_EXT4_GROUP_H
10#define _LINUX_EXT4_GROUP_H
11
12extern __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 group,
13 struct ext4_group_desc *gdp);
14extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group,
15 struct ext4_group_desc *gdp);
16struct buffer_head *ext4_read_block_bitmap(struct super_block *sb,
17 ext4_group_t block_group);
18extern unsigned ext4_init_block_bitmap(struct super_block *sb,
19 struct buffer_head *bh,
20 ext4_group_t group,
21 struct ext4_group_desc *desc);
22#define ext4_free_blocks_after_init(sb, group, desc) \
23 ext4_init_block_bitmap(sb, NULL, group, desc)
24extern unsigned ext4_init_inode_bitmap(struct super_block *sb,
25 struct buffer_head *bh,
26 ext4_group_t group,
27 struct ext4_group_desc *desc);
28extern void mark_bitmap_end(int start_bit, int end_bit, char *bitmap);
29#endif /* _LINUX_EXT4_GROUP_H */
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index f18e0a08a6b5..3743bd849bce 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -27,7 +27,6 @@
27#include "ext4_jbd2.h" 27#include "ext4_jbd2.h"
28#include "xattr.h" 28#include "xattr.h"
29#include "acl.h" 29#include "acl.h"
30#include "group.h"
31 30
32/* 31/*
33 * ialloc.c contains the inodes allocation and deallocation routines 32 * ialloc.c contains the inodes allocation and deallocation routines
@@ -123,16 +122,16 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
123 unlock_buffer(bh); 122 unlock_buffer(bh);
124 return bh; 123 return bh;
125 } 124 }
126 spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group)); 125 ext4_lock_group(sb, block_group);
127 if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { 126 if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
128 ext4_init_inode_bitmap(sb, bh, block_group, desc); 127 ext4_init_inode_bitmap(sb, bh, block_group, desc);
129 set_bitmap_uptodate(bh); 128 set_bitmap_uptodate(bh);
130 set_buffer_uptodate(bh); 129 set_buffer_uptodate(bh);
131 spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group)); 130 ext4_unlock_group(sb, block_group);
132 unlock_buffer(bh); 131 unlock_buffer(bh);
133 return bh; 132 return bh;
134 } 133 }
135 spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group)); 134 ext4_unlock_group(sb, block_group);
136 if (buffer_uptodate(bh)) { 135 if (buffer_uptodate(bh)) {
137 /* 136 /*
138 * if not uninit if bh is uptodate, 137 * if not uninit if bh is uptodate,
@@ -247,9 +246,8 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
247 goto error_return; 246 goto error_return;
248 247
249 /* Ok, now we can actually update the inode bitmaps.. */ 248 /* Ok, now we can actually update the inode bitmaps.. */
250 spin_lock(sb_bgl_lock(sbi, block_group)); 249 cleared = ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group),
251 cleared = ext4_clear_bit(bit, bitmap_bh->b_data); 250 bit, bitmap_bh->b_data);
252 spin_unlock(sb_bgl_lock(sbi, block_group));
253 if (!cleared) 251 if (!cleared)
254 ext4_error(sb, "ext4_free_inode", 252 ext4_error(sb, "ext4_free_inode",
255 "bit already cleared for inode %lu", ino); 253 "bit already cleared for inode %lu", ino);
@@ -261,7 +259,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
261 if (fatal) goto error_return; 259 if (fatal) goto error_return;
262 260
263 if (gdp) { 261 if (gdp) {
264 spin_lock(sb_bgl_lock(sbi, block_group)); 262 ext4_lock_group(sb, block_group);
265 count = ext4_free_inodes_count(sb, gdp) + 1; 263 count = ext4_free_inodes_count(sb, gdp) + 1;
266 ext4_free_inodes_set(sb, gdp, count); 264 ext4_free_inodes_set(sb, gdp, count);
267 if (is_directory) { 265 if (is_directory) {
@@ -277,7 +275,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
277 } 275 }
278 gdp->bg_checksum = ext4_group_desc_csum(sbi, 276 gdp->bg_checksum = ext4_group_desc_csum(sbi,
279 block_group, gdp); 277 block_group, gdp);
280 spin_unlock(sb_bgl_lock(sbi, block_group)); 278 ext4_unlock_group(sb, block_group);
281 percpu_counter_inc(&sbi->s_freeinodes_counter); 279 percpu_counter_inc(&sbi->s_freeinodes_counter);
282 if (is_directory) 280 if (is_directory)
283 percpu_counter_dec(&sbi->s_dirs_counter); 281 percpu_counter_dec(&sbi->s_dirs_counter);
@@ -316,7 +314,7 @@ error_return:
316static int find_group_dir(struct super_block *sb, struct inode *parent, 314static int find_group_dir(struct super_block *sb, struct inode *parent,
317 ext4_group_t *best_group) 315 ext4_group_t *best_group)
318{ 316{
319 ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; 317 ext4_group_t ngroups = ext4_get_groups_count(sb);
320 unsigned int freei, avefreei; 318 unsigned int freei, avefreei;
321 struct ext4_group_desc *desc, *best_desc = NULL; 319 struct ext4_group_desc *desc, *best_desc = NULL;
322 ext4_group_t group; 320 ext4_group_t group;
@@ -349,11 +347,10 @@ static int find_group_flex(struct super_block *sb, struct inode *parent,
349{ 347{
350 struct ext4_sb_info *sbi = EXT4_SB(sb); 348 struct ext4_sb_info *sbi = EXT4_SB(sb);
351 struct ext4_group_desc *desc; 349 struct ext4_group_desc *desc;
352 struct buffer_head *bh;
353 struct flex_groups *flex_group = sbi->s_flex_groups; 350 struct flex_groups *flex_group = sbi->s_flex_groups;
354 ext4_group_t parent_group = EXT4_I(parent)->i_block_group; 351 ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
355 ext4_group_t parent_fbg_group = ext4_flex_group(sbi, parent_group); 352 ext4_group_t parent_fbg_group = ext4_flex_group(sbi, parent_group);
356 ext4_group_t ngroups = sbi->s_groups_count; 353 ext4_group_t ngroups = ext4_get_groups_count(sb);
357 int flex_size = ext4_flex_bg_size(sbi); 354 int flex_size = ext4_flex_bg_size(sbi);
358 ext4_group_t best_flex = parent_fbg_group; 355 ext4_group_t best_flex = parent_fbg_group;
359 int blocks_per_flex = sbi->s_blocks_per_group * flex_size; 356 int blocks_per_flex = sbi->s_blocks_per_group * flex_size;
@@ -362,7 +359,7 @@ static int find_group_flex(struct super_block *sb, struct inode *parent,
362 ext4_group_t n_fbg_groups; 359 ext4_group_t n_fbg_groups;
363 ext4_group_t i; 360 ext4_group_t i;
364 361
365 n_fbg_groups = (sbi->s_groups_count + flex_size - 1) >> 362 n_fbg_groups = (ngroups + flex_size - 1) >>
366 sbi->s_log_groups_per_flex; 363 sbi->s_log_groups_per_flex;
367 364
368find_close_to_parent: 365find_close_to_parent:
@@ -404,7 +401,7 @@ find_close_to_parent:
404found_flexbg: 401found_flexbg:
405 for (i = best_flex * flex_size; i < ngroups && 402 for (i = best_flex * flex_size; i < ngroups &&
406 i < (best_flex + 1) * flex_size; i++) { 403 i < (best_flex + 1) * flex_size; i++) {
407 desc = ext4_get_group_desc(sb, i, &bh); 404 desc = ext4_get_group_desc(sb, i, NULL);
408 if (ext4_free_inodes_count(sb, desc)) { 405 if (ext4_free_inodes_count(sb, desc)) {
409 *best_group = i; 406 *best_group = i;
410 goto out; 407 goto out;
@@ -478,20 +475,21 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
478{ 475{
479 ext4_group_t parent_group = EXT4_I(parent)->i_block_group; 476 ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
480 struct ext4_sb_info *sbi = EXT4_SB(sb); 477 struct ext4_sb_info *sbi = EXT4_SB(sb);
481 ext4_group_t ngroups = sbi->s_groups_count; 478 ext4_group_t real_ngroups = ext4_get_groups_count(sb);
482 int inodes_per_group = EXT4_INODES_PER_GROUP(sb); 479 int inodes_per_group = EXT4_INODES_PER_GROUP(sb);
483 unsigned int freei, avefreei; 480 unsigned int freei, avefreei;
484 ext4_fsblk_t freeb, avefreeb; 481 ext4_fsblk_t freeb, avefreeb;
485 unsigned int ndirs; 482 unsigned int ndirs;
486 int max_dirs, min_inodes; 483 int max_dirs, min_inodes;
487 ext4_grpblk_t min_blocks; 484 ext4_grpblk_t min_blocks;
488 ext4_group_t i, grp, g; 485 ext4_group_t i, grp, g, ngroups;
489 struct ext4_group_desc *desc; 486 struct ext4_group_desc *desc;
490 struct orlov_stats stats; 487 struct orlov_stats stats;
491 int flex_size = ext4_flex_bg_size(sbi); 488 int flex_size = ext4_flex_bg_size(sbi);
492 489
490 ngroups = real_ngroups;
493 if (flex_size > 1) { 491 if (flex_size > 1) {
494 ngroups = (ngroups + flex_size - 1) >> 492 ngroups = (real_ngroups + flex_size - 1) >>
495 sbi->s_log_groups_per_flex; 493 sbi->s_log_groups_per_flex;
496 parent_group >>= sbi->s_log_groups_per_flex; 494 parent_group >>= sbi->s_log_groups_per_flex;
497 } 495 }
@@ -543,7 +541,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
543 */ 541 */
544 grp *= flex_size; 542 grp *= flex_size;
545 for (i = 0; i < flex_size; i++) { 543 for (i = 0; i < flex_size; i++) {
546 if (grp+i >= sbi->s_groups_count) 544 if (grp+i >= real_ngroups)
547 break; 545 break;
548 desc = ext4_get_group_desc(sb, grp+i, NULL); 546 desc = ext4_get_group_desc(sb, grp+i, NULL);
549 if (desc && ext4_free_inodes_count(sb, desc)) { 547 if (desc && ext4_free_inodes_count(sb, desc)) {
@@ -583,7 +581,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
583 } 581 }
584 582
585fallback: 583fallback:
586 ngroups = sbi->s_groups_count; 584 ngroups = real_ngroups;
587 avefreei = freei / ngroups; 585 avefreei = freei / ngroups;
588fallback_retry: 586fallback_retry:
589 parent_group = EXT4_I(parent)->i_block_group; 587 parent_group = EXT4_I(parent)->i_block_group;
@@ -613,9 +611,8 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
613 ext4_group_t *group, int mode) 611 ext4_group_t *group, int mode)
614{ 612{
615 ext4_group_t parent_group = EXT4_I(parent)->i_block_group; 613 ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
616 ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; 614 ext4_group_t i, last, ngroups = ext4_get_groups_count(sb);
617 struct ext4_group_desc *desc; 615 struct ext4_group_desc *desc;
618 ext4_group_t i, last;
619 int flex_size = ext4_flex_bg_size(EXT4_SB(sb)); 616 int flex_size = ext4_flex_bg_size(EXT4_SB(sb));
620 617
621 /* 618 /*
@@ -708,10 +705,10 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
708 705
709/* 706/*
710 * claim the inode from the inode bitmap. If the group 707 * claim the inode from the inode bitmap. If the group
711 * is uninit we need to take the groups's sb_bgl_lock 708 * is uninit we need to take the groups's ext4_group_lock
712 * and clear the uninit flag. The inode bitmap update 709 * and clear the uninit flag. The inode bitmap update
713 * and group desc uninit flag clear should be done 710 * and group desc uninit flag clear should be done
714 * after holding sb_bgl_lock so that ext4_read_inode_bitmap 711 * after holding ext4_group_lock so that ext4_read_inode_bitmap
715 * doesn't race with the ext4_claim_inode 712 * doesn't race with the ext4_claim_inode
716 */ 713 */
717static int ext4_claim_inode(struct super_block *sb, 714static int ext4_claim_inode(struct super_block *sb,
@@ -722,7 +719,7 @@ static int ext4_claim_inode(struct super_block *sb,
722 struct ext4_sb_info *sbi = EXT4_SB(sb); 719 struct ext4_sb_info *sbi = EXT4_SB(sb);
723 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL); 720 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
724 721
725 spin_lock(sb_bgl_lock(sbi, group)); 722 ext4_lock_group(sb, group);
726 if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) { 723 if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) {
727 /* not a free inode */ 724 /* not a free inode */
728 retval = 1; 725 retval = 1;
@@ -731,7 +728,7 @@ static int ext4_claim_inode(struct super_block *sb,
731 ino++; 728 ino++;
732 if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || 729 if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
733 ino > EXT4_INODES_PER_GROUP(sb)) { 730 ino > EXT4_INODES_PER_GROUP(sb)) {
734 spin_unlock(sb_bgl_lock(sbi, group)); 731 ext4_unlock_group(sb, group);
735 ext4_error(sb, __func__, 732 ext4_error(sb, __func__,
736 "reserved inode or inode > inodes count - " 733 "reserved inode or inode > inodes count - "
737 "block_group = %u, inode=%lu", group, 734 "block_group = %u, inode=%lu", group,
@@ -780,7 +777,7 @@ static int ext4_claim_inode(struct super_block *sb,
780 } 777 }
781 gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); 778 gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
782err_ret: 779err_ret:
783 spin_unlock(sb_bgl_lock(sbi, group)); 780 ext4_unlock_group(sb, group);
784 return retval; 781 return retval;
785} 782}
786 783
@@ -799,11 +796,10 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
799 struct super_block *sb; 796 struct super_block *sb;
800 struct buffer_head *inode_bitmap_bh = NULL; 797 struct buffer_head *inode_bitmap_bh = NULL;
801 struct buffer_head *group_desc_bh; 798 struct buffer_head *group_desc_bh;
802 ext4_group_t group = 0; 799 ext4_group_t ngroups, group = 0;
803 unsigned long ino = 0; 800 unsigned long ino = 0;
804 struct inode *inode; 801 struct inode *inode;
805 struct ext4_group_desc *gdp = NULL; 802 struct ext4_group_desc *gdp = NULL;
806 struct ext4_super_block *es;
807 struct ext4_inode_info *ei; 803 struct ext4_inode_info *ei;
808 struct ext4_sb_info *sbi; 804 struct ext4_sb_info *sbi;
809 int ret2, err = 0; 805 int ret2, err = 0;
@@ -818,15 +814,14 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
818 return ERR_PTR(-EPERM); 814 return ERR_PTR(-EPERM);
819 815
820 sb = dir->i_sb; 816 sb = dir->i_sb;
817 ngroups = ext4_get_groups_count(sb);
821 trace_mark(ext4_request_inode, "dev %s dir %lu mode %d", sb->s_id, 818 trace_mark(ext4_request_inode, "dev %s dir %lu mode %d", sb->s_id,
822 dir->i_ino, mode); 819 dir->i_ino, mode);
823 inode = new_inode(sb); 820 inode = new_inode(sb);
824 if (!inode) 821 if (!inode)
825 return ERR_PTR(-ENOMEM); 822 return ERR_PTR(-ENOMEM);
826 ei = EXT4_I(inode); 823 ei = EXT4_I(inode);
827
828 sbi = EXT4_SB(sb); 824 sbi = EXT4_SB(sb);
829 es = sbi->s_es;
830 825
831 if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) { 826 if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) {
832 ret2 = find_group_flex(sb, dir, &group); 827 ret2 = find_group_flex(sb, dir, &group);
@@ -856,7 +851,7 @@ got_group:
856 if (ret2 == -1) 851 if (ret2 == -1)
857 goto out; 852 goto out;
858 853
859 for (i = 0; i < sbi->s_groups_count; i++) { 854 for (i = 0; i < ngroups; i++) {
860 err = -EIO; 855 err = -EIO;
861 856
862 gdp = ext4_get_group_desc(sb, group, &group_desc_bh); 857 gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
@@ -917,7 +912,7 @@ repeat_in_this_group:
917 * group descriptor metadata has not yet been updated. 912 * group descriptor metadata has not yet been updated.
918 * So we just go onto the next blockgroup. 913 * So we just go onto the next blockgroup.
919 */ 914 */
920 if (++group == sbi->s_groups_count) 915 if (++group == ngroups)
921 group = 0; 916 group = 0;
922 } 917 }
923 err = -ENOSPC; 918 err = -ENOSPC;
@@ -938,7 +933,7 @@ got:
938 } 933 }
939 934
940 free = 0; 935 free = 0;
941 spin_lock(sb_bgl_lock(sbi, group)); 936 ext4_lock_group(sb, group);
942 /* recheck and clear flag under lock if we still need to */ 937 /* recheck and clear flag under lock if we still need to */
943 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { 938 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
944 free = ext4_free_blocks_after_init(sb, group, gdp); 939 free = ext4_free_blocks_after_init(sb, group, gdp);
@@ -947,7 +942,7 @@ got:
947 gdp->bg_checksum = ext4_group_desc_csum(sbi, group, 942 gdp->bg_checksum = ext4_group_desc_csum(sbi, group,
948 gdp); 943 gdp);
949 } 944 }
950 spin_unlock(sb_bgl_lock(sbi, group)); 945 ext4_unlock_group(sb, group);
951 946
952 /* Don't need to dirty bitmap block if we didn't change it */ 947 /* Don't need to dirty bitmap block if we didn't change it */
953 if (free) { 948 if (free) {
@@ -1158,7 +1153,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb)
1158{ 1153{
1159 unsigned long desc_count; 1154 unsigned long desc_count;
1160 struct ext4_group_desc *gdp; 1155 struct ext4_group_desc *gdp;
1161 ext4_group_t i; 1156 ext4_group_t i, ngroups = ext4_get_groups_count(sb);
1162#ifdef EXT4FS_DEBUG 1157#ifdef EXT4FS_DEBUG
1163 struct ext4_super_block *es; 1158 struct ext4_super_block *es;
1164 unsigned long bitmap_count, x; 1159 unsigned long bitmap_count, x;
@@ -1168,7 +1163,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb)
1168 desc_count = 0; 1163 desc_count = 0;
1169 bitmap_count = 0; 1164 bitmap_count = 0;
1170 gdp = NULL; 1165 gdp = NULL;
1171 for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { 1166 for (i = 0; i < ngroups; i++) {
1172 gdp = ext4_get_group_desc(sb, i, NULL); 1167 gdp = ext4_get_group_desc(sb, i, NULL);
1173 if (!gdp) 1168 if (!gdp)
1174 continue; 1169 continue;
@@ -1190,7 +1185,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb)
1190 return desc_count; 1185 return desc_count;
1191#else 1186#else
1192 desc_count = 0; 1187 desc_count = 0;
1193 for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { 1188 for (i = 0; i < ngroups; i++) {
1194 gdp = ext4_get_group_desc(sb, i, NULL); 1189 gdp = ext4_get_group_desc(sb, i, NULL);
1195 if (!gdp) 1190 if (!gdp)
1196 continue; 1191 continue;
@@ -1205,9 +1200,9 @@ unsigned long ext4_count_free_inodes(struct super_block *sb)
1205unsigned long ext4_count_dirs(struct super_block * sb) 1200unsigned long ext4_count_dirs(struct super_block * sb)
1206{ 1201{
1207 unsigned long count = 0; 1202 unsigned long count = 0;
1208 ext4_group_t i; 1203 ext4_group_t i, ngroups = ext4_get_groups_count(sb);
1209 1204
1210 for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { 1205 for (i = 0; i < ngroups; i++) {
1211 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL); 1206 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
1212 if (!gdp) 1207 if (!gdp)
1213 continue; 1208 continue;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 2a9ffd528dd1..875db944b22f 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -372,20 +372,21 @@ static int ext4_block_to_path(struct inode *inode,
372} 372}
373 373
374static int __ext4_check_blockref(const char *function, struct inode *inode, 374static int __ext4_check_blockref(const char *function, struct inode *inode,
375 __le32 *p, unsigned int max) { 375 __le32 *p, unsigned int max)
376 376{
377 unsigned int maxblocks = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es);
378 __le32 *bref = p; 377 __le32 *bref = p;
378 unsigned int blk;
379
379 while (bref < p+max) { 380 while (bref < p+max) {
380 if (unlikely(le32_to_cpu(*bref) >= maxblocks)) { 381 blk = le32_to_cpu(*bref++);
382 if (blk &&
383 unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb),
384 blk, 1))) {
381 ext4_error(inode->i_sb, function, 385 ext4_error(inode->i_sb, function,
382 "block reference %u >= max (%u) " 386 "invalid block reference %u "
383 "in inode #%lu, offset=%d", 387 "in inode #%lu", blk, inode->i_ino);
384 le32_to_cpu(*bref), maxblocks,
385 inode->i_ino, (int)(bref-p));
386 return -EIO; 388 return -EIO;
387 } 389 }
388 bref++;
389 } 390 }
390 return 0; 391 return 0;
391} 392}
@@ -892,6 +893,10 @@ err_out:
892} 893}
893 894
894/* 895/*
896 * The ext4_ind_get_blocks() function handles non-extents inodes
897 * (i.e., using the traditional indirect/double-indirect i_blocks
898 * scheme) for ext4_get_blocks().
899 *
895 * Allocation strategy is simple: if we have to allocate something, we will 900 * Allocation strategy is simple: if we have to allocate something, we will
896 * have to go the whole way to leaf. So let's do it before attaching anything 901 * have to go the whole way to leaf. So let's do it before attaching anything
897 * to tree, set linkage between the newborn blocks, write them if sync is 902 * to tree, set linkage between the newborn blocks, write them if sync is
@@ -909,15 +914,16 @@ err_out:
909 * return = 0, if plain lookup failed. 914 * return = 0, if plain lookup failed.
910 * return < 0, error case. 915 * return < 0, error case.
911 * 916 *
912 * 917 * The ext4_ind_get_blocks() function should be called with
913 * Need to be called with 918 * down_write(&EXT4_I(inode)->i_data_sem) if allocating filesystem
914 * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system block 919 * blocks (i.e., flags has EXT4_GET_BLOCKS_CREATE set) or
915 * (ie, create is zero). Otherwise down_write(&EXT4_I(inode)->i_data_sem) 920 * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system
921 * blocks.
916 */ 922 */
917static int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, 923static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
918 ext4_lblk_t iblock, unsigned int maxblocks, 924 ext4_lblk_t iblock, unsigned int maxblocks,
919 struct buffer_head *bh_result, 925 struct buffer_head *bh_result,
920 int create, int extend_disksize) 926 int flags)
921{ 927{
922 int err = -EIO; 928 int err = -EIO;
923 ext4_lblk_t offsets[4]; 929 ext4_lblk_t offsets[4];
@@ -927,14 +933,11 @@ static int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
927 int indirect_blks; 933 int indirect_blks;
928 int blocks_to_boundary = 0; 934 int blocks_to_boundary = 0;
929 int depth; 935 int depth;
930 struct ext4_inode_info *ei = EXT4_I(inode);
931 int count = 0; 936 int count = 0;
932 ext4_fsblk_t first_block = 0; 937 ext4_fsblk_t first_block = 0;
933 loff_t disksize;
934
935 938
936 J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)); 939 J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL));
937 J_ASSERT(handle != NULL || create == 0); 940 J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0);
938 depth = ext4_block_to_path(inode, iblock, offsets, 941 depth = ext4_block_to_path(inode, iblock, offsets,
939 &blocks_to_boundary); 942 &blocks_to_boundary);
940 943
@@ -963,7 +966,7 @@ static int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
963 } 966 }
964 967
965 /* Next simple case - plain lookup or failed read of indirect block */ 968 /* Next simple case - plain lookup or failed read of indirect block */
966 if (!create || err == -EIO) 969 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0 || err == -EIO)
967 goto cleanup; 970 goto cleanup;
968 971
969 /* 972 /*
@@ -997,19 +1000,7 @@ static int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
997 if (!err) 1000 if (!err)
998 err = ext4_splice_branch(handle, inode, iblock, 1001 err = ext4_splice_branch(handle, inode, iblock,
999 partial, indirect_blks, count); 1002 partial, indirect_blks, count);
1000 /* 1003 else
1001 * i_disksize growing is protected by i_data_sem. Don't forget to
1002 * protect it if you're about to implement concurrent
1003 * ext4_get_block() -bzzz
1004 */
1005 if (!err && extend_disksize) {
1006 disksize = ((loff_t) iblock + count) << inode->i_blkbits;
1007 if (disksize > i_size_read(inode))
1008 disksize = i_size_read(inode);
1009 if (disksize > ei->i_disksize)
1010 ei->i_disksize = disksize;
1011 }
1012 if (err)
1013 goto cleanup; 1004 goto cleanup;
1014 1005
1015 set_buffer_new(bh_result); 1006 set_buffer_new(bh_result);
@@ -1120,8 +1111,23 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
1120 ext4_discard_preallocations(inode); 1111 ext4_discard_preallocations(inode);
1121} 1112}
1122 1113
1114static int check_block_validity(struct inode *inode, sector_t logical,
1115 sector_t phys, int len)
1116{
1117 if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) {
1118 ext4_error(inode->i_sb, "check_block_validity",
1119 "inode #%lu logical block %llu mapped to %llu "
1120 "(size %d)", inode->i_ino,
1121 (unsigned long long) logical,
1122 (unsigned long long) phys, len);
1123 WARN_ON(1);
1124 return -EIO;
1125 }
1126 return 0;
1127}
1128
1123/* 1129/*
1124 * The ext4_get_blocks_wrap() function try to look up the requested blocks, 1130 * The ext4_get_blocks() function tries to look up the requested blocks,
1125 * and returns if the blocks are already mapped. 1131 * and returns if the blocks are already mapped.
1126 * 1132 *
1127 * Otherwise it takes the write lock of the i_data_sem and allocate blocks 1133 * Otherwise it takes the write lock of the i_data_sem and allocate blocks
@@ -1129,7 +1135,7 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
1129 * mapped. 1135 * mapped.
1130 * 1136 *
1131 * If file type is extents based, it will call ext4_ext_get_blocks(), 1137 * If file type is extents based, it will call ext4_ext_get_blocks(),
1132 * Otherwise, call with ext4_get_blocks_handle() to handle indirect mapping 1138 * Otherwise, call with ext4_ind_get_blocks() to handle indirect mapping
1133 * based files 1139 * based files
1134 * 1140 *
1135 * On success, it returns the number of blocks being mapped or allocate. 1141 * On success, it returns the number of blocks being mapped or allocate.
@@ -1142,9 +1148,9 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
1142 * 1148 *
1143 * It returns the error in case of allocation failure. 1149 * It returns the error in case of allocation failure.
1144 */ 1150 */
1145int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, 1151int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
1146 unsigned int max_blocks, struct buffer_head *bh, 1152 unsigned int max_blocks, struct buffer_head *bh,
1147 int create, int extend_disksize, int flag) 1153 int flags)
1148{ 1154{
1149 int retval; 1155 int retval;
1150 1156
@@ -1152,21 +1158,28 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
1152 clear_buffer_unwritten(bh); 1158 clear_buffer_unwritten(bh);
1153 1159
1154 /* 1160 /*
1155 * Try to see if we can get the block without requesting 1161 * Try to see if we can get the block without requesting a new
1156 * for new file system block. 1162 * file system block.
1157 */ 1163 */
1158 down_read((&EXT4_I(inode)->i_data_sem)); 1164 down_read((&EXT4_I(inode)->i_data_sem));
1159 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { 1165 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
1160 retval = ext4_ext_get_blocks(handle, inode, block, max_blocks, 1166 retval = ext4_ext_get_blocks(handle, inode, block, max_blocks,
1161 bh, 0, 0); 1167 bh, 0);
1162 } else { 1168 } else {
1163 retval = ext4_get_blocks_handle(handle, 1169 retval = ext4_ind_get_blocks(handle, inode, block, max_blocks,
1164 inode, block, max_blocks, bh, 0, 0); 1170 bh, 0);
1165 } 1171 }
1166 up_read((&EXT4_I(inode)->i_data_sem)); 1172 up_read((&EXT4_I(inode)->i_data_sem));
1167 1173
1174 if (retval > 0 && buffer_mapped(bh)) {
1175 int ret = check_block_validity(inode, block,
1176 bh->b_blocknr, retval);
1177 if (ret != 0)
1178 return ret;
1179 }
1180
1168 /* If it is only a block(s) look up */ 1181 /* If it is only a block(s) look up */
1169 if (!create) 1182 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0)
1170 return retval; 1183 return retval;
1171 1184
1172 /* 1185 /*
@@ -1205,7 +1218,7 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
1205 * let the underlying get_block() function know to 1218 * let the underlying get_block() function know to
1206 * avoid double accounting 1219 * avoid double accounting
1207 */ 1220 */
1208 if (flag) 1221 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
1209 EXT4_I(inode)->i_delalloc_reserved_flag = 1; 1222 EXT4_I(inode)->i_delalloc_reserved_flag = 1;
1210 /* 1223 /*
1211 * We need to check for EXT4 here because migrate 1224 * We need to check for EXT4 here because migrate
@@ -1213,10 +1226,10 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
1213 */ 1226 */
1214 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { 1227 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
1215 retval = ext4_ext_get_blocks(handle, inode, block, max_blocks, 1228 retval = ext4_ext_get_blocks(handle, inode, block, max_blocks,
1216 bh, create, extend_disksize); 1229 bh, flags);
1217 } else { 1230 } else {
1218 retval = ext4_get_blocks_handle(handle, inode, block, 1231 retval = ext4_ind_get_blocks(handle, inode, block,
1219 max_blocks, bh, create, extend_disksize); 1232 max_blocks, bh, flags);
1220 1233
1221 if (retval > 0 && buffer_new(bh)) { 1234 if (retval > 0 && buffer_new(bh)) {
1222 /* 1235 /*
@@ -1229,18 +1242,23 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
1229 } 1242 }
1230 } 1243 }
1231 1244
1232 if (flag) { 1245 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
1233 EXT4_I(inode)->i_delalloc_reserved_flag = 0; 1246 EXT4_I(inode)->i_delalloc_reserved_flag = 0;
1234 /* 1247
1235 * Update reserved blocks/metadata blocks 1248 /*
1236 * after successful block allocation 1249 * Update reserved blocks/metadata blocks after successful
1237 * which were deferred till now 1250 * block allocation which had been deferred till now.
1238 */ 1251 */
1239 if ((retval > 0) && buffer_delay(bh)) 1252 if ((retval > 0) && (flags & EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE))
1240 ext4_da_update_reserve_space(inode, retval); 1253 ext4_da_update_reserve_space(inode, retval);
1241 }
1242 1254
1243 up_write((&EXT4_I(inode)->i_data_sem)); 1255 up_write((&EXT4_I(inode)->i_data_sem));
1256 if (retval > 0 && buffer_mapped(bh)) {
1257 int ret = check_block_validity(inode, block,
1258 bh->b_blocknr, retval);
1259 if (ret != 0)
1260 return ret;
1261 }
1244 return retval; 1262 return retval;
1245} 1263}
1246 1264
@@ -1268,8 +1286,8 @@ int ext4_get_block(struct inode *inode, sector_t iblock,
1268 started = 1; 1286 started = 1;
1269 } 1287 }
1270 1288
1271 ret = ext4_get_blocks_wrap(handle, inode, iblock, 1289 ret = ext4_get_blocks(handle, inode, iblock, max_blocks, bh_result,
1272 max_blocks, bh_result, create, 0, 0); 1290 create ? EXT4_GET_BLOCKS_CREATE : 0);
1273 if (ret > 0) { 1291 if (ret > 0) {
1274 bh_result->b_size = (ret << inode->i_blkbits); 1292 bh_result->b_size = (ret << inode->i_blkbits);
1275 ret = 0; 1293 ret = 0;
@@ -1288,17 +1306,19 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
1288{ 1306{
1289 struct buffer_head dummy; 1307 struct buffer_head dummy;
1290 int fatal = 0, err; 1308 int fatal = 0, err;
1309 int flags = 0;
1291 1310
1292 J_ASSERT(handle != NULL || create == 0); 1311 J_ASSERT(handle != NULL || create == 0);
1293 1312
1294 dummy.b_state = 0; 1313 dummy.b_state = 0;
1295 dummy.b_blocknr = -1000; 1314 dummy.b_blocknr = -1000;
1296 buffer_trace_init(&dummy.b_history); 1315 buffer_trace_init(&dummy.b_history);
1297 err = ext4_get_blocks_wrap(handle, inode, block, 1, 1316 if (create)
1298 &dummy, create, 1, 0); 1317 flags |= EXT4_GET_BLOCKS_CREATE;
1318 err = ext4_get_blocks(handle, inode, block, 1, &dummy, flags);
1299 /* 1319 /*
1300 * ext4_get_blocks_handle() returns number of blocks 1320 * ext4_get_blocks() returns number of blocks mapped. 0 in
1301 * mapped. 0 in case of a HOLE. 1321 * case of a HOLE.
1302 */ 1322 */
1303 if (err > 0) { 1323 if (err > 0) {
1304 if (err > 1) 1324 if (err > 1)
@@ -1439,7 +1459,7 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
1439 struct page **pagep, void **fsdata) 1459 struct page **pagep, void **fsdata)
1440{ 1460{
1441 struct inode *inode = mapping->host; 1461 struct inode *inode = mapping->host;
1442 int ret, needed_blocks = ext4_writepage_trans_blocks(inode); 1462 int ret, needed_blocks;
1443 handle_t *handle; 1463 handle_t *handle;
1444 int retries = 0; 1464 int retries = 0;
1445 struct page *page; 1465 struct page *page;
@@ -1450,6 +1470,11 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
1450 "dev %s ino %lu pos %llu len %u flags %u", 1470 "dev %s ino %lu pos %llu len %u flags %u",
1451 inode->i_sb->s_id, inode->i_ino, 1471 inode->i_sb->s_id, inode->i_ino,
1452 (unsigned long long) pos, len, flags); 1472 (unsigned long long) pos, len, flags);
1473 /*
1474 * Reserve one block more for addition to orphan list in case
1475 * we allocate blocks but write fails for some reason
1476 */
1477 needed_blocks = ext4_writepage_trans_blocks(inode) + 1;
1453 index = pos >> PAGE_CACHE_SHIFT; 1478 index = pos >> PAGE_CACHE_SHIFT;
1454 from = pos & (PAGE_CACHE_SIZE - 1); 1479 from = pos & (PAGE_CACHE_SIZE - 1);
1455 to = from + len; 1480 to = from + len;
@@ -1483,15 +1508,30 @@ retry:
1483 1508
1484 if (ret) { 1509 if (ret) {
1485 unlock_page(page); 1510 unlock_page(page);
1486 ext4_journal_stop(handle);
1487 page_cache_release(page); 1511 page_cache_release(page);
1488 /* 1512 /*
1489 * block_write_begin may have instantiated a few blocks 1513 * block_write_begin may have instantiated a few blocks
1490 * outside i_size. Trim these off again. Don't need 1514 * outside i_size. Trim these off again. Don't need
1491 * i_size_read because we hold i_mutex. 1515 * i_size_read because we hold i_mutex.
1516 *
1517 * Add inode to orphan list in case we crash before
1518 * truncate finishes
1492 */ 1519 */
1493 if (pos + len > inode->i_size) 1520 if (pos + len > inode->i_size)
1521 ext4_orphan_add(handle, inode);
1522
1523 ext4_journal_stop(handle);
1524 if (pos + len > inode->i_size) {
1494 vmtruncate(inode, inode->i_size); 1525 vmtruncate(inode, inode->i_size);
1526 /*
1527 * If vmtruncate failed early the inode might
1528 * still be on the orphan list; we need to
1529 * make sure the inode is removed from the
1530 * orphan list in that case.
1531 */
1532 if (inode->i_nlink)
1533 ext4_orphan_del(NULL, inode);
1534 }
1495 } 1535 }
1496 1536
1497 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 1537 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
@@ -1509,6 +1549,52 @@ static int write_end_fn(handle_t *handle, struct buffer_head *bh)
1509 return ext4_handle_dirty_metadata(handle, NULL, bh); 1549 return ext4_handle_dirty_metadata(handle, NULL, bh);
1510} 1550}
1511 1551
1552static int ext4_generic_write_end(struct file *file,
1553 struct address_space *mapping,
1554 loff_t pos, unsigned len, unsigned copied,
1555 struct page *page, void *fsdata)
1556{
1557 int i_size_changed = 0;
1558 struct inode *inode = mapping->host;
1559 handle_t *handle = ext4_journal_current_handle();
1560
1561 copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
1562
1563 /*
1564 * No need to use i_size_read() here, the i_size
1565 * cannot change under us because we hold i_mutex.
1566 *
1567 * But it's important to update i_size while still holding page lock:
1568 * page writeout could otherwise come in and zero beyond i_size.
1569 */
1570 if (pos + copied > inode->i_size) {
1571 i_size_write(inode, pos + copied);
1572 i_size_changed = 1;
1573 }
1574
1575 if (pos + copied > EXT4_I(inode)->i_disksize) {
1576 /* We need to mark inode dirty even if
1577 * new_i_size is less that inode->i_size
1578 * bu greater than i_disksize.(hint delalloc)
1579 */
1580 ext4_update_i_disksize(inode, (pos + copied));
1581 i_size_changed = 1;
1582 }
1583 unlock_page(page);
1584 page_cache_release(page);
1585
1586 /*
1587 * Don't mark the inode dirty under page lock. First, it unnecessarily
1588 * makes the holding time of page lock longer. Second, it forces lock
1589 * ordering of page lock and transaction start for journaling
1590 * filesystems.
1591 */
1592 if (i_size_changed)
1593 ext4_mark_inode_dirty(handle, inode);
1594
1595 return copied;
1596}
1597
1512/* 1598/*
1513 * We need to pick up the new inode size which generic_commit_write gave us 1599 * We need to pick up the new inode size which generic_commit_write gave us
1514 * `file' can be NULL - eg, when called from page_symlink(). 1600 * `file' can be NULL - eg, when called from page_symlink().
@@ -1532,21 +1618,15 @@ static int ext4_ordered_write_end(struct file *file,
1532 ret = ext4_jbd2_file_inode(handle, inode); 1618 ret = ext4_jbd2_file_inode(handle, inode);
1533 1619
1534 if (ret == 0) { 1620 if (ret == 0) {
1535 loff_t new_i_size; 1621 ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
1536
1537 new_i_size = pos + copied;
1538 if (new_i_size > EXT4_I(inode)->i_disksize) {
1539 ext4_update_i_disksize(inode, new_i_size);
1540 /* We need to mark inode dirty even if
1541 * new_i_size is less that inode->i_size
1542 * bu greater than i_disksize.(hint delalloc)
1543 */
1544 ext4_mark_inode_dirty(handle, inode);
1545 }
1546
1547 ret2 = generic_write_end(file, mapping, pos, len, copied,
1548 page, fsdata); 1622 page, fsdata);
1549 copied = ret2; 1623 copied = ret2;
1624 if (pos + len > inode->i_size)
1625 /* if we have allocated more blocks and copied
1626 * less. We will have blocks allocated outside
1627 * inode->i_size. So truncate them
1628 */
1629 ext4_orphan_add(handle, inode);
1550 if (ret2 < 0) 1630 if (ret2 < 0)
1551 ret = ret2; 1631 ret = ret2;
1552 } 1632 }
@@ -1554,6 +1634,18 @@ static int ext4_ordered_write_end(struct file *file,
1554 if (!ret) 1634 if (!ret)
1555 ret = ret2; 1635 ret = ret2;
1556 1636
1637 if (pos + len > inode->i_size) {
1638 vmtruncate(inode, inode->i_size);
1639 /*
1640 * If vmtruncate failed early the inode might still be
1641 * on the orphan list; we need to make sure the inode
1642 * is removed from the orphan list in that case.
1643 */
1644 if (inode->i_nlink)
1645 ext4_orphan_del(NULL, inode);
1646 }
1647
1648
1557 return ret ? ret : copied; 1649 return ret ? ret : copied;
1558} 1650}
1559 1651
@@ -1565,25 +1657,21 @@ static int ext4_writeback_write_end(struct file *file,
1565 handle_t *handle = ext4_journal_current_handle(); 1657 handle_t *handle = ext4_journal_current_handle();
1566 struct inode *inode = mapping->host; 1658 struct inode *inode = mapping->host;
1567 int ret = 0, ret2; 1659 int ret = 0, ret2;
1568 loff_t new_i_size;
1569 1660
1570 trace_mark(ext4_writeback_write_end, 1661 trace_mark(ext4_writeback_write_end,
1571 "dev %s ino %lu pos %llu len %u copied %u", 1662 "dev %s ino %lu pos %llu len %u copied %u",
1572 inode->i_sb->s_id, inode->i_ino, 1663 inode->i_sb->s_id, inode->i_ino,
1573 (unsigned long long) pos, len, copied); 1664 (unsigned long long) pos, len, copied);
1574 new_i_size = pos + copied; 1665 ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
1575 if (new_i_size > EXT4_I(inode)->i_disksize) {
1576 ext4_update_i_disksize(inode, new_i_size);
1577 /* We need to mark inode dirty even if
1578 * new_i_size is less that inode->i_size
1579 * bu greater than i_disksize.(hint delalloc)
1580 */
1581 ext4_mark_inode_dirty(handle, inode);
1582 }
1583
1584 ret2 = generic_write_end(file, mapping, pos, len, copied,
1585 page, fsdata); 1666 page, fsdata);
1586 copied = ret2; 1667 copied = ret2;
1668 if (pos + len > inode->i_size)
1669 /* if we have allocated more blocks and copied
1670 * less. We will have blocks allocated outside
1671 * inode->i_size. So truncate them
1672 */
1673 ext4_orphan_add(handle, inode);
1674
1587 if (ret2 < 0) 1675 if (ret2 < 0)
1588 ret = ret2; 1676 ret = ret2;
1589 1677
@@ -1591,6 +1679,17 @@ static int ext4_writeback_write_end(struct file *file,
1591 if (!ret) 1679 if (!ret)
1592 ret = ret2; 1680 ret = ret2;
1593 1681
1682 if (pos + len > inode->i_size) {
1683 vmtruncate(inode, inode->i_size);
1684 /*
1685 * If vmtruncate failed early the inode might still be
1686 * on the orphan list; we need to make sure the inode
1687 * is removed from the orphan list in that case.
1688 */
1689 if (inode->i_nlink)
1690 ext4_orphan_del(NULL, inode);
1691 }
1692
1594 return ret ? ret : copied; 1693 return ret ? ret : copied;
1595} 1694}
1596 1695
@@ -1635,10 +1734,27 @@ static int ext4_journalled_write_end(struct file *file,
1635 } 1734 }
1636 1735
1637 unlock_page(page); 1736 unlock_page(page);
1737 page_cache_release(page);
1738 if (pos + len > inode->i_size)
1739 /* if we have allocated more blocks and copied
1740 * less. We will have blocks allocated outside
1741 * inode->i_size. So truncate them
1742 */
1743 ext4_orphan_add(handle, inode);
1744
1638 ret2 = ext4_journal_stop(handle); 1745 ret2 = ext4_journal_stop(handle);
1639 if (!ret) 1746 if (!ret)
1640 ret = ret2; 1747 ret = ret2;
1641 page_cache_release(page); 1748 if (pos + len > inode->i_size) {
1749 vmtruncate(inode, inode->i_size);
1750 /*
1751 * If vmtruncate failed early the inode might still be
1752 * on the orphan list; we need to make sure the inode
1753 * is removed from the orphan list in that case.
1754 */
1755 if (inode->i_nlink)
1756 ext4_orphan_del(NULL, inode);
1757 }
1642 1758
1643 return ret ? ret : copied; 1759 return ret ? ret : copied;
1644} 1760}
@@ -1852,7 +1968,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
1852 * @logical - first logical block to start assignment with 1968 * @logical - first logical block to start assignment with
1853 * 1969 *
1854 * the function goes through all passed space and put actual disk 1970 * the function goes through all passed space and put actual disk
1855 * block numbers into buffer heads, dropping BH_Delay 1971 * block numbers into buffer heads, dropping BH_Delay and BH_Unwritten
1856 */ 1972 */
1857static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, 1973static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical,
1858 struct buffer_head *exbh) 1974 struct buffer_head *exbh)
@@ -1902,16 +2018,24 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical,
1902 do { 2018 do {
1903 if (cur_logical >= logical + blocks) 2019 if (cur_logical >= logical + blocks)
1904 break; 2020 break;
1905 if (buffer_delay(bh)) { 2021
1906 bh->b_blocknr = pblock; 2022 if (buffer_delay(bh) ||
1907 clear_buffer_delay(bh); 2023 buffer_unwritten(bh)) {
1908 bh->b_bdev = inode->i_sb->s_bdev; 2024
1909 } else if (buffer_unwritten(bh)) { 2025 BUG_ON(bh->b_bdev != inode->i_sb->s_bdev);
1910 bh->b_blocknr = pblock; 2026
1911 clear_buffer_unwritten(bh); 2027 if (buffer_delay(bh)) {
1912 set_buffer_mapped(bh); 2028 clear_buffer_delay(bh);
1913 set_buffer_new(bh); 2029 bh->b_blocknr = pblock;
1914 bh->b_bdev = inode->i_sb->s_bdev; 2030 } else {
2031 /*
2032 * unwritten already should have
2033 * blocknr assigned. Verify that
2034 */
2035 clear_buffer_unwritten(bh);
2036 BUG_ON(bh->b_blocknr != pblock);
2037 }
2038
1915 } else if (buffer_mapped(bh)) 2039 } else if (buffer_mapped(bh))
1916 BUG_ON(bh->b_blocknr != pblock); 2040 BUG_ON(bh->b_blocknr != pblock);
1917 2041
@@ -1990,51 +2114,6 @@ static void ext4_print_free_blocks(struct inode *inode)
1990 return; 2114 return;
1991} 2115}
1992 2116
1993#define EXT4_DELALLOC_RSVED 1
1994static int ext4_da_get_block_write(struct inode *inode, sector_t iblock,
1995 struct buffer_head *bh_result, int create)
1996{
1997 int ret;
1998 unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
1999 loff_t disksize = EXT4_I(inode)->i_disksize;
2000 handle_t *handle = NULL;
2001
2002 handle = ext4_journal_current_handle();
2003 BUG_ON(!handle);
2004 ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks,
2005 bh_result, create, 0, EXT4_DELALLOC_RSVED);
2006 if (ret <= 0)
2007 return ret;
2008
2009 bh_result->b_size = (ret << inode->i_blkbits);
2010
2011 if (ext4_should_order_data(inode)) {
2012 int retval;
2013 retval = ext4_jbd2_file_inode(handle, inode);
2014 if (retval)
2015 /*
2016 * Failed to add inode for ordered mode. Don't
2017 * update file size
2018 */
2019 return retval;
2020 }
2021
2022 /*
2023 * Update on-disk size along with block allocation we don't
2024 * use 'extend_disksize' as size may change within already
2025 * allocated block -bzzz
2026 */
2027 disksize = ((loff_t) iblock + ret) << inode->i_blkbits;
2028 if (disksize > i_size_read(inode))
2029 disksize = i_size_read(inode);
2030 if (disksize > EXT4_I(inode)->i_disksize) {
2031 ext4_update_i_disksize(inode, disksize);
2032 ret = ext4_mark_inode_dirty(handle, inode);
2033 return ret;
2034 }
2035 return 0;
2036}
2037
2038/* 2117/*
2039 * mpage_da_map_blocks - go through given space 2118 * mpage_da_map_blocks - go through given space
2040 * 2119 *
@@ -2045,29 +2124,57 @@ static int ext4_da_get_block_write(struct inode *inode, sector_t iblock,
2045 */ 2124 */
2046static int mpage_da_map_blocks(struct mpage_da_data *mpd) 2125static int mpage_da_map_blocks(struct mpage_da_data *mpd)
2047{ 2126{
2048 int err = 0; 2127 int err, blks, get_blocks_flags;
2049 struct buffer_head new; 2128 struct buffer_head new;
2050 sector_t next; 2129 sector_t next = mpd->b_blocknr;
2130 unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits;
2131 loff_t disksize = EXT4_I(mpd->inode)->i_disksize;
2132 handle_t *handle = NULL;
2051 2133
2052 /* 2134 /*
2053 * We consider only non-mapped and non-allocated blocks 2135 * We consider only non-mapped and non-allocated blocks
2054 */ 2136 */
2055 if ((mpd->b_state & (1 << BH_Mapped)) && 2137 if ((mpd->b_state & (1 << BH_Mapped)) &&
2056 !(mpd->b_state & (1 << BH_Delay))) 2138 !(mpd->b_state & (1 << BH_Delay)) &&
2139 !(mpd->b_state & (1 << BH_Unwritten)))
2057 return 0; 2140 return 0;
2058 new.b_state = mpd->b_state; 2141
2059 new.b_blocknr = 0;
2060 new.b_size = mpd->b_size;
2061 next = mpd->b_blocknr;
2062 /* 2142 /*
2063 * If we didn't accumulate anything 2143 * If we didn't accumulate anything to write simply return
2064 * to write simply return
2065 */ 2144 */
2066 if (!new.b_size) 2145 if (!mpd->b_size)
2067 return 0; 2146 return 0;
2068 2147
2069 err = ext4_da_get_block_write(mpd->inode, next, &new, 1); 2148 handle = ext4_journal_current_handle();
2070 if (err) { 2149 BUG_ON(!handle);
2150
2151 /*
2152 * Call ext4_get_blocks() to allocate any delayed allocation
2153 * blocks, or to convert an uninitialized extent to be
2154 * initialized (in the case where we have written into
2155 * one or more preallocated blocks).
2156 *
2157 * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE to
2158 * indicate that we are on the delayed allocation path. This
2159 * affects functions in many different parts of the allocation
2160 * call path. This flag exists primarily because we don't
2161 * want to change *many* call functions, so ext4_get_blocks()
2162 * will set the magic i_delalloc_reserved_flag once the
2163 * inode's allocation semaphore is taken.
2164 *
2165 * If the blocks in questions were delalloc blocks, set
2166 * EXT4_GET_BLOCKS_DELALLOC_RESERVE so the delalloc accounting
2167 * variables are updated after the blocks have been allocated.
2168 */
2169 new.b_state = 0;
2170 get_blocks_flags = (EXT4_GET_BLOCKS_CREATE |
2171 EXT4_GET_BLOCKS_DELALLOC_RESERVE);
2172 if (mpd->b_state & (1 << BH_Delay))
2173 get_blocks_flags |= EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE;
2174 blks = ext4_get_blocks(handle, mpd->inode, next, max_blocks,
2175 &new, get_blocks_flags);
2176 if (blks < 0) {
2177 err = blks;
2071 /* 2178 /*
2072 * If get block returns with error we simply 2179 * If get block returns with error we simply
2073 * return. Later writepage will redirty the page and 2180 * return. Later writepage will redirty the page and
@@ -2100,12 +2207,14 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
2100 if (err == -ENOSPC) { 2207 if (err == -ENOSPC) {
2101 ext4_print_free_blocks(mpd->inode); 2208 ext4_print_free_blocks(mpd->inode);
2102 } 2209 }
2103 /* invlaidate all the pages */ 2210 /* invalidate all the pages */
2104 ext4_da_block_invalidatepages(mpd, next, 2211 ext4_da_block_invalidatepages(mpd, next,
2105 mpd->b_size >> mpd->inode->i_blkbits); 2212 mpd->b_size >> mpd->inode->i_blkbits);
2106 return err; 2213 return err;
2107 } 2214 }
2108 BUG_ON(new.b_size == 0); 2215 BUG_ON(blks == 0);
2216
2217 new.b_size = (blks << mpd->inode->i_blkbits);
2109 2218
2110 if (buffer_new(&new)) 2219 if (buffer_new(&new))
2111 __unmap_underlying_blocks(mpd->inode, &new); 2220 __unmap_underlying_blocks(mpd->inode, &new);
@@ -2118,6 +2227,23 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
2118 (mpd->b_state & (1 << BH_Unwritten))) 2227 (mpd->b_state & (1 << BH_Unwritten)))
2119 mpage_put_bnr_to_bhs(mpd, next, &new); 2228 mpage_put_bnr_to_bhs(mpd, next, &new);
2120 2229
2230 if (ext4_should_order_data(mpd->inode)) {
2231 err = ext4_jbd2_file_inode(handle, mpd->inode);
2232 if (err)
2233 return err;
2234 }
2235
2236 /*
2237 * Update on-disk size along with block allocation.
2238 */
2239 disksize = ((loff_t) next + blks) << mpd->inode->i_blkbits;
2240 if (disksize > i_size_read(mpd->inode))
2241 disksize = i_size_read(mpd->inode);
2242 if (disksize > EXT4_I(mpd->inode)->i_disksize) {
2243 ext4_update_i_disksize(mpd->inode, disksize);
2244 return ext4_mark_inode_dirty(handle, mpd->inode);
2245 }
2246
2121 return 0; 2247 return 0;
2122} 2248}
2123 2249
@@ -2192,6 +2318,17 @@ flush_it:
2192 return; 2318 return;
2193} 2319}
2194 2320
2321static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh)
2322{
2323 /*
2324 * unmapped buffer is possible for holes.
2325 * delay buffer is possible with delayed allocation.
2326 * We also need to consider unwritten buffer as unmapped.
2327 */
2328 return (!buffer_mapped(bh) || buffer_delay(bh) ||
2329 buffer_unwritten(bh)) && buffer_dirty(bh);
2330}
2331
2195/* 2332/*
2196 * __mpage_da_writepage - finds extent of pages and blocks 2333 * __mpage_da_writepage - finds extent of pages and blocks
2197 * 2334 *
@@ -2276,8 +2413,7 @@ static int __mpage_da_writepage(struct page *page,
2276 * Otherwise we won't make progress 2413 * Otherwise we won't make progress
2277 * with the page in ext4_da_writepage 2414 * with the page in ext4_da_writepage
2278 */ 2415 */
2279 if (buffer_dirty(bh) && 2416 if (ext4_bh_unmapped_or_delay(NULL, bh)) {
2280 (!buffer_mapped(bh) || buffer_delay(bh))) {
2281 mpage_add_bh_to_extent(mpd, logical, 2417 mpage_add_bh_to_extent(mpd, logical,
2282 bh->b_size, 2418 bh->b_size,
2283 bh->b_state); 2419 bh->b_state);
@@ -2303,8 +2439,16 @@ static int __mpage_da_writepage(struct page *page,
2303} 2439}
2304 2440
2305/* 2441/*
2306 * this is a special callback for ->write_begin() only 2442 * This is a special get_blocks_t callback which is used by
2307 * it's intention is to return mapped block or reserve space 2443 * ext4_da_write_begin(). It will either return mapped block or
2444 * reserve space for a single block.
2445 *
2446 * For delayed buffer_head we have BH_Mapped, BH_New, BH_Delay set.
2447 * We also have b_blocknr = -1 and b_bdev initialized properly
2448 *
2449 * For unwritten buffer_head we have BH_Mapped, BH_New, BH_Unwritten set.
2450 * We also have b_blocknr = physicalblock mapping unwritten extent and b_bdev
2451 * initialized properly.
2308 */ 2452 */
2309static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, 2453static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
2310 struct buffer_head *bh_result, int create) 2454 struct buffer_head *bh_result, int create)
@@ -2323,7 +2467,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
2323 * preallocated blocks are unmapped but should treated 2467 * preallocated blocks are unmapped but should treated
2324 * the same as allocated blocks. 2468 * the same as allocated blocks.
2325 */ 2469 */
2326 ret = ext4_get_blocks_wrap(NULL, inode, iblock, 1, bh_result, 0, 0, 0); 2470 ret = ext4_get_blocks(NULL, inode, iblock, 1, bh_result, 0);
2327 if ((ret == 0) && !buffer_delay(bh_result)) { 2471 if ((ret == 0) && !buffer_delay(bh_result)) {
2328 /* the block isn't (pre)allocated yet, let's reserve space */ 2472 /* the block isn't (pre)allocated yet, let's reserve space */
2329 /* 2473 /*
@@ -2340,40 +2484,53 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
2340 set_buffer_delay(bh_result); 2484 set_buffer_delay(bh_result);
2341 } else if (ret > 0) { 2485 } else if (ret > 0) {
2342 bh_result->b_size = (ret << inode->i_blkbits); 2486 bh_result->b_size = (ret << inode->i_blkbits);
2343 /* 2487 if (buffer_unwritten(bh_result)) {
2344 * With sub-block writes into unwritten extents 2488 /* A delayed write to unwritten bh should
2345 * we also need to mark the buffer as new so that 2489 * be marked new and mapped. Mapped ensures
2346 * the unwritten parts of the buffer gets correctly zeroed. 2490 * that we don't do get_block multiple times
2347 */ 2491 * when we write to the same offset and new
2348 if (buffer_unwritten(bh_result)) 2492 * ensures that we do proper zero out for
2493 * partial write.
2494 */
2349 set_buffer_new(bh_result); 2495 set_buffer_new(bh_result);
2496 set_buffer_mapped(bh_result);
2497 }
2350 ret = 0; 2498 ret = 0;
2351 } 2499 }
2352 2500
2353 return ret; 2501 return ret;
2354} 2502}
2355 2503
2356static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh) 2504/*
2357{ 2505 * This function is used as a standard get_block_t calback function
2358 /* 2506 * when there is no desire to allocate any blocks. It is used as a
2359 * unmapped buffer is possible for holes. 2507 * callback function for block_prepare_write(), nobh_writepage(), and
2360 * delay buffer is possible with delayed allocation 2508 * block_write_full_page(). These functions should only try to map a
2361 */ 2509 * single block at a time.
2362 return ((!buffer_mapped(bh) || buffer_delay(bh)) && buffer_dirty(bh)); 2510 *
2363} 2511 * Since this function doesn't do block allocations even if the caller
2364 2512 * requests it by passing in create=1, it is critically important that
2365static int ext4_normal_get_block_write(struct inode *inode, sector_t iblock, 2513 * any caller checks to make sure that any buffer heads are returned
2514 * by this function are either all already mapped or marked for
2515 * delayed allocation before calling nobh_writepage() or
2516 * block_write_full_page(). Otherwise, b_blocknr could be left
2517 * unitialized, and the page write functions will be taken by
2518 * surprise.
2519 */
2520static int noalloc_get_block_write(struct inode *inode, sector_t iblock,
2366 struct buffer_head *bh_result, int create) 2521 struct buffer_head *bh_result, int create)
2367{ 2522{
2368 int ret = 0; 2523 int ret = 0;
2369 unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; 2524 unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
2370 2525
2526 BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize);
2527
2371 /* 2528 /*
2372 * we don't want to do block allocation in writepage 2529 * we don't want to do block allocation in writepage
2373 * so call get_block_wrap with create = 0 2530 * so call get_block_wrap with create = 0
2374 */ 2531 */
2375 ret = ext4_get_blocks_wrap(NULL, inode, iblock, max_blocks, 2532 ret = ext4_get_blocks(NULL, inode, iblock, max_blocks, bh_result, 0);
2376 bh_result, 0, 0, 0); 2533 BUG_ON(create && ret == 0);
2377 if (ret > 0) { 2534 if (ret > 0) {
2378 bh_result->b_size = (ret << inode->i_blkbits); 2535 bh_result->b_size = (ret << inode->i_blkbits);
2379 ret = 0; 2536 ret = 0;
@@ -2382,10 +2539,11 @@ static int ext4_normal_get_block_write(struct inode *inode, sector_t iblock,
2382} 2539}
2383 2540
2384/* 2541/*
2385 * get called vi ext4_da_writepages after taking page lock (have journal handle) 2542 * This function can get called via...
2386 * get called via journal_submit_inode_data_buffers (no journal handle) 2543 * - ext4_da_writepages after taking page lock (have journal handle)
2387 * get called via shrink_page_list via pdflush (no journal handle) 2544 * - journal_submit_inode_data_buffers (no journal handle)
2388 * or grab_page_cache when doing write_begin (have journal handle) 2545 * - shrink_page_list via pdflush (no journal handle)
2546 * - grab_page_cache when doing write_begin (have journal handle)
2389 */ 2547 */
2390static int ext4_da_writepage(struct page *page, 2548static int ext4_da_writepage(struct page *page,
2391 struct writeback_control *wbc) 2549 struct writeback_control *wbc)
@@ -2436,7 +2594,7 @@ static int ext4_da_writepage(struct page *page,
2436 * do block allocation here. 2594 * do block allocation here.
2437 */ 2595 */
2438 ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE, 2596 ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE,
2439 ext4_normal_get_block_write); 2597 noalloc_get_block_write);
2440 if (!ret) { 2598 if (!ret) {
2441 page_bufs = page_buffers(page); 2599 page_bufs = page_buffers(page);
2442 /* check whether all are mapped and non delay */ 2600 /* check whether all are mapped and non delay */
@@ -2461,11 +2619,10 @@ static int ext4_da_writepage(struct page *page,
2461 } 2619 }
2462 2620
2463 if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) 2621 if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
2464 ret = nobh_writepage(page, ext4_normal_get_block_write, wbc); 2622 ret = nobh_writepage(page, noalloc_get_block_write, wbc);
2465 else 2623 else
2466 ret = block_write_full_page(page, 2624 ret = block_write_full_page(page, noalloc_get_block_write,
2467 ext4_normal_get_block_write, 2625 wbc);
2468 wbc);
2469 2626
2470 return ret; 2627 return ret;
2471} 2628}
@@ -2777,7 +2934,7 @@ retry:
2777 *pagep = page; 2934 *pagep = page;
2778 2935
2779 ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, 2936 ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
2780 ext4_da_get_block_prep); 2937 ext4_da_get_block_prep);
2781 if (ret < 0) { 2938 if (ret < 0) {
2782 unlock_page(page); 2939 unlock_page(page);
2783 ext4_journal_stop(handle); 2940 ext4_journal_stop(handle);
@@ -2815,7 +2972,7 @@ static int ext4_da_should_update_i_disksize(struct page *page,
2815 for (i = 0; i < idx; i++) 2972 for (i = 0; i < idx; i++)
2816 bh = bh->b_this_page; 2973 bh = bh->b_this_page;
2817 2974
2818 if (!buffer_mapped(bh) || (buffer_delay(bh))) 2975 if (!buffer_mapped(bh) || (buffer_delay(bh)) || buffer_unwritten(bh))
2819 return 0; 2976 return 0;
2820 return 1; 2977 return 1;
2821} 2978}
@@ -3085,12 +3242,10 @@ static int __ext4_normal_writepage(struct page *page,
3085 struct inode *inode = page->mapping->host; 3242 struct inode *inode = page->mapping->host;
3086 3243
3087 if (test_opt(inode->i_sb, NOBH)) 3244 if (test_opt(inode->i_sb, NOBH))
3088 return nobh_writepage(page, 3245 return nobh_writepage(page, noalloc_get_block_write, wbc);
3089 ext4_normal_get_block_write, wbc);
3090 else 3246 else
3091 return block_write_full_page(page, 3247 return block_write_full_page(page, noalloc_get_block_write,
3092 ext4_normal_get_block_write, 3248 wbc);
3093 wbc);
3094} 3249}
3095 3250
3096static int ext4_normal_writepage(struct page *page, 3251static int ext4_normal_writepage(struct page *page,
@@ -3142,7 +3297,7 @@ static int __ext4_journalled_writepage(struct page *page,
3142 int err; 3297 int err;
3143 3298
3144 ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE, 3299 ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE,
3145 ext4_normal_get_block_write); 3300 noalloc_get_block_write);
3146 if (ret != 0) 3301 if (ret != 0)
3147 goto out_unlock; 3302 goto out_unlock;
3148 3303
@@ -3227,9 +3382,8 @@ static int ext4_journalled_writepage(struct page *page,
3227 * really know unless we go poke around in the buffer_heads. 3382 * really know unless we go poke around in the buffer_heads.
3228 * But block_write_full_page will do the right thing. 3383 * But block_write_full_page will do the right thing.
3229 */ 3384 */
3230 return block_write_full_page(page, 3385 return block_write_full_page(page, noalloc_get_block_write,
3231 ext4_normal_get_block_write, 3386 wbc);
3232 wbc);
3233 } 3387 }
3234no_write: 3388no_write:
3235 redirty_page_for_writepage(wbc, page); 3389 redirty_page_for_writepage(wbc, page);
@@ -3973,7 +4127,8 @@ void ext4_truncate(struct inode *inode)
3973 if (!ext4_can_truncate(inode)) 4127 if (!ext4_can_truncate(inode))
3974 return; 4128 return;
3975 4129
3976 if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) 4130 if (ei->i_disksize && inode->i_size == 0 &&
4131 !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC))
3977 ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE; 4132 ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE;
3978 4133
3979 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { 4134 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
@@ -4715,25 +4870,6 @@ int ext4_write_inode(struct inode *inode, int wait)
4715 return ext4_force_commit(inode->i_sb); 4870 return ext4_force_commit(inode->i_sb);
4716} 4871}
4717 4872
4718int __ext4_write_dirty_metadata(struct inode *inode, struct buffer_head *bh)
4719{
4720 int err = 0;
4721
4722 mark_buffer_dirty(bh);
4723 if (inode && inode_needs_sync(inode)) {
4724 sync_dirty_buffer(bh);
4725 if (buffer_req(bh) && !buffer_uptodate(bh)) {
4726 ext4_error(inode->i_sb, __func__,
4727 "IO error syncing inode, "
4728 "inode=%lu, block=%llu",
4729 inode->i_ino,
4730 (unsigned long long)bh->b_blocknr);
4731 err = -EIO;
4732 }
4733 }
4734 return err;
4735}
4736
4737/* 4873/*
4738 * ext4_setattr() 4874 * ext4_setattr()
4739 * 4875 *
@@ -4930,7 +5066,8 @@ static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
4930 */ 5066 */
4931int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) 5067int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk)
4932{ 5068{
4933 int groups, gdpblocks; 5069 ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb);
5070 int gdpblocks;
4934 int idxblocks; 5071 int idxblocks;
4935 int ret = 0; 5072 int ret = 0;
4936 5073
@@ -4957,8 +5094,8 @@ int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk)
4957 groups += nrblocks; 5094 groups += nrblocks;
4958 5095
4959 gdpblocks = groups; 5096 gdpblocks = groups;
4960 if (groups > EXT4_SB(inode->i_sb)->s_groups_count) 5097 if (groups > ngroups)
4961 groups = EXT4_SB(inode->i_sb)->s_groups_count; 5098 groups = ngroups;
4962 if (groups > EXT4_SB(inode->i_sb)->s_gdb_count) 5099 if (groups > EXT4_SB(inode->i_sb)->s_gdb_count)
4963 gdpblocks = EXT4_SB(inode->i_sb)->s_gdb_count; 5100 gdpblocks = EXT4_SB(inode->i_sb)->s_gdb_count;
4964 5101
@@ -4998,7 +5135,7 @@ int ext4_writepage_trans_blocks(struct inode *inode)
4998 * Calculate the journal credits for a chunk of data modification. 5135 * Calculate the journal credits for a chunk of data modification.
4999 * 5136 *
5000 * This is called from DIO, fallocate or whoever calling 5137 * This is called from DIO, fallocate or whoever calling
5001 * ext4_get_blocks_wrap() to map/allocate a chunk of contigous disk blocks. 5138 * ext4_get_blocks() to map/allocate a chunk of contigous disk blocks.
5002 * 5139 *
5003 * journal buffers for data blocks are not included here, as DIO 5140 * journal buffers for data blocks are not included here, as DIO
5004 * and fallocate do no need to journal data buffers. 5141 * and fallocate do no need to journal data buffers.
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index f871677a7984..ed8482e22c0e 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -372,24 +372,12 @@ static inline void mb_set_bit(int bit, void *addr)
372 ext4_set_bit(bit, addr); 372 ext4_set_bit(bit, addr);
373} 373}
374 374
375static inline void mb_set_bit_atomic(spinlock_t *lock, int bit, void *addr)
376{
377 addr = mb_correct_addr_and_bit(&bit, addr);
378 ext4_set_bit_atomic(lock, bit, addr);
379}
380
381static inline void mb_clear_bit(int bit, void *addr) 375static inline void mb_clear_bit(int bit, void *addr)
382{ 376{
383 addr = mb_correct_addr_and_bit(&bit, addr); 377 addr = mb_correct_addr_and_bit(&bit, addr);
384 ext4_clear_bit(bit, addr); 378 ext4_clear_bit(bit, addr);
385} 379}
386 380
387static inline void mb_clear_bit_atomic(spinlock_t *lock, int bit, void *addr)
388{
389 addr = mb_correct_addr_and_bit(&bit, addr);
390 ext4_clear_bit_atomic(lock, bit, addr);
391}
392
393static inline int mb_find_next_zero_bit(void *addr, int max, int start) 381static inline int mb_find_next_zero_bit(void *addr, int max, int start)
394{ 382{
395 int fix = 0, ret, tmpmax; 383 int fix = 0, ret, tmpmax;
@@ -448,7 +436,7 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
448 436
449 if (unlikely(e4b->bd_info->bb_bitmap == NULL)) 437 if (unlikely(e4b->bd_info->bb_bitmap == NULL))
450 return; 438 return;
451 BUG_ON(!ext4_is_group_locked(sb, e4b->bd_group)); 439 assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
452 for (i = 0; i < count; i++) { 440 for (i = 0; i < count; i++) {
453 if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) { 441 if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) {
454 ext4_fsblk_t blocknr; 442 ext4_fsblk_t blocknr;
@@ -472,7 +460,7 @@ static void mb_mark_used_double(struct ext4_buddy *e4b, int first, int count)
472 460
473 if (unlikely(e4b->bd_info->bb_bitmap == NULL)) 461 if (unlikely(e4b->bd_info->bb_bitmap == NULL))
474 return; 462 return;
475 BUG_ON(!ext4_is_group_locked(e4b->bd_sb, e4b->bd_group)); 463 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
476 for (i = 0; i < count; i++) { 464 for (i = 0; i < count; i++) {
477 BUG_ON(mb_test_bit(first + i, e4b->bd_info->bb_bitmap)); 465 BUG_ON(mb_test_bit(first + i, e4b->bd_info->bb_bitmap));
478 mb_set_bit(first + i, e4b->bd_info->bb_bitmap); 466 mb_set_bit(first + i, e4b->bd_info->bb_bitmap);
@@ -739,6 +727,7 @@ static void ext4_mb_generate_buddy(struct super_block *sb,
739 727
740static int ext4_mb_init_cache(struct page *page, char *incore) 728static int ext4_mb_init_cache(struct page *page, char *incore)
741{ 729{
730 ext4_group_t ngroups;
742 int blocksize; 731 int blocksize;
743 int blocks_per_page; 732 int blocks_per_page;
744 int groups_per_page; 733 int groups_per_page;
@@ -757,6 +746,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
757 746
758 inode = page->mapping->host; 747 inode = page->mapping->host;
759 sb = inode->i_sb; 748 sb = inode->i_sb;
749 ngroups = ext4_get_groups_count(sb);
760 blocksize = 1 << inode->i_blkbits; 750 blocksize = 1 << inode->i_blkbits;
761 blocks_per_page = PAGE_CACHE_SIZE / blocksize; 751 blocks_per_page = PAGE_CACHE_SIZE / blocksize;
762 752
@@ -780,7 +770,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
780 for (i = 0; i < groups_per_page; i++) { 770 for (i = 0; i < groups_per_page; i++) {
781 struct ext4_group_desc *desc; 771 struct ext4_group_desc *desc;
782 772
783 if (first_group + i >= EXT4_SB(sb)->s_groups_count) 773 if (first_group + i >= ngroups)
784 break; 774 break;
785 775
786 err = -EIO; 776 err = -EIO;
@@ -801,17 +791,17 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
801 unlock_buffer(bh[i]); 791 unlock_buffer(bh[i]);
802 continue; 792 continue;
803 } 793 }
804 spin_lock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); 794 ext4_lock_group(sb, first_group + i);
805 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { 795 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
806 ext4_init_block_bitmap(sb, bh[i], 796 ext4_init_block_bitmap(sb, bh[i],
807 first_group + i, desc); 797 first_group + i, desc);
808 set_bitmap_uptodate(bh[i]); 798 set_bitmap_uptodate(bh[i]);
809 set_buffer_uptodate(bh[i]); 799 set_buffer_uptodate(bh[i]);
810 spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); 800 ext4_unlock_group(sb, first_group + i);
811 unlock_buffer(bh[i]); 801 unlock_buffer(bh[i]);
812 continue; 802 continue;
813 } 803 }
814 spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); 804 ext4_unlock_group(sb, first_group + i);
815 if (buffer_uptodate(bh[i])) { 805 if (buffer_uptodate(bh[i])) {
816 /* 806 /*
817 * if not uninit if bh is uptodate, 807 * if not uninit if bh is uptodate,
@@ -852,7 +842,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
852 struct ext4_group_info *grinfo; 842 struct ext4_group_info *grinfo;
853 843
854 group = (first_block + i) >> 1; 844 group = (first_block + i) >> 1;
855 if (group >= EXT4_SB(sb)->s_groups_count) 845 if (group >= ngroups)
856 break; 846 break;
857 847
858 /* 848 /*
@@ -1078,7 +1068,7 @@ static int mb_find_order_for_block(struct ext4_buddy *e4b, int block)
1078 return 0; 1068 return 0;
1079} 1069}
1080 1070
1081static void mb_clear_bits(spinlock_t *lock, void *bm, int cur, int len) 1071static void mb_clear_bits(void *bm, int cur, int len)
1082{ 1072{
1083 __u32 *addr; 1073 __u32 *addr;
1084 1074
@@ -1091,15 +1081,12 @@ static void mb_clear_bits(spinlock_t *lock, void *bm, int cur, int len)
1091 cur += 32; 1081 cur += 32;
1092 continue; 1082 continue;
1093 } 1083 }
1094 if (lock) 1084 mb_clear_bit(cur, bm);
1095 mb_clear_bit_atomic(lock, cur, bm);
1096 else
1097 mb_clear_bit(cur, bm);
1098 cur++; 1085 cur++;
1099 } 1086 }
1100} 1087}
1101 1088
1102static void mb_set_bits(spinlock_t *lock, void *bm, int cur, int len) 1089static void mb_set_bits(void *bm, int cur, int len)
1103{ 1090{
1104 __u32 *addr; 1091 __u32 *addr;
1105 1092
@@ -1112,10 +1099,7 @@ static void mb_set_bits(spinlock_t *lock, void *bm, int cur, int len)
1112 cur += 32; 1099 cur += 32;
1113 continue; 1100 continue;
1114 } 1101 }
1115 if (lock) 1102 mb_set_bit(cur, bm);
1116 mb_set_bit_atomic(lock, cur, bm);
1117 else
1118 mb_set_bit(cur, bm);
1119 cur++; 1103 cur++;
1120 } 1104 }
1121} 1105}
@@ -1131,7 +1115,7 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1131 struct super_block *sb = e4b->bd_sb; 1115 struct super_block *sb = e4b->bd_sb;
1132 1116
1133 BUG_ON(first + count > (sb->s_blocksize << 3)); 1117 BUG_ON(first + count > (sb->s_blocksize << 3));
1134 BUG_ON(!ext4_is_group_locked(sb, e4b->bd_group)); 1118 assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
1135 mb_check_buddy(e4b); 1119 mb_check_buddy(e4b);
1136 mb_free_blocks_double(inode, e4b, first, count); 1120 mb_free_blocks_double(inode, e4b, first, count);
1137 1121
@@ -1212,7 +1196,7 @@ static int mb_find_extent(struct ext4_buddy *e4b, int order, int block,
1212 int ord; 1196 int ord;
1213 void *buddy; 1197 void *buddy;
1214 1198
1215 BUG_ON(!ext4_is_group_locked(e4b->bd_sb, e4b->bd_group)); 1199 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
1216 BUG_ON(ex == NULL); 1200 BUG_ON(ex == NULL);
1217 1201
1218 buddy = mb_find_buddy(e4b, order, &max); 1202 buddy = mb_find_buddy(e4b, order, &max);
@@ -1276,7 +1260,7 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
1276 1260
1277 BUG_ON(start + len > (e4b->bd_sb->s_blocksize << 3)); 1261 BUG_ON(start + len > (e4b->bd_sb->s_blocksize << 3));
1278 BUG_ON(e4b->bd_group != ex->fe_group); 1262 BUG_ON(e4b->bd_group != ex->fe_group);
1279 BUG_ON(!ext4_is_group_locked(e4b->bd_sb, e4b->bd_group)); 1263 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
1280 mb_check_buddy(e4b); 1264 mb_check_buddy(e4b);
1281 mb_mark_used_double(e4b, start, len); 1265 mb_mark_used_double(e4b, start, len);
1282 1266
@@ -1330,8 +1314,7 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
1330 e4b->bd_info->bb_counters[ord]++; 1314 e4b->bd_info->bb_counters[ord]++;
1331 } 1315 }
1332 1316
1333 mb_set_bits(sb_bgl_lock(EXT4_SB(e4b->bd_sb), ex->fe_group), 1317 mb_set_bits(EXT4_MB_BITMAP(e4b), ex->fe_start, len0);
1334 EXT4_MB_BITMAP(e4b), ex->fe_start, len0);
1335 mb_check_buddy(e4b); 1318 mb_check_buddy(e4b);
1336 1319
1337 return ret; 1320 return ret;
@@ -1726,7 +1709,6 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
1726 unsigned free, fragments; 1709 unsigned free, fragments;
1727 unsigned i, bits; 1710 unsigned i, bits;
1728 int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb)); 1711 int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb));
1729 struct ext4_group_desc *desc;
1730 struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group); 1712 struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
1731 1713
1732 BUG_ON(cr < 0 || cr >= 4); 1714 BUG_ON(cr < 0 || cr >= 4);
@@ -1742,10 +1724,6 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
1742 switch (cr) { 1724 switch (cr) {
1743 case 0: 1725 case 0:
1744 BUG_ON(ac->ac_2order == 0); 1726 BUG_ON(ac->ac_2order == 0);
1745 /* If this group is uninitialized, skip it initially */
1746 desc = ext4_get_group_desc(ac->ac_sb, group, NULL);
1747 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))
1748 return 0;
1749 1727
1750 /* Avoid using the first bg of a flexgroup for data files */ 1728 /* Avoid using the first bg of a flexgroup for data files */
1751 if ((ac->ac_flags & EXT4_MB_HINT_DATA) && 1729 if ((ac->ac_flags & EXT4_MB_HINT_DATA) &&
@@ -1788,6 +1766,7 @@ int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group)
1788 int block, pnum; 1766 int block, pnum;
1789 int blocks_per_page; 1767 int blocks_per_page;
1790 int groups_per_page; 1768 int groups_per_page;
1769 ext4_group_t ngroups = ext4_get_groups_count(sb);
1791 ext4_group_t first_group; 1770 ext4_group_t first_group;
1792 struct ext4_group_info *grp; 1771 struct ext4_group_info *grp;
1793 1772
@@ -1807,7 +1786,7 @@ int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group)
1807 /* read all groups the page covers into the cache */ 1786 /* read all groups the page covers into the cache */
1808 for (i = 0; i < groups_per_page; i++) { 1787 for (i = 0; i < groups_per_page; i++) {
1809 1788
1810 if ((first_group + i) >= EXT4_SB(sb)->s_groups_count) 1789 if ((first_group + i) >= ngroups)
1811 break; 1790 break;
1812 grp = ext4_get_group_info(sb, first_group + i); 1791 grp = ext4_get_group_info(sb, first_group + i);
1813 /* take all groups write allocation 1792 /* take all groups write allocation
@@ -1945,8 +1924,7 @@ err:
1945static noinline_for_stack int 1924static noinline_for_stack int
1946ext4_mb_regular_allocator(struct ext4_allocation_context *ac) 1925ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
1947{ 1926{
1948 ext4_group_t group; 1927 ext4_group_t ngroups, group, i;
1949 ext4_group_t i;
1950 int cr; 1928 int cr;
1951 int err = 0; 1929 int err = 0;
1952 int bsbits; 1930 int bsbits;
@@ -1957,6 +1935,7 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
1957 1935
1958 sb = ac->ac_sb; 1936 sb = ac->ac_sb;
1959 sbi = EXT4_SB(sb); 1937 sbi = EXT4_SB(sb);
1938 ngroups = ext4_get_groups_count(sb);
1960 BUG_ON(ac->ac_status == AC_STATUS_FOUND); 1939 BUG_ON(ac->ac_status == AC_STATUS_FOUND);
1961 1940
1962 /* first, try the goal */ 1941 /* first, try the goal */
@@ -2017,11 +1996,11 @@ repeat:
2017 */ 1996 */
2018 group = ac->ac_g_ex.fe_group; 1997 group = ac->ac_g_ex.fe_group;
2019 1998
2020 for (i = 0; i < EXT4_SB(sb)->s_groups_count; group++, i++) { 1999 for (i = 0; i < ngroups; group++, i++) {
2021 struct ext4_group_info *grp; 2000 struct ext4_group_info *grp;
2022 struct ext4_group_desc *desc; 2001 struct ext4_group_desc *desc;
2023 2002
2024 if (group == EXT4_SB(sb)->s_groups_count) 2003 if (group == ngroups)
2025 group = 0; 2004 group = 0;
2026 2005
2027 /* quick check to skip empty groups */ 2006 /* quick check to skip empty groups */
@@ -2064,9 +2043,7 @@ repeat:
2064 2043
2065 ac->ac_groups_scanned++; 2044 ac->ac_groups_scanned++;
2066 desc = ext4_get_group_desc(sb, group, NULL); 2045 desc = ext4_get_group_desc(sb, group, NULL);
2067 if (cr == 0 || (desc->bg_flags & 2046 if (cr == 0)
2068 cpu_to_le16(EXT4_BG_BLOCK_UNINIT) &&
2069 ac->ac_2order != 0))
2070 ext4_mb_simple_scan_group(ac, &e4b); 2047 ext4_mb_simple_scan_group(ac, &e4b);
2071 else if (cr == 1 && 2048 else if (cr == 1 &&
2072 ac->ac_g_ex.fe_len == sbi->s_stripe) 2049 ac->ac_g_ex.fe_len == sbi->s_stripe)
@@ -2315,12 +2292,10 @@ static struct file_operations ext4_mb_seq_history_fops = {
2315static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos) 2292static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
2316{ 2293{
2317 struct super_block *sb = seq->private; 2294 struct super_block *sb = seq->private;
2318 struct ext4_sb_info *sbi = EXT4_SB(sb);
2319 ext4_group_t group; 2295 ext4_group_t group;
2320 2296
2321 if (*pos < 0 || *pos >= sbi->s_groups_count) 2297 if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
2322 return NULL; 2298 return NULL;
2323
2324 group = *pos + 1; 2299 group = *pos + 1;
2325 return (void *) ((unsigned long) group); 2300 return (void *) ((unsigned long) group);
2326} 2301}
@@ -2328,11 +2303,10 @@ static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
2328static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos) 2303static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
2329{ 2304{
2330 struct super_block *sb = seq->private; 2305 struct super_block *sb = seq->private;
2331 struct ext4_sb_info *sbi = EXT4_SB(sb);
2332 ext4_group_t group; 2306 ext4_group_t group;
2333 2307
2334 ++*pos; 2308 ++*pos;
2335 if (*pos < 0 || *pos >= sbi->s_groups_count) 2309 if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
2336 return NULL; 2310 return NULL;
2337 group = *pos + 1; 2311 group = *pos + 1;
2338 return (void *) ((unsigned long) group); 2312 return (void *) ((unsigned long) group);
@@ -2420,7 +2394,8 @@ static void ext4_mb_history_release(struct super_block *sb)
2420 2394
2421 if (sbi->s_proc != NULL) { 2395 if (sbi->s_proc != NULL) {
2422 remove_proc_entry("mb_groups", sbi->s_proc); 2396 remove_proc_entry("mb_groups", sbi->s_proc);
2423 remove_proc_entry("mb_history", sbi->s_proc); 2397 if (sbi->s_mb_history_max)
2398 remove_proc_entry("mb_history", sbi->s_proc);
2424 } 2399 }
2425 kfree(sbi->s_mb_history); 2400 kfree(sbi->s_mb_history);
2426} 2401}
@@ -2431,17 +2406,17 @@ static void ext4_mb_history_init(struct super_block *sb)
2431 int i; 2406 int i;
2432 2407
2433 if (sbi->s_proc != NULL) { 2408 if (sbi->s_proc != NULL) {
2434 proc_create_data("mb_history", S_IRUGO, sbi->s_proc, 2409 if (sbi->s_mb_history_max)
2435 &ext4_mb_seq_history_fops, sb); 2410 proc_create_data("mb_history", S_IRUGO, sbi->s_proc,
2411 &ext4_mb_seq_history_fops, sb);
2436 proc_create_data("mb_groups", S_IRUGO, sbi->s_proc, 2412 proc_create_data("mb_groups", S_IRUGO, sbi->s_proc,
2437 &ext4_mb_seq_groups_fops, sb); 2413 &ext4_mb_seq_groups_fops, sb);
2438 } 2414 }
2439 2415
2440 sbi->s_mb_history_max = 1000;
2441 sbi->s_mb_history_cur = 0; 2416 sbi->s_mb_history_cur = 0;
2442 spin_lock_init(&sbi->s_mb_history_lock); 2417 spin_lock_init(&sbi->s_mb_history_lock);
2443 i = sbi->s_mb_history_max * sizeof(struct ext4_mb_history); 2418 i = sbi->s_mb_history_max * sizeof(struct ext4_mb_history);
2444 sbi->s_mb_history = kzalloc(i, GFP_KERNEL); 2419 sbi->s_mb_history = i ? kzalloc(i, GFP_KERNEL) : NULL;
2445 /* if we can't allocate history, then we simple won't use it */ 2420 /* if we can't allocate history, then we simple won't use it */
2446} 2421}
2447 2422
@@ -2451,7 +2426,7 @@ ext4_mb_store_history(struct ext4_allocation_context *ac)
2451 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); 2426 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
2452 struct ext4_mb_history h; 2427 struct ext4_mb_history h;
2453 2428
2454 if (unlikely(sbi->s_mb_history == NULL)) 2429 if (sbi->s_mb_history == NULL)
2455 return; 2430 return;
2456 2431
2457 if (!(ac->ac_op & sbi->s_mb_history_filter)) 2432 if (!(ac->ac_op & sbi->s_mb_history_filter))
@@ -2587,6 +2562,7 @@ void ext4_mb_update_group_info(struct ext4_group_info *grp, ext4_grpblk_t add)
2587 2562
2588static int ext4_mb_init_backend(struct super_block *sb) 2563static int ext4_mb_init_backend(struct super_block *sb)
2589{ 2564{
2565 ext4_group_t ngroups = ext4_get_groups_count(sb);
2590 ext4_group_t i; 2566 ext4_group_t i;
2591 int metalen; 2567 int metalen;
2592 struct ext4_sb_info *sbi = EXT4_SB(sb); 2568 struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -2598,7 +2574,7 @@ static int ext4_mb_init_backend(struct super_block *sb)
2598 struct ext4_group_desc *desc; 2574 struct ext4_group_desc *desc;
2599 2575
2600 /* This is the number of blocks used by GDT */ 2576 /* This is the number of blocks used by GDT */
2601 num_meta_group_infos = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 2577 num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) -
2602 1) >> EXT4_DESC_PER_BLOCK_BITS(sb); 2578 1) >> EXT4_DESC_PER_BLOCK_BITS(sb);
2603 2579
2604 /* 2580 /*
@@ -2644,7 +2620,7 @@ static int ext4_mb_init_backend(struct super_block *sb)
2644 for (i = 0; i < num_meta_group_infos; i++) { 2620 for (i = 0; i < num_meta_group_infos; i++) {
2645 if ((i + 1) == num_meta_group_infos) 2621 if ((i + 1) == num_meta_group_infos)
2646 metalen = sizeof(*meta_group_info) * 2622 metalen = sizeof(*meta_group_info) *
2647 (sbi->s_groups_count - 2623 (ngroups -
2648 (i << EXT4_DESC_PER_BLOCK_BITS(sb))); 2624 (i << EXT4_DESC_PER_BLOCK_BITS(sb)));
2649 meta_group_info = kmalloc(metalen, GFP_KERNEL); 2625 meta_group_info = kmalloc(metalen, GFP_KERNEL);
2650 if (meta_group_info == NULL) { 2626 if (meta_group_info == NULL) {
@@ -2655,7 +2631,7 @@ static int ext4_mb_init_backend(struct super_block *sb)
2655 sbi->s_group_info[i] = meta_group_info; 2631 sbi->s_group_info[i] = meta_group_info;
2656 } 2632 }
2657 2633
2658 for (i = 0; i < sbi->s_groups_count; i++) { 2634 for (i = 0; i < ngroups; i++) {
2659 desc = ext4_get_group_desc(sb, i, NULL); 2635 desc = ext4_get_group_desc(sb, i, NULL);
2660 if (desc == NULL) { 2636 if (desc == NULL) {
2661 printk(KERN_ERR 2637 printk(KERN_ERR
@@ -2761,7 +2737,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2761 return 0; 2737 return 0;
2762} 2738}
2763 2739
2764/* need to called with ext4 group lock (ext4_lock_group) */ 2740/* need to called with the ext4 group lock held */
2765static void ext4_mb_cleanup_pa(struct ext4_group_info *grp) 2741static void ext4_mb_cleanup_pa(struct ext4_group_info *grp)
2766{ 2742{
2767 struct ext4_prealloc_space *pa; 2743 struct ext4_prealloc_space *pa;
@@ -2781,13 +2757,14 @@ static void ext4_mb_cleanup_pa(struct ext4_group_info *grp)
2781 2757
2782int ext4_mb_release(struct super_block *sb) 2758int ext4_mb_release(struct super_block *sb)
2783{ 2759{
2760 ext4_group_t ngroups = ext4_get_groups_count(sb);
2784 ext4_group_t i; 2761 ext4_group_t i;
2785 int num_meta_group_infos; 2762 int num_meta_group_infos;
2786 struct ext4_group_info *grinfo; 2763 struct ext4_group_info *grinfo;
2787 struct ext4_sb_info *sbi = EXT4_SB(sb); 2764 struct ext4_sb_info *sbi = EXT4_SB(sb);
2788 2765
2789 if (sbi->s_group_info) { 2766 if (sbi->s_group_info) {
2790 for (i = 0; i < sbi->s_groups_count; i++) { 2767 for (i = 0; i < ngroups; i++) {
2791 grinfo = ext4_get_group_info(sb, i); 2768 grinfo = ext4_get_group_info(sb, i);
2792#ifdef DOUBLE_CHECK 2769#ifdef DOUBLE_CHECK
2793 kfree(grinfo->bb_bitmap); 2770 kfree(grinfo->bb_bitmap);
@@ -2797,7 +2774,7 @@ int ext4_mb_release(struct super_block *sb)
2797 ext4_unlock_group(sb, i); 2774 ext4_unlock_group(sb, i);
2798 kfree(grinfo); 2775 kfree(grinfo);
2799 } 2776 }
2800 num_meta_group_infos = (sbi->s_groups_count + 2777 num_meta_group_infos = (ngroups +
2801 EXT4_DESC_PER_BLOCK(sb) - 1) >> 2778 EXT4_DESC_PER_BLOCK(sb) - 1) >>
2802 EXT4_DESC_PER_BLOCK_BITS(sb); 2779 EXT4_DESC_PER_BLOCK_BITS(sb);
2803 for (i = 0; i < num_meta_group_infos; i++) 2780 for (i = 0; i < num_meta_group_infos; i++)
@@ -2984,27 +2961,25 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2984 + le32_to_cpu(es->s_first_data_block); 2961 + le32_to_cpu(es->s_first_data_block);
2985 2962
2986 len = ac->ac_b_ex.fe_len; 2963 len = ac->ac_b_ex.fe_len;
2987 if (in_range(ext4_block_bitmap(sb, gdp), block, len) || 2964 if (!ext4_data_block_valid(sbi, block, len)) {
2988 in_range(ext4_inode_bitmap(sb, gdp), block, len) ||
2989 in_range(block, ext4_inode_table(sb, gdp),
2990 EXT4_SB(sb)->s_itb_per_group) ||
2991 in_range(block + len - 1, ext4_inode_table(sb, gdp),
2992 EXT4_SB(sb)->s_itb_per_group)) {
2993 ext4_error(sb, __func__, 2965 ext4_error(sb, __func__,
2994 "Allocating block %llu in system zone of %d group\n", 2966 "Allocating blocks %llu-%llu which overlap "
2995 block, ac->ac_b_ex.fe_group); 2967 "fs metadata\n", block, block+len);
2996 /* File system mounted not to panic on error 2968 /* File system mounted not to panic on error
2997 * Fix the bitmap and repeat the block allocation 2969 * Fix the bitmap and repeat the block allocation
2998 * We leak some of the blocks here. 2970 * We leak some of the blocks here.
2999 */ 2971 */
3000 mb_set_bits(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group), 2972 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
3001 bitmap_bh->b_data, ac->ac_b_ex.fe_start, 2973 mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
3002 ac->ac_b_ex.fe_len); 2974 ac->ac_b_ex.fe_len);
2975 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
3003 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); 2976 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
3004 if (!err) 2977 if (!err)
3005 err = -EAGAIN; 2978 err = -EAGAIN;
3006 goto out_err; 2979 goto out_err;
3007 } 2980 }
2981
2982 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
3008#ifdef AGGRESSIVE_CHECK 2983#ifdef AGGRESSIVE_CHECK
3009 { 2984 {
3010 int i; 2985 int i;
@@ -3014,9 +2989,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
3014 } 2989 }
3015 } 2990 }
3016#endif 2991#endif
3017 spin_lock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); 2992 mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,ac->ac_b_ex.fe_len);
3018 mb_set_bits(NULL, bitmap_bh->b_data,
3019 ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len);
3020 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { 2993 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
3021 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); 2994 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
3022 ext4_free_blks_set(sb, gdp, 2995 ext4_free_blks_set(sb, gdp,
@@ -3026,7 +2999,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
3026 len = ext4_free_blks_count(sb, gdp) - ac->ac_b_ex.fe_len; 2999 len = ext4_free_blks_count(sb, gdp) - ac->ac_b_ex.fe_len;
3027 ext4_free_blks_set(sb, gdp, len); 3000 ext4_free_blks_set(sb, gdp, len);
3028 gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); 3001 gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
3029 spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); 3002
3003 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
3030 percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len); 3004 percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len);
3031 /* 3005 /*
3032 * Now reduce the dirty block count also. Should not go negative 3006 * Now reduce the dirty block count also. Should not go negative
@@ -3459,7 +3433,7 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3459 * the function goes through all block freed in the group 3433 * the function goes through all block freed in the group
3460 * but not yet committed and marks them used in in-core bitmap. 3434 * but not yet committed and marks them used in in-core bitmap.
3461 * buddy must be generated from this bitmap 3435 * buddy must be generated from this bitmap
3462 * Need to be called with ext4 group lock (ext4_lock_group) 3436 * Need to be called with the ext4 group lock held
3463 */ 3437 */
3464static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, 3438static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
3465 ext4_group_t group) 3439 ext4_group_t group)
@@ -3473,9 +3447,7 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
3473 3447
3474 while (n) { 3448 while (n) {
3475 entry = rb_entry(n, struct ext4_free_data, node); 3449 entry = rb_entry(n, struct ext4_free_data, node);
3476 mb_set_bits(sb_bgl_lock(EXT4_SB(sb), group), 3450 mb_set_bits(bitmap, entry->start_blk, entry->count);
3477 bitmap, entry->start_blk,
3478 entry->count);
3479 n = rb_next(n); 3451 n = rb_next(n);
3480 } 3452 }
3481 return; 3453 return;
@@ -3484,7 +3456,7 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
3484/* 3456/*
3485 * the function goes through all preallocation in this group and marks them 3457 * the function goes through all preallocation in this group and marks them
3486 * used in in-core bitmap. buddy must be generated from this bitmap 3458 * used in in-core bitmap. buddy must be generated from this bitmap
3487 * Need to be called with ext4 group lock (ext4_lock_group) 3459 * Need to be called with ext4 group lock held
3488 */ 3460 */
3489static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, 3461static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
3490 ext4_group_t group) 3462 ext4_group_t group)
@@ -3516,8 +3488,7 @@ static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
3516 if (unlikely(len == 0)) 3488 if (unlikely(len == 0))
3517 continue; 3489 continue;
3518 BUG_ON(groupnr != group); 3490 BUG_ON(groupnr != group);
3519 mb_set_bits(sb_bgl_lock(EXT4_SB(sb), group), 3491 mb_set_bits(bitmap, start, len);
3520 bitmap, start, len);
3521 preallocated += len; 3492 preallocated += len;
3522 count++; 3493 count++;
3523 } 3494 }
@@ -4121,7 +4092,7 @@ static void ext4_mb_return_to_preallocation(struct inode *inode,
4121static void ext4_mb_show_ac(struct ext4_allocation_context *ac) 4092static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
4122{ 4093{
4123 struct super_block *sb = ac->ac_sb; 4094 struct super_block *sb = ac->ac_sb;
4124 ext4_group_t i; 4095 ext4_group_t ngroups, i;
4125 4096
4126 printk(KERN_ERR "EXT4-fs: Can't allocate:" 4097 printk(KERN_ERR "EXT4-fs: Can't allocate:"
4127 " Allocation context details:\n"); 4098 " Allocation context details:\n");
@@ -4145,7 +4116,8 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
4145 printk(KERN_ERR "EXT4-fs: %lu scanned, %d found\n", ac->ac_ex_scanned, 4116 printk(KERN_ERR "EXT4-fs: %lu scanned, %d found\n", ac->ac_ex_scanned,
4146 ac->ac_found); 4117 ac->ac_found);
4147 printk(KERN_ERR "EXT4-fs: groups: \n"); 4118 printk(KERN_ERR "EXT4-fs: groups: \n");
4148 for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { 4119 ngroups = ext4_get_groups_count(sb);
4120 for (i = 0; i < ngroups; i++) {
4149 struct ext4_group_info *grp = ext4_get_group_info(sb, i); 4121 struct ext4_group_info *grp = ext4_get_group_info(sb, i);
4150 struct ext4_prealloc_space *pa; 4122 struct ext4_prealloc_space *pa;
4151 ext4_grpblk_t start; 4123 ext4_grpblk_t start;
@@ -4469,13 +4441,13 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac)
4469 4441
4470static int ext4_mb_discard_preallocations(struct super_block *sb, int needed) 4442static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
4471{ 4443{
4472 ext4_group_t i; 4444 ext4_group_t i, ngroups = ext4_get_groups_count(sb);
4473 int ret; 4445 int ret;
4474 int freed = 0; 4446 int freed = 0;
4475 4447
4476 trace_mark(ext4_mb_discard_preallocations, "dev %s needed %d", 4448 trace_mark(ext4_mb_discard_preallocations, "dev %s needed %d",
4477 sb->s_id, needed); 4449 sb->s_id, needed);
4478 for (i = 0; i < EXT4_SB(sb)->s_groups_count && needed > 0; i++) { 4450 for (i = 0; i < ngroups && needed > 0; i++) {
4479 ret = ext4_mb_discard_group_preallocations(sb, i, needed); 4451 ret = ext4_mb_discard_group_preallocations(sb, i, needed);
4480 freed += ret; 4452 freed += ret;
4481 needed -= ret; 4453 needed -= ret;
@@ -4859,29 +4831,25 @@ do_more:
4859 new_entry->group = block_group; 4831 new_entry->group = block_group;
4860 new_entry->count = count; 4832 new_entry->count = count;
4861 new_entry->t_tid = handle->h_transaction->t_tid; 4833 new_entry->t_tid = handle->h_transaction->t_tid;
4834
4862 ext4_lock_group(sb, block_group); 4835 ext4_lock_group(sb, block_group);
4863 mb_clear_bits(sb_bgl_lock(sbi, block_group), bitmap_bh->b_data, 4836 mb_clear_bits(bitmap_bh->b_data, bit, count);
4864 bit, count);
4865 ext4_mb_free_metadata(handle, &e4b, new_entry); 4837 ext4_mb_free_metadata(handle, &e4b, new_entry);
4866 ext4_unlock_group(sb, block_group);
4867 } else { 4838 } else {
4868 ext4_lock_group(sb, block_group);
4869 /* need to update group_info->bb_free and bitmap 4839 /* need to update group_info->bb_free and bitmap
4870 * with group lock held. generate_buddy look at 4840 * with group lock held. generate_buddy look at
4871 * them with group lock_held 4841 * them with group lock_held
4872 */ 4842 */
4873 mb_clear_bits(sb_bgl_lock(sbi, block_group), bitmap_bh->b_data, 4843 ext4_lock_group(sb, block_group);
4874 bit, count); 4844 mb_clear_bits(bitmap_bh->b_data, bit, count);
4875 mb_free_blocks(inode, &e4b, bit, count); 4845 mb_free_blocks(inode, &e4b, bit, count);
4876 ext4_mb_return_to_preallocation(inode, &e4b, block, count); 4846 ext4_mb_return_to_preallocation(inode, &e4b, block, count);
4877 ext4_unlock_group(sb, block_group);
4878 } 4847 }
4879 4848
4880 spin_lock(sb_bgl_lock(sbi, block_group));
4881 ret = ext4_free_blks_count(sb, gdp) + count; 4849 ret = ext4_free_blks_count(sb, gdp) + count;
4882 ext4_free_blks_set(sb, gdp, ret); 4850 ext4_free_blks_set(sb, gdp, ret);
4883 gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp); 4851 gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp);
4884 spin_unlock(sb_bgl_lock(sbi, block_group)); 4852 ext4_unlock_group(sb, block_group);
4885 percpu_counter_add(&sbi->s_freeblocks_counter, count); 4853 percpu_counter_add(&sbi->s_freeblocks_counter, count);
4886 4854
4887 if (sbi->s_log_groups_per_flex) { 4855 if (sbi->s_log_groups_per_flex) {
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index dd9e6cd5f6cf..75e34f69215b 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -23,7 +23,6 @@
23#include <linux/mutex.h> 23#include <linux/mutex.h>
24#include "ext4_jbd2.h" 24#include "ext4_jbd2.h"
25#include "ext4.h" 25#include "ext4.h"
26#include "group.h"
27 26
28/* 27/*
29 * with AGGRESSIVE_CHECK allocator runs consistency checks over 28 * with AGGRESSIVE_CHECK allocator runs consistency checks over
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 22098e1cd085..07eb6649e4fa 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -37,7 +37,6 @@
37#include "ext4.h" 37#include "ext4.h"
38#include "ext4_jbd2.h" 38#include "ext4_jbd2.h"
39 39
40#include "namei.h"
41#include "xattr.h" 40#include "xattr.h"
42#include "acl.h" 41#include "acl.h"
43 42
@@ -750,7 +749,7 @@ static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize,
750 ext4fs_dirhash(de->name, de->name_len, &h); 749 ext4fs_dirhash(de->name, de->name_len, &h);
751 map_tail--; 750 map_tail--;
752 map_tail->hash = h.hash; 751 map_tail->hash = h.hash;
753 map_tail->offs = (u16) ((char *) de - base); 752 map_tail->offs = ((char *) de - base)>>2;
754 map_tail->size = le16_to_cpu(de->rec_len); 753 map_tail->size = le16_to_cpu(de->rec_len);
755 count++; 754 count++;
756 cond_resched(); 755 cond_resched();
@@ -1148,7 +1147,8 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count,
1148 unsigned rec_len = 0; 1147 unsigned rec_len = 0;
1149 1148
1150 while (count--) { 1149 while (count--) {
1151 struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *) (from + map->offs); 1150 struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *)
1151 (from + (map->offs<<2));
1152 rec_len = EXT4_DIR_REC_LEN(de->name_len); 1152 rec_len = EXT4_DIR_REC_LEN(de->name_len);
1153 memcpy (to, de, rec_len); 1153 memcpy (to, de, rec_len);
1154 ((struct ext4_dir_entry_2 *) to)->rec_len = 1154 ((struct ext4_dir_entry_2 *) to)->rec_len =
@@ -1997,7 +1997,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
1997 if (!ext4_handle_valid(handle)) 1997 if (!ext4_handle_valid(handle))
1998 return 0; 1998 return 0;
1999 1999
2000 lock_super(sb); 2000 mutex_lock(&EXT4_SB(sb)->s_orphan_lock);
2001 if (!list_empty(&EXT4_I(inode)->i_orphan)) 2001 if (!list_empty(&EXT4_I(inode)->i_orphan))
2002 goto out_unlock; 2002 goto out_unlock;
2003 2003
@@ -2006,9 +2006,13 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
2006 2006
2007 /* @@@ FIXME: Observation from aviro: 2007 /* @@@ FIXME: Observation from aviro:
2008 * I think I can trigger J_ASSERT in ext4_orphan_add(). We block 2008 * I think I can trigger J_ASSERT in ext4_orphan_add(). We block
2009 * here (on lock_super()), so race with ext4_link() which might bump 2009 * here (on s_orphan_lock), so race with ext4_link() which might bump
2010 * ->i_nlink. For, say it, character device. Not a regular file, 2010 * ->i_nlink. For, say it, character device. Not a regular file,
2011 * not a directory, not a symlink and ->i_nlink > 0. 2011 * not a directory, not a symlink and ->i_nlink > 0.
2012 *
2013 * tytso, 4/25/2009: I'm not sure how that could happen;
2014 * shouldn't the fs core protect us from these sort of
2015 * unlink()/link() races?
2012 */ 2016 */
2013 J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || 2017 J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
2014 S_ISLNK(inode->i_mode)) || inode->i_nlink == 0); 2018 S_ISLNK(inode->i_mode)) || inode->i_nlink == 0);
@@ -2045,7 +2049,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
2045 jbd_debug(4, "orphan inode %lu will point to %d\n", 2049 jbd_debug(4, "orphan inode %lu will point to %d\n",
2046 inode->i_ino, NEXT_ORPHAN(inode)); 2050 inode->i_ino, NEXT_ORPHAN(inode));
2047out_unlock: 2051out_unlock:
2048 unlock_super(sb); 2052 mutex_unlock(&EXT4_SB(sb)->s_orphan_lock);
2049 ext4_std_error(inode->i_sb, err); 2053 ext4_std_error(inode->i_sb, err);
2050 return err; 2054 return err;
2051} 2055}
@@ -2066,11 +2070,9 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
2066 if (!ext4_handle_valid(handle)) 2070 if (!ext4_handle_valid(handle))
2067 return 0; 2071 return 0;
2068 2072
2069 lock_super(inode->i_sb); 2073 mutex_lock(&EXT4_SB(inode->i_sb)->s_orphan_lock);
2070 if (list_empty(&ei->i_orphan)) { 2074 if (list_empty(&ei->i_orphan))
2071 unlock_super(inode->i_sb); 2075 goto out;
2072 return 0;
2073 }
2074 2076
2075 ino_next = NEXT_ORPHAN(inode); 2077 ino_next = NEXT_ORPHAN(inode);
2076 prev = ei->i_orphan.prev; 2078 prev = ei->i_orphan.prev;
@@ -2120,7 +2122,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
2120out_err: 2122out_err:
2121 ext4_std_error(inode->i_sb, err); 2123 ext4_std_error(inode->i_sb, err);
2122out: 2124out:
2123 unlock_super(inode->i_sb); 2125 mutex_unlock(&EXT4_SB(inode->i_sb)->s_orphan_lock);
2124 return err; 2126 return err;
2125 2127
2126out_brelse: 2128out_brelse:
@@ -2533,6 +2535,7 @@ const struct inode_operations ext4_dir_inode_operations = {
2533 .removexattr = generic_removexattr, 2535 .removexattr = generic_removexattr,
2534#endif 2536#endif
2535 .permission = ext4_permission, 2537 .permission = ext4_permission,
2538 .fiemap = ext4_fiemap,
2536}; 2539};
2537 2540
2538const struct inode_operations ext4_special_inode_operations = { 2541const struct inode_operations ext4_special_inode_operations = {
diff --git a/fs/ext4/namei.h b/fs/ext4/namei.h
deleted file mode 100644
index 5e4dfff36a00..000000000000
--- a/fs/ext4/namei.h
+++ /dev/null
@@ -1,8 +0,0 @@
1/* linux/fs/ext4/namei.h
2 *
3 * Copyright (C) 2005 Simtec Electronics
4 * Ben Dooks <ben@simtec.co.uk>
5 *
6*/
7
8extern struct dentry *ext4_get_parent(struct dentry *child);
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 546c7dd869e1..27eb289eea37 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -15,7 +15,6 @@
15#include <linux/slab.h> 15#include <linux/slab.h>
16 16
17#include "ext4_jbd2.h" 17#include "ext4_jbd2.h"
18#include "group.h"
19 18
20#define outside(b, first, last) ((b) < (first) || (b) >= (last)) 19#define outside(b, first, last) ((b) < (first) || (b) >= (last))
21#define inside(b, first, last) ((b) >= (first) && (b) < (last)) 20#define inside(b, first, last) ((b) >= (first) && (b) < (last))
@@ -193,7 +192,7 @@ static int setup_new_group_blocks(struct super_block *sb,
193 if (IS_ERR(handle)) 192 if (IS_ERR(handle))
194 return PTR_ERR(handle); 193 return PTR_ERR(handle);
195 194
196 lock_super(sb); 195 mutex_lock(&sbi->s_resize_lock);
197 if (input->group != sbi->s_groups_count) { 196 if (input->group != sbi->s_groups_count) {
198 err = -EBUSY; 197 err = -EBUSY;
199 goto exit_journal; 198 goto exit_journal;
@@ -302,7 +301,7 @@ exit_bh:
302 brelse(bh); 301 brelse(bh);
303 302
304exit_journal: 303exit_journal:
305 unlock_super(sb); 304 mutex_unlock(&sbi->s_resize_lock);
306 if ((err2 = ext4_journal_stop(handle)) && !err) 305 if ((err2 = ext4_journal_stop(handle)) && !err)
307 err = err2; 306 err = err2;
308 307
@@ -643,11 +642,12 @@ exit_free:
643 * important part is that the new block and inode counts are in the backup 642 * important part is that the new block and inode counts are in the backup
644 * superblocks, and the location of the new group metadata in the GDT backups. 643 * superblocks, and the location of the new group metadata in the GDT backups.
645 * 644 *
646 * We do not need lock_super() for this, because these blocks are not 645 * We do not need take the s_resize_lock for this, because these
647 * otherwise touched by the filesystem code when it is mounted. We don't 646 * blocks are not otherwise touched by the filesystem code when it is
648 * need to worry about last changing from sbi->s_groups_count, because the 647 * mounted. We don't need to worry about last changing from
649 * worst that can happen is that we do not copy the full number of backups 648 * sbi->s_groups_count, because the worst that can happen is that we
650 * at this time. The resize which changed s_groups_count will backup again. 649 * do not copy the full number of backups at this time. The resize
650 * which changed s_groups_count will backup again.
651 */ 651 */
652static void update_backups(struct super_block *sb, 652static void update_backups(struct super_block *sb,
653 int blk_off, char *data, int size) 653 int blk_off, char *data, int size)
@@ -809,7 +809,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
809 goto exit_put; 809 goto exit_put;
810 } 810 }
811 811
812 lock_super(sb); 812 mutex_lock(&sbi->s_resize_lock);
813 if (input->group != sbi->s_groups_count) { 813 if (input->group != sbi->s_groups_count) {
814 ext4_warning(sb, __func__, 814 ext4_warning(sb, __func__,
815 "multiple resizers run on filesystem!"); 815 "multiple resizers run on filesystem!");
@@ -840,7 +840,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
840 /* 840 /*
841 * OK, now we've set up the new group. Time to make it active. 841 * OK, now we've set up the new group. Time to make it active.
842 * 842 *
843 * Current kernels don't lock all allocations via lock_super(), 843 * We do not lock all allocations via s_resize_lock
844 * so we have to be safe wrt. concurrent accesses the group 844 * so we have to be safe wrt. concurrent accesses the group
845 * data. So we need to be careful to set all of the relevant 845 * data. So we need to be careful to set all of the relevant
846 * group descriptor data etc. *before* we enable the group. 846 * group descriptor data etc. *before* we enable the group.
@@ -900,12 +900,12 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
900 * 900 *
901 * The precise rules we use are: 901 * The precise rules we use are:
902 * 902 *
903 * * Writers of s_groups_count *must* hold lock_super 903 * * Writers of s_groups_count *must* hold s_resize_lock
904 * AND 904 * AND
905 * * Writers must perform a smp_wmb() after updating all dependent 905 * * Writers must perform a smp_wmb() after updating all dependent
906 * data and before modifying the groups count 906 * data and before modifying the groups count
907 * 907 *
908 * * Readers must hold lock_super() over the access 908 * * Readers must hold s_resize_lock over the access
909 * OR 909 * OR
910 * * Readers must perform an smp_rmb() after reading the groups count 910 * * Readers must perform an smp_rmb() after reading the groups count
911 * and before reading any dependent data. 911 * and before reading any dependent data.
@@ -948,7 +948,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
948 sb->s_dirt = 1; 948 sb->s_dirt = 1;
949 949
950exit_journal: 950exit_journal:
951 unlock_super(sb); 951 mutex_unlock(&sbi->s_resize_lock);
952 if ((err2 = ext4_journal_stop(handle)) && !err) 952 if ((err2 = ext4_journal_stop(handle)) && !err)
953 err = err2; 953 err = err2;
954 if (!err) { 954 if (!err) {
@@ -986,7 +986,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
986 986
987 /* We don't need to worry about locking wrt other resizers just 987 /* We don't need to worry about locking wrt other resizers just
988 * yet: we're going to revalidate es->s_blocks_count after 988 * yet: we're going to revalidate es->s_blocks_count after
989 * taking lock_super() below. */ 989 * taking the s_resize_lock below. */
990 o_blocks_count = ext4_blocks_count(es); 990 o_blocks_count = ext4_blocks_count(es);
991 o_groups_count = EXT4_SB(sb)->s_groups_count; 991 o_groups_count = EXT4_SB(sb)->s_groups_count;
992 992
@@ -1056,11 +1056,11 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1056 goto exit_put; 1056 goto exit_put;
1057 } 1057 }
1058 1058
1059 lock_super(sb); 1059 mutex_lock(&EXT4_SB(sb)->s_resize_lock);
1060 if (o_blocks_count != ext4_blocks_count(es)) { 1060 if (o_blocks_count != ext4_blocks_count(es)) {
1061 ext4_warning(sb, __func__, 1061 ext4_warning(sb, __func__,
1062 "multiple resizers run on filesystem!"); 1062 "multiple resizers run on filesystem!");
1063 unlock_super(sb); 1063 mutex_unlock(&EXT4_SB(sb)->s_resize_lock);
1064 ext4_journal_stop(handle); 1064 ext4_journal_stop(handle);
1065 err = -EBUSY; 1065 err = -EBUSY;
1066 goto exit_put; 1066 goto exit_put;
@@ -1070,14 +1070,14 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1070 EXT4_SB(sb)->s_sbh))) { 1070 EXT4_SB(sb)->s_sbh))) {
1071 ext4_warning(sb, __func__, 1071 ext4_warning(sb, __func__,
1072 "error %d on journal write access", err); 1072 "error %d on journal write access", err);
1073 unlock_super(sb); 1073 mutex_unlock(&EXT4_SB(sb)->s_resize_lock);
1074 ext4_journal_stop(handle); 1074 ext4_journal_stop(handle);
1075 goto exit_put; 1075 goto exit_put;
1076 } 1076 }
1077 ext4_blocks_count_set(es, o_blocks_count + add); 1077 ext4_blocks_count_set(es, o_blocks_count + add);
1078 ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh); 1078 ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
1079 sb->s_dirt = 1; 1079 sb->s_dirt = 1;
1080 unlock_super(sb); 1080 mutex_unlock(&EXT4_SB(sb)->s_resize_lock);
1081 ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, 1081 ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count,
1082 o_blocks_count + add); 1082 o_blocks_count + add);
1083 /* We add the blocks to the bitmap and set the group need init bit */ 1083 /* We add the blocks to the bitmap and set the group need init bit */
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 2958f4e6f222..012c4251397e 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -20,6 +20,7 @@
20#include <linux/string.h> 20#include <linux/string.h>
21#include <linux/fs.h> 21#include <linux/fs.h>
22#include <linux/time.h> 22#include <linux/time.h>
23#include <linux/vmalloc.h>
23#include <linux/jbd2.h> 24#include <linux/jbd2.h>
24#include <linux/slab.h> 25#include <linux/slab.h>
25#include <linux/init.h> 26#include <linux/init.h>
@@ -45,16 +46,20 @@
45#include "ext4_jbd2.h" 46#include "ext4_jbd2.h"
46#include "xattr.h" 47#include "xattr.h"
47#include "acl.h" 48#include "acl.h"
48#include "namei.h" 49
49#include "group.h" 50static int default_mb_history_length = 1000;
51
52module_param_named(default_mb_history_length, default_mb_history_length,
53 int, 0644);
54MODULE_PARM_DESC(default_mb_history_length,
55 "Default number of entries saved for mb_history");
50 56
51struct proc_dir_entry *ext4_proc_root; 57struct proc_dir_entry *ext4_proc_root;
52static struct kset *ext4_kset; 58static struct kset *ext4_kset;
53 59
54static int ext4_load_journal(struct super_block *, struct ext4_super_block *, 60static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
55 unsigned long journal_devnum); 61 unsigned long journal_devnum);
56static int ext4_commit_super(struct super_block *sb, 62static int ext4_commit_super(struct super_block *sb, int sync);
57 struct ext4_super_block *es, int sync);
58static void ext4_mark_recovery_complete(struct super_block *sb, 63static void ext4_mark_recovery_complete(struct super_block *sb,
59 struct ext4_super_block *es); 64 struct ext4_super_block *es);
60static void ext4_clear_journal_err(struct super_block *sb, 65static void ext4_clear_journal_err(struct super_block *sb,
@@ -74,7 +79,7 @@ ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
74{ 79{
75 return le32_to_cpu(bg->bg_block_bitmap_lo) | 80 return le32_to_cpu(bg->bg_block_bitmap_lo) |
76 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 81 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
77 (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0); 82 (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0);
78} 83}
79 84
80ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb, 85ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
@@ -82,7 +87,7 @@ ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
82{ 87{
83 return le32_to_cpu(bg->bg_inode_bitmap_lo) | 88 return le32_to_cpu(bg->bg_inode_bitmap_lo) |
84 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 89 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
85 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0); 90 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0);
86} 91}
87 92
88ext4_fsblk_t ext4_inode_table(struct super_block *sb, 93ext4_fsblk_t ext4_inode_table(struct super_block *sb,
@@ -90,7 +95,7 @@ ext4_fsblk_t ext4_inode_table(struct super_block *sb,
90{ 95{
91 return le32_to_cpu(bg->bg_inode_table_lo) | 96 return le32_to_cpu(bg->bg_inode_table_lo) |
92 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 97 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
93 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0); 98 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
94} 99}
95 100
96__u32 ext4_free_blks_count(struct super_block *sb, 101__u32 ext4_free_blks_count(struct super_block *sb,
@@ -98,7 +103,7 @@ __u32 ext4_free_blks_count(struct super_block *sb,
98{ 103{
99 return le16_to_cpu(bg->bg_free_blocks_count_lo) | 104 return le16_to_cpu(bg->bg_free_blocks_count_lo) |
100 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 105 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
101 (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0); 106 (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0);
102} 107}
103 108
104__u32 ext4_free_inodes_count(struct super_block *sb, 109__u32 ext4_free_inodes_count(struct super_block *sb,
@@ -106,7 +111,7 @@ __u32 ext4_free_inodes_count(struct super_block *sb,
106{ 111{
107 return le16_to_cpu(bg->bg_free_inodes_count_lo) | 112 return le16_to_cpu(bg->bg_free_inodes_count_lo) |
108 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 113 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
109 (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0); 114 (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0);
110} 115}
111 116
112__u32 ext4_used_dirs_count(struct super_block *sb, 117__u32 ext4_used_dirs_count(struct super_block *sb,
@@ -114,7 +119,7 @@ __u32 ext4_used_dirs_count(struct super_block *sb,
114{ 119{
115 return le16_to_cpu(bg->bg_used_dirs_count_lo) | 120 return le16_to_cpu(bg->bg_used_dirs_count_lo) |
116 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 121 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
117 (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0); 122 (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0);
118} 123}
119 124
120__u32 ext4_itable_unused_count(struct super_block *sb, 125__u32 ext4_itable_unused_count(struct super_block *sb,
@@ -122,7 +127,7 @@ __u32 ext4_itable_unused_count(struct super_block *sb,
122{ 127{
123 return le16_to_cpu(bg->bg_itable_unused_lo) | 128 return le16_to_cpu(bg->bg_itable_unused_lo) |
124 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 129 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
125 (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0); 130 (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0);
126} 131}
127 132
128void ext4_block_bitmap_set(struct super_block *sb, 133void ext4_block_bitmap_set(struct super_block *sb,
@@ -202,8 +207,7 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
202 journal = EXT4_SB(sb)->s_journal; 207 journal = EXT4_SB(sb)->s_journal;
203 if (journal) { 208 if (journal) {
204 if (is_journal_aborted(journal)) { 209 if (is_journal_aborted(journal)) {
205 ext4_abort(sb, __func__, 210 ext4_abort(sb, __func__, "Detected aborted journal");
206 "Detected aborted journal");
207 return ERR_PTR(-EROFS); 211 return ERR_PTR(-EROFS);
208 } 212 }
209 return jbd2_journal_start(journal, nblocks); 213 return jbd2_journal_start(journal, nblocks);
@@ -302,10 +306,10 @@ static void ext4_handle_error(struct super_block *sb)
302 jbd2_journal_abort(journal, -EIO); 306 jbd2_journal_abort(journal, -EIO);
303 } 307 }
304 if (test_opt(sb, ERRORS_RO)) { 308 if (test_opt(sb, ERRORS_RO)) {
305 printk(KERN_CRIT "Remounting filesystem read-only\n"); 309 ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
306 sb->s_flags |= MS_RDONLY; 310 sb->s_flags |= MS_RDONLY;
307 } 311 }
308 ext4_commit_super(sb, es, 1); 312 ext4_commit_super(sb, 1);
309 if (test_opt(sb, ERRORS_PANIC)) 313 if (test_opt(sb, ERRORS_PANIC))
310 panic("EXT4-fs (device %s): panic forced after error\n", 314 panic("EXT4-fs (device %s): panic forced after error\n",
311 sb->s_id); 315 sb->s_id);
@@ -395,8 +399,6 @@ void ext4_abort(struct super_block *sb, const char *function,
395{ 399{
396 va_list args; 400 va_list args;
397 401
398 printk(KERN_CRIT "ext4_abort called.\n");
399
400 va_start(args, fmt); 402 va_start(args, fmt);
401 printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function); 403 printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
402 vprintk(fmt, args); 404 vprintk(fmt, args);
@@ -409,7 +411,7 @@ void ext4_abort(struct super_block *sb, const char *function,
409 if (sb->s_flags & MS_RDONLY) 411 if (sb->s_flags & MS_RDONLY)
410 return; 412 return;
411 413
412 printk(KERN_CRIT "Remounting filesystem read-only\n"); 414 ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
413 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 415 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
414 sb->s_flags |= MS_RDONLY; 416 sb->s_flags |= MS_RDONLY;
415 EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT; 417 EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT;
@@ -417,6 +419,18 @@ void ext4_abort(struct super_block *sb, const char *function,
417 jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); 419 jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
418} 420}
419 421
422void ext4_msg (struct super_block * sb, const char *prefix,
423 const char *fmt, ...)
424{
425 va_list args;
426
427 va_start(args, fmt);
428 printk("%sEXT4-fs (%s): ", prefix, sb->s_id);
429 vprintk(fmt, args);
430 printk("\n");
431 va_end(args);
432}
433
420void ext4_warning(struct super_block *sb, const char *function, 434void ext4_warning(struct super_block *sb, const char *function,
421 const char *fmt, ...) 435 const char *fmt, ...)
422{ 436{
@@ -431,7 +445,7 @@ void ext4_warning(struct super_block *sb, const char *function,
431} 445}
432 446
433void ext4_grp_locked_error(struct super_block *sb, ext4_group_t grp, 447void ext4_grp_locked_error(struct super_block *sb, ext4_group_t grp,
434 const char *function, const char *fmt, ...) 448 const char *function, const char *fmt, ...)
435__releases(bitlock) 449__releases(bitlock)
436__acquires(bitlock) 450__acquires(bitlock)
437{ 451{
@@ -447,7 +461,7 @@ __acquires(bitlock)
447 if (test_opt(sb, ERRORS_CONT)) { 461 if (test_opt(sb, ERRORS_CONT)) {
448 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 462 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
449 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 463 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
450 ext4_commit_super(sb, es, 0); 464 ext4_commit_super(sb, 0);
451 return; 465 return;
452 } 466 }
453 ext4_unlock_group(sb, grp); 467 ext4_unlock_group(sb, grp);
@@ -467,7 +481,6 @@ __acquires(bitlock)
467 return; 481 return;
468} 482}
469 483
470
471void ext4_update_dynamic_rev(struct super_block *sb) 484void ext4_update_dynamic_rev(struct super_block *sb)
472{ 485{
473 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 486 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
@@ -496,7 +509,7 @@ void ext4_update_dynamic_rev(struct super_block *sb)
496/* 509/*
497 * Open the external journal device 510 * Open the external journal device
498 */ 511 */
499static struct block_device *ext4_blkdev_get(dev_t dev) 512static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb)
500{ 513{
501 struct block_device *bdev; 514 struct block_device *bdev;
502 char b[BDEVNAME_SIZE]; 515 char b[BDEVNAME_SIZE];
@@ -507,7 +520,7 @@ static struct block_device *ext4_blkdev_get(dev_t dev)
507 return bdev; 520 return bdev;
508 521
509fail: 522fail:
510 printk(KERN_ERR "EXT4-fs: failed to open journal device %s: %ld\n", 523 ext4_msg(sb, KERN_ERR, "failed to open journal device %s: %ld",
511 __bdevname(dev, b), PTR_ERR(bdev)); 524 __bdevname(dev, b), PTR_ERR(bdev));
512 return NULL; 525 return NULL;
513} 526}
@@ -543,8 +556,8 @@ static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi)
543{ 556{
544 struct list_head *l; 557 struct list_head *l;
545 558
546 printk(KERN_ERR "sb orphan head is %d\n", 559 ext4_msg(sb, KERN_ERR, "sb orphan head is %d",
547 le32_to_cpu(sbi->s_es->s_last_orphan)); 560 le32_to_cpu(sbi->s_es->s_last_orphan));
548 561
549 printk(KERN_ERR "sb_info orphan list:\n"); 562 printk(KERN_ERR "sb_info orphan list:\n");
550 list_for_each(l, &sbi->s_orphan) { 563 list_for_each(l, &sbi->s_orphan) {
@@ -563,6 +576,12 @@ static void ext4_put_super(struct super_block *sb)
563 struct ext4_super_block *es = sbi->s_es; 576 struct ext4_super_block *es = sbi->s_es;
564 int i, err; 577 int i, err;
565 578
579 lock_super(sb);
580 lock_kernel();
581 if (sb->s_dirt)
582 ext4_commit_super(sb, 1);
583
584 ext4_release_system_zone(sb);
566 ext4_mb_release(sb); 585 ext4_mb_release(sb);
567 ext4_ext_release(sb); 586 ext4_ext_release(sb);
568 ext4_xattr_put_super(sb); 587 ext4_xattr_put_super(sb);
@@ -576,7 +595,7 @@ static void ext4_put_super(struct super_block *sb)
576 if (!(sb->s_flags & MS_RDONLY)) { 595 if (!(sb->s_flags & MS_RDONLY)) {
577 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 596 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
578 es->s_state = cpu_to_le16(sbi->s_mount_state); 597 es->s_state = cpu_to_le16(sbi->s_mount_state);
579 ext4_commit_super(sb, es, 1); 598 ext4_commit_super(sb, 1);
580 } 599 }
581 if (sbi->s_proc) { 600 if (sbi->s_proc) {
582 remove_proc_entry(sb->s_id, ext4_proc_root); 601 remove_proc_entry(sb->s_id, ext4_proc_root);
@@ -586,7 +605,10 @@ static void ext4_put_super(struct super_block *sb)
586 for (i = 0; i < sbi->s_gdb_count; i++) 605 for (i = 0; i < sbi->s_gdb_count; i++)
587 brelse(sbi->s_group_desc[i]); 606 brelse(sbi->s_group_desc[i]);
588 kfree(sbi->s_group_desc); 607 kfree(sbi->s_group_desc);
589 kfree(sbi->s_flex_groups); 608 if (is_vmalloc_addr(sbi->s_flex_groups))
609 vfree(sbi->s_flex_groups);
610 else
611 kfree(sbi->s_flex_groups);
590 percpu_counter_destroy(&sbi->s_freeblocks_counter); 612 percpu_counter_destroy(&sbi->s_freeblocks_counter);
591 percpu_counter_destroy(&sbi->s_freeinodes_counter); 613 percpu_counter_destroy(&sbi->s_freeinodes_counter);
592 percpu_counter_destroy(&sbi->s_dirs_counter); 614 percpu_counter_destroy(&sbi->s_dirs_counter);
@@ -625,11 +647,8 @@ static void ext4_put_super(struct super_block *sb)
625 unlock_super(sb); 647 unlock_super(sb);
626 kobject_put(&sbi->s_kobj); 648 kobject_put(&sbi->s_kobj);
627 wait_for_completion(&sbi->s_kobj_unregister); 649 wait_for_completion(&sbi->s_kobj_unregister);
628 lock_super(sb);
629 lock_kernel();
630 kfree(sbi->s_blockgroup_lock); 650 kfree(sbi->s_blockgroup_lock);
631 kfree(sbi); 651 kfree(sbi);
632 return;
633} 652}
634 653
635static struct kmem_cache *ext4_inode_cachep; 654static struct kmem_cache *ext4_inode_cachep;
@@ -644,6 +663,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
644 ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS); 663 ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS);
645 if (!ei) 664 if (!ei)
646 return NULL; 665 return NULL;
666
647#ifdef CONFIG_EXT4_FS_POSIX_ACL 667#ifdef CONFIG_EXT4_FS_POSIX_ACL
648 ei->i_acl = EXT4_ACL_NOT_CACHED; 668 ei->i_acl = EXT4_ACL_NOT_CACHED;
649 ei->i_default_acl = EXT4_ACL_NOT_CACHED; 669 ei->i_default_acl = EXT4_ACL_NOT_CACHED;
@@ -664,14 +684,16 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
664 ei->i_allocated_meta_blocks = 0; 684 ei->i_allocated_meta_blocks = 0;
665 ei->i_delalloc_reserved_flag = 0; 685 ei->i_delalloc_reserved_flag = 0;
666 spin_lock_init(&(ei->i_block_reservation_lock)); 686 spin_lock_init(&(ei->i_block_reservation_lock));
687
667 return &ei->vfs_inode; 688 return &ei->vfs_inode;
668} 689}
669 690
670static void ext4_destroy_inode(struct inode *inode) 691static void ext4_destroy_inode(struct inode *inode)
671{ 692{
672 if (!list_empty(&(EXT4_I(inode)->i_orphan))) { 693 if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
673 printk("EXT4 Inode %p: orphan list check failed!\n", 694 ext4_msg(inode->i_sb, KERN_ERR,
674 EXT4_I(inode)); 695 "Inode %lu (%p): orphan list check failed!",
696 inode->i_ino, EXT4_I(inode));
675 print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4, 697 print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4,
676 EXT4_I(inode), sizeof(struct ext4_inode_info), 698 EXT4_I(inode), sizeof(struct ext4_inode_info),
677 true); 699 true);
@@ -870,12 +892,12 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
870 seq_puts(seq, ",noauto_da_alloc"); 892 seq_puts(seq, ",noauto_da_alloc");
871 893
872 ext4_show_quota_options(seq, sb); 894 ext4_show_quota_options(seq, sb);
895
873 return 0; 896 return 0;
874} 897}
875 898
876
877static struct inode *ext4_nfs_get_inode(struct super_block *sb, 899static struct inode *ext4_nfs_get_inode(struct super_block *sb,
878 u64 ino, u32 generation) 900 u64 ino, u32 generation)
879{ 901{
880 struct inode *inode; 902 struct inode *inode;
881 903
@@ -904,14 +926,14 @@ static struct inode *ext4_nfs_get_inode(struct super_block *sb,
904} 926}
905 927
906static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid, 928static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid,
907 int fh_len, int fh_type) 929 int fh_len, int fh_type)
908{ 930{
909 return generic_fh_to_dentry(sb, fid, fh_len, fh_type, 931 return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
910 ext4_nfs_get_inode); 932 ext4_nfs_get_inode);
911} 933}
912 934
913static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid, 935static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
914 int fh_len, int fh_type) 936 int fh_len, int fh_type)
915{ 937{
916 return generic_fh_to_parent(sb, fid, fh_len, fh_type, 938 return generic_fh_to_parent(sb, fid, fh_len, fh_type,
917 ext4_nfs_get_inode); 939 ext4_nfs_get_inode);
@@ -923,7 +945,8 @@ static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
923 * which would prevent try_to_free_buffers() from freeing them, we must use 945 * which would prevent try_to_free_buffers() from freeing them, we must use
924 * jbd2 layer's try_to_free_buffers() function to release them. 946 * jbd2 layer's try_to_free_buffers() function to release them.
925 */ 947 */
926static int bdev_try_to_free_page(struct super_block *sb, struct page *page, gfp_t wait) 948static int bdev_try_to_free_page(struct super_block *sb, struct page *page,
949 gfp_t wait)
927{ 950{
928 journal_t *journal = EXT4_SB(sb)->s_journal; 951 journal_t *journal = EXT4_SB(sb)->s_journal;
929 952
@@ -992,7 +1015,6 @@ static const struct super_operations ext4_sops = {
992 .dirty_inode = ext4_dirty_inode, 1015 .dirty_inode = ext4_dirty_inode,
993 .delete_inode = ext4_delete_inode, 1016 .delete_inode = ext4_delete_inode,
994 .put_super = ext4_put_super, 1017 .put_super = ext4_put_super,
995 .write_super = ext4_write_super,
996 .sync_fs = ext4_sync_fs, 1018 .sync_fs = ext4_sync_fs,
997 .freeze_fs = ext4_freeze, 1019 .freeze_fs = ext4_freeze,
998 .unfreeze_fs = ext4_unfreeze, 1020 .unfreeze_fs = ext4_unfreeze,
@@ -1007,6 +1029,25 @@ static const struct super_operations ext4_sops = {
1007 .bdev_try_to_free_page = bdev_try_to_free_page, 1029 .bdev_try_to_free_page = bdev_try_to_free_page,
1008}; 1030};
1009 1031
1032static const struct super_operations ext4_nojournal_sops = {
1033 .alloc_inode = ext4_alloc_inode,
1034 .destroy_inode = ext4_destroy_inode,
1035 .write_inode = ext4_write_inode,
1036 .dirty_inode = ext4_dirty_inode,
1037 .delete_inode = ext4_delete_inode,
1038 .write_super = ext4_write_super,
1039 .put_super = ext4_put_super,
1040 .statfs = ext4_statfs,
1041 .remount_fs = ext4_remount,
1042 .clear_inode = ext4_clear_inode,
1043 .show_options = ext4_show_options,
1044#ifdef CONFIG_QUOTA
1045 .quota_read = ext4_quota_read,
1046 .quota_write = ext4_quota_write,
1047#endif
1048 .bdev_try_to_free_page = bdev_try_to_free_page,
1049};
1050
1010static const struct export_operations ext4_export_ops = { 1051static const struct export_operations ext4_export_ops = {
1011 .fh_to_dentry = ext4_fh_to_dentry, 1052 .fh_to_dentry = ext4_fh_to_dentry,
1012 .fh_to_parent = ext4_fh_to_parent, 1053 .fh_to_parent = ext4_fh_to_parent,
@@ -1023,12 +1064,13 @@ enum {
1023 Opt_journal_update, Opt_journal_dev, 1064 Opt_journal_update, Opt_journal_dev,
1024 Opt_journal_checksum, Opt_journal_async_commit, 1065 Opt_journal_checksum, Opt_journal_async_commit,
1025 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, 1066 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
1026 Opt_data_err_abort, Opt_data_err_ignore, 1067 Opt_data_err_abort, Opt_data_err_ignore, Opt_mb_history_length,
1027 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, 1068 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
1028 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, 1069 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
1029 Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, Opt_resize, 1070 Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, Opt_resize,
1030 Opt_usrquota, Opt_grpquota, Opt_i_version, 1071 Opt_usrquota, Opt_grpquota, Opt_i_version,
1031 Opt_stripe, Opt_delalloc, Opt_nodelalloc, 1072 Opt_stripe, Opt_delalloc, Opt_nodelalloc,
1073 Opt_block_validity, Opt_noblock_validity,
1032 Opt_inode_readahead_blks, Opt_journal_ioprio 1074 Opt_inode_readahead_blks, Opt_journal_ioprio
1033}; 1075};
1034 1076
@@ -1069,6 +1111,7 @@ static const match_table_t tokens = {
1069 {Opt_data_writeback, "data=writeback"}, 1111 {Opt_data_writeback, "data=writeback"},
1070 {Opt_data_err_abort, "data_err=abort"}, 1112 {Opt_data_err_abort, "data_err=abort"},
1071 {Opt_data_err_ignore, "data_err=ignore"}, 1113 {Opt_data_err_ignore, "data_err=ignore"},
1114 {Opt_mb_history_length, "mb_history_length=%u"},
1072 {Opt_offusrjquota, "usrjquota="}, 1115 {Opt_offusrjquota, "usrjquota="},
1073 {Opt_usrjquota, "usrjquota=%s"}, 1116 {Opt_usrjquota, "usrjquota=%s"},
1074 {Opt_offgrpjquota, "grpjquota="}, 1117 {Opt_offgrpjquota, "grpjquota="},
@@ -1087,6 +1130,8 @@ static const match_table_t tokens = {
1087 {Opt_resize, "resize"}, 1130 {Opt_resize, "resize"},
1088 {Opt_delalloc, "delalloc"}, 1131 {Opt_delalloc, "delalloc"},
1089 {Opt_nodelalloc, "nodelalloc"}, 1132 {Opt_nodelalloc, "nodelalloc"},
1133 {Opt_block_validity, "block_validity"},
1134 {Opt_noblock_validity, "noblock_validity"},
1090 {Opt_inode_readahead_blks, "inode_readahead_blks=%u"}, 1135 {Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
1091 {Opt_journal_ioprio, "journal_ioprio=%u"}, 1136 {Opt_journal_ioprio, "journal_ioprio=%u"},
1092 {Opt_auto_da_alloc, "auto_da_alloc=%u"}, 1137 {Opt_auto_da_alloc, "auto_da_alloc=%u"},
@@ -1102,8 +1147,9 @@ static ext4_fsblk_t get_sb_block(void **data)
1102 1147
1103 if (!options || strncmp(options, "sb=", 3) != 0) 1148 if (!options || strncmp(options, "sb=", 3) != 0)
1104 return 1; /* Default location */ 1149 return 1; /* Default location */
1150
1105 options += 3; 1151 options += 3;
1106 /*todo: use simple_strtoll with >32bit ext4 */ 1152 /* TODO: use simple_strtoll with >32bit ext4 */
1107 sb_block = simple_strtoul(options, &options, 0); 1153 sb_block = simple_strtoul(options, &options, 0);
1108 if (*options && *options != ',') { 1154 if (*options && *options != ',') {
1109 printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n", 1155 printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n",
@@ -1113,6 +1159,7 @@ static ext4_fsblk_t get_sb_block(void **data)
1113 if (*options == ',') 1159 if (*options == ',')
1114 options++; 1160 options++;
1115 *data = (void *) options; 1161 *data = (void *) options;
1162
1116 return sb_block; 1163 return sb_block;
1117} 1164}
1118 1165
@@ -1206,8 +1253,7 @@ static int parse_options(char *options, struct super_block *sb,
1206#else 1253#else
1207 case Opt_user_xattr: 1254 case Opt_user_xattr:
1208 case Opt_nouser_xattr: 1255 case Opt_nouser_xattr:
1209 printk(KERN_ERR "EXT4 (no)user_xattr options " 1256 ext4_msg(sb, KERN_ERR, "(no)user_xattr options not supported");
1210 "not supported\n");
1211 break; 1257 break;
1212#endif 1258#endif
1213#ifdef CONFIG_EXT4_FS_POSIX_ACL 1259#ifdef CONFIG_EXT4_FS_POSIX_ACL
@@ -1220,8 +1266,7 @@ static int parse_options(char *options, struct super_block *sb,
1220#else 1266#else
1221 case Opt_acl: 1267 case Opt_acl:
1222 case Opt_noacl: 1268 case Opt_noacl:
1223 printk(KERN_ERR "EXT4 (no)acl options " 1269 ext4_msg(sb, KERN_ERR, "(no)acl options not supported");
1224 "not supported\n");
1225 break; 1270 break;
1226#endif 1271#endif
1227 case Opt_journal_update: 1272 case Opt_journal_update:
@@ -1231,16 +1276,16 @@ static int parse_options(char *options, struct super_block *sb,
1231 user to specify an existing inode to be the 1276 user to specify an existing inode to be the
1232 journal file. */ 1277 journal file. */
1233 if (is_remount) { 1278 if (is_remount) {
1234 printk(KERN_ERR "EXT4-fs: cannot specify " 1279 ext4_msg(sb, KERN_ERR,
1235 "journal on remount\n"); 1280 "Cannot specify journal on remount");
1236 return 0; 1281 return 0;
1237 } 1282 }
1238 set_opt(sbi->s_mount_opt, UPDATE_JOURNAL); 1283 set_opt(sbi->s_mount_opt, UPDATE_JOURNAL);
1239 break; 1284 break;
1240 case Opt_journal_dev: 1285 case Opt_journal_dev:
1241 if (is_remount) { 1286 if (is_remount) {
1242 printk(KERN_ERR "EXT4-fs: cannot specify " 1287 ext4_msg(sb, KERN_ERR,
1243 "journal on remount\n"); 1288 "Cannot specify journal on remount");
1244 return 0; 1289 return 0;
1245 } 1290 }
1246 if (match_int(&args[0], &option)) 1291 if (match_int(&args[0], &option))
@@ -1294,9 +1339,8 @@ static int parse_options(char *options, struct super_block *sb,
1294 if (is_remount) { 1339 if (is_remount) {
1295 if ((sbi->s_mount_opt & EXT4_MOUNT_DATA_FLAGS) 1340 if ((sbi->s_mount_opt & EXT4_MOUNT_DATA_FLAGS)
1296 != data_opt) { 1341 != data_opt) {
1297 printk(KERN_ERR 1342 ext4_msg(sb, KERN_ERR,
1298 "EXT4-fs: cannot change data " 1343 "Cannot change data mode on remount");
1299 "mode on remount\n");
1300 return 0; 1344 return 0;
1301 } 1345 }
1302 } else { 1346 } else {
@@ -1310,6 +1354,13 @@ static int parse_options(char *options, struct super_block *sb,
1310 case Opt_data_err_ignore: 1354 case Opt_data_err_ignore:
1311 clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT); 1355 clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
1312 break; 1356 break;
1357 case Opt_mb_history_length:
1358 if (match_int(&args[0], &option))
1359 return 0;
1360 if (option < 0)
1361 return 0;
1362 sbi->s_mb_history_max = option;
1363 break;
1313#ifdef CONFIG_QUOTA 1364#ifdef CONFIG_QUOTA
1314 case Opt_usrjquota: 1365 case Opt_usrjquota:
1315 qtype = USRQUOTA; 1366 qtype = USRQUOTA;
@@ -1319,31 +1370,31 @@ static int parse_options(char *options, struct super_block *sb,
1319set_qf_name: 1370set_qf_name:
1320 if (sb_any_quota_loaded(sb) && 1371 if (sb_any_quota_loaded(sb) &&
1321 !sbi->s_qf_names[qtype]) { 1372 !sbi->s_qf_names[qtype]) {
1322 printk(KERN_ERR 1373 ext4_msg(sb, KERN_ERR,
1323 "EXT4-fs: Cannot change journaled " 1374 "Cannot change journaled "
1324 "quota options when quota turned on.\n"); 1375 "quota options when quota turned on");
1325 return 0; 1376 return 0;
1326 } 1377 }
1327 qname = match_strdup(&args[0]); 1378 qname = match_strdup(&args[0]);
1328 if (!qname) { 1379 if (!qname) {
1329 printk(KERN_ERR 1380 ext4_msg(sb, KERN_ERR,
1330 "EXT4-fs: not enough memory for " 1381 "Not enough memory for "
1331 "storing quotafile name.\n"); 1382 "storing quotafile name");
1332 return 0; 1383 return 0;
1333 } 1384 }
1334 if (sbi->s_qf_names[qtype] && 1385 if (sbi->s_qf_names[qtype] &&
1335 strcmp(sbi->s_qf_names[qtype], qname)) { 1386 strcmp(sbi->s_qf_names[qtype], qname)) {
1336 printk(KERN_ERR 1387 ext4_msg(sb, KERN_ERR,
1337 "EXT4-fs: %s quota file already " 1388 "%s quota file already "
1338 "specified.\n", QTYPE2NAME(qtype)); 1389 "specified", QTYPE2NAME(qtype));
1339 kfree(qname); 1390 kfree(qname);
1340 return 0; 1391 return 0;
1341 } 1392 }
1342 sbi->s_qf_names[qtype] = qname; 1393 sbi->s_qf_names[qtype] = qname;
1343 if (strchr(sbi->s_qf_names[qtype], '/')) { 1394 if (strchr(sbi->s_qf_names[qtype], '/')) {
1344 printk(KERN_ERR 1395 ext4_msg(sb, KERN_ERR,
1345 "EXT4-fs: quotafile must be on " 1396 "quotafile must be on "
1346 "filesystem root.\n"); 1397 "filesystem root");
1347 kfree(sbi->s_qf_names[qtype]); 1398 kfree(sbi->s_qf_names[qtype]);
1348 sbi->s_qf_names[qtype] = NULL; 1399 sbi->s_qf_names[qtype] = NULL;
1349 return 0; 1400 return 0;
@@ -1358,9 +1409,9 @@ set_qf_name:
1358clear_qf_name: 1409clear_qf_name:
1359 if (sb_any_quota_loaded(sb) && 1410 if (sb_any_quota_loaded(sb) &&
1360 sbi->s_qf_names[qtype]) { 1411 sbi->s_qf_names[qtype]) {
1361 printk(KERN_ERR "EXT4-fs: Cannot change " 1412 ext4_msg(sb, KERN_ERR, "Cannot change "
1362 "journaled quota options when " 1413 "journaled quota options when "
1363 "quota turned on.\n"); 1414 "quota turned on");
1364 return 0; 1415 return 0;
1365 } 1416 }
1366 /* 1417 /*
@@ -1377,9 +1428,9 @@ clear_qf_name:
1377set_qf_format: 1428set_qf_format:
1378 if (sb_any_quota_loaded(sb) && 1429 if (sb_any_quota_loaded(sb) &&
1379 sbi->s_jquota_fmt != qfmt) { 1430 sbi->s_jquota_fmt != qfmt) {
1380 printk(KERN_ERR "EXT4-fs: Cannot change " 1431 ext4_msg(sb, KERN_ERR, "Cannot change "
1381 "journaled quota options when " 1432 "journaled quota options when "
1382 "quota turned on.\n"); 1433 "quota turned on");
1383 return 0; 1434 return 0;
1384 } 1435 }
1385 sbi->s_jquota_fmt = qfmt; 1436 sbi->s_jquota_fmt = qfmt;
@@ -1395,8 +1446,8 @@ set_qf_format:
1395 break; 1446 break;
1396 case Opt_noquota: 1447 case Opt_noquota:
1397 if (sb_any_quota_loaded(sb)) { 1448 if (sb_any_quota_loaded(sb)) {
1398 printk(KERN_ERR "EXT4-fs: Cannot change quota " 1449 ext4_msg(sb, KERN_ERR, "Cannot change quota "
1399 "options when quota turned on.\n"); 1450 "options when quota turned on");
1400 return 0; 1451 return 0;
1401 } 1452 }
1402 clear_opt(sbi->s_mount_opt, QUOTA); 1453 clear_opt(sbi->s_mount_opt, QUOTA);
@@ -1407,8 +1458,8 @@ set_qf_format:
1407 case Opt_quota: 1458 case Opt_quota:
1408 case Opt_usrquota: 1459 case Opt_usrquota:
1409 case Opt_grpquota: 1460 case Opt_grpquota:
1410 printk(KERN_ERR 1461 ext4_msg(sb, KERN_ERR,
1411 "EXT4-fs: quota options not supported.\n"); 1462 "quota options not supported");
1412 break; 1463 break;
1413 case Opt_usrjquota: 1464 case Opt_usrjquota:
1414 case Opt_grpjquota: 1465 case Opt_grpjquota:
@@ -1416,9 +1467,8 @@ set_qf_format:
1416 case Opt_offgrpjquota: 1467 case Opt_offgrpjquota:
1417 case Opt_jqfmt_vfsold: 1468 case Opt_jqfmt_vfsold:
1418 case Opt_jqfmt_vfsv0: 1469 case Opt_jqfmt_vfsv0:
1419 printk(KERN_ERR 1470 ext4_msg(sb, KERN_ERR,
1420 "EXT4-fs: journaled quota options not " 1471 "journaled quota options not supported");
1421 "supported.\n");
1422 break; 1472 break;
1423 case Opt_noquota: 1473 case Opt_noquota:
1424 break; 1474 break;
@@ -1443,8 +1493,9 @@ set_qf_format:
1443 break; 1493 break;
1444 case Opt_resize: 1494 case Opt_resize:
1445 if (!is_remount) { 1495 if (!is_remount) {
1446 printk("EXT4-fs: resize option only available " 1496 ext4_msg(sb, KERN_ERR,
1447 "for remount\n"); 1497 "resize option only available "
1498 "for remount");
1448 return 0; 1499 return 0;
1449 } 1500 }
1450 if (match_int(&args[0], &option) != 0) 1501 if (match_int(&args[0], &option) != 0)
@@ -1474,14 +1525,21 @@ set_qf_format:
1474 case Opt_delalloc: 1525 case Opt_delalloc:
1475 set_opt(sbi->s_mount_opt, DELALLOC); 1526 set_opt(sbi->s_mount_opt, DELALLOC);
1476 break; 1527 break;
1528 case Opt_block_validity:
1529 set_opt(sbi->s_mount_opt, BLOCK_VALIDITY);
1530 break;
1531 case Opt_noblock_validity:
1532 clear_opt(sbi->s_mount_opt, BLOCK_VALIDITY);
1533 break;
1477 case Opt_inode_readahead_blks: 1534 case Opt_inode_readahead_blks:
1478 if (match_int(&args[0], &option)) 1535 if (match_int(&args[0], &option))
1479 return 0; 1536 return 0;
1480 if (option < 0 || option > (1 << 30)) 1537 if (option < 0 || option > (1 << 30))
1481 return 0; 1538 return 0;
1482 if (option & (option - 1)) { 1539 if (!is_power_of_2(option)) {
1483 printk(KERN_ERR "EXT4-fs: inode_readahead_blks" 1540 ext4_msg(sb, KERN_ERR,
1484 " must be a power of 2\n"); 1541 "EXT4-fs: inode_readahead_blks"
1542 " must be a power of 2");
1485 return 0; 1543 return 0;
1486 } 1544 }
1487 sbi->s_inode_readahead_blks = option; 1545 sbi->s_inode_readahead_blks = option;
@@ -1508,9 +1566,9 @@ set_qf_format:
1508 set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC); 1566 set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC);
1509 break; 1567 break;
1510 default: 1568 default:
1511 printk(KERN_ERR 1569 ext4_msg(sb, KERN_ERR,
1512 "EXT4-fs: Unrecognized mount option \"%s\" " 1570 "Unrecognized mount option \"%s\" "
1513 "or missing value\n", p); 1571 "or missing value", p);
1514 return 0; 1572 return 0;
1515 } 1573 }
1516 } 1574 }
@@ -1528,21 +1586,21 @@ set_qf_format:
1528 (sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA)) || 1586 (sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA)) ||
1529 (sbi->s_qf_names[GRPQUOTA] && 1587 (sbi->s_qf_names[GRPQUOTA] &&
1530 (sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA))) { 1588 (sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA))) {
1531 printk(KERN_ERR "EXT4-fs: old and new quota " 1589 ext4_msg(sb, KERN_ERR, "old and new quota "
1532 "format mixing.\n"); 1590 "format mixing");
1533 return 0; 1591 return 0;
1534 } 1592 }
1535 1593
1536 if (!sbi->s_jquota_fmt) { 1594 if (!sbi->s_jquota_fmt) {
1537 printk(KERN_ERR "EXT4-fs: journaled quota format " 1595 ext4_msg(sb, KERN_ERR, "journaled quota format "
1538 "not specified.\n"); 1596 "not specified");
1539 return 0; 1597 return 0;
1540 } 1598 }
1541 } else { 1599 } else {
1542 if (sbi->s_jquota_fmt) { 1600 if (sbi->s_jquota_fmt) {
1543 printk(KERN_ERR "EXT4-fs: journaled quota format " 1601 ext4_msg(sb, KERN_ERR, "journaled quota format "
1544 "specified with no journaling " 1602 "specified with no journaling "
1545 "enabled.\n"); 1603 "enabled");
1546 return 0; 1604 return 0;
1547 } 1605 }
1548 } 1606 }
@@ -1557,32 +1615,32 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
1557 int res = 0; 1615 int res = 0;
1558 1616
1559 if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) { 1617 if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) {
1560 printk(KERN_ERR "EXT4-fs warning: revision level too high, " 1618 ext4_msg(sb, KERN_ERR, "revision level too high, "
1561 "forcing read-only mode\n"); 1619 "forcing read-only mode");
1562 res = MS_RDONLY; 1620 res = MS_RDONLY;
1563 } 1621 }
1564 if (read_only) 1622 if (read_only)
1565 return res; 1623 return res;
1566 if (!(sbi->s_mount_state & EXT4_VALID_FS)) 1624 if (!(sbi->s_mount_state & EXT4_VALID_FS))
1567 printk(KERN_WARNING "EXT4-fs warning: mounting unchecked fs, " 1625 ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, "
1568 "running e2fsck is recommended\n"); 1626 "running e2fsck is recommended");
1569 else if ((sbi->s_mount_state & EXT4_ERROR_FS)) 1627 else if ((sbi->s_mount_state & EXT4_ERROR_FS))
1570 printk(KERN_WARNING 1628 ext4_msg(sb, KERN_WARNING,
1571 "EXT4-fs warning: mounting fs with errors, " 1629 "warning: mounting fs with errors, "
1572 "running e2fsck is recommended\n"); 1630 "running e2fsck is recommended");
1573 else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 && 1631 else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 &&
1574 le16_to_cpu(es->s_mnt_count) >= 1632 le16_to_cpu(es->s_mnt_count) >=
1575 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count)) 1633 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
1576 printk(KERN_WARNING 1634 ext4_msg(sb, KERN_WARNING,
1577 "EXT4-fs warning: maximal mount count reached, " 1635 "warning: maximal mount count reached, "
1578 "running e2fsck is recommended\n"); 1636 "running e2fsck is recommended");
1579 else if (le32_to_cpu(es->s_checkinterval) && 1637 else if (le32_to_cpu(es->s_checkinterval) &&
1580 (le32_to_cpu(es->s_lastcheck) + 1638 (le32_to_cpu(es->s_lastcheck) +
1581 le32_to_cpu(es->s_checkinterval) <= get_seconds())) 1639 le32_to_cpu(es->s_checkinterval) <= get_seconds()))
1582 printk(KERN_WARNING 1640 ext4_msg(sb, KERN_WARNING,
1583 "EXT4-fs warning: checktime reached, " 1641 "warning: checktime reached, "
1584 "running e2fsck is recommended\n"); 1642 "running e2fsck is recommended");
1585 if (!sbi->s_journal) 1643 if (!sbi->s_journal)
1586 es->s_state &= cpu_to_le16(~EXT4_VALID_FS); 1644 es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
1587 if (!(__s16) le16_to_cpu(es->s_max_mnt_count)) 1645 if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
1588 es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT); 1646 es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
@@ -1592,7 +1650,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
1592 if (sbi->s_journal) 1650 if (sbi->s_journal)
1593 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 1651 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
1594 1652
1595 ext4_commit_super(sb, es, 1); 1653 ext4_commit_super(sb, 1);
1596 if (test_opt(sb, DEBUG)) 1654 if (test_opt(sb, DEBUG))
1597 printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, " 1655 printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
1598 "bpg=%lu, ipg=%lu, mo=%04lx]\n", 1656 "bpg=%lu, ipg=%lu, mo=%04lx]\n",
@@ -1603,11 +1661,11 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
1603 sbi->s_mount_opt); 1661 sbi->s_mount_opt);
1604 1662
1605 if (EXT4_SB(sb)->s_journal) { 1663 if (EXT4_SB(sb)->s_journal) {
1606 printk(KERN_INFO "EXT4 FS on %s, %s journal on %s\n", 1664 ext4_msg(sb, KERN_INFO, "%s journal on %s",
1607 sb->s_id, EXT4_SB(sb)->s_journal->j_inode ? "internal" : 1665 EXT4_SB(sb)->s_journal->j_inode ? "internal" :
1608 "external", EXT4_SB(sb)->s_journal->j_devname); 1666 "external", EXT4_SB(sb)->s_journal->j_devname);
1609 } else { 1667 } else {
1610 printk(KERN_INFO "EXT4 FS on %s, no journal\n", sb->s_id); 1668 ext4_msg(sb, KERN_INFO, "no journal");
1611 } 1669 }
1612 return res; 1670 return res;
1613} 1671}
@@ -1616,10 +1674,10 @@ static int ext4_fill_flex_info(struct super_block *sb)
1616{ 1674{
1617 struct ext4_sb_info *sbi = EXT4_SB(sb); 1675 struct ext4_sb_info *sbi = EXT4_SB(sb);
1618 struct ext4_group_desc *gdp = NULL; 1676 struct ext4_group_desc *gdp = NULL;
1619 struct buffer_head *bh;
1620 ext4_group_t flex_group_count; 1677 ext4_group_t flex_group_count;
1621 ext4_group_t flex_group; 1678 ext4_group_t flex_group;
1622 int groups_per_flex = 0; 1679 int groups_per_flex = 0;
1680 size_t size;
1623 int i; 1681 int i;
1624 1682
1625 if (!sbi->s_es->s_log_groups_per_flex) { 1683 if (!sbi->s_es->s_log_groups_per_flex) {
@@ -1634,16 +1692,21 @@ static int ext4_fill_flex_info(struct super_block *sb)
1634 flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) + 1692 flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) +
1635 ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) << 1693 ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) <<
1636 EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex; 1694 EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex;
1637 sbi->s_flex_groups = kzalloc(flex_group_count * 1695 size = flex_group_count * sizeof(struct flex_groups);
1638 sizeof(struct flex_groups), GFP_KERNEL); 1696 sbi->s_flex_groups = kzalloc(size, GFP_KERNEL);
1697 if (sbi->s_flex_groups == NULL) {
1698 sbi->s_flex_groups = vmalloc(size);
1699 if (sbi->s_flex_groups)
1700 memset(sbi->s_flex_groups, 0, size);
1701 }
1639 if (sbi->s_flex_groups == NULL) { 1702 if (sbi->s_flex_groups == NULL) {
1640 printk(KERN_ERR "EXT4-fs: not enough memory for " 1703 ext4_msg(sb, KERN_ERR, "not enough memory for "
1641 "%u flex groups\n", flex_group_count); 1704 "%u flex groups", flex_group_count);
1642 goto failed; 1705 goto failed;
1643 } 1706 }
1644 1707
1645 for (i = 0; i < sbi->s_groups_count; i++) { 1708 for (i = 0; i < sbi->s_groups_count; i++) {
1646 gdp = ext4_get_group_desc(sb, i, &bh); 1709 gdp = ext4_get_group_desc(sb, i, NULL);
1647 1710
1648 flex_group = ext4_flex_group(sbi, i); 1711 flex_group = ext4_flex_group(sbi, i);
1649 atomic_set(&sbi->s_flex_groups[flex_group].free_inodes, 1712 atomic_set(&sbi->s_flex_groups[flex_group].free_inodes,
@@ -1724,44 +1787,44 @@ static int ext4_check_descriptors(struct super_block *sb)
1724 1787
1725 block_bitmap = ext4_block_bitmap(sb, gdp); 1788 block_bitmap = ext4_block_bitmap(sb, gdp);
1726 if (block_bitmap < first_block || block_bitmap > last_block) { 1789 if (block_bitmap < first_block || block_bitmap > last_block) {
1727 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " 1790 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
1728 "Block bitmap for group %u not in group " 1791 "Block bitmap for group %u not in group "
1729 "(block %llu)!\n", i, block_bitmap); 1792 "(block %llu)!", i, block_bitmap);
1730 return 0; 1793 return 0;
1731 } 1794 }
1732 inode_bitmap = ext4_inode_bitmap(sb, gdp); 1795 inode_bitmap = ext4_inode_bitmap(sb, gdp);
1733 if (inode_bitmap < first_block || inode_bitmap > last_block) { 1796 if (inode_bitmap < first_block || inode_bitmap > last_block) {
1734 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " 1797 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
1735 "Inode bitmap for group %u not in group " 1798 "Inode bitmap for group %u not in group "
1736 "(block %llu)!\n", i, inode_bitmap); 1799 "(block %llu)!", i, inode_bitmap);
1737 return 0; 1800 return 0;
1738 } 1801 }
1739 inode_table = ext4_inode_table(sb, gdp); 1802 inode_table = ext4_inode_table(sb, gdp);
1740 if (inode_table < first_block || 1803 if (inode_table < first_block ||
1741 inode_table + sbi->s_itb_per_group - 1 > last_block) { 1804 inode_table + sbi->s_itb_per_group - 1 > last_block) {
1742 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " 1805 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
1743 "Inode table for group %u not in group " 1806 "Inode table for group %u not in group "
1744 "(block %llu)!\n", i, inode_table); 1807 "(block %llu)!", i, inode_table);
1745 return 0; 1808 return 0;
1746 } 1809 }
1747 spin_lock(sb_bgl_lock(sbi, i)); 1810 ext4_lock_group(sb, i);
1748 if (!ext4_group_desc_csum_verify(sbi, i, gdp)) { 1811 if (!ext4_group_desc_csum_verify(sbi, i, gdp)) {
1749 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " 1812 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
1750 "Checksum for group %u failed (%u!=%u)\n", 1813 "Checksum for group %u failed (%u!=%u)",
1751 i, le16_to_cpu(ext4_group_desc_csum(sbi, i, 1814 i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
1752 gdp)), le16_to_cpu(gdp->bg_checksum)); 1815 gdp)), le16_to_cpu(gdp->bg_checksum));
1753 if (!(sb->s_flags & MS_RDONLY)) { 1816 if (!(sb->s_flags & MS_RDONLY)) {
1754 spin_unlock(sb_bgl_lock(sbi, i)); 1817 ext4_unlock_group(sb, i);
1755 return 0; 1818 return 0;
1756 } 1819 }
1757 } 1820 }
1758 spin_unlock(sb_bgl_lock(sbi, i)); 1821 ext4_unlock_group(sb, i);
1759 if (!flexbg_flag) 1822 if (!flexbg_flag)
1760 first_block += EXT4_BLOCKS_PER_GROUP(sb); 1823 first_block += EXT4_BLOCKS_PER_GROUP(sb);
1761 } 1824 }
1762 1825
1763 ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb)); 1826 ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb));
1764 sbi->s_es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb)); 1827 sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb));
1765 return 1; 1828 return 1;
1766} 1829}
1767 1830
@@ -1796,8 +1859,8 @@ static void ext4_orphan_cleanup(struct super_block *sb,
1796 } 1859 }
1797 1860
1798 if (bdev_read_only(sb->s_bdev)) { 1861 if (bdev_read_only(sb->s_bdev)) {
1799 printk(KERN_ERR "EXT4-fs: write access " 1862 ext4_msg(sb, KERN_ERR, "write access "
1800 "unavailable, skipping orphan cleanup.\n"); 1863 "unavailable, skipping orphan cleanup");
1801 return; 1864 return;
1802 } 1865 }
1803 1866
@@ -1811,8 +1874,7 @@ static void ext4_orphan_cleanup(struct super_block *sb,
1811 } 1874 }
1812 1875
1813 if (s_flags & MS_RDONLY) { 1876 if (s_flags & MS_RDONLY) {
1814 printk(KERN_INFO "EXT4-fs: %s: orphan cleanup on readonly fs\n", 1877 ext4_msg(sb, KERN_INFO, "orphan cleanup on readonly fs");
1815 sb->s_id);
1816 sb->s_flags &= ~MS_RDONLY; 1878 sb->s_flags &= ~MS_RDONLY;
1817 } 1879 }
1818#ifdef CONFIG_QUOTA 1880#ifdef CONFIG_QUOTA
@@ -1823,9 +1885,9 @@ static void ext4_orphan_cleanup(struct super_block *sb,
1823 if (EXT4_SB(sb)->s_qf_names[i]) { 1885 if (EXT4_SB(sb)->s_qf_names[i]) {
1824 int ret = ext4_quota_on_mount(sb, i); 1886 int ret = ext4_quota_on_mount(sb, i);
1825 if (ret < 0) 1887 if (ret < 0)
1826 printk(KERN_ERR 1888 ext4_msg(sb, KERN_ERR,
1827 "EXT4-fs: Cannot turn on journaled " 1889 "Cannot turn on journaled "
1828 "quota: error %d\n", ret); 1890 "quota: error %d", ret);
1829 } 1891 }
1830 } 1892 }
1831#endif 1893#endif
@@ -1842,16 +1904,16 @@ static void ext4_orphan_cleanup(struct super_block *sb,
1842 list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan); 1904 list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);
1843 vfs_dq_init(inode); 1905 vfs_dq_init(inode);
1844 if (inode->i_nlink) { 1906 if (inode->i_nlink) {
1845 printk(KERN_DEBUG 1907 ext4_msg(sb, KERN_DEBUG,
1846 "%s: truncating inode %lu to %lld bytes\n", 1908 "%s: truncating inode %lu to %lld bytes",
1847 __func__, inode->i_ino, inode->i_size); 1909 __func__, inode->i_ino, inode->i_size);
1848 jbd_debug(2, "truncating inode %lu to %lld bytes\n", 1910 jbd_debug(2, "truncating inode %lu to %lld bytes\n",
1849 inode->i_ino, inode->i_size); 1911 inode->i_ino, inode->i_size);
1850 ext4_truncate(inode); 1912 ext4_truncate(inode);
1851 nr_truncates++; 1913 nr_truncates++;
1852 } else { 1914 } else {
1853 printk(KERN_DEBUG 1915 ext4_msg(sb, KERN_DEBUG,
1854 "%s: deleting unreferenced inode %lu\n", 1916 "%s: deleting unreferenced inode %lu",
1855 __func__, inode->i_ino); 1917 __func__, inode->i_ino);
1856 jbd_debug(2, "deleting unreferenced inode %lu\n", 1918 jbd_debug(2, "deleting unreferenced inode %lu\n",
1857 inode->i_ino); 1919 inode->i_ino);
@@ -1863,11 +1925,11 @@ static void ext4_orphan_cleanup(struct super_block *sb,
1863#define PLURAL(x) (x), ((x) == 1) ? "" : "s" 1925#define PLURAL(x) (x), ((x) == 1) ? "" : "s"
1864 1926
1865 if (nr_orphans) 1927 if (nr_orphans)
1866 printk(KERN_INFO "EXT4-fs: %s: %d orphan inode%s deleted\n", 1928 ext4_msg(sb, KERN_INFO, "%d orphan inode%s deleted",
1867 sb->s_id, PLURAL(nr_orphans)); 1929 PLURAL(nr_orphans));
1868 if (nr_truncates) 1930 if (nr_truncates)
1869 printk(KERN_INFO "EXT4-fs: %s: %d truncate%s cleaned up\n", 1931 ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up",
1870 sb->s_id, PLURAL(nr_truncates)); 1932 PLURAL(nr_truncates));
1871#ifdef CONFIG_QUOTA 1933#ifdef CONFIG_QUOTA
1872 /* Turn quotas off */ 1934 /* Turn quotas off */
1873 for (i = 0; i < MAXQUOTAS; i++) { 1935 for (i = 0; i < MAXQUOTAS; i++) {
@@ -1877,6 +1939,7 @@ static void ext4_orphan_cleanup(struct super_block *sb,
1877#endif 1939#endif
1878 sb->s_flags = s_flags; /* Restore MS_RDONLY status */ 1940 sb->s_flags = s_flags; /* Restore MS_RDONLY status */
1879} 1941}
1942
1880/* 1943/*
1881 * Maximal extent format file size. 1944 * Maximal extent format file size.
1882 * Resulting logical blkno at s_maxbytes must fit in our on-disk 1945 * Resulting logical blkno at s_maxbytes must fit in our on-disk
@@ -1927,19 +1990,19 @@ static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
1927 loff_t res = EXT4_NDIR_BLOCKS; 1990 loff_t res = EXT4_NDIR_BLOCKS;
1928 int meta_blocks; 1991 int meta_blocks;
1929 loff_t upper_limit; 1992 loff_t upper_limit;
1930 /* This is calculated to be the largest file size for a 1993 /* This is calculated to be the largest file size for a dense, block
1931 * dense, bitmapped file such that the total number of 1994 * mapped file such that the file's total number of 512-byte sectors,
1932 * sectors in the file, including data and all indirect blocks, 1995 * including data and all indirect blocks, does not exceed (2^48 - 1).
1933 * does not exceed 2^48 -1 1996 *
1934 * __u32 i_blocks_lo and _u16 i_blocks_high representing the 1997 * __u32 i_blocks_lo and _u16 i_blocks_high represent the total
1935 * total number of 512 bytes blocks of the file 1998 * number of 512-byte sectors of the file.
1936 */ 1999 */
1937 2000
1938 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { 2001 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
1939 /* 2002 /*
1940 * !has_huge_files or CONFIG_LBD is not enabled 2003 * !has_huge_files or CONFIG_LBD not enabled implies that
1941 * implies the inode i_block represent total blocks in 2004 * the inode i_block field represents total file blocks in
1942 * 512 bytes 32 == size of vfs inode i_blocks * 8 2005 * 2^32 512-byte sectors == size of vfs inode i_blocks * 8
1943 */ 2006 */
1944 upper_limit = (1LL << 32) - 1; 2007 upper_limit = (1LL << 32) - 1;
1945 2008
@@ -1981,7 +2044,7 @@ static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
1981} 2044}
1982 2045
1983static ext4_fsblk_t descriptor_loc(struct super_block *sb, 2046static ext4_fsblk_t descriptor_loc(struct super_block *sb,
1984 ext4_fsblk_t logical_sb_block, int nr) 2047 ext4_fsblk_t logical_sb_block, int nr)
1985{ 2048{
1986 struct ext4_sb_info *sbi = EXT4_SB(sb); 2049 struct ext4_sb_info *sbi = EXT4_SB(sb);
1987 ext4_group_t bg, first_meta_bg; 2050 ext4_group_t bg, first_meta_bg;
@@ -1995,6 +2058,7 @@ static ext4_fsblk_t descriptor_loc(struct super_block *sb,
1995 bg = sbi->s_desc_per_block * nr; 2058 bg = sbi->s_desc_per_block * nr;
1996 if (ext4_bg_has_super(sb, bg)) 2059 if (ext4_bg_has_super(sb, bg))
1997 has_super = 1; 2060 has_super = 1;
2061
1998 return (has_super + ext4_group_first_block_no(sb, bg)); 2062 return (has_super + ext4_group_first_block_no(sb, bg));
1999} 2063}
2000 2064
@@ -2091,8 +2155,7 @@ static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
2091 if (parse_strtoul(buf, 0x40000000, &t)) 2155 if (parse_strtoul(buf, 0x40000000, &t))
2092 return -EINVAL; 2156 return -EINVAL;
2093 2157
2094 /* inode_readahead_blks must be a power of 2 */ 2158 if (!is_power_of_2(t))
2095 if (t & (t-1))
2096 return -EINVAL; 2159 return -EINVAL;
2097 2160
2098 sbi->s_inode_readahead_blks = t; 2161 sbi->s_inode_readahead_blks = t;
@@ -2100,7 +2163,7 @@ static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
2100} 2163}
2101 2164
2102static ssize_t sbi_ui_show(struct ext4_attr *a, 2165static ssize_t sbi_ui_show(struct ext4_attr *a,
2103 struct ext4_sb_info *sbi, char *buf) 2166 struct ext4_sb_info *sbi, char *buf)
2104{ 2167{
2105 unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset); 2168 unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset);
2106 2169
@@ -2205,7 +2268,6 @@ static struct kobj_type ext4_ktype = {
2205static int ext4_fill_super(struct super_block *sb, void *data, int silent) 2268static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2206 __releases(kernel_lock) 2269 __releases(kernel_lock)
2207 __acquires(kernel_lock) 2270 __acquires(kernel_lock)
2208
2209{ 2271{
2210 struct buffer_head *bh; 2272 struct buffer_head *bh;
2211 struct ext4_super_block *es = NULL; 2273 struct ext4_super_block *es = NULL;
@@ -2256,7 +2318,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2256 2318
2257 blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE); 2319 blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
2258 if (!blocksize) { 2320 if (!blocksize) {
2259 printk(KERN_ERR "EXT4-fs: unable to set blocksize\n"); 2321 ext4_msg(sb, KERN_ERR, "unable to set blocksize");
2260 goto out_fail; 2322 goto out_fail;
2261 } 2323 }
2262 2324
@@ -2272,7 +2334,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2272 } 2334 }
2273 2335
2274 if (!(bh = sb_bread(sb, logical_sb_block))) { 2336 if (!(bh = sb_bread(sb, logical_sb_block))) {
2275 printk(KERN_ERR "EXT4-fs: unable to read superblock\n"); 2337 ext4_msg(sb, KERN_ERR, "unable to read superblock");
2276 goto out_fail; 2338 goto out_fail;
2277 } 2339 }
2278 /* 2340 /*
@@ -2321,6 +2383,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2321 sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ; 2383 sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ;
2322 sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME; 2384 sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
2323 sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME; 2385 sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
2386 sbi->s_mb_history_max = default_mb_history_length;
2324 2387
2325 set_opt(sbi->s_mount_opt, BARRIER); 2388 set_opt(sbi->s_mount_opt, BARRIER);
2326 2389
@@ -2330,7 +2393,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2330 */ 2393 */
2331 set_opt(sbi->s_mount_opt, DELALLOC); 2394 set_opt(sbi->s_mount_opt, DELALLOC);
2332 2395
2333
2334 if (!parse_options((char *) data, sb, &journal_devnum, 2396 if (!parse_options((char *) data, sb, &journal_devnum,
2335 &journal_ioprio, NULL, 0)) 2397 &journal_ioprio, NULL, 0))
2336 goto failed_mount; 2398 goto failed_mount;
@@ -2342,9 +2404,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2342 (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) || 2404 (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) ||
2343 EXT4_HAS_RO_COMPAT_FEATURE(sb, ~0U) || 2405 EXT4_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
2344 EXT4_HAS_INCOMPAT_FEATURE(sb, ~0U))) 2406 EXT4_HAS_INCOMPAT_FEATURE(sb, ~0U)))
2345 printk(KERN_WARNING 2407 ext4_msg(sb, KERN_WARNING,
2346 "EXT4-fs warning: feature flags set on rev 0 fs, " 2408 "feature flags set on rev 0 fs, "
2347 "running e2fsck is recommended\n"); 2409 "running e2fsck is recommended");
2348 2410
2349 /* 2411 /*
2350 * Check feature flags regardless of the revision level, since we 2412 * Check feature flags regardless of the revision level, since we
@@ -2353,16 +2415,18 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2353 */ 2415 */
2354 features = EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP); 2416 features = EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP);
2355 if (features) { 2417 if (features) {
2356 printk(KERN_ERR "EXT4-fs: %s: couldn't mount because of " 2418 ext4_msg(sb, KERN_ERR,
2357 "unsupported optional features (%x).\n", sb->s_id, 2419 "Couldn't mount because of "
2420 "unsupported optional features (%x)",
2358 (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) & 2421 (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) &
2359 ~EXT4_FEATURE_INCOMPAT_SUPP)); 2422 ~EXT4_FEATURE_INCOMPAT_SUPP));
2360 goto failed_mount; 2423 goto failed_mount;
2361 } 2424 }
2362 features = EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP); 2425 features = EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP);
2363 if (!(sb->s_flags & MS_RDONLY) && features) { 2426 if (!(sb->s_flags & MS_RDONLY) && features) {
2364 printk(KERN_ERR "EXT4-fs: %s: couldn't mount RDWR because of " 2427 ext4_msg(sb, KERN_ERR,
2365 "unsupported optional features (%x).\n", sb->s_id, 2428 "Couldn't mount RDWR because of "
2429 "unsupported optional features (%x)",
2366 (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) & 2430 (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) &
2367 ~EXT4_FEATURE_RO_COMPAT_SUPP)); 2431 ~EXT4_FEATURE_RO_COMPAT_SUPP));
2368 goto failed_mount; 2432 goto failed_mount;
@@ -2376,9 +2440,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2376 */ 2440 */
2377 if (sizeof(root->i_blocks) < sizeof(u64) && 2441 if (sizeof(root->i_blocks) < sizeof(u64) &&
2378 !(sb->s_flags & MS_RDONLY)) { 2442 !(sb->s_flags & MS_RDONLY)) {
2379 printk(KERN_ERR "EXT4-fs: %s: Filesystem with huge " 2443 ext4_msg(sb, KERN_ERR, "Filesystem with huge "
2380 "files cannot be mounted read-write " 2444 "files cannot be mounted read-write "
2381 "without CONFIG_LBD.\n", sb->s_id); 2445 "without CONFIG_LBD");
2382 goto failed_mount; 2446 goto failed_mount;
2383 } 2447 }
2384 } 2448 }
@@ -2386,17 +2450,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2386 2450
2387 if (blocksize < EXT4_MIN_BLOCK_SIZE || 2451 if (blocksize < EXT4_MIN_BLOCK_SIZE ||
2388 blocksize > EXT4_MAX_BLOCK_SIZE) { 2452 blocksize > EXT4_MAX_BLOCK_SIZE) {
2389 printk(KERN_ERR 2453 ext4_msg(sb, KERN_ERR,
2390 "EXT4-fs: Unsupported filesystem blocksize %d on %s.\n", 2454 "Unsupported filesystem blocksize %d", blocksize);
2391 blocksize, sb->s_id);
2392 goto failed_mount; 2455 goto failed_mount;
2393 } 2456 }
2394 2457
2395 if (sb->s_blocksize != blocksize) { 2458 if (sb->s_blocksize != blocksize) {
2396
2397 /* Validate the filesystem blocksize */ 2459 /* Validate the filesystem blocksize */
2398 if (!sb_set_blocksize(sb, blocksize)) { 2460 if (!sb_set_blocksize(sb, blocksize)) {
2399 printk(KERN_ERR "EXT4-fs: bad block size %d.\n", 2461 ext4_msg(sb, KERN_ERR, "bad block size %d",
2400 blocksize); 2462 blocksize);
2401 goto failed_mount; 2463 goto failed_mount;
2402 } 2464 }
@@ -2406,15 +2468,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2406 offset = do_div(logical_sb_block, blocksize); 2468 offset = do_div(logical_sb_block, blocksize);
2407 bh = sb_bread(sb, logical_sb_block); 2469 bh = sb_bread(sb, logical_sb_block);
2408 if (!bh) { 2470 if (!bh) {
2409 printk(KERN_ERR 2471 ext4_msg(sb, KERN_ERR,
2410 "EXT4-fs: Can't read superblock on 2nd try.\n"); 2472 "Can't read superblock on 2nd try");
2411 goto failed_mount; 2473 goto failed_mount;
2412 } 2474 }
2413 es = (struct ext4_super_block *)(((char *)bh->b_data) + offset); 2475 es = (struct ext4_super_block *)(((char *)bh->b_data) + offset);
2414 sbi->s_es = es; 2476 sbi->s_es = es;
2415 if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) { 2477 if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) {
2416 printk(KERN_ERR 2478 ext4_msg(sb, KERN_ERR,
2417 "EXT4-fs: Magic mismatch, very weird !\n"); 2479 "Magic mismatch, very weird!");
2418 goto failed_mount; 2480 goto failed_mount;
2419 } 2481 }
2420 } 2482 }
@@ -2432,30 +2494,33 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2432 if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) || 2494 if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
2433 (!is_power_of_2(sbi->s_inode_size)) || 2495 (!is_power_of_2(sbi->s_inode_size)) ||
2434 (sbi->s_inode_size > blocksize)) { 2496 (sbi->s_inode_size > blocksize)) {
2435 printk(KERN_ERR 2497 ext4_msg(sb, KERN_ERR,
2436 "EXT4-fs: unsupported inode size: %d\n", 2498 "unsupported inode size: %d",
2437 sbi->s_inode_size); 2499 sbi->s_inode_size);
2438 goto failed_mount; 2500 goto failed_mount;
2439 } 2501 }
2440 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) 2502 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE)
2441 sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2); 2503 sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2);
2442 } 2504 }
2505
2443 sbi->s_desc_size = le16_to_cpu(es->s_desc_size); 2506 sbi->s_desc_size = le16_to_cpu(es->s_desc_size);
2444 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) { 2507 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) {
2445 if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT || 2508 if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT ||
2446 sbi->s_desc_size > EXT4_MAX_DESC_SIZE || 2509 sbi->s_desc_size > EXT4_MAX_DESC_SIZE ||
2447 !is_power_of_2(sbi->s_desc_size)) { 2510 !is_power_of_2(sbi->s_desc_size)) {
2448 printk(KERN_ERR 2511 ext4_msg(sb, KERN_ERR,
2449 "EXT4-fs: unsupported descriptor size %lu\n", 2512 "unsupported descriptor size %lu",
2450 sbi->s_desc_size); 2513 sbi->s_desc_size);
2451 goto failed_mount; 2514 goto failed_mount;
2452 } 2515 }
2453 } else 2516 } else
2454 sbi->s_desc_size = EXT4_MIN_DESC_SIZE; 2517 sbi->s_desc_size = EXT4_MIN_DESC_SIZE;
2518
2455 sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group); 2519 sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
2456 sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group); 2520 sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
2457 if (EXT4_INODE_SIZE(sb) == 0 || EXT4_INODES_PER_GROUP(sb) == 0) 2521 if (EXT4_INODE_SIZE(sb) == 0 || EXT4_INODES_PER_GROUP(sb) == 0)
2458 goto cantfind_ext4; 2522 goto cantfind_ext4;
2523
2459 sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb); 2524 sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb);
2460 if (sbi->s_inodes_per_block == 0) 2525 if (sbi->s_inodes_per_block == 0)
2461 goto cantfind_ext4; 2526 goto cantfind_ext4;
@@ -2466,6 +2531,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2466 sbi->s_mount_state = le16_to_cpu(es->s_state); 2531 sbi->s_mount_state = le16_to_cpu(es->s_state);
2467 sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb)); 2532 sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
2468 sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb)); 2533 sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
2534
2469 for (i = 0; i < 4; i++) 2535 for (i = 0; i < 4; i++)
2470 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); 2536 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
2471 sbi->s_def_hash_version = es->s_def_hash_version; 2537 sbi->s_def_hash_version = es->s_def_hash_version;
@@ -2483,25 +2549,24 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2483 } 2549 }
2484 2550
2485 if (sbi->s_blocks_per_group > blocksize * 8) { 2551 if (sbi->s_blocks_per_group > blocksize * 8) {
2486 printk(KERN_ERR 2552 ext4_msg(sb, KERN_ERR,
2487 "EXT4-fs: #blocks per group too big: %lu\n", 2553 "#blocks per group too big: %lu",
2488 sbi->s_blocks_per_group); 2554 sbi->s_blocks_per_group);
2489 goto failed_mount; 2555 goto failed_mount;
2490 } 2556 }
2491 if (sbi->s_inodes_per_group > blocksize * 8) { 2557 if (sbi->s_inodes_per_group > blocksize * 8) {
2492 printk(KERN_ERR 2558 ext4_msg(sb, KERN_ERR,
2493 "EXT4-fs: #inodes per group too big: %lu\n", 2559 "#inodes per group too big: %lu",
2494 sbi->s_inodes_per_group); 2560 sbi->s_inodes_per_group);
2495 goto failed_mount; 2561 goto failed_mount;
2496 } 2562 }
2497 2563
2498 if (ext4_blocks_count(es) > 2564 if (ext4_blocks_count(es) >
2499 (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { 2565 (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) {
2500 printk(KERN_ERR "EXT4-fs: filesystem on %s:" 2566 ext4_msg(sb, KERN_ERR, "filesystem"
2501 " too large to mount safely\n", sb->s_id); 2567 " too large to mount safely");
2502 if (sizeof(sector_t) < 8) 2568 if (sizeof(sector_t) < 8)
2503 printk(KERN_WARNING "EXT4-fs: CONFIG_LBD not " 2569 ext4_msg(sb, KERN_WARNING, "CONFIG_LBD not enabled");
2504 "enabled\n");
2505 goto failed_mount; 2570 goto failed_mount;
2506 } 2571 }
2507 2572
@@ -2511,21 +2576,21 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2511 /* check blocks count against device size */ 2576 /* check blocks count against device size */
2512 blocks_count = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits; 2577 blocks_count = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits;
2513 if (blocks_count && ext4_blocks_count(es) > blocks_count) { 2578 if (blocks_count && ext4_blocks_count(es) > blocks_count) {
2514 printk(KERN_WARNING "EXT4-fs: bad geometry: block count %llu " 2579 ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu "
2515 "exceeds size of device (%llu blocks)\n", 2580 "exceeds size of device (%llu blocks)",
2516 ext4_blocks_count(es), blocks_count); 2581 ext4_blocks_count(es), blocks_count);
2517 goto failed_mount; 2582 goto failed_mount;
2518 } 2583 }
2519 2584
2520 /* 2585 /*
2521 * It makes no sense for the first data block to be beyond the end 2586 * It makes no sense for the first data block to be beyond the end
2522 * of the filesystem. 2587 * of the filesystem.
2523 */ 2588 */
2524 if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) { 2589 if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) {
2525 printk(KERN_WARNING "EXT4-fs: bad geometry: first data" 2590 ext4_msg(sb, KERN_WARNING, "bad geometry: first data"
2526 "block %u is beyond end of filesystem (%llu)\n", 2591 "block %u is beyond end of filesystem (%llu)",
2527 le32_to_cpu(es->s_first_data_block), 2592 le32_to_cpu(es->s_first_data_block),
2528 ext4_blocks_count(es)); 2593 ext4_blocks_count(es));
2529 goto failed_mount; 2594 goto failed_mount;
2530 } 2595 }
2531 blocks_count = (ext4_blocks_count(es) - 2596 blocks_count = (ext4_blocks_count(es) -
@@ -2533,9 +2598,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2533 EXT4_BLOCKS_PER_GROUP(sb) - 1); 2598 EXT4_BLOCKS_PER_GROUP(sb) - 1);
2534 do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb)); 2599 do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb));
2535 if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) { 2600 if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) {
2536 printk(KERN_WARNING "EXT4-fs: groups count too large: %u " 2601 ext4_msg(sb, KERN_WARNING, "groups count too large: %u "
2537 "(block count %llu, first data block %u, " 2602 "(block count %llu, first data block %u, "
2538 "blocks per group %lu)\n", sbi->s_groups_count, 2603 "blocks per group %lu)", sbi->s_groups_count,
2539 ext4_blocks_count(es), 2604 ext4_blocks_count(es),
2540 le32_to_cpu(es->s_first_data_block), 2605 le32_to_cpu(es->s_first_data_block),
2541 EXT4_BLOCKS_PER_GROUP(sb)); 2606 EXT4_BLOCKS_PER_GROUP(sb));
@@ -2547,7 +2612,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2547 sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *), 2612 sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *),
2548 GFP_KERNEL); 2613 GFP_KERNEL);
2549 if (sbi->s_group_desc == NULL) { 2614 if (sbi->s_group_desc == NULL) {
2550 printk(KERN_ERR "EXT4-fs: not enough memory\n"); 2615 ext4_msg(sb, KERN_ERR, "not enough memory");
2551 goto failed_mount; 2616 goto failed_mount;
2552 } 2617 }
2553 2618
@@ -2562,21 +2627,21 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2562 block = descriptor_loc(sb, logical_sb_block, i); 2627 block = descriptor_loc(sb, logical_sb_block, i);
2563 sbi->s_group_desc[i] = sb_bread(sb, block); 2628 sbi->s_group_desc[i] = sb_bread(sb, block);
2564 if (!sbi->s_group_desc[i]) { 2629 if (!sbi->s_group_desc[i]) {
2565 printk(KERN_ERR "EXT4-fs: " 2630 ext4_msg(sb, KERN_ERR,
2566 "can't read group descriptor %d\n", i); 2631 "can't read group descriptor %d", i);
2567 db_count = i; 2632 db_count = i;
2568 goto failed_mount2; 2633 goto failed_mount2;
2569 } 2634 }
2570 } 2635 }
2571 if (!ext4_check_descriptors(sb)) { 2636 if (!ext4_check_descriptors(sb)) {
2572 printk(KERN_ERR "EXT4-fs: group descriptors corrupted!\n"); 2637 ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
2573 goto failed_mount2; 2638 goto failed_mount2;
2574 } 2639 }
2575 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) 2640 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
2576 if (!ext4_fill_flex_info(sb)) { 2641 if (!ext4_fill_flex_info(sb)) {
2577 printk(KERN_ERR 2642 ext4_msg(sb, KERN_ERR,
2578 "EXT4-fs: unable to initialize " 2643 "unable to initialize "
2579 "flex_bg meta info!\n"); 2644 "flex_bg meta info!");
2580 goto failed_mount2; 2645 goto failed_mount2;
2581 } 2646 }
2582 2647
@@ -2598,7 +2663,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2598 err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0); 2663 err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0);
2599 } 2664 }
2600 if (err) { 2665 if (err) {
2601 printk(KERN_ERR "EXT4-fs: insufficient memory\n"); 2666 ext4_msg(sb, KERN_ERR, "insufficient memory");
2602 goto failed_mount3; 2667 goto failed_mount3;
2603 } 2668 }
2604 2669
@@ -2607,7 +2672,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2607 /* 2672 /*
2608 * set up enough so that it can read an inode 2673 * set up enough so that it can read an inode
2609 */ 2674 */
2610 sb->s_op = &ext4_sops; 2675 if (!test_opt(sb, NOLOAD) &&
2676 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL))
2677 sb->s_op = &ext4_sops;
2678 else
2679 sb->s_op = &ext4_nojournal_sops;
2611 sb->s_export_op = &ext4_export_ops; 2680 sb->s_export_op = &ext4_export_ops;
2612 sb->s_xattr = ext4_xattr_handlers; 2681 sb->s_xattr = ext4_xattr_handlers;
2613#ifdef CONFIG_QUOTA 2682#ifdef CONFIG_QUOTA
@@ -2615,6 +2684,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2615 sb->dq_op = &ext4_quota_operations; 2684 sb->dq_op = &ext4_quota_operations;
2616#endif 2685#endif
2617 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ 2686 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
2687 mutex_init(&sbi->s_orphan_lock);
2688 mutex_init(&sbi->s_resize_lock);
2618 2689
2619 sb->s_root = NULL; 2690 sb->s_root = NULL;
2620 2691
@@ -2632,13 +2703,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2632 goto failed_mount3; 2703 goto failed_mount3;
2633 if (!(sb->s_flags & MS_RDONLY) && 2704 if (!(sb->s_flags & MS_RDONLY) &&
2634 EXT4_SB(sb)->s_journal->j_failed_commit) { 2705 EXT4_SB(sb)->s_journal->j_failed_commit) {
2635 printk(KERN_CRIT "EXT4-fs error (device %s): " 2706 ext4_msg(sb, KERN_CRIT, "error: "
2636 "ext4_fill_super: Journal transaction " 2707 "ext4_fill_super: Journal transaction "
2637 "%u is corrupt\n", sb->s_id, 2708 "%u is corrupt",
2638 EXT4_SB(sb)->s_journal->j_failed_commit); 2709 EXT4_SB(sb)->s_journal->j_failed_commit);
2639 if (test_opt(sb, ERRORS_RO)) { 2710 if (test_opt(sb, ERRORS_RO)) {
2640 printk(KERN_CRIT 2711 ext4_msg(sb, KERN_CRIT,
2641 "Mounting filesystem read-only\n"); 2712 "Mounting filesystem read-only");
2642 sb->s_flags |= MS_RDONLY; 2713 sb->s_flags |= MS_RDONLY;
2643 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 2714 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
2644 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 2715 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
@@ -2646,14 +2717,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2646 if (test_opt(sb, ERRORS_PANIC)) { 2717 if (test_opt(sb, ERRORS_PANIC)) {
2647 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 2718 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
2648 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 2719 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
2649 ext4_commit_super(sb, es, 1); 2720 ext4_commit_super(sb, 1);
2650 goto failed_mount4; 2721 goto failed_mount4;
2651 } 2722 }
2652 } 2723 }
2653 } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) && 2724 } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) &&
2654 EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { 2725 EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
2655 printk(KERN_ERR "EXT4-fs: required journal recovery " 2726 ext4_msg(sb, KERN_ERR, "required journal recovery "
2656 "suppressed and not mounted read-only\n"); 2727 "suppressed and not mounted read-only");
2657 goto failed_mount4; 2728 goto failed_mount4;
2658 } else { 2729 } else {
2659 clear_opt(sbi->s_mount_opt, DATA_FLAGS); 2730 clear_opt(sbi->s_mount_opt, DATA_FLAGS);
@@ -2666,7 +2737,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2666 if (ext4_blocks_count(es) > 0xffffffffULL && 2737 if (ext4_blocks_count(es) > 0xffffffffULL &&
2667 !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0, 2738 !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
2668 JBD2_FEATURE_INCOMPAT_64BIT)) { 2739 JBD2_FEATURE_INCOMPAT_64BIT)) {
2669 printk(KERN_ERR "EXT4-fs: Failed to set 64-bit journal feature\n"); 2740 ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature");
2670 goto failed_mount4; 2741 goto failed_mount4;
2671 } 2742 }
2672 2743
@@ -2704,8 +2775,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2704 case EXT4_MOUNT_WRITEBACK_DATA: 2775 case EXT4_MOUNT_WRITEBACK_DATA:
2705 if (!jbd2_journal_check_available_features 2776 if (!jbd2_journal_check_available_features
2706 (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) { 2777 (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
2707 printk(KERN_ERR "EXT4-fs: Journal does not support " 2778 ext4_msg(sb, KERN_ERR, "Journal does not support "
2708 "requested data journaling mode\n"); 2779 "requested data journaling mode");
2709 goto failed_mount4; 2780 goto failed_mount4;
2710 } 2781 }
2711 default: 2782 default:
@@ -2717,8 +2788,8 @@ no_journal:
2717 2788
2718 if (test_opt(sb, NOBH)) { 2789 if (test_opt(sb, NOBH)) {
2719 if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) { 2790 if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) {
2720 printk(KERN_WARNING "EXT4-fs: Ignoring nobh option - " 2791 ext4_msg(sb, KERN_WARNING, "Ignoring nobh option - "
2721 "its supported only with writeback mode\n"); 2792 "its supported only with writeback mode");
2722 clear_opt(sbi->s_mount_opt, NOBH); 2793 clear_opt(sbi->s_mount_opt, NOBH);
2723 } 2794 }
2724 } 2795 }
@@ -2729,18 +2800,18 @@ no_journal:
2729 2800
2730 root = ext4_iget(sb, EXT4_ROOT_INO); 2801 root = ext4_iget(sb, EXT4_ROOT_INO);
2731 if (IS_ERR(root)) { 2802 if (IS_ERR(root)) {
2732 printk(KERN_ERR "EXT4-fs: get root inode failed\n"); 2803 ext4_msg(sb, KERN_ERR, "get root inode failed");
2733 ret = PTR_ERR(root); 2804 ret = PTR_ERR(root);
2734 goto failed_mount4; 2805 goto failed_mount4;
2735 } 2806 }
2736 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { 2807 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
2737 iput(root); 2808 iput(root);
2738 printk(KERN_ERR "EXT4-fs: corrupt root inode, run e2fsck\n"); 2809 ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck");
2739 goto failed_mount4; 2810 goto failed_mount4;
2740 } 2811 }
2741 sb->s_root = d_alloc_root(root); 2812 sb->s_root = d_alloc_root(root);
2742 if (!sb->s_root) { 2813 if (!sb->s_root) {
2743 printk(KERN_ERR "EXT4-fs: get root dentry failed\n"); 2814 ext4_msg(sb, KERN_ERR, "get root dentry failed");
2744 iput(root); 2815 iput(root);
2745 ret = -ENOMEM; 2816 ret = -ENOMEM;
2746 goto failed_mount4; 2817 goto failed_mount4;
@@ -2769,22 +2840,29 @@ no_journal:
2769 sbi->s_inode_size) { 2840 sbi->s_inode_size) {
2770 sbi->s_want_extra_isize = sizeof(struct ext4_inode) - 2841 sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
2771 EXT4_GOOD_OLD_INODE_SIZE; 2842 EXT4_GOOD_OLD_INODE_SIZE;
2772 printk(KERN_INFO "EXT4-fs: required extra inode space not" 2843 ext4_msg(sb, KERN_INFO, "required extra inode space not"
2773 "available.\n"); 2844 "available");
2774 } 2845 }
2775 2846
2776 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { 2847 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
2777 printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - " 2848 ext4_msg(sb, KERN_WARNING, "Ignoring delalloc option - "
2778 "requested data journaling mode\n"); 2849 "requested data journaling mode");
2779 clear_opt(sbi->s_mount_opt, DELALLOC); 2850 clear_opt(sbi->s_mount_opt, DELALLOC);
2780 } else if (test_opt(sb, DELALLOC)) 2851 } else if (test_opt(sb, DELALLOC))
2781 printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n"); 2852 ext4_msg(sb, KERN_INFO, "delayed allocation enabled");
2853
2854 err = ext4_setup_system_zone(sb);
2855 if (err) {
2856 ext4_msg(sb, KERN_ERR, "failed to initialize system "
2857 "zone (%d)\n", err);
2858 goto failed_mount4;
2859 }
2782 2860
2783 ext4_ext_init(sb); 2861 ext4_ext_init(sb);
2784 err = ext4_mb_init(sb, needs_recovery); 2862 err = ext4_mb_init(sb, needs_recovery);
2785 if (err) { 2863 if (err) {
2786 printk(KERN_ERR "EXT4-fs: failed to initalize mballoc (%d)\n", 2864 ext4_msg(sb, KERN_ERR, "failed to initalize mballoc (%d)",
2787 err); 2865 err);
2788 goto failed_mount4; 2866 goto failed_mount4;
2789 } 2867 }
2790 2868
@@ -2798,19 +2876,11 @@ no_journal:
2798 goto failed_mount4; 2876 goto failed_mount4;
2799 }; 2877 };
2800 2878
2801 /*
2802 * akpm: core read_super() calls in here with the superblock locked.
2803 * That deadlocks, because orphan cleanup needs to lock the superblock
2804 * in numerous places. Here we just pop the lock - it's relatively
2805 * harmless, because we are now ready to accept write_super() requests,
2806 * and aviro says that's the only reason for hanging onto the
2807 * superblock lock.
2808 */
2809 EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS; 2879 EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
2810 ext4_orphan_cleanup(sb, es); 2880 ext4_orphan_cleanup(sb, es);
2811 EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS; 2881 EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
2812 if (needs_recovery) { 2882 if (needs_recovery) {
2813 printk(KERN_INFO "EXT4-fs: recovery complete.\n"); 2883 ext4_msg(sb, KERN_INFO, "recovery complete");
2814 ext4_mark_recovery_complete(sb, es); 2884 ext4_mark_recovery_complete(sb, es);
2815 } 2885 }
2816 if (EXT4_SB(sb)->s_journal) { 2886 if (EXT4_SB(sb)->s_journal) {
@@ -2823,25 +2893,30 @@ no_journal:
2823 } else 2893 } else
2824 descr = "out journal"; 2894 descr = "out journal";
2825 2895
2826 printk(KERN_INFO "EXT4-fs: mounted filesystem %s with%s\n", 2896 ext4_msg(sb, KERN_INFO, "mounted filesystem with%s", descr);
2827 sb->s_id, descr);
2828 2897
2829 lock_kernel(); 2898 lock_kernel();
2830 return 0; 2899 return 0;
2831 2900
2832cantfind_ext4: 2901cantfind_ext4:
2833 if (!silent) 2902 if (!silent)
2834 printk(KERN_ERR "VFS: Can't find ext4 filesystem on dev %s.\n", 2903 ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
2835 sb->s_id);
2836 goto failed_mount; 2904 goto failed_mount;
2837 2905
2838failed_mount4: 2906failed_mount4:
2839 printk(KERN_ERR "EXT4-fs (device %s): mount failed\n", sb->s_id); 2907 ext4_msg(sb, KERN_ERR, "mount failed");
2908 ext4_release_system_zone(sb);
2840 if (sbi->s_journal) { 2909 if (sbi->s_journal) {
2841 jbd2_journal_destroy(sbi->s_journal); 2910 jbd2_journal_destroy(sbi->s_journal);
2842 sbi->s_journal = NULL; 2911 sbi->s_journal = NULL;
2843 } 2912 }
2844failed_mount3: 2913failed_mount3:
2914 if (sbi->s_flex_groups) {
2915 if (is_vmalloc_addr(sbi->s_flex_groups))
2916 vfree(sbi->s_flex_groups);
2917 else
2918 kfree(sbi->s_flex_groups);
2919 }
2845 percpu_counter_destroy(&sbi->s_freeblocks_counter); 2920 percpu_counter_destroy(&sbi->s_freeblocks_counter);
2846 percpu_counter_destroy(&sbi->s_freeinodes_counter); 2921 percpu_counter_destroy(&sbi->s_freeinodes_counter);
2847 percpu_counter_destroy(&sbi->s_dirs_counter); 2922 percpu_counter_destroy(&sbi->s_dirs_counter);
@@ -2862,6 +2937,7 @@ failed_mount:
2862 brelse(bh); 2937 brelse(bh);
2863out_fail: 2938out_fail:
2864 sb->s_fs_info = NULL; 2939 sb->s_fs_info = NULL;
2940 kfree(sbi->s_blockgroup_lock);
2865 kfree(sbi); 2941 kfree(sbi);
2866 lock_kernel(); 2942 lock_kernel();
2867 return ret; 2943 return ret;
@@ -2906,27 +2982,27 @@ static journal_t *ext4_get_journal(struct super_block *sb,
2906 2982
2907 journal_inode = ext4_iget(sb, journal_inum); 2983 journal_inode = ext4_iget(sb, journal_inum);
2908 if (IS_ERR(journal_inode)) { 2984 if (IS_ERR(journal_inode)) {
2909 printk(KERN_ERR "EXT4-fs: no journal found.\n"); 2985 ext4_msg(sb, KERN_ERR, "no journal found");
2910 return NULL; 2986 return NULL;
2911 } 2987 }
2912 if (!journal_inode->i_nlink) { 2988 if (!journal_inode->i_nlink) {
2913 make_bad_inode(journal_inode); 2989 make_bad_inode(journal_inode);
2914 iput(journal_inode); 2990 iput(journal_inode);
2915 printk(KERN_ERR "EXT4-fs: journal inode is deleted.\n"); 2991 ext4_msg(sb, KERN_ERR, "journal inode is deleted");
2916 return NULL; 2992 return NULL;
2917 } 2993 }
2918 2994
2919 jbd_debug(2, "Journal inode found at %p: %lld bytes\n", 2995 jbd_debug(2, "Journal inode found at %p: %lld bytes\n",
2920 journal_inode, journal_inode->i_size); 2996 journal_inode, journal_inode->i_size);
2921 if (!S_ISREG(journal_inode->i_mode)) { 2997 if (!S_ISREG(journal_inode->i_mode)) {
2922 printk(KERN_ERR "EXT4-fs: invalid journal inode.\n"); 2998 ext4_msg(sb, KERN_ERR, "invalid journal inode");
2923 iput(journal_inode); 2999 iput(journal_inode);
2924 return NULL; 3000 return NULL;
2925 } 3001 }
2926 3002
2927 journal = jbd2_journal_init_inode(journal_inode); 3003 journal = jbd2_journal_init_inode(journal_inode);
2928 if (!journal) { 3004 if (!journal) {
2929 printk(KERN_ERR "EXT4-fs: Could not load journal inode\n"); 3005 ext4_msg(sb, KERN_ERR, "Could not load journal inode");
2930 iput(journal_inode); 3006 iput(journal_inode);
2931 return NULL; 3007 return NULL;
2932 } 3008 }
@@ -2950,22 +3026,22 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
2950 3026
2951 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); 3027 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
2952 3028
2953 bdev = ext4_blkdev_get(j_dev); 3029 bdev = ext4_blkdev_get(j_dev, sb);
2954 if (bdev == NULL) 3030 if (bdev == NULL)
2955 return NULL; 3031 return NULL;
2956 3032
2957 if (bd_claim(bdev, sb)) { 3033 if (bd_claim(bdev, sb)) {
2958 printk(KERN_ERR 3034 ext4_msg(sb, KERN_ERR,
2959 "EXT4-fs: failed to claim external journal device.\n"); 3035 "failed to claim external journal device");
2960 blkdev_put(bdev, FMODE_READ|FMODE_WRITE); 3036 blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
2961 return NULL; 3037 return NULL;
2962 } 3038 }
2963 3039
2964 blocksize = sb->s_blocksize; 3040 blocksize = sb->s_blocksize;
2965 hblock = bdev_hardsect_size(bdev); 3041 hblock = bdev_logical_block_size(bdev);
2966 if (blocksize < hblock) { 3042 if (blocksize < hblock) {
2967 printk(KERN_ERR 3043 ext4_msg(sb, KERN_ERR,
2968 "EXT4-fs: blocksize too small for journal device.\n"); 3044 "blocksize too small for journal device");
2969 goto out_bdev; 3045 goto out_bdev;
2970 } 3046 }
2971 3047
@@ -2973,8 +3049,8 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
2973 offset = EXT4_MIN_BLOCK_SIZE % blocksize; 3049 offset = EXT4_MIN_BLOCK_SIZE % blocksize;
2974 set_blocksize(bdev, blocksize); 3050 set_blocksize(bdev, blocksize);
2975 if (!(bh = __bread(bdev, sb_block, blocksize))) { 3051 if (!(bh = __bread(bdev, sb_block, blocksize))) {
2976 printk(KERN_ERR "EXT4-fs: couldn't read superblock of " 3052 ext4_msg(sb, KERN_ERR, "couldn't read superblock of "
2977 "external journal\n"); 3053 "external journal");
2978 goto out_bdev; 3054 goto out_bdev;
2979 } 3055 }
2980 3056
@@ -2982,14 +3058,14 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
2982 if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) || 3058 if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) ||
2983 !(le32_to_cpu(es->s_feature_incompat) & 3059 !(le32_to_cpu(es->s_feature_incompat) &
2984 EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) { 3060 EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) {
2985 printk(KERN_ERR "EXT4-fs: external journal has " 3061 ext4_msg(sb, KERN_ERR, "external journal has "
2986 "bad superblock\n"); 3062 "bad superblock");
2987 brelse(bh); 3063 brelse(bh);
2988 goto out_bdev; 3064 goto out_bdev;
2989 } 3065 }
2990 3066
2991 if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) { 3067 if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
2992 printk(KERN_ERR "EXT4-fs: journal UUID does not match\n"); 3068 ext4_msg(sb, KERN_ERR, "journal UUID does not match");
2993 brelse(bh); 3069 brelse(bh);
2994 goto out_bdev; 3070 goto out_bdev;
2995 } 3071 }
@@ -3001,25 +3077,26 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
3001 journal = jbd2_journal_init_dev(bdev, sb->s_bdev, 3077 journal = jbd2_journal_init_dev(bdev, sb->s_bdev,
3002 start, len, blocksize); 3078 start, len, blocksize);
3003 if (!journal) { 3079 if (!journal) {
3004 printk(KERN_ERR "EXT4-fs: failed to create device journal\n"); 3080 ext4_msg(sb, KERN_ERR, "failed to create device journal");
3005 goto out_bdev; 3081 goto out_bdev;
3006 } 3082 }
3007 journal->j_private = sb; 3083 journal->j_private = sb;
3008 ll_rw_block(READ, 1, &journal->j_sb_buffer); 3084 ll_rw_block(READ, 1, &journal->j_sb_buffer);
3009 wait_on_buffer(journal->j_sb_buffer); 3085 wait_on_buffer(journal->j_sb_buffer);
3010 if (!buffer_uptodate(journal->j_sb_buffer)) { 3086 if (!buffer_uptodate(journal->j_sb_buffer)) {
3011 printk(KERN_ERR "EXT4-fs: I/O error on journal device\n"); 3087 ext4_msg(sb, KERN_ERR, "I/O error on journal device");
3012 goto out_journal; 3088 goto out_journal;
3013 } 3089 }
3014 if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) { 3090 if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
3015 printk(KERN_ERR "EXT4-fs: External journal has more than one " 3091 ext4_msg(sb, KERN_ERR, "External journal has more than one "
3016 "user (unsupported) - %d\n", 3092 "user (unsupported) - %d",
3017 be32_to_cpu(journal->j_superblock->s_nr_users)); 3093 be32_to_cpu(journal->j_superblock->s_nr_users));
3018 goto out_journal; 3094 goto out_journal;
3019 } 3095 }
3020 EXT4_SB(sb)->journal_bdev = bdev; 3096 EXT4_SB(sb)->journal_bdev = bdev;
3021 ext4_init_journal_params(sb, journal); 3097 ext4_init_journal_params(sb, journal);
3022 return journal; 3098 return journal;
3099
3023out_journal: 3100out_journal:
3024 jbd2_journal_destroy(journal); 3101 jbd2_journal_destroy(journal);
3025out_bdev: 3102out_bdev:
@@ -3041,8 +3118,8 @@ static int ext4_load_journal(struct super_block *sb,
3041 3118
3042 if (journal_devnum && 3119 if (journal_devnum &&
3043 journal_devnum != le32_to_cpu(es->s_journal_dev)) { 3120 journal_devnum != le32_to_cpu(es->s_journal_dev)) {
3044 printk(KERN_INFO "EXT4-fs: external journal device major/minor " 3121 ext4_msg(sb, KERN_INFO, "external journal device major/minor "
3045 "numbers have changed\n"); 3122 "numbers have changed");
3046 journal_dev = new_decode_dev(journal_devnum); 3123 journal_dev = new_decode_dev(journal_devnum);
3047 } else 3124 } else
3048 journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev)); 3125 journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
@@ -3054,24 +3131,23 @@ static int ext4_load_journal(struct super_block *sb,
3054 * crash? For recovery, we need to check in advance whether we 3131 * crash? For recovery, we need to check in advance whether we
3055 * can get read-write access to the device. 3132 * can get read-write access to the device.
3056 */ 3133 */
3057
3058 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { 3134 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
3059 if (sb->s_flags & MS_RDONLY) { 3135 if (sb->s_flags & MS_RDONLY) {
3060 printk(KERN_INFO "EXT4-fs: INFO: recovery " 3136 ext4_msg(sb, KERN_INFO, "INFO: recovery "
3061 "required on readonly filesystem.\n"); 3137 "required on readonly filesystem");
3062 if (really_read_only) { 3138 if (really_read_only) {
3063 printk(KERN_ERR "EXT4-fs: write access " 3139 ext4_msg(sb, KERN_ERR, "write access "
3064 "unavailable, cannot proceed.\n"); 3140 "unavailable, cannot proceed");
3065 return -EROFS; 3141 return -EROFS;
3066 } 3142 }
3067 printk(KERN_INFO "EXT4-fs: write access will " 3143 ext4_msg(sb, KERN_INFO, "write access will "
3068 "be enabled during recovery.\n"); 3144 "be enabled during recovery");
3069 } 3145 }
3070 } 3146 }
3071 3147
3072 if (journal_inum && journal_dev) { 3148 if (journal_inum && journal_dev) {
3073 printk(KERN_ERR "EXT4-fs: filesystem has both journal " 3149 ext4_msg(sb, KERN_ERR, "filesystem has both journal "
3074 "and inode journals!\n"); 3150 "and inode journals!");
3075 return -EINVAL; 3151 return -EINVAL;
3076 } 3152 }
3077 3153
@@ -3084,14 +3160,14 @@ static int ext4_load_journal(struct super_block *sb,
3084 } 3160 }
3085 3161
3086 if (journal->j_flags & JBD2_BARRIER) 3162 if (journal->j_flags & JBD2_BARRIER)
3087 printk(KERN_INFO "EXT4-fs: barriers enabled\n"); 3163 ext4_msg(sb, KERN_INFO, "barriers enabled");
3088 else 3164 else
3089 printk(KERN_INFO "EXT4-fs: barriers disabled\n"); 3165 ext4_msg(sb, KERN_INFO, "barriers disabled");
3090 3166
3091 if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) { 3167 if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) {
3092 err = jbd2_journal_update_format(journal); 3168 err = jbd2_journal_update_format(journal);
3093 if (err) { 3169 if (err) {
3094 printk(KERN_ERR "EXT4-fs: error updating journal.\n"); 3170 ext4_msg(sb, KERN_ERR, "error updating journal");
3095 jbd2_journal_destroy(journal); 3171 jbd2_journal_destroy(journal);
3096 return err; 3172 return err;
3097 } 3173 }
@@ -3103,7 +3179,7 @@ static int ext4_load_journal(struct super_block *sb,
3103 err = jbd2_journal_load(journal); 3179 err = jbd2_journal_load(journal);
3104 3180
3105 if (err) { 3181 if (err) {
3106 printk(KERN_ERR "EXT4-fs: error loading journal.\n"); 3182 ext4_msg(sb, KERN_ERR, "error loading journal");
3107 jbd2_journal_destroy(journal); 3183 jbd2_journal_destroy(journal);
3108 return err; 3184 return err;
3109 } 3185 }
@@ -3114,18 +3190,17 @@ static int ext4_load_journal(struct super_block *sb,
3114 if (journal_devnum && 3190 if (journal_devnum &&
3115 journal_devnum != le32_to_cpu(es->s_journal_dev)) { 3191 journal_devnum != le32_to_cpu(es->s_journal_dev)) {
3116 es->s_journal_dev = cpu_to_le32(journal_devnum); 3192 es->s_journal_dev = cpu_to_le32(journal_devnum);
3117 sb->s_dirt = 1;
3118 3193
3119 /* Make sure we flush the recovery flag to disk. */ 3194 /* Make sure we flush the recovery flag to disk. */
3120 ext4_commit_super(sb, es, 1); 3195 ext4_commit_super(sb, 1);
3121 } 3196 }
3122 3197
3123 return 0; 3198 return 0;
3124} 3199}
3125 3200
3126static int ext4_commit_super(struct super_block *sb, 3201static int ext4_commit_super(struct super_block *sb, int sync)
3127 struct ext4_super_block *es, int sync)
3128{ 3202{
3203 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
3129 struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; 3204 struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
3130 int error = 0; 3205 int error = 0;
3131 3206
@@ -3140,8 +3215,8 @@ static int ext4_commit_super(struct super_block *sb,
3140 * be remapped. Nothing we can do but to retry the 3215 * be remapped. Nothing we can do but to retry the
3141 * write and hope for the best. 3216 * write and hope for the best.
3142 */ 3217 */
3143 printk(KERN_ERR "EXT4-fs: previous I/O error to " 3218 ext4_msg(sb, KERN_ERR, "previous I/O error to "
3144 "superblock detected for %s.\n", sb->s_id); 3219 "superblock detected");
3145 clear_buffer_write_io_error(sbh); 3220 clear_buffer_write_io_error(sbh);
3146 set_buffer_uptodate(sbh); 3221 set_buffer_uptodate(sbh);
3147 } 3222 }
@@ -3154,7 +3229,7 @@ static int ext4_commit_super(struct super_block *sb,
3154 &EXT4_SB(sb)->s_freeblocks_counter)); 3229 &EXT4_SB(sb)->s_freeblocks_counter));
3155 es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive( 3230 es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive(
3156 &EXT4_SB(sb)->s_freeinodes_counter)); 3231 &EXT4_SB(sb)->s_freeinodes_counter));
3157 3232 sb->s_dirt = 0;
3158 BUFFER_TRACE(sbh, "marking dirty"); 3233 BUFFER_TRACE(sbh, "marking dirty");
3159 mark_buffer_dirty(sbh); 3234 mark_buffer_dirty(sbh);
3160 if (sync) { 3235 if (sync) {
@@ -3164,8 +3239,8 @@ static int ext4_commit_super(struct super_block *sb,
3164 3239
3165 error = buffer_write_io_error(sbh); 3240 error = buffer_write_io_error(sbh);
3166 if (error) { 3241 if (error) {
3167 printk(KERN_ERR "EXT4-fs: I/O error while writing " 3242 ext4_msg(sb, KERN_ERR, "I/O error while writing "
3168 "superblock for %s.\n", sb->s_id); 3243 "superblock");
3169 clear_buffer_write_io_error(sbh); 3244 clear_buffer_write_io_error(sbh);
3170 set_buffer_uptodate(sbh); 3245 set_buffer_uptodate(sbh);
3171 } 3246 }
@@ -3173,7 +3248,6 @@ static int ext4_commit_super(struct super_block *sb,
3173 return error; 3248 return error;
3174} 3249}
3175 3250
3176
3177/* 3251/*
3178 * Have we just finished recovery? If so, and if we are mounting (or 3252 * Have we just finished recovery? If so, and if we are mounting (or
3179 * remounting) the filesystem readonly, then we will end up with a 3253 * remounting) the filesystem readonly, then we will end up with a
@@ -3192,14 +3266,11 @@ static void ext4_mark_recovery_complete(struct super_block *sb,
3192 if (jbd2_journal_flush(journal) < 0) 3266 if (jbd2_journal_flush(journal) < 0)
3193 goto out; 3267 goto out;
3194 3268
3195 lock_super(sb);
3196 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) && 3269 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) &&
3197 sb->s_flags & MS_RDONLY) { 3270 sb->s_flags & MS_RDONLY) {
3198 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 3271 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
3199 sb->s_dirt = 0; 3272 ext4_commit_super(sb, 1);
3200 ext4_commit_super(sb, es, 1);
3201 } 3273 }
3202 unlock_super(sb);
3203 3274
3204out: 3275out:
3205 jbd2_journal_unlock_updates(journal); 3276 jbd2_journal_unlock_updates(journal);
@@ -3238,7 +3309,7 @@ static void ext4_clear_journal_err(struct super_block *sb,
3238 3309
3239 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 3310 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
3240 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 3311 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
3241 ext4_commit_super(sb, es, 1); 3312 ext4_commit_super(sb, 1);
3242 3313
3243 jbd2_journal_clear_err(journal); 3314 jbd2_journal_clear_err(journal);
3244 } 3315 }
@@ -3257,29 +3328,17 @@ int ext4_force_commit(struct super_block *sb)
3257 return 0; 3328 return 0;
3258 3329
3259 journal = EXT4_SB(sb)->s_journal; 3330 journal = EXT4_SB(sb)->s_journal;
3260 if (journal) { 3331 if (journal)
3261 sb->s_dirt = 0;
3262 ret = ext4_journal_force_commit(journal); 3332 ret = ext4_journal_force_commit(journal);
3263 }
3264 3333
3265 return ret; 3334 return ret;
3266} 3335}
3267 3336
3268/*
3269 * Ext4 always journals updates to the superblock itself, so we don't
3270 * have to propagate any other updates to the superblock on disk at this
3271 * point. (We can probably nuke this function altogether, and remove
3272 * any mention to sb->s_dirt in all of fs/ext4; eventual cleanup...)
3273 */
3274static void ext4_write_super(struct super_block *sb) 3337static void ext4_write_super(struct super_block *sb)
3275{ 3338{
3276 if (EXT4_SB(sb)->s_journal) { 3339 lock_super(sb);
3277 if (mutex_trylock(&sb->s_lock) != 0) 3340 ext4_commit_super(sb, 1);
3278 BUG(); 3341 unlock_super(sb);
3279 sb->s_dirt = 0;
3280 } else {
3281 ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1);
3282 }
3283} 3342}
3284 3343
3285static int ext4_sync_fs(struct super_block *sb, int wait) 3344static int ext4_sync_fs(struct super_block *sb, int wait)
@@ -3288,16 +3347,9 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
3288 tid_t target; 3347 tid_t target;
3289 3348
3290 trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait); 3349 trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait);
3291 sb->s_dirt = 0; 3350 if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) {
3292 if (EXT4_SB(sb)->s_journal) { 3351 if (wait)
3293 if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, 3352 jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, target);
3294 &target)) {
3295 if (wait)
3296 jbd2_log_wait_commit(EXT4_SB(sb)->s_journal,
3297 target);
3298 }
3299 } else {
3300 ext4_commit_super(sb, EXT4_SB(sb)->s_es, wait);
3301 } 3353 }
3302 return ret; 3354 return ret;
3303} 3355}
@@ -3310,34 +3362,32 @@ static int ext4_freeze(struct super_block *sb)
3310{ 3362{
3311 int error = 0; 3363 int error = 0;
3312 journal_t *journal; 3364 journal_t *journal;
3313 sb->s_dirt = 0;
3314 3365
3315 if (!(sb->s_flags & MS_RDONLY)) { 3366 if (sb->s_flags & MS_RDONLY)
3316 journal = EXT4_SB(sb)->s_journal; 3367 return 0;
3317 3368
3318 if (journal) { 3369 journal = EXT4_SB(sb)->s_journal;
3319 /* Now we set up the journal barrier. */
3320 jbd2_journal_lock_updates(journal);
3321 3370
3322 /* 3371 /* Now we set up the journal barrier. */
3323 * We don't want to clear needs_recovery flag when we 3372 jbd2_journal_lock_updates(journal);
3324 * failed to flush the journal.
3325 */
3326 error = jbd2_journal_flush(journal);
3327 if (error < 0)
3328 goto out;
3329 }
3330 3373
3331 /* Journal blocked and flushed, clear needs_recovery flag. */ 3374 /*
3332 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 3375 * Don't clear the needs_recovery flag if we failed to flush
3333 error = ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1); 3376 * the journal.
3334 if (error) 3377 */
3335 goto out; 3378 error = jbd2_journal_flush(journal);
3379 if (error < 0) {
3380 out:
3381 jbd2_journal_unlock_updates(journal);
3382 return error;
3336 } 3383 }
3384
3385 /* Journal blocked and flushed, clear needs_recovery flag. */
3386 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
3387 error = ext4_commit_super(sb, 1);
3388 if (error)
3389 goto out;
3337 return 0; 3390 return 0;
3338out:
3339 jbd2_journal_unlock_updates(journal);
3340 return error;
3341} 3391}
3342 3392
3343/* 3393/*
@@ -3346,14 +3396,15 @@ out:
3346 */ 3396 */
3347static int ext4_unfreeze(struct super_block *sb) 3397static int ext4_unfreeze(struct super_block *sb)
3348{ 3398{
3349 if (EXT4_SB(sb)->s_journal && !(sb->s_flags & MS_RDONLY)) { 3399 if (sb->s_flags & MS_RDONLY)
3350 lock_super(sb); 3400 return 0;
3351 /* Reser the needs_recovery flag before the fs is unlocked. */ 3401
3352 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 3402 lock_super(sb);
3353 ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1); 3403 /* Reset the needs_recovery flag before the fs is unlocked. */
3354 unlock_super(sb); 3404 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
3355 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); 3405 ext4_commit_super(sb, 1);
3356 } 3406 unlock_super(sb);
3407 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
3357 return 0; 3408 return 0;
3358} 3409}
3359 3410
@@ -3371,7 +3422,10 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
3371 int i; 3422 int i;
3372#endif 3423#endif
3373 3424
3425 lock_kernel();
3426
3374 /* Store the original options */ 3427 /* Store the original options */
3428 lock_super(sb);
3375 old_sb_flags = sb->s_flags; 3429 old_sb_flags = sb->s_flags;
3376 old_opts.s_mount_opt = sbi->s_mount_opt; 3430 old_opts.s_mount_opt = sbi->s_mount_opt;
3377 old_opts.s_resuid = sbi->s_resuid; 3431 old_opts.s_resuid = sbi->s_resuid;
@@ -3432,22 +3486,15 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
3432 (sbi->s_mount_state & EXT4_VALID_FS)) 3486 (sbi->s_mount_state & EXT4_VALID_FS))
3433 es->s_state = cpu_to_le16(sbi->s_mount_state); 3487 es->s_state = cpu_to_le16(sbi->s_mount_state);
3434 3488
3435 /* 3489 if (sbi->s_journal)
3436 * We have to unlock super so that we can wait for
3437 * transactions.
3438 */
3439 if (sbi->s_journal) {
3440 unlock_super(sb);
3441 ext4_mark_recovery_complete(sb, es); 3490 ext4_mark_recovery_complete(sb, es);
3442 lock_super(sb);
3443 }
3444 } else { 3491 } else {
3445 int ret; 3492 int ret;
3446 if ((ret = EXT4_HAS_RO_COMPAT_FEATURE(sb, 3493 if ((ret = EXT4_HAS_RO_COMPAT_FEATURE(sb,
3447 ~EXT4_FEATURE_RO_COMPAT_SUPP))) { 3494 ~EXT4_FEATURE_RO_COMPAT_SUPP))) {
3448 printk(KERN_WARNING "EXT4-fs: %s: couldn't " 3495 ext4_msg(sb, KERN_WARNING, "couldn't "
3449 "remount RDWR because of unsupported " 3496 "remount RDWR because of unsupported "
3450 "optional features (%x).\n", sb->s_id, 3497 "optional features (%x)",
3451 (le32_to_cpu(sbi->s_es->s_feature_ro_compat) & 3498 (le32_to_cpu(sbi->s_es->s_feature_ro_compat) &
3452 ~EXT4_FEATURE_RO_COMPAT_SUPP)); 3499 ~EXT4_FEATURE_RO_COMPAT_SUPP));
3453 err = -EROFS; 3500 err = -EROFS;
@@ -3456,17 +3503,15 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
3456 3503
3457 /* 3504 /*
3458 * Make sure the group descriptor checksums 3505 * Make sure the group descriptor checksums
3459 * are sane. If they aren't, refuse to 3506 * are sane. If they aren't, refuse to remount r/w.
3460 * remount r/w.
3461 */ 3507 */
3462 for (g = 0; g < sbi->s_groups_count; g++) { 3508 for (g = 0; g < sbi->s_groups_count; g++) {
3463 struct ext4_group_desc *gdp = 3509 struct ext4_group_desc *gdp =
3464 ext4_get_group_desc(sb, g, NULL); 3510 ext4_get_group_desc(sb, g, NULL);
3465 3511
3466 if (!ext4_group_desc_csum_verify(sbi, g, gdp)) { 3512 if (!ext4_group_desc_csum_verify(sbi, g, gdp)) {
3467 printk(KERN_ERR 3513 ext4_msg(sb, KERN_ERR,
3468 "EXT4-fs: ext4_remount: " 3514 "ext4_remount: Checksum for group %u failed (%u!=%u)",
3469 "Checksum for group %u failed (%u!=%u)\n",
3470 g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)), 3515 g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)),
3471 le16_to_cpu(gdp->bg_checksum)); 3516 le16_to_cpu(gdp->bg_checksum));
3472 err = -EINVAL; 3517 err = -EINVAL;
@@ -3480,11 +3525,10 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
3480 * require a full umount/remount for now. 3525 * require a full umount/remount for now.
3481 */ 3526 */
3482 if (es->s_last_orphan) { 3527 if (es->s_last_orphan) {
3483 printk(KERN_WARNING "EXT4-fs: %s: couldn't " 3528 ext4_msg(sb, KERN_WARNING, "Couldn't "
3484 "remount RDWR because of unprocessed " 3529 "remount RDWR because of unprocessed "
3485 "orphan inode list. Please " 3530 "orphan inode list. Please "
3486 "umount/remount instead.\n", 3531 "umount/remount instead");
3487 sb->s_id);
3488 err = -EINVAL; 3532 err = -EINVAL;
3489 goto restore_opts; 3533 goto restore_opts;
3490 } 3534 }
@@ -3504,8 +3548,9 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
3504 sb->s_flags &= ~MS_RDONLY; 3548 sb->s_flags &= ~MS_RDONLY;
3505 } 3549 }
3506 } 3550 }
3551 ext4_setup_system_zone(sb);
3507 if (sbi->s_journal == NULL) 3552 if (sbi->s_journal == NULL)
3508 ext4_commit_super(sb, es, 1); 3553 ext4_commit_super(sb, 1);
3509 3554
3510#ifdef CONFIG_QUOTA 3555#ifdef CONFIG_QUOTA
3511 /* Release old quota file names */ 3556 /* Release old quota file names */
@@ -3514,7 +3559,10 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
3514 old_opts.s_qf_names[i] != sbi->s_qf_names[i]) 3559 old_opts.s_qf_names[i] != sbi->s_qf_names[i])
3515 kfree(old_opts.s_qf_names[i]); 3560 kfree(old_opts.s_qf_names[i]);
3516#endif 3561#endif
3562 unlock_super(sb);
3563 unlock_kernel();
3517 return 0; 3564 return 0;
3565
3518restore_opts: 3566restore_opts:
3519 sb->s_flags = old_sb_flags; 3567 sb->s_flags = old_sb_flags;
3520 sbi->s_mount_opt = old_opts.s_mount_opt; 3568 sbi->s_mount_opt = old_opts.s_mount_opt;
@@ -3532,6 +3580,8 @@ restore_opts:
3532 sbi->s_qf_names[i] = old_opts.s_qf_names[i]; 3580 sbi->s_qf_names[i] = old_opts.s_qf_names[i];
3533 } 3581 }
3534#endif 3582#endif
3583 unlock_super(sb);
3584 unlock_kernel();
3535 return err; 3585 return err;
3536} 3586}
3537 3587
@@ -3545,9 +3595,8 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
3545 if (test_opt(sb, MINIX_DF)) { 3595 if (test_opt(sb, MINIX_DF)) {
3546 sbi->s_overhead_last = 0; 3596 sbi->s_overhead_last = 0;
3547 } else if (sbi->s_blocks_last != ext4_blocks_count(es)) { 3597 } else if (sbi->s_blocks_last != ext4_blocks_count(es)) {
3548 ext4_group_t ngroups = sbi->s_groups_count, i; 3598 ext4_group_t i, ngroups = ext4_get_groups_count(sb);
3549 ext4_fsblk_t overhead = 0; 3599 ext4_fsblk_t overhead = 0;
3550 smp_rmb();
3551 3600
3552 /* 3601 /*
3553 * Compute the overhead (FS structures). This is constant 3602 * Compute the overhead (FS structures). This is constant
@@ -3599,11 +3648,12 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
3599 le64_to_cpup((void *)es->s_uuid + sizeof(u64)); 3648 le64_to_cpup((void *)es->s_uuid + sizeof(u64));
3600 buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL; 3649 buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
3601 buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL; 3650 buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
3651
3602 return 0; 3652 return 0;
3603} 3653}
3604 3654
3605/* Helper function for writing quotas on sync - we need to start transaction before quota file 3655/* Helper function for writing quotas on sync - we need to start transaction
3606 * is locked for write. Otherwise the are possible deadlocks: 3656 * before quota file is locked for write. Otherwise the are possible deadlocks:
3607 * Process 1 Process 2 3657 * Process 1 Process 2
3608 * ext4_create() quota_sync() 3658 * ext4_create() quota_sync()
3609 * jbd2_journal_start() write_dquot() 3659 * jbd2_journal_start() write_dquot()
@@ -3627,7 +3677,7 @@ static int ext4_write_dquot(struct dquot *dquot)
3627 3677
3628 inode = dquot_to_inode(dquot); 3678 inode = dquot_to_inode(dquot);
3629 handle = ext4_journal_start(inode, 3679 handle = ext4_journal_start(inode,
3630 EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); 3680 EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
3631 if (IS_ERR(handle)) 3681 if (IS_ERR(handle))
3632 return PTR_ERR(handle); 3682 return PTR_ERR(handle);
3633 ret = dquot_commit(dquot); 3683 ret = dquot_commit(dquot);
@@ -3643,7 +3693,7 @@ static int ext4_acquire_dquot(struct dquot *dquot)
3643 handle_t *handle; 3693 handle_t *handle;
3644 3694
3645 handle = ext4_journal_start(dquot_to_inode(dquot), 3695 handle = ext4_journal_start(dquot_to_inode(dquot),
3646 EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb)); 3696 EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb));
3647 if (IS_ERR(handle)) 3697 if (IS_ERR(handle))
3648 return PTR_ERR(handle); 3698 return PTR_ERR(handle);
3649 ret = dquot_acquire(dquot); 3699 ret = dquot_acquire(dquot);
@@ -3659,7 +3709,7 @@ static int ext4_release_dquot(struct dquot *dquot)
3659 handle_t *handle; 3709 handle_t *handle;
3660 3710
3661 handle = ext4_journal_start(dquot_to_inode(dquot), 3711 handle = ext4_journal_start(dquot_to_inode(dquot),
3662 EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb)); 3712 EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb));
3663 if (IS_ERR(handle)) { 3713 if (IS_ERR(handle)) {
3664 /* Release dquot anyway to avoid endless cycle in dqput() */ 3714 /* Release dquot anyway to avoid endless cycle in dqput() */
3665 dquot_release(dquot); 3715 dquot_release(dquot);
@@ -3707,7 +3757,7 @@ static int ext4_write_info(struct super_block *sb, int type)
3707static int ext4_quota_on_mount(struct super_block *sb, int type) 3757static int ext4_quota_on_mount(struct super_block *sb, int type)
3708{ 3758{
3709 return vfs_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type], 3759 return vfs_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type],
3710 EXT4_SB(sb)->s_jquota_fmt, type); 3760 EXT4_SB(sb)->s_jquota_fmt, type);
3711} 3761}
3712 3762
3713/* 3763/*
@@ -3738,9 +3788,9 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
3738 if (EXT4_SB(sb)->s_qf_names[type]) { 3788 if (EXT4_SB(sb)->s_qf_names[type]) {
3739 /* Quotafile not in fs root? */ 3789 /* Quotafile not in fs root? */
3740 if (path.dentry->d_parent != sb->s_root) 3790 if (path.dentry->d_parent != sb->s_root)
3741 printk(KERN_WARNING 3791 ext4_msg(sb, KERN_WARNING,
3742 "EXT4-fs: Quota file not on filesystem root. " 3792 "Quota file not on filesystem root. "
3743 "Journaled quota will not work.\n"); 3793 "Journaled quota will not work");
3744 } 3794 }
3745 3795
3746 /* 3796 /*
@@ -3823,8 +3873,8 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
3823 handle_t *handle = journal_current_handle(); 3873 handle_t *handle = journal_current_handle();
3824 3874
3825 if (EXT4_SB(sb)->s_journal && !handle) { 3875 if (EXT4_SB(sb)->s_journal && !handle) {
3826 printk(KERN_WARNING "EXT4-fs: Quota write (off=%llu, len=%llu)" 3876 ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
3827 " cancelled because transaction is not started.\n", 3877 " cancelled because transaction is not started",
3828 (unsigned long long)off, (unsigned long long)len); 3878 (unsigned long long)off, (unsigned long long)len);
3829 return -EIO; 3879 return -EIO;
3830 } 3880 }
@@ -3878,10 +3928,10 @@ out:
3878 3928
3879#endif 3929#endif
3880 3930
3881static int ext4_get_sb(struct file_system_type *fs_type, 3931static int ext4_get_sb(struct file_system_type *fs_type, int flags,
3882 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 3932 const char *dev_name, void *data, struct vfsmount *mnt)
3883{ 3933{
3884 return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt); 3934 return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt);
3885} 3935}
3886 3936
3887static struct file_system_type ext4_fs_type = { 3937static struct file_system_type ext4_fs_type = {
@@ -3893,14 +3943,14 @@ static struct file_system_type ext4_fs_type = {
3893}; 3943};
3894 3944
3895#ifdef CONFIG_EXT4DEV_COMPAT 3945#ifdef CONFIG_EXT4DEV_COMPAT
3896static int ext4dev_get_sb(struct file_system_type *fs_type, 3946static int ext4dev_get_sb(struct file_system_type *fs_type, int flags,
3897 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 3947 const char *dev_name, void *data,struct vfsmount *mnt)
3898{ 3948{
3899 printk(KERN_WARNING "EXT4-fs: Update your userspace programs " 3949 printk(KERN_WARNING "EXT4-fs (%s): Update your userspace programs "
3900 "to mount using ext4\n"); 3950 "to mount using ext4\n", dev_name);
3901 printk(KERN_WARNING "EXT4-fs: ext4dev backwards compatibility " 3951 printk(KERN_WARNING "EXT4-fs (%s): ext4dev backwards compatibility "
3902 "will go away by 2.6.31\n"); 3952 "will go away by 2.6.31\n", dev_name);
3903 return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt); 3953 return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt);
3904} 3954}
3905 3955
3906static struct file_system_type ext4dev_fs_type = { 3956static struct file_system_type ext4dev_fs_type = {
@@ -3917,13 +3967,16 @@ static int __init init_ext4_fs(void)
3917{ 3967{
3918 int err; 3968 int err;
3919 3969
3970 err = init_ext4_system_zone();
3971 if (err)
3972 return err;
3920 ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); 3973 ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj);
3921 if (!ext4_kset) 3974 if (!ext4_kset)
3922 return -ENOMEM; 3975 goto out4;
3923 ext4_proc_root = proc_mkdir("fs/ext4", NULL); 3976 ext4_proc_root = proc_mkdir("fs/ext4", NULL);
3924 err = init_ext4_mballoc(); 3977 err = init_ext4_mballoc();
3925 if (err) 3978 if (err)
3926 return err; 3979 goto out3;
3927 3980
3928 err = init_ext4_xattr(); 3981 err = init_ext4_xattr();
3929 if (err) 3982 if (err)
@@ -3948,6 +4001,11 @@ out1:
3948 exit_ext4_xattr(); 4001 exit_ext4_xattr();
3949out2: 4002out2:
3950 exit_ext4_mballoc(); 4003 exit_ext4_mballoc();
4004out3:
4005 remove_proc_entry("fs/ext4", NULL);
4006 kset_unregister(ext4_kset);
4007out4:
4008 exit_ext4_system_zone();
3951 return err; 4009 return err;
3952} 4010}
3953 4011
@@ -3962,6 +4020,7 @@ static void __exit exit_ext4_fs(void)
3962 exit_ext4_mballoc(); 4020 exit_ext4_mballoc();
3963 remove_proc_entry("fs/ext4", NULL); 4021 remove_proc_entry("fs/ext4", NULL);
3964 kset_unregister(ext4_kset); 4022 kset_unregister(ext4_kset);
4023 exit_ext4_system_zone();
3965} 4024}
3966 4025
3967MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); 4026MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");