diff options
Diffstat (limited to 'fs/ext4')
-rw-r--r-- | fs/ext4/Makefile | 4 | ||||
-rw-r--r-- | fs/ext4/acl.c | 13 | ||||
-rw-r--r-- | fs/ext4/balloc.c | 28 | ||||
-rw-r--r-- | fs/ext4/block_validity.c | 244 | ||||
-rw-r--r-- | fs/ext4/dir.c | 3 | ||||
-rw-r--r-- | fs/ext4/ext4.h | 354 | ||||
-rw-r--r-- | fs/ext4/ext4_i.h | 140 | ||||
-rw-r--r-- | fs/ext4/ext4_sb.h | 161 | ||||
-rw-r--r-- | fs/ext4/extents.c | 85 | ||||
-rw-r--r-- | fs/ext4/group.h | 29 | ||||
-rw-r--r-- | fs/ext4/ialloc.c | 73 | ||||
-rw-r--r-- | fs/ext4/inode.c | 593 | ||||
-rw-r--r-- | fs/ext4/mballoc.c | 166 | ||||
-rw-r--r-- | fs/ext4/mballoc.h | 1 | ||||
-rw-r--r-- | fs/ext4/namei.c | 27 | ||||
-rw-r--r-- | fs/ext4/namei.h | 8 | ||||
-rw-r--r-- | fs/ext4/resize.c | 36 | ||||
-rw-r--r-- | fs/ext4/super.c | 849 |
18 files changed, 1583 insertions, 1231 deletions
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile index a8ff003a00f7..8a34710ecf40 100644 --- a/fs/ext4/Makefile +++ b/fs/ext4/Makefile | |||
@@ -5,8 +5,8 @@ | |||
5 | obj-$(CONFIG_EXT4_FS) += ext4.o | 5 | obj-$(CONFIG_EXT4_FS) += ext4.o |
6 | 6 | ||
7 | ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ | 7 | ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ |
8 | ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ | 8 | ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ |
9 | ext4_jbd2.o migrate.o mballoc.o | 9 | ext4_jbd2.o migrate.o mballoc.o block_validity.o |
10 | 10 | ||
11 | ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o | 11 | ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o |
12 | ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o | 12 | ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o |
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index 647e0d65a284..605aeed96d68 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c | |||
@@ -129,12 +129,15 @@ fail: | |||
129 | static inline struct posix_acl * | 129 | static inline struct posix_acl * |
130 | ext4_iget_acl(struct inode *inode, struct posix_acl **i_acl) | 130 | ext4_iget_acl(struct inode *inode, struct posix_acl **i_acl) |
131 | { | 131 | { |
132 | struct posix_acl *acl = EXT4_ACL_NOT_CACHED; | 132 | struct posix_acl *acl = ACCESS_ONCE(*i_acl); |
133 | 133 | ||
134 | spin_lock(&inode->i_lock); | 134 | if (acl) { |
135 | if (*i_acl != EXT4_ACL_NOT_CACHED) | 135 | spin_lock(&inode->i_lock); |
136 | acl = posix_acl_dup(*i_acl); | 136 | acl = *i_acl; |
137 | spin_unlock(&inode->i_lock); | 137 | if (acl != EXT4_ACL_NOT_CACHED) |
138 | acl = posix_acl_dup(acl); | ||
139 | spin_unlock(&inode->i_lock); | ||
140 | } | ||
138 | 141 | ||
139 | return acl; | 142 | return acl; |
140 | } | 143 | } |
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 53c72ad85877..e2126d70dff5 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c | |||
@@ -19,7 +19,6 @@ | |||
19 | #include <linux/buffer_head.h> | 19 | #include <linux/buffer_head.h> |
20 | #include "ext4.h" | 20 | #include "ext4.h" |
21 | #include "ext4_jbd2.h" | 21 | #include "ext4_jbd2.h" |
22 | #include "group.h" | ||
23 | #include "mballoc.h" | 22 | #include "mballoc.h" |
24 | 23 | ||
25 | /* | 24 | /* |
@@ -88,6 +87,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, | |||
88 | ext4_group_t block_group, struct ext4_group_desc *gdp) | 87 | ext4_group_t block_group, struct ext4_group_desc *gdp) |
89 | { | 88 | { |
90 | int bit, bit_max; | 89 | int bit, bit_max; |
90 | ext4_group_t ngroups = ext4_get_groups_count(sb); | ||
91 | unsigned free_blocks, group_blocks; | 91 | unsigned free_blocks, group_blocks; |
92 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 92 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
93 | 93 | ||
@@ -123,7 +123,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, | |||
123 | bit_max += ext4_bg_num_gdb(sb, block_group); | 123 | bit_max += ext4_bg_num_gdb(sb, block_group); |
124 | } | 124 | } |
125 | 125 | ||
126 | if (block_group == sbi->s_groups_count - 1) { | 126 | if (block_group == ngroups - 1) { |
127 | /* | 127 | /* |
128 | * Even though mke2fs always initialize first and last group | 128 | * Even though mke2fs always initialize first and last group |
129 | * if some other tool enabled the EXT4_BG_BLOCK_UNINIT we need | 129 | * if some other tool enabled the EXT4_BG_BLOCK_UNINIT we need |
@@ -131,7 +131,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, | |||
131 | */ | 131 | */ |
132 | group_blocks = ext4_blocks_count(sbi->s_es) - | 132 | group_blocks = ext4_blocks_count(sbi->s_es) - |
133 | le32_to_cpu(sbi->s_es->s_first_data_block) - | 133 | le32_to_cpu(sbi->s_es->s_first_data_block) - |
134 | (EXT4_BLOCKS_PER_GROUP(sb) * (sbi->s_groups_count - 1)); | 134 | (EXT4_BLOCKS_PER_GROUP(sb) * (ngroups - 1)); |
135 | } else { | 135 | } else { |
136 | group_blocks = EXT4_BLOCKS_PER_GROUP(sb); | 136 | group_blocks = EXT4_BLOCKS_PER_GROUP(sb); |
137 | } | 137 | } |
@@ -205,18 +205,18 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb, | |||
205 | { | 205 | { |
206 | unsigned int group_desc; | 206 | unsigned int group_desc; |
207 | unsigned int offset; | 207 | unsigned int offset; |
208 | ext4_group_t ngroups = ext4_get_groups_count(sb); | ||
208 | struct ext4_group_desc *desc; | 209 | struct ext4_group_desc *desc; |
209 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 210 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
210 | 211 | ||
211 | if (block_group >= sbi->s_groups_count) { | 212 | if (block_group >= ngroups) { |
212 | ext4_error(sb, "ext4_get_group_desc", | 213 | ext4_error(sb, "ext4_get_group_desc", |
213 | "block_group >= groups_count - " | 214 | "block_group >= groups_count - " |
214 | "block_group = %u, groups_count = %u", | 215 | "block_group = %u, groups_count = %u", |
215 | block_group, sbi->s_groups_count); | 216 | block_group, ngroups); |
216 | 217 | ||
217 | return NULL; | 218 | return NULL; |
218 | } | 219 | } |
219 | smp_rmb(); | ||
220 | 220 | ||
221 | group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb); | 221 | group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb); |
222 | offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1); | 222 | offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1); |
@@ -326,16 +326,16 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
326 | unlock_buffer(bh); | 326 | unlock_buffer(bh); |
327 | return bh; | 327 | return bh; |
328 | } | 328 | } |
329 | spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group)); | 329 | ext4_lock_group(sb, block_group); |
330 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { | 330 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { |
331 | ext4_init_block_bitmap(sb, bh, block_group, desc); | 331 | ext4_init_block_bitmap(sb, bh, block_group, desc); |
332 | set_bitmap_uptodate(bh); | 332 | set_bitmap_uptodate(bh); |
333 | set_buffer_uptodate(bh); | 333 | set_buffer_uptodate(bh); |
334 | spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group)); | 334 | ext4_unlock_group(sb, block_group); |
335 | unlock_buffer(bh); | 335 | unlock_buffer(bh); |
336 | return bh; | 336 | return bh; |
337 | } | 337 | } |
338 | spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group)); | 338 | ext4_unlock_group(sb, block_group); |
339 | if (buffer_uptodate(bh)) { | 339 | if (buffer_uptodate(bh)) { |
340 | /* | 340 | /* |
341 | * if not uninit if bh is uptodate, | 341 | * if not uninit if bh is uptodate, |
@@ -451,7 +451,7 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, | |||
451 | down_write(&grp->alloc_sem); | 451 | down_write(&grp->alloc_sem); |
452 | for (i = 0, blocks_freed = 0; i < count; i++) { | 452 | for (i = 0, blocks_freed = 0; i < count; i++) { |
453 | BUFFER_TRACE(bitmap_bh, "clear bit"); | 453 | BUFFER_TRACE(bitmap_bh, "clear bit"); |
454 | if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group), | 454 | if (!ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group), |
455 | bit + i, bitmap_bh->b_data)) { | 455 | bit + i, bitmap_bh->b_data)) { |
456 | ext4_error(sb, __func__, | 456 | ext4_error(sb, __func__, |
457 | "bit already cleared for block %llu", | 457 | "bit already cleared for block %llu", |
@@ -461,11 +461,11 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, | |||
461 | blocks_freed++; | 461 | blocks_freed++; |
462 | } | 462 | } |
463 | } | 463 | } |
464 | spin_lock(sb_bgl_lock(sbi, block_group)); | 464 | ext4_lock_group(sb, block_group); |
465 | blk_free_count = blocks_freed + ext4_free_blks_count(sb, desc); | 465 | blk_free_count = blocks_freed + ext4_free_blks_count(sb, desc); |
466 | ext4_free_blks_set(sb, desc, blk_free_count); | 466 | ext4_free_blks_set(sb, desc, blk_free_count); |
467 | desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc); | 467 | desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc); |
468 | spin_unlock(sb_bgl_lock(sbi, block_group)); | 468 | ext4_unlock_group(sb, block_group); |
469 | percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed); | 469 | percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed); |
470 | 470 | ||
471 | if (sbi->s_log_groups_per_flex) { | 471 | if (sbi->s_log_groups_per_flex) { |
@@ -665,7 +665,7 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb) | |||
665 | ext4_fsblk_t desc_count; | 665 | ext4_fsblk_t desc_count; |
666 | struct ext4_group_desc *gdp; | 666 | struct ext4_group_desc *gdp; |
667 | ext4_group_t i; | 667 | ext4_group_t i; |
668 | ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; | 668 | ext4_group_t ngroups = ext4_get_groups_count(sb); |
669 | #ifdef EXT4FS_DEBUG | 669 | #ifdef EXT4FS_DEBUG |
670 | struct ext4_super_block *es; | 670 | struct ext4_super_block *es; |
671 | ext4_fsblk_t bitmap_count; | 671 | ext4_fsblk_t bitmap_count; |
@@ -677,7 +677,6 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb) | |||
677 | bitmap_count = 0; | 677 | bitmap_count = 0; |
678 | gdp = NULL; | 678 | gdp = NULL; |
679 | 679 | ||
680 | smp_rmb(); | ||
681 | for (i = 0; i < ngroups; i++) { | 680 | for (i = 0; i < ngroups; i++) { |
682 | gdp = ext4_get_group_desc(sb, i, NULL); | 681 | gdp = ext4_get_group_desc(sb, i, NULL); |
683 | if (!gdp) | 682 | if (!gdp) |
@@ -700,7 +699,6 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb) | |||
700 | return bitmap_count; | 699 | return bitmap_count; |
701 | #else | 700 | #else |
702 | desc_count = 0; | 701 | desc_count = 0; |
703 | smp_rmb(); | ||
704 | for (i = 0; i < ngroups; i++) { | 702 | for (i = 0; i < ngroups; i++) { |
705 | gdp = ext4_get_group_desc(sb, i, NULL); | 703 | gdp = ext4_get_group_desc(sb, i, NULL); |
706 | if (!gdp) | 704 | if (!gdp) |
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c new file mode 100644 index 000000000000..50784ef07563 --- /dev/null +++ b/fs/ext4/block_validity.c | |||
@@ -0,0 +1,244 @@ | |||
1 | /* | ||
2 | * linux/fs/ext4/block_validity.c | ||
3 | * | ||
4 | * Copyright (C) 2009 | ||
5 | * Theodore Ts'o (tytso@mit.edu) | ||
6 | * | ||
7 | * Track which blocks in the filesystem are metadata blocks that | ||
8 | * should never be used as data blocks by files or directories. | ||
9 | */ | ||
10 | |||
11 | #include <linux/time.h> | ||
12 | #include <linux/fs.h> | ||
13 | #include <linux/namei.h> | ||
14 | #include <linux/quotaops.h> | ||
15 | #include <linux/buffer_head.h> | ||
16 | #include <linux/module.h> | ||
17 | #include <linux/swap.h> | ||
18 | #include <linux/pagemap.h> | ||
19 | #include <linux/version.h> | ||
20 | #include <linux/blkdev.h> | ||
21 | #include <linux/mutex.h> | ||
22 | #include "ext4.h" | ||
23 | |||
24 | struct ext4_system_zone { | ||
25 | struct rb_node node; | ||
26 | ext4_fsblk_t start_blk; | ||
27 | unsigned int count; | ||
28 | }; | ||
29 | |||
30 | static struct kmem_cache *ext4_system_zone_cachep; | ||
31 | |||
32 | int __init init_ext4_system_zone(void) | ||
33 | { | ||
34 | ext4_system_zone_cachep = KMEM_CACHE(ext4_system_zone, | ||
35 | SLAB_RECLAIM_ACCOUNT); | ||
36 | if (ext4_system_zone_cachep == NULL) | ||
37 | return -ENOMEM; | ||
38 | return 0; | ||
39 | } | ||
40 | |||
41 | void exit_ext4_system_zone(void) | ||
42 | { | ||
43 | kmem_cache_destroy(ext4_system_zone_cachep); | ||
44 | } | ||
45 | |||
46 | static inline int can_merge(struct ext4_system_zone *entry1, | ||
47 | struct ext4_system_zone *entry2) | ||
48 | { | ||
49 | if ((entry1->start_blk + entry1->count) == entry2->start_blk) | ||
50 | return 1; | ||
51 | return 0; | ||
52 | } | ||
53 | |||
54 | /* | ||
55 | * Mark a range of blocks as belonging to the "system zone" --- that | ||
56 | * is, filesystem metadata blocks which should never be used by | ||
57 | * inodes. | ||
58 | */ | ||
59 | static int add_system_zone(struct ext4_sb_info *sbi, | ||
60 | ext4_fsblk_t start_blk, | ||
61 | unsigned int count) | ||
62 | { | ||
63 | struct ext4_system_zone *new_entry = NULL, *entry; | ||
64 | struct rb_node **n = &sbi->system_blks.rb_node, *node; | ||
65 | struct rb_node *parent = NULL, *new_node = NULL; | ||
66 | |||
67 | while (*n) { | ||
68 | parent = *n; | ||
69 | entry = rb_entry(parent, struct ext4_system_zone, node); | ||
70 | if (start_blk < entry->start_blk) | ||
71 | n = &(*n)->rb_left; | ||
72 | else if (start_blk >= (entry->start_blk + entry->count)) | ||
73 | n = &(*n)->rb_right; | ||
74 | else { | ||
75 | if (start_blk + count > (entry->start_blk + | ||
76 | entry->count)) | ||
77 | entry->count = (start_blk + count - | ||
78 | entry->start_blk); | ||
79 | new_node = *n; | ||
80 | new_entry = rb_entry(new_node, struct ext4_system_zone, | ||
81 | node); | ||
82 | break; | ||
83 | } | ||
84 | } | ||
85 | |||
86 | if (!new_entry) { | ||
87 | new_entry = kmem_cache_alloc(ext4_system_zone_cachep, | ||
88 | GFP_KERNEL); | ||
89 | if (!new_entry) | ||
90 | return -ENOMEM; | ||
91 | new_entry->start_blk = start_blk; | ||
92 | new_entry->count = count; | ||
93 | new_node = &new_entry->node; | ||
94 | |||
95 | rb_link_node(new_node, parent, n); | ||
96 | rb_insert_color(new_node, &sbi->system_blks); | ||
97 | } | ||
98 | |||
99 | /* Can we merge to the left? */ | ||
100 | node = rb_prev(new_node); | ||
101 | if (node) { | ||
102 | entry = rb_entry(node, struct ext4_system_zone, node); | ||
103 | if (can_merge(entry, new_entry)) { | ||
104 | new_entry->start_blk = entry->start_blk; | ||
105 | new_entry->count += entry->count; | ||
106 | rb_erase(node, &sbi->system_blks); | ||
107 | kmem_cache_free(ext4_system_zone_cachep, entry); | ||
108 | } | ||
109 | } | ||
110 | |||
111 | /* Can we merge to the right? */ | ||
112 | node = rb_next(new_node); | ||
113 | if (node) { | ||
114 | entry = rb_entry(node, struct ext4_system_zone, node); | ||
115 | if (can_merge(new_entry, entry)) { | ||
116 | new_entry->count += entry->count; | ||
117 | rb_erase(node, &sbi->system_blks); | ||
118 | kmem_cache_free(ext4_system_zone_cachep, entry); | ||
119 | } | ||
120 | } | ||
121 | return 0; | ||
122 | } | ||
123 | |||
124 | static void debug_print_tree(struct ext4_sb_info *sbi) | ||
125 | { | ||
126 | struct rb_node *node; | ||
127 | struct ext4_system_zone *entry; | ||
128 | int first = 1; | ||
129 | |||
130 | printk(KERN_INFO "System zones: "); | ||
131 | node = rb_first(&sbi->system_blks); | ||
132 | while (node) { | ||
133 | entry = rb_entry(node, struct ext4_system_zone, node); | ||
134 | printk("%s%llu-%llu", first ? "" : ", ", | ||
135 | entry->start_blk, entry->start_blk + entry->count - 1); | ||
136 | first = 0; | ||
137 | node = rb_next(node); | ||
138 | } | ||
139 | printk("\n"); | ||
140 | } | ||
141 | |||
142 | int ext4_setup_system_zone(struct super_block *sb) | ||
143 | { | ||
144 | ext4_group_t ngroups = ext4_get_groups_count(sb); | ||
145 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
146 | struct ext4_group_desc *gdp; | ||
147 | ext4_group_t i; | ||
148 | int flex_size = ext4_flex_bg_size(sbi); | ||
149 | int ret; | ||
150 | |||
151 | if (!test_opt(sb, BLOCK_VALIDITY)) { | ||
152 | if (EXT4_SB(sb)->system_blks.rb_node) | ||
153 | ext4_release_system_zone(sb); | ||
154 | return 0; | ||
155 | } | ||
156 | if (EXT4_SB(sb)->system_blks.rb_node) | ||
157 | return 0; | ||
158 | |||
159 | for (i=0; i < ngroups; i++) { | ||
160 | if (ext4_bg_has_super(sb, i) && | ||
161 | ((i < 5) || ((i % flex_size) == 0))) | ||
162 | add_system_zone(sbi, ext4_group_first_block_no(sb, i), | ||
163 | sbi->s_gdb_count + 1); | ||
164 | gdp = ext4_get_group_desc(sb, i, NULL); | ||
165 | ret = add_system_zone(sbi, ext4_block_bitmap(sb, gdp), 1); | ||
166 | if (ret) | ||
167 | return ret; | ||
168 | ret = add_system_zone(sbi, ext4_inode_bitmap(sb, gdp), 1); | ||
169 | if (ret) | ||
170 | return ret; | ||
171 | ret = add_system_zone(sbi, ext4_inode_table(sb, gdp), | ||
172 | sbi->s_itb_per_group); | ||
173 | if (ret) | ||
174 | return ret; | ||
175 | } | ||
176 | |||
177 | if (test_opt(sb, DEBUG)) | ||
178 | debug_print_tree(EXT4_SB(sb)); | ||
179 | return 0; | ||
180 | } | ||
181 | |||
182 | /* Called when the filesystem is unmounted */ | ||
183 | void ext4_release_system_zone(struct super_block *sb) | ||
184 | { | ||
185 | struct rb_node *n = EXT4_SB(sb)->system_blks.rb_node; | ||
186 | struct rb_node *parent; | ||
187 | struct ext4_system_zone *entry; | ||
188 | |||
189 | while (n) { | ||
190 | /* Do the node's children first */ | ||
191 | if (n->rb_left) { | ||
192 | n = n->rb_left; | ||
193 | continue; | ||
194 | } | ||
195 | if (n->rb_right) { | ||
196 | n = n->rb_right; | ||
197 | continue; | ||
198 | } | ||
199 | /* | ||
200 | * The node has no children; free it, and then zero | ||
201 | * out parent's link to it. Finally go to the | ||
202 | * beginning of the loop and try to free the parent | ||
203 | * node. | ||
204 | */ | ||
205 | parent = rb_parent(n); | ||
206 | entry = rb_entry(n, struct ext4_system_zone, node); | ||
207 | kmem_cache_free(ext4_system_zone_cachep, entry); | ||
208 | if (!parent) | ||
209 | EXT4_SB(sb)->system_blks.rb_node = NULL; | ||
210 | else if (parent->rb_left == n) | ||
211 | parent->rb_left = NULL; | ||
212 | else if (parent->rb_right == n) | ||
213 | parent->rb_right = NULL; | ||
214 | n = parent; | ||
215 | } | ||
216 | EXT4_SB(sb)->system_blks.rb_node = NULL; | ||
217 | } | ||
218 | |||
219 | /* | ||
220 | * Returns 1 if the passed-in block region (start_blk, | ||
221 | * start_blk+count) is valid; 0 if some part of the block region | ||
222 | * overlaps with filesystem metadata blocks. | ||
223 | */ | ||
224 | int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk, | ||
225 | unsigned int count) | ||
226 | { | ||
227 | struct ext4_system_zone *entry; | ||
228 | struct rb_node *n = sbi->system_blks.rb_node; | ||
229 | |||
230 | if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) || | ||
231 | (start_blk + count > ext4_blocks_count(sbi->s_es))) | ||
232 | return 0; | ||
233 | while (n) { | ||
234 | entry = rb_entry(n, struct ext4_system_zone, node); | ||
235 | if (start_blk + count - 1 < entry->start_blk) | ||
236 | n = n->rb_left; | ||
237 | else if (start_blk >= (entry->start_blk + entry->count)) | ||
238 | n = n->rb_right; | ||
239 | else | ||
240 | return 0; | ||
241 | } | ||
242 | return 1; | ||
243 | } | ||
244 | |||
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index b64789929a65..9dc93168e262 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c | |||
@@ -131,8 +131,7 @@ static int ext4_readdir(struct file *filp, | |||
131 | struct buffer_head *bh = NULL; | 131 | struct buffer_head *bh = NULL; |
132 | 132 | ||
133 | map_bh.b_state = 0; | 133 | map_bh.b_state = 0; |
134 | err = ext4_get_blocks_wrap(NULL, inode, blk, 1, &map_bh, | 134 | err = ext4_get_blocks(NULL, inode, blk, 1, &map_bh, 0); |
135 | 0, 0, 0); | ||
136 | if (err > 0) { | 135 | if (err > 0) { |
137 | pgoff_t index = map_bh.b_blocknr >> | 136 | pgoff_t index = map_bh.b_blocknr >> |
138 | (PAGE_CACHE_SHIFT - inode->i_blkbits); | 137 | (PAGE_CACHE_SHIFT - inode->i_blkbits); |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index d0f15ef56de1..cc7d5edc38c9 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -21,7 +21,14 @@ | |||
21 | #include <linux/magic.h> | 21 | #include <linux/magic.h> |
22 | #include <linux/jbd2.h> | 22 | #include <linux/jbd2.h> |
23 | #include <linux/quota.h> | 23 | #include <linux/quota.h> |
24 | #include "ext4_i.h" | 24 | #include <linux/rwsem.h> |
25 | #include <linux/rbtree.h> | ||
26 | #include <linux/seqlock.h> | ||
27 | #include <linux/mutex.h> | ||
28 | #include <linux/timer.h> | ||
29 | #include <linux/wait.h> | ||
30 | #include <linux/blockgroup_lock.h> | ||
31 | #include <linux/percpu_counter.h> | ||
25 | 32 | ||
26 | /* | 33 | /* |
27 | * The fourth extended filesystem constants/structures | 34 | * The fourth extended filesystem constants/structures |
@@ -46,6 +53,19 @@ | |||
46 | #define ext4_debug(f, a...) do {} while (0) | 53 | #define ext4_debug(f, a...) do {} while (0) |
47 | #endif | 54 | #endif |
48 | 55 | ||
56 | /* data type for block offset of block group */ | ||
57 | typedef int ext4_grpblk_t; | ||
58 | |||
59 | /* data type for filesystem-wide blocks number */ | ||
60 | typedef unsigned long long ext4_fsblk_t; | ||
61 | |||
62 | /* data type for file logical block number */ | ||
63 | typedef __u32 ext4_lblk_t; | ||
64 | |||
65 | /* data type for block group number */ | ||
66 | typedef unsigned int ext4_group_t; | ||
67 | |||
68 | |||
49 | /* prefer goal again. length */ | 69 | /* prefer goal again. length */ |
50 | #define EXT4_MB_HINT_MERGE 1 | 70 | #define EXT4_MB_HINT_MERGE 1 |
51 | /* blocks already reserved */ | 71 | /* blocks already reserved */ |
@@ -179,9 +199,6 @@ struct flex_groups { | |||
179 | #define EXT4_BG_BLOCK_UNINIT 0x0002 /* Block bitmap not in use */ | 199 | #define EXT4_BG_BLOCK_UNINIT 0x0002 /* Block bitmap not in use */ |
180 | #define EXT4_BG_INODE_ZEROED 0x0004 /* On-disk itable initialized to zero */ | 200 | #define EXT4_BG_INODE_ZEROED 0x0004 /* On-disk itable initialized to zero */ |
181 | 201 | ||
182 | #ifdef __KERNEL__ | ||
183 | #include "ext4_sb.h" | ||
184 | #endif | ||
185 | /* | 202 | /* |
186 | * Macro-instructions used to manage group descriptors | 203 | * Macro-instructions used to manage group descriptors |
187 | */ | 204 | */ |
@@ -297,10 +314,23 @@ struct ext4_new_group_data { | |||
297 | }; | 314 | }; |
298 | 315 | ||
299 | /* | 316 | /* |
300 | * Following is used by preallocation code to tell get_blocks() that we | 317 | * Flags used by ext4_get_blocks() |
301 | * want uninitialzed extents. | ||
302 | */ | 318 | */ |
303 | #define EXT4_CREATE_UNINITIALIZED_EXT 2 | 319 | /* Allocate any needed blocks and/or convert an unitialized |
320 | extent to be an initialized ext4 */ | ||
321 | #define EXT4_GET_BLOCKS_CREATE 0x0001 | ||
322 | /* Request the creation of an unitialized extent */ | ||
323 | #define EXT4_GET_BLOCKS_UNINIT_EXT 0x0002 | ||
324 | #define EXT4_GET_BLOCKS_CREATE_UNINIT_EXT (EXT4_GET_BLOCKS_UNINIT_EXT|\ | ||
325 | EXT4_GET_BLOCKS_CREATE) | ||
326 | /* Caller is from the delayed allocation writeout path, | ||
327 | so set the magic i_delalloc_reserve_flag after taking the | ||
328 | inode allocation semaphore for */ | ||
329 | #define EXT4_GET_BLOCKS_DELALLOC_RESERVE 0x0004 | ||
330 | /* Call ext4_da_update_reserve_space() after successfully | ||
331 | allocating the blocks */ | ||
332 | #define EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE 0x0008 | ||
333 | |||
304 | 334 | ||
305 | /* | 335 | /* |
306 | * ioctl commands | 336 | * ioctl commands |
@@ -516,6 +546,110 @@ do { \ | |||
516 | #endif /* defined(__KERNEL__) || defined(__linux__) */ | 546 | #endif /* defined(__KERNEL__) || defined(__linux__) */ |
517 | 547 | ||
518 | /* | 548 | /* |
549 | * storage for cached extent | ||
550 | */ | ||
551 | struct ext4_ext_cache { | ||
552 | ext4_fsblk_t ec_start; | ||
553 | ext4_lblk_t ec_block; | ||
554 | __u32 ec_len; /* must be 32bit to return holes */ | ||
555 | __u32 ec_type; | ||
556 | }; | ||
557 | |||
558 | /* | ||
559 | * fourth extended file system inode data in memory | ||
560 | */ | ||
561 | struct ext4_inode_info { | ||
562 | __le32 i_data[15]; /* unconverted */ | ||
563 | __u32 i_flags; | ||
564 | ext4_fsblk_t i_file_acl; | ||
565 | __u32 i_dtime; | ||
566 | |||
567 | /* | ||
568 | * i_block_group is the number of the block group which contains | ||
569 | * this file's inode. Constant across the lifetime of the inode, | ||
570 | * it is ued for making block allocation decisions - we try to | ||
571 | * place a file's data blocks near its inode block, and new inodes | ||
572 | * near to their parent directory's inode. | ||
573 | */ | ||
574 | ext4_group_t i_block_group; | ||
575 | __u32 i_state; /* Dynamic state flags for ext4 */ | ||
576 | |||
577 | ext4_lblk_t i_dir_start_lookup; | ||
578 | #ifdef CONFIG_EXT4_FS_XATTR | ||
579 | /* | ||
580 | * Extended attributes can be read independently of the main file | ||
581 | * data. Taking i_mutex even when reading would cause contention | ||
582 | * between readers of EAs and writers of regular file data, so | ||
583 | * instead we synchronize on xattr_sem when reading or changing | ||
584 | * EAs. | ||
585 | */ | ||
586 | struct rw_semaphore xattr_sem; | ||
587 | #endif | ||
588 | #ifdef CONFIG_EXT4_FS_POSIX_ACL | ||
589 | struct posix_acl *i_acl; | ||
590 | struct posix_acl *i_default_acl; | ||
591 | #endif | ||
592 | |||
593 | struct list_head i_orphan; /* unlinked but open inodes */ | ||
594 | |||
595 | /* | ||
596 | * i_disksize keeps track of what the inode size is ON DISK, not | ||
597 | * in memory. During truncate, i_size is set to the new size by | ||
598 | * the VFS prior to calling ext4_truncate(), but the filesystem won't | ||
599 | * set i_disksize to 0 until the truncate is actually under way. | ||
600 | * | ||
601 | * The intent is that i_disksize always represents the blocks which | ||
602 | * are used by this file. This allows recovery to restart truncate | ||
603 | * on orphans if we crash during truncate. We actually write i_disksize | ||
604 | * into the on-disk inode when writing inodes out, instead of i_size. | ||
605 | * | ||
606 | * The only time when i_disksize and i_size may be different is when | ||
607 | * a truncate is in progress. The only things which change i_disksize | ||
608 | * are ext4_get_block (growth) and ext4_truncate (shrinkth). | ||
609 | */ | ||
610 | loff_t i_disksize; | ||
611 | |||
612 | /* | ||
613 | * i_data_sem is for serialising ext4_truncate() against | ||
614 | * ext4_getblock(). In the 2.4 ext2 design, great chunks of inode's | ||
615 | * data tree are chopped off during truncate. We can't do that in | ||
616 | * ext4 because whenever we perform intermediate commits during | ||
617 | * truncate, the inode and all the metadata blocks *must* be in a | ||
618 | * consistent state which allows truncation of the orphans to restart | ||
619 | * during recovery. Hence we must fix the get_block-vs-truncate race | ||
620 | * by other means, so we have i_data_sem. | ||
621 | */ | ||
622 | struct rw_semaphore i_data_sem; | ||
623 | struct inode vfs_inode; | ||
624 | struct jbd2_inode jinode; | ||
625 | |||
626 | struct ext4_ext_cache i_cached_extent; | ||
627 | /* | ||
628 | * File creation time. Its function is same as that of | ||
629 | * struct timespec i_{a,c,m}time in the generic inode. | ||
630 | */ | ||
631 | struct timespec i_crtime; | ||
632 | |||
633 | /* mballoc */ | ||
634 | struct list_head i_prealloc_list; | ||
635 | spinlock_t i_prealloc_lock; | ||
636 | |||
637 | /* ialloc */ | ||
638 | ext4_group_t i_last_alloc_group; | ||
639 | |||
640 | /* allocation reservation info for delalloc */ | ||
641 | unsigned int i_reserved_data_blocks; | ||
642 | unsigned int i_reserved_meta_blocks; | ||
643 | unsigned int i_allocated_meta_blocks; | ||
644 | unsigned short i_delalloc_reserved_flag; | ||
645 | |||
646 | /* on-disk additional length */ | ||
647 | __u16 i_extra_isize; | ||
648 | |||
649 | spinlock_t i_block_reservation_lock; | ||
650 | }; | ||
651 | |||
652 | /* | ||
519 | * File system states | 653 | * File system states |
520 | */ | 654 | */ |
521 | #define EXT4_VALID_FS 0x0001 /* Unmounted cleanly */ | 655 | #define EXT4_VALID_FS 0x0001 /* Unmounted cleanly */ |
@@ -560,6 +694,7 @@ do { \ | |||
560 | #define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ | 694 | #define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ |
561 | #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ | 695 | #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ |
562 | #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ | 696 | #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ |
697 | #define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */ | ||
563 | 698 | ||
564 | /* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */ | 699 | /* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */ |
565 | #ifndef _LINUX_EXT2_FS_H | 700 | #ifndef _LINUX_EXT2_FS_H |
@@ -689,6 +824,137 @@ struct ext4_super_block { | |||
689 | }; | 824 | }; |
690 | 825 | ||
691 | #ifdef __KERNEL__ | 826 | #ifdef __KERNEL__ |
827 | /* | ||
828 | * fourth extended-fs super-block data in memory | ||
829 | */ | ||
830 | struct ext4_sb_info { | ||
831 | unsigned long s_desc_size; /* Size of a group descriptor in bytes */ | ||
832 | unsigned long s_inodes_per_block;/* Number of inodes per block */ | ||
833 | unsigned long s_blocks_per_group;/* Number of blocks in a group */ | ||
834 | unsigned long s_inodes_per_group;/* Number of inodes in a group */ | ||
835 | unsigned long s_itb_per_group; /* Number of inode table blocks per group */ | ||
836 | unsigned long s_gdb_count; /* Number of group descriptor blocks */ | ||
837 | unsigned long s_desc_per_block; /* Number of group descriptors per block */ | ||
838 | ext4_group_t s_groups_count; /* Number of groups in the fs */ | ||
839 | unsigned long s_overhead_last; /* Last calculated overhead */ | ||
840 | unsigned long s_blocks_last; /* Last seen block count */ | ||
841 | loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */ | ||
842 | struct buffer_head * s_sbh; /* Buffer containing the super block */ | ||
843 | struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */ | ||
844 | struct buffer_head **s_group_desc; | ||
845 | unsigned long s_mount_opt; | ||
846 | ext4_fsblk_t s_sb_block; | ||
847 | uid_t s_resuid; | ||
848 | gid_t s_resgid; | ||
849 | unsigned short s_mount_state; | ||
850 | unsigned short s_pad; | ||
851 | int s_addr_per_block_bits; | ||
852 | int s_desc_per_block_bits; | ||
853 | int s_inode_size; | ||
854 | int s_first_ino; | ||
855 | unsigned int s_inode_readahead_blks; | ||
856 | spinlock_t s_next_gen_lock; | ||
857 | u32 s_next_generation; | ||
858 | u32 s_hash_seed[4]; | ||
859 | int s_def_hash_version; | ||
860 | int s_hash_unsigned; /* 3 if hash should be signed, 0 if not */ | ||
861 | struct percpu_counter s_freeblocks_counter; | ||
862 | struct percpu_counter s_freeinodes_counter; | ||
863 | struct percpu_counter s_dirs_counter; | ||
864 | struct percpu_counter s_dirtyblocks_counter; | ||
865 | struct blockgroup_lock *s_blockgroup_lock; | ||
866 | struct proc_dir_entry *s_proc; | ||
867 | struct kobject s_kobj; | ||
868 | struct completion s_kobj_unregister; | ||
869 | |||
870 | /* Journaling */ | ||
871 | struct inode *s_journal_inode; | ||
872 | struct journal_s *s_journal; | ||
873 | struct list_head s_orphan; | ||
874 | struct mutex s_orphan_lock; | ||
875 | struct mutex s_resize_lock; | ||
876 | unsigned long s_commit_interval; | ||
877 | u32 s_max_batch_time; | ||
878 | u32 s_min_batch_time; | ||
879 | struct block_device *journal_bdev; | ||
880 | #ifdef CONFIG_JBD2_DEBUG | ||
881 | struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */ | ||
882 | wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */ | ||
883 | #endif | ||
884 | #ifdef CONFIG_QUOTA | ||
885 | char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */ | ||
886 | int s_jquota_fmt; /* Format of quota to use */ | ||
887 | #endif | ||
888 | unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */ | ||
889 | struct rb_root system_blks; | ||
890 | |||
891 | #ifdef EXTENTS_STATS | ||
892 | /* ext4 extents stats */ | ||
893 | unsigned long s_ext_min; | ||
894 | unsigned long s_ext_max; | ||
895 | unsigned long s_depth_max; | ||
896 | spinlock_t s_ext_stats_lock; | ||
897 | unsigned long s_ext_blocks; | ||
898 | unsigned long s_ext_extents; | ||
899 | #endif | ||
900 | |||
901 | /* for buddy allocator */ | ||
902 | struct ext4_group_info ***s_group_info; | ||
903 | struct inode *s_buddy_cache; | ||
904 | long s_blocks_reserved; | ||
905 | spinlock_t s_reserve_lock; | ||
906 | spinlock_t s_md_lock; | ||
907 | tid_t s_last_transaction; | ||
908 | unsigned short *s_mb_offsets; | ||
909 | unsigned int *s_mb_maxs; | ||
910 | |||
911 | /* tunables */ | ||
912 | unsigned long s_stripe; | ||
913 | unsigned int s_mb_stream_request; | ||
914 | unsigned int s_mb_max_to_scan; | ||
915 | unsigned int s_mb_min_to_scan; | ||
916 | unsigned int s_mb_stats; | ||
917 | unsigned int s_mb_order2_reqs; | ||
918 | unsigned int s_mb_group_prealloc; | ||
919 | /* where last allocation was done - for stream allocation */ | ||
920 | unsigned long s_mb_last_group; | ||
921 | unsigned long s_mb_last_start; | ||
922 | |||
923 | /* history to debug policy */ | ||
924 | struct ext4_mb_history *s_mb_history; | ||
925 | int s_mb_history_cur; | ||
926 | int s_mb_history_max; | ||
927 | int s_mb_history_num; | ||
928 | spinlock_t s_mb_history_lock; | ||
929 | int s_mb_history_filter; | ||
930 | |||
931 | /* stats for buddy allocator */ | ||
932 | spinlock_t s_mb_pa_lock; | ||
933 | atomic_t s_bal_reqs; /* number of reqs with len > 1 */ | ||
934 | atomic_t s_bal_success; /* we found long enough chunks */ | ||
935 | atomic_t s_bal_allocated; /* in blocks */ | ||
936 | atomic_t s_bal_ex_scanned; /* total extents scanned */ | ||
937 | atomic_t s_bal_goals; /* goal hits */ | ||
938 | atomic_t s_bal_breaks; /* too long searches */ | ||
939 | atomic_t s_bal_2orders; /* 2^order hits */ | ||
940 | spinlock_t s_bal_lock; | ||
941 | unsigned long s_mb_buddies_generated; | ||
942 | unsigned long long s_mb_generation_time; | ||
943 | atomic_t s_mb_lost_chunks; | ||
944 | atomic_t s_mb_preallocated; | ||
945 | atomic_t s_mb_discarded; | ||
946 | |||
947 | /* locality groups */ | ||
948 | struct ext4_locality_group *s_locality_groups; | ||
949 | |||
950 | /* for write statistics */ | ||
951 | unsigned long s_sectors_written_start; | ||
952 | u64 s_kbytes_written; | ||
953 | |||
954 | unsigned int s_log_groups_per_flex; | ||
955 | struct flex_groups *s_flex_groups; | ||
956 | }; | ||
957 | |||
692 | static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) | 958 | static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) |
693 | { | 959 | { |
694 | return sb->s_fs_info; | 960 | return sb->s_fs_info; |
@@ -704,7 +970,6 @@ static inline struct timespec ext4_current_time(struct inode *inode) | |||
704 | current_fs_time(inode->i_sb) : CURRENT_TIME_SEC; | 970 | current_fs_time(inode->i_sb) : CURRENT_TIME_SEC; |
705 | } | 971 | } |
706 | 972 | ||
707 | |||
708 | static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) | 973 | static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) |
709 | { | 974 | { |
710 | return ino == EXT4_ROOT_INO || | 975 | return ino == EXT4_ROOT_INO || |
@@ -1014,6 +1279,14 @@ extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, | |||
1014 | ext4_group_t block_group, | 1279 | ext4_group_t block_group, |
1015 | struct buffer_head ** bh); | 1280 | struct buffer_head ** bh); |
1016 | extern int ext4_should_retry_alloc(struct super_block *sb, int *retries); | 1281 | extern int ext4_should_retry_alloc(struct super_block *sb, int *retries); |
1282 | struct buffer_head *ext4_read_block_bitmap(struct super_block *sb, | ||
1283 | ext4_group_t block_group); | ||
1284 | extern unsigned ext4_init_block_bitmap(struct super_block *sb, | ||
1285 | struct buffer_head *bh, | ||
1286 | ext4_group_t group, | ||
1287 | struct ext4_group_desc *desc); | ||
1288 | #define ext4_free_blocks_after_init(sb, group, desc) \ | ||
1289 | ext4_init_block_bitmap(sb, NULL, group, desc) | ||
1017 | 1290 | ||
1018 | /* dir.c */ | 1291 | /* dir.c */ |
1019 | extern int ext4_check_dir_entry(const char *, struct inode *, | 1292 | extern int ext4_check_dir_entry(const char *, struct inode *, |
@@ -1038,6 +1311,11 @@ extern struct inode * ext4_orphan_get(struct super_block *, unsigned long); | |||
1038 | extern unsigned long ext4_count_free_inodes(struct super_block *); | 1311 | extern unsigned long ext4_count_free_inodes(struct super_block *); |
1039 | extern unsigned long ext4_count_dirs(struct super_block *); | 1312 | extern unsigned long ext4_count_dirs(struct super_block *); |
1040 | extern void ext4_check_inodes_bitmap(struct super_block *); | 1313 | extern void ext4_check_inodes_bitmap(struct super_block *); |
1314 | extern unsigned ext4_init_inode_bitmap(struct super_block *sb, | ||
1315 | struct buffer_head *bh, | ||
1316 | ext4_group_t group, | ||
1317 | struct ext4_group_desc *desc); | ||
1318 | extern void mark_bitmap_end(int start_bit, int end_bit, char *bitmap); | ||
1041 | 1319 | ||
1042 | /* mballoc.c */ | 1320 | /* mballoc.c */ |
1043 | extern long ext4_mb_stats; | 1321 | extern long ext4_mb_stats; |
@@ -1123,6 +1401,8 @@ extern void ext4_abort(struct super_block *, const char *, const char *, ...) | |||
1123 | __attribute__ ((format (printf, 3, 4))); | 1401 | __attribute__ ((format (printf, 3, 4))); |
1124 | extern void ext4_warning(struct super_block *, const char *, const char *, ...) | 1402 | extern void ext4_warning(struct super_block *, const char *, const char *, ...) |
1125 | __attribute__ ((format (printf, 3, 4))); | 1403 | __attribute__ ((format (printf, 3, 4))); |
1404 | extern void ext4_msg(struct super_block *, const char *, const char *, ...) | ||
1405 | __attribute__ ((format (printf, 3, 4))); | ||
1126 | extern void ext4_grp_locked_error(struct super_block *, ext4_group_t, | 1406 | extern void ext4_grp_locked_error(struct super_block *, ext4_group_t, |
1127 | const char *, const char *, ...) | 1407 | const char *, const char *, ...) |
1128 | __attribute__ ((format (printf, 4, 5))); | 1408 | __attribute__ ((format (printf, 4, 5))); |
@@ -1161,6 +1441,10 @@ extern void ext4_used_dirs_set(struct super_block *sb, | |||
1161 | struct ext4_group_desc *bg, __u32 count); | 1441 | struct ext4_group_desc *bg, __u32 count); |
1162 | extern void ext4_itable_unused_set(struct super_block *sb, | 1442 | extern void ext4_itable_unused_set(struct super_block *sb, |
1163 | struct ext4_group_desc *bg, __u32 count); | 1443 | struct ext4_group_desc *bg, __u32 count); |
1444 | extern __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 group, | ||
1445 | struct ext4_group_desc *gdp); | ||
1446 | extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group, | ||
1447 | struct ext4_group_desc *gdp); | ||
1164 | 1448 | ||
1165 | static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es) | 1449 | static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es) |
1166 | { | 1450 | { |
@@ -1228,6 +1512,18 @@ struct ext4_group_info *ext4_get_group_info(struct super_block *sb, | |||
1228 | return grp_info[indexv][indexh]; | 1512 | return grp_info[indexv][indexh]; |
1229 | } | 1513 | } |
1230 | 1514 | ||
1515 | /* | ||
1516 | * Reading s_groups_count requires using smp_rmb() afterwards. See | ||
1517 | * the locking protocol documented in the comments of ext4_group_add() | ||
1518 | * in resize.c | ||
1519 | */ | ||
1520 | static inline ext4_group_t ext4_get_groups_count(struct super_block *sb) | ||
1521 | { | ||
1522 | ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; | ||
1523 | |||
1524 | smp_rmb(); | ||
1525 | return ngroups; | ||
1526 | } | ||
1231 | 1527 | ||
1232 | static inline ext4_group_t ext4_flex_group(struct ext4_sb_info *sbi, | 1528 | static inline ext4_group_t ext4_flex_group(struct ext4_sb_info *sbi, |
1233 | ext4_group_t block_group) | 1529 | ext4_group_t block_group) |
@@ -1283,33 +1579,25 @@ struct ext4_group_info { | |||
1283 | }; | 1579 | }; |
1284 | 1580 | ||
1285 | #define EXT4_GROUP_INFO_NEED_INIT_BIT 0 | 1581 | #define EXT4_GROUP_INFO_NEED_INIT_BIT 0 |
1286 | #define EXT4_GROUP_INFO_LOCKED_BIT 1 | ||
1287 | 1582 | ||
1288 | #define EXT4_MB_GRP_NEED_INIT(grp) \ | 1583 | #define EXT4_MB_GRP_NEED_INIT(grp) \ |
1289 | (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state))) | 1584 | (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state))) |
1290 | 1585 | ||
1291 | static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group) | 1586 | static inline spinlock_t *ext4_group_lock_ptr(struct super_block *sb, |
1587 | ext4_group_t group) | ||
1292 | { | 1588 | { |
1293 | struct ext4_group_info *grinfo = ext4_get_group_info(sb, group); | 1589 | return bgl_lock_ptr(EXT4_SB(sb)->s_blockgroup_lock, group); |
1294 | |||
1295 | bit_spin_lock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state)); | ||
1296 | } | 1590 | } |
1297 | 1591 | ||
1298 | static inline void ext4_unlock_group(struct super_block *sb, | 1592 | static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group) |
1299 | ext4_group_t group) | ||
1300 | { | 1593 | { |
1301 | struct ext4_group_info *grinfo = ext4_get_group_info(sb, group); | 1594 | spin_lock(ext4_group_lock_ptr(sb, group)); |
1302 | |||
1303 | bit_spin_unlock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state)); | ||
1304 | } | 1595 | } |
1305 | 1596 | ||
1306 | static inline int ext4_is_group_locked(struct super_block *sb, | 1597 | static inline void ext4_unlock_group(struct super_block *sb, |
1307 | ext4_group_t group) | 1598 | ext4_group_t group) |
1308 | { | 1599 | { |
1309 | struct ext4_group_info *grinfo = ext4_get_group_info(sb, group); | 1600 | spin_unlock(ext4_group_lock_ptr(sb, group)); |
1310 | |||
1311 | return bit_spin_is_locked(EXT4_GROUP_INFO_LOCKED_BIT, | ||
1312 | &(grinfo->bb_state)); | ||
1313 | } | 1601 | } |
1314 | 1602 | ||
1315 | /* | 1603 | /* |
@@ -1326,11 +1614,21 @@ extern const struct file_operations ext4_file_operations; | |||
1326 | /* namei.c */ | 1614 | /* namei.c */ |
1327 | extern const struct inode_operations ext4_dir_inode_operations; | 1615 | extern const struct inode_operations ext4_dir_inode_operations; |
1328 | extern const struct inode_operations ext4_special_inode_operations; | 1616 | extern const struct inode_operations ext4_special_inode_operations; |
1617 | extern struct dentry *ext4_get_parent(struct dentry *child); | ||
1329 | 1618 | ||
1330 | /* symlink.c */ | 1619 | /* symlink.c */ |
1331 | extern const struct inode_operations ext4_symlink_inode_operations; | 1620 | extern const struct inode_operations ext4_symlink_inode_operations; |
1332 | extern const struct inode_operations ext4_fast_symlink_inode_operations; | 1621 | extern const struct inode_operations ext4_fast_symlink_inode_operations; |
1333 | 1622 | ||
1623 | /* block_validity */ | ||
1624 | extern void ext4_release_system_zone(struct super_block *sb); | ||
1625 | extern int ext4_setup_system_zone(struct super_block *sb); | ||
1626 | extern int __init init_ext4_system_zone(void); | ||
1627 | extern void exit_ext4_system_zone(void); | ||
1628 | extern int ext4_data_block_valid(struct ext4_sb_info *sbi, | ||
1629 | ext4_fsblk_t start_blk, | ||
1630 | unsigned int count); | ||
1631 | |||
1334 | /* extents.c */ | 1632 | /* extents.c */ |
1335 | extern int ext4_ext_tree_init(handle_t *handle, struct inode *); | 1633 | extern int ext4_ext_tree_init(handle_t *handle, struct inode *); |
1336 | extern int ext4_ext_writepage_trans_blocks(struct inode *, int); | 1634 | extern int ext4_ext_writepage_trans_blocks(struct inode *, int); |
@@ -1338,17 +1636,15 @@ extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, | |||
1338 | int chunk); | 1636 | int chunk); |
1339 | extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | 1637 | extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, |
1340 | ext4_lblk_t iblock, unsigned int max_blocks, | 1638 | ext4_lblk_t iblock, unsigned int max_blocks, |
1341 | struct buffer_head *bh_result, | 1639 | struct buffer_head *bh_result, int flags); |
1342 | int create, int extend_disksize); | ||
1343 | extern void ext4_ext_truncate(struct inode *); | 1640 | extern void ext4_ext_truncate(struct inode *); |
1344 | extern void ext4_ext_init(struct super_block *); | 1641 | extern void ext4_ext_init(struct super_block *); |
1345 | extern void ext4_ext_release(struct super_block *); | 1642 | extern void ext4_ext_release(struct super_block *); |
1346 | extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset, | 1643 | extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset, |
1347 | loff_t len); | 1644 | loff_t len); |
1348 | extern int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, | 1645 | extern int ext4_get_blocks(handle_t *handle, struct inode *inode, |
1349 | sector_t block, unsigned int max_blocks, | 1646 | sector_t block, unsigned int max_blocks, |
1350 | struct buffer_head *bh, int create, | 1647 | struct buffer_head *bh, int flags); |
1351 | int extend_disksize, int flag); | ||
1352 | extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 1648 | extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
1353 | __u64 start, __u64 len); | 1649 | __u64 start, __u64 len); |
1354 | 1650 | ||
diff --git a/fs/ext4/ext4_i.h b/fs/ext4/ext4_i.h deleted file mode 100644 index 4ce2187123aa..000000000000 --- a/fs/ext4/ext4_i.h +++ /dev/null | |||
@@ -1,140 +0,0 @@ | |||
1 | /* | ||
2 | * ext4_i.h | ||
3 | * | ||
4 | * Copyright (C) 1992, 1993, 1994, 1995 | ||
5 | * Remy Card (card@masi.ibp.fr) | ||
6 | * Laboratoire MASI - Institut Blaise Pascal | ||
7 | * Universite Pierre et Marie Curie (Paris VI) | ||
8 | * | ||
9 | * from | ||
10 | * | ||
11 | * linux/include/linux/minix_fs_i.h | ||
12 | * | ||
13 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
14 | */ | ||
15 | |||
16 | #ifndef _EXT4_I | ||
17 | #define _EXT4_I | ||
18 | |||
19 | #include <linux/rwsem.h> | ||
20 | #include <linux/rbtree.h> | ||
21 | #include <linux/seqlock.h> | ||
22 | #include <linux/mutex.h> | ||
23 | |||
24 | /* data type for block offset of block group */ | ||
25 | typedef int ext4_grpblk_t; | ||
26 | |||
27 | /* data type for filesystem-wide blocks number */ | ||
28 | typedef unsigned long long ext4_fsblk_t; | ||
29 | |||
30 | /* data type for file logical block number */ | ||
31 | typedef __u32 ext4_lblk_t; | ||
32 | |||
33 | /* data type for block group number */ | ||
34 | typedef unsigned int ext4_group_t; | ||
35 | |||
36 | /* | ||
37 | * storage for cached extent | ||
38 | */ | ||
39 | struct ext4_ext_cache { | ||
40 | ext4_fsblk_t ec_start; | ||
41 | ext4_lblk_t ec_block; | ||
42 | __u32 ec_len; /* must be 32bit to return holes */ | ||
43 | __u32 ec_type; | ||
44 | }; | ||
45 | |||
46 | /* | ||
47 | * fourth extended file system inode data in memory | ||
48 | */ | ||
49 | struct ext4_inode_info { | ||
50 | __le32 i_data[15]; /* unconverted */ | ||
51 | __u32 i_flags; | ||
52 | ext4_fsblk_t i_file_acl; | ||
53 | __u32 i_dtime; | ||
54 | |||
55 | /* | ||
56 | * i_block_group is the number of the block group which contains | ||
57 | * this file's inode. Constant across the lifetime of the inode, | ||
58 | * it is ued for making block allocation decisions - we try to | ||
59 | * place a file's data blocks near its inode block, and new inodes | ||
60 | * near to their parent directory's inode. | ||
61 | */ | ||
62 | ext4_group_t i_block_group; | ||
63 | __u32 i_state; /* Dynamic state flags for ext4 */ | ||
64 | |||
65 | ext4_lblk_t i_dir_start_lookup; | ||
66 | #ifdef CONFIG_EXT4_FS_XATTR | ||
67 | /* | ||
68 | * Extended attributes can be read independently of the main file | ||
69 | * data. Taking i_mutex even when reading would cause contention | ||
70 | * between readers of EAs and writers of regular file data, so | ||
71 | * instead we synchronize on xattr_sem when reading or changing | ||
72 | * EAs. | ||
73 | */ | ||
74 | struct rw_semaphore xattr_sem; | ||
75 | #endif | ||
76 | #ifdef CONFIG_EXT4_FS_POSIX_ACL | ||
77 | struct posix_acl *i_acl; | ||
78 | struct posix_acl *i_default_acl; | ||
79 | #endif | ||
80 | |||
81 | struct list_head i_orphan; /* unlinked but open inodes */ | ||
82 | |||
83 | /* | ||
84 | * i_disksize keeps track of what the inode size is ON DISK, not | ||
85 | * in memory. During truncate, i_size is set to the new size by | ||
86 | * the VFS prior to calling ext4_truncate(), but the filesystem won't | ||
87 | * set i_disksize to 0 until the truncate is actually under way. | ||
88 | * | ||
89 | * The intent is that i_disksize always represents the blocks which | ||
90 | * are used by this file. This allows recovery to restart truncate | ||
91 | * on orphans if we crash during truncate. We actually write i_disksize | ||
92 | * into the on-disk inode when writing inodes out, instead of i_size. | ||
93 | * | ||
94 | * The only time when i_disksize and i_size may be different is when | ||
95 | * a truncate is in progress. The only things which change i_disksize | ||
96 | * are ext4_get_block (growth) and ext4_truncate (shrinkth). | ||
97 | */ | ||
98 | loff_t i_disksize; | ||
99 | |||
100 | /* | ||
101 | * i_data_sem is for serialising ext4_truncate() against | ||
102 | * ext4_getblock(). In the 2.4 ext2 design, great chunks of inode's | ||
103 | * data tree are chopped off during truncate. We can't do that in | ||
104 | * ext4 because whenever we perform intermediate commits during | ||
105 | * truncate, the inode and all the metadata blocks *must* be in a | ||
106 | * consistent state which allows truncation of the orphans to restart | ||
107 | * during recovery. Hence we must fix the get_block-vs-truncate race | ||
108 | * by other means, so we have i_data_sem. | ||
109 | */ | ||
110 | struct rw_semaphore i_data_sem; | ||
111 | struct inode vfs_inode; | ||
112 | struct jbd2_inode jinode; | ||
113 | |||
114 | struct ext4_ext_cache i_cached_extent; | ||
115 | /* | ||
116 | * File creation time. Its function is same as that of | ||
117 | * struct timespec i_{a,c,m}time in the generic inode. | ||
118 | */ | ||
119 | struct timespec i_crtime; | ||
120 | |||
121 | /* mballoc */ | ||
122 | struct list_head i_prealloc_list; | ||
123 | spinlock_t i_prealloc_lock; | ||
124 | |||
125 | /* ialloc */ | ||
126 | ext4_group_t i_last_alloc_group; | ||
127 | |||
128 | /* allocation reservation info for delalloc */ | ||
129 | unsigned int i_reserved_data_blocks; | ||
130 | unsigned int i_reserved_meta_blocks; | ||
131 | unsigned int i_allocated_meta_blocks; | ||
132 | unsigned short i_delalloc_reserved_flag; | ||
133 | |||
134 | /* on-disk additional length */ | ||
135 | __u16 i_extra_isize; | ||
136 | |||
137 | spinlock_t i_block_reservation_lock; | ||
138 | }; | ||
139 | |||
140 | #endif /* _EXT4_I */ | ||
diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h deleted file mode 100644 index 57b71fefbccf..000000000000 --- a/fs/ext4/ext4_sb.h +++ /dev/null | |||
@@ -1,161 +0,0 @@ | |||
1 | /* | ||
2 | * ext4_sb.h | ||
3 | * | ||
4 | * Copyright (C) 1992, 1993, 1994, 1995 | ||
5 | * Remy Card (card@masi.ibp.fr) | ||
6 | * Laboratoire MASI - Institut Blaise Pascal | ||
7 | * Universite Pierre et Marie Curie (Paris VI) | ||
8 | * | ||
9 | * from | ||
10 | * | ||
11 | * linux/include/linux/minix_fs_sb.h | ||
12 | * | ||
13 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
14 | */ | ||
15 | |||
16 | #ifndef _EXT4_SB | ||
17 | #define _EXT4_SB | ||
18 | |||
19 | #ifdef __KERNEL__ | ||
20 | #include <linux/timer.h> | ||
21 | #include <linux/wait.h> | ||
22 | #include <linux/blockgroup_lock.h> | ||
23 | #include <linux/percpu_counter.h> | ||
24 | #endif | ||
25 | #include <linux/rbtree.h> | ||
26 | |||
27 | /* | ||
28 | * fourth extended-fs super-block data in memory | ||
29 | */ | ||
30 | struct ext4_sb_info { | ||
31 | unsigned long s_desc_size; /* Size of a group descriptor in bytes */ | ||
32 | unsigned long s_inodes_per_block;/* Number of inodes per block */ | ||
33 | unsigned long s_blocks_per_group;/* Number of blocks in a group */ | ||
34 | unsigned long s_inodes_per_group;/* Number of inodes in a group */ | ||
35 | unsigned long s_itb_per_group; /* Number of inode table blocks per group */ | ||
36 | unsigned long s_gdb_count; /* Number of group descriptor blocks */ | ||
37 | unsigned long s_desc_per_block; /* Number of group descriptors per block */ | ||
38 | ext4_group_t s_groups_count; /* Number of groups in the fs */ | ||
39 | unsigned long s_overhead_last; /* Last calculated overhead */ | ||
40 | unsigned long s_blocks_last; /* Last seen block count */ | ||
41 | loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */ | ||
42 | struct buffer_head * s_sbh; /* Buffer containing the super block */ | ||
43 | struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */ | ||
44 | struct buffer_head **s_group_desc; | ||
45 | unsigned long s_mount_opt; | ||
46 | ext4_fsblk_t s_sb_block; | ||
47 | uid_t s_resuid; | ||
48 | gid_t s_resgid; | ||
49 | unsigned short s_mount_state; | ||
50 | unsigned short s_pad; | ||
51 | int s_addr_per_block_bits; | ||
52 | int s_desc_per_block_bits; | ||
53 | int s_inode_size; | ||
54 | int s_first_ino; | ||
55 | unsigned int s_inode_readahead_blks; | ||
56 | spinlock_t s_next_gen_lock; | ||
57 | u32 s_next_generation; | ||
58 | u32 s_hash_seed[4]; | ||
59 | int s_def_hash_version; | ||
60 | int s_hash_unsigned; /* 3 if hash should be signed, 0 if not */ | ||
61 | struct percpu_counter s_freeblocks_counter; | ||
62 | struct percpu_counter s_freeinodes_counter; | ||
63 | struct percpu_counter s_dirs_counter; | ||
64 | struct percpu_counter s_dirtyblocks_counter; | ||
65 | struct blockgroup_lock *s_blockgroup_lock; | ||
66 | struct proc_dir_entry *s_proc; | ||
67 | struct kobject s_kobj; | ||
68 | struct completion s_kobj_unregister; | ||
69 | |||
70 | /* Journaling */ | ||
71 | struct inode *s_journal_inode; | ||
72 | struct journal_s *s_journal; | ||
73 | struct list_head s_orphan; | ||
74 | unsigned long s_commit_interval; | ||
75 | u32 s_max_batch_time; | ||
76 | u32 s_min_batch_time; | ||
77 | struct block_device *journal_bdev; | ||
78 | #ifdef CONFIG_JBD2_DEBUG | ||
79 | struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */ | ||
80 | wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */ | ||
81 | #endif | ||
82 | #ifdef CONFIG_QUOTA | ||
83 | char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */ | ||
84 | int s_jquota_fmt; /* Format of quota to use */ | ||
85 | #endif | ||
86 | unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */ | ||
87 | |||
88 | #ifdef EXTENTS_STATS | ||
89 | /* ext4 extents stats */ | ||
90 | unsigned long s_ext_min; | ||
91 | unsigned long s_ext_max; | ||
92 | unsigned long s_depth_max; | ||
93 | spinlock_t s_ext_stats_lock; | ||
94 | unsigned long s_ext_blocks; | ||
95 | unsigned long s_ext_extents; | ||
96 | #endif | ||
97 | |||
98 | /* for buddy allocator */ | ||
99 | struct ext4_group_info ***s_group_info; | ||
100 | struct inode *s_buddy_cache; | ||
101 | long s_blocks_reserved; | ||
102 | spinlock_t s_reserve_lock; | ||
103 | spinlock_t s_md_lock; | ||
104 | tid_t s_last_transaction; | ||
105 | unsigned short *s_mb_offsets; | ||
106 | unsigned int *s_mb_maxs; | ||
107 | |||
108 | /* tunables */ | ||
109 | unsigned long s_stripe; | ||
110 | unsigned int s_mb_stream_request; | ||
111 | unsigned int s_mb_max_to_scan; | ||
112 | unsigned int s_mb_min_to_scan; | ||
113 | unsigned int s_mb_stats; | ||
114 | unsigned int s_mb_order2_reqs; | ||
115 | unsigned int s_mb_group_prealloc; | ||
116 | /* where last allocation was done - for stream allocation */ | ||
117 | unsigned long s_mb_last_group; | ||
118 | unsigned long s_mb_last_start; | ||
119 | |||
120 | /* history to debug policy */ | ||
121 | struct ext4_mb_history *s_mb_history; | ||
122 | int s_mb_history_cur; | ||
123 | int s_mb_history_max; | ||
124 | int s_mb_history_num; | ||
125 | spinlock_t s_mb_history_lock; | ||
126 | int s_mb_history_filter; | ||
127 | |||
128 | /* stats for buddy allocator */ | ||
129 | spinlock_t s_mb_pa_lock; | ||
130 | atomic_t s_bal_reqs; /* number of reqs with len > 1 */ | ||
131 | atomic_t s_bal_success; /* we found long enough chunks */ | ||
132 | atomic_t s_bal_allocated; /* in blocks */ | ||
133 | atomic_t s_bal_ex_scanned; /* total extents scanned */ | ||
134 | atomic_t s_bal_goals; /* goal hits */ | ||
135 | atomic_t s_bal_breaks; /* too long searches */ | ||
136 | atomic_t s_bal_2orders; /* 2^order hits */ | ||
137 | spinlock_t s_bal_lock; | ||
138 | unsigned long s_mb_buddies_generated; | ||
139 | unsigned long long s_mb_generation_time; | ||
140 | atomic_t s_mb_lost_chunks; | ||
141 | atomic_t s_mb_preallocated; | ||
142 | atomic_t s_mb_discarded; | ||
143 | |||
144 | /* locality groups */ | ||
145 | struct ext4_locality_group *s_locality_groups; | ||
146 | |||
147 | /* for write statistics */ | ||
148 | unsigned long s_sectors_written_start; | ||
149 | u64 s_kbytes_written; | ||
150 | |||
151 | unsigned int s_log_groups_per_flex; | ||
152 | struct flex_groups *s_flex_groups; | ||
153 | }; | ||
154 | |||
155 | static inline spinlock_t * | ||
156 | sb_bgl_lock(struct ext4_sb_info *sbi, unsigned int block_group) | ||
157 | { | ||
158 | return bgl_lock_ptr(sbi->s_blockgroup_lock, block_group); | ||
159 | } | ||
160 | |||
161 | #endif /* _EXT4_SB */ | ||
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index e3a55eb8b26a..2593f748c3a4 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -326,32 +326,18 @@ ext4_ext_max_entries(struct inode *inode, int depth) | |||
326 | 326 | ||
327 | static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext) | 327 | static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext) |
328 | { | 328 | { |
329 | ext4_fsblk_t block = ext_pblock(ext), valid_block; | 329 | ext4_fsblk_t block = ext_pblock(ext); |
330 | int len = ext4_ext_get_actual_len(ext); | 330 | int len = ext4_ext_get_actual_len(ext); |
331 | struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; | ||
332 | 331 | ||
333 | valid_block = le32_to_cpu(es->s_first_data_block) + | 332 | return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len); |
334 | EXT4_SB(inode->i_sb)->s_gdb_count; | ||
335 | if (unlikely(block <= valid_block || | ||
336 | ((block + len) > ext4_blocks_count(es)))) | ||
337 | return 0; | ||
338 | else | ||
339 | return 1; | ||
340 | } | 333 | } |
341 | 334 | ||
342 | static int ext4_valid_extent_idx(struct inode *inode, | 335 | static int ext4_valid_extent_idx(struct inode *inode, |
343 | struct ext4_extent_idx *ext_idx) | 336 | struct ext4_extent_idx *ext_idx) |
344 | { | 337 | { |
345 | ext4_fsblk_t block = idx_pblock(ext_idx), valid_block; | 338 | ext4_fsblk_t block = idx_pblock(ext_idx); |
346 | struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; | ||
347 | 339 | ||
348 | valid_block = le32_to_cpu(es->s_first_data_block) + | 340 | return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, 1); |
349 | EXT4_SB(inode->i_sb)->s_gdb_count; | ||
350 | if (unlikely(block <= valid_block || | ||
351 | (block >= ext4_blocks_count(es)))) | ||
352 | return 0; | ||
353 | else | ||
354 | return 1; | ||
355 | } | 341 | } |
356 | 342 | ||
357 | static int ext4_valid_extent_entries(struct inode *inode, | 343 | static int ext4_valid_extent_entries(struct inode *inode, |
@@ -2097,12 +2083,16 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2097 | ex = EXT_LAST_EXTENT(eh); | 2083 | ex = EXT_LAST_EXTENT(eh); |
2098 | 2084 | ||
2099 | ex_ee_block = le32_to_cpu(ex->ee_block); | 2085 | ex_ee_block = le32_to_cpu(ex->ee_block); |
2100 | if (ext4_ext_is_uninitialized(ex)) | ||
2101 | uninitialized = 1; | ||
2102 | ex_ee_len = ext4_ext_get_actual_len(ex); | 2086 | ex_ee_len = ext4_ext_get_actual_len(ex); |
2103 | 2087 | ||
2104 | while (ex >= EXT_FIRST_EXTENT(eh) && | 2088 | while (ex >= EXT_FIRST_EXTENT(eh) && |
2105 | ex_ee_block + ex_ee_len > start) { | 2089 | ex_ee_block + ex_ee_len > start) { |
2090 | |||
2091 | if (ext4_ext_is_uninitialized(ex)) | ||
2092 | uninitialized = 1; | ||
2093 | else | ||
2094 | uninitialized = 0; | ||
2095 | |||
2106 | ext_debug("remove ext %lu:%u\n", ex_ee_block, ex_ee_len); | 2096 | ext_debug("remove ext %lu:%u\n", ex_ee_block, ex_ee_len); |
2107 | path[depth].p_ext = ex; | 2097 | path[depth].p_ext = ex; |
2108 | 2098 | ||
@@ -2784,7 +2774,7 @@ fix_extent_len: | |||
2784 | int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | 2774 | int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, |
2785 | ext4_lblk_t iblock, | 2775 | ext4_lblk_t iblock, |
2786 | unsigned int max_blocks, struct buffer_head *bh_result, | 2776 | unsigned int max_blocks, struct buffer_head *bh_result, |
2787 | int create, int extend_disksize) | 2777 | int flags) |
2788 | { | 2778 | { |
2789 | struct ext4_ext_path *path = NULL; | 2779 | struct ext4_ext_path *path = NULL; |
2790 | struct ext4_extent_header *eh; | 2780 | struct ext4_extent_header *eh; |
@@ -2793,7 +2783,6 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
2793 | int err = 0, depth, ret, cache_type; | 2783 | int err = 0, depth, ret, cache_type; |
2794 | unsigned int allocated = 0; | 2784 | unsigned int allocated = 0; |
2795 | struct ext4_allocation_request ar; | 2785 | struct ext4_allocation_request ar; |
2796 | loff_t disksize; | ||
2797 | 2786 | ||
2798 | __clear_bit(BH_New, &bh_result->b_state); | 2787 | __clear_bit(BH_New, &bh_result->b_state); |
2799 | ext_debug("blocks %u/%u requested for inode %u\n", | 2788 | ext_debug("blocks %u/%u requested for inode %u\n", |
@@ -2803,7 +2792,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
2803 | cache_type = ext4_ext_in_cache(inode, iblock, &newex); | 2792 | cache_type = ext4_ext_in_cache(inode, iblock, &newex); |
2804 | if (cache_type) { | 2793 | if (cache_type) { |
2805 | if (cache_type == EXT4_EXT_CACHE_GAP) { | 2794 | if (cache_type == EXT4_EXT_CACHE_GAP) { |
2806 | if (!create) { | 2795 | if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { |
2807 | /* | 2796 | /* |
2808 | * block isn't allocated yet and | 2797 | * block isn't allocated yet and |
2809 | * user doesn't want to allocate it | 2798 | * user doesn't want to allocate it |
@@ -2869,9 +2858,11 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
2869 | EXT4_EXT_CACHE_EXTENT); | 2858 | EXT4_EXT_CACHE_EXTENT); |
2870 | goto out; | 2859 | goto out; |
2871 | } | 2860 | } |
2872 | if (create == EXT4_CREATE_UNINITIALIZED_EXT) | 2861 | if (flags & EXT4_GET_BLOCKS_UNINIT_EXT) |
2873 | goto out; | 2862 | goto out; |
2874 | if (!create) { | 2863 | if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { |
2864 | if (allocated > max_blocks) | ||
2865 | allocated = max_blocks; | ||
2875 | /* | 2866 | /* |
2876 | * We have blocks reserved already. We | 2867 | * We have blocks reserved already. We |
2877 | * return allocated blocks so that delalloc | 2868 | * return allocated blocks so that delalloc |
@@ -2879,8 +2870,6 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
2879 | * the buffer head will be unmapped so that | 2870 | * the buffer head will be unmapped so that |
2880 | * a read from the block returns 0s. | 2871 | * a read from the block returns 0s. |
2881 | */ | 2872 | */ |
2882 | if (allocated > max_blocks) | ||
2883 | allocated = max_blocks; | ||
2884 | set_buffer_unwritten(bh_result); | 2873 | set_buffer_unwritten(bh_result); |
2885 | bh_result->b_bdev = inode->i_sb->s_bdev; | 2874 | bh_result->b_bdev = inode->i_sb->s_bdev; |
2886 | bh_result->b_blocknr = newblock; | 2875 | bh_result->b_blocknr = newblock; |
@@ -2903,7 +2892,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
2903 | * requested block isn't allocated yet; | 2892 | * requested block isn't allocated yet; |
2904 | * we couldn't try to create block if create flag is zero | 2893 | * we couldn't try to create block if create flag is zero |
2905 | */ | 2894 | */ |
2906 | if (!create) { | 2895 | if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { |
2907 | /* | 2896 | /* |
2908 | * put just found gap into cache to speed up | 2897 | * put just found gap into cache to speed up |
2909 | * subsequent requests | 2898 | * subsequent requests |
@@ -2932,10 +2921,10 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
2932 | * EXT_UNINIT_MAX_LEN. | 2921 | * EXT_UNINIT_MAX_LEN. |
2933 | */ | 2922 | */ |
2934 | if (max_blocks > EXT_INIT_MAX_LEN && | 2923 | if (max_blocks > EXT_INIT_MAX_LEN && |
2935 | create != EXT4_CREATE_UNINITIALIZED_EXT) | 2924 | !(flags & EXT4_GET_BLOCKS_UNINIT_EXT)) |
2936 | max_blocks = EXT_INIT_MAX_LEN; | 2925 | max_blocks = EXT_INIT_MAX_LEN; |
2937 | else if (max_blocks > EXT_UNINIT_MAX_LEN && | 2926 | else if (max_blocks > EXT_UNINIT_MAX_LEN && |
2938 | create == EXT4_CREATE_UNINITIALIZED_EXT) | 2927 | (flags & EXT4_GET_BLOCKS_UNINIT_EXT)) |
2939 | max_blocks = EXT_UNINIT_MAX_LEN; | 2928 | max_blocks = EXT_UNINIT_MAX_LEN; |
2940 | 2929 | ||
2941 | /* Check if we can really insert (iblock)::(iblock+max_blocks) extent */ | 2930 | /* Check if we can really insert (iblock)::(iblock+max_blocks) extent */ |
@@ -2966,7 +2955,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
2966 | /* try to insert new extent into found leaf and return */ | 2955 | /* try to insert new extent into found leaf and return */ |
2967 | ext4_ext_store_pblock(&newex, newblock); | 2956 | ext4_ext_store_pblock(&newex, newblock); |
2968 | newex.ee_len = cpu_to_le16(ar.len); | 2957 | newex.ee_len = cpu_to_le16(ar.len); |
2969 | if (create == EXT4_CREATE_UNINITIALIZED_EXT) /* Mark uninitialized */ | 2958 | if (flags & EXT4_GET_BLOCKS_UNINIT_EXT) /* Mark uninitialized */ |
2970 | ext4_ext_mark_uninitialized(&newex); | 2959 | ext4_ext_mark_uninitialized(&newex); |
2971 | err = ext4_ext_insert_extent(handle, inode, path, &newex); | 2960 | err = ext4_ext_insert_extent(handle, inode, path, &newex); |
2972 | if (err) { | 2961 | if (err) { |
@@ -2983,18 +2972,10 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
2983 | newblock = ext_pblock(&newex); | 2972 | newblock = ext_pblock(&newex); |
2984 | allocated = ext4_ext_get_actual_len(&newex); | 2973 | allocated = ext4_ext_get_actual_len(&newex); |
2985 | outnew: | 2974 | outnew: |
2986 | if (extend_disksize) { | ||
2987 | disksize = ((loff_t) iblock + ar.len) << inode->i_blkbits; | ||
2988 | if (disksize > i_size_read(inode)) | ||
2989 | disksize = i_size_read(inode); | ||
2990 | if (disksize > EXT4_I(inode)->i_disksize) | ||
2991 | EXT4_I(inode)->i_disksize = disksize; | ||
2992 | } | ||
2993 | |||
2994 | set_buffer_new(bh_result); | 2975 | set_buffer_new(bh_result); |
2995 | 2976 | ||
2996 | /* Cache only when it is _not_ an uninitialized extent */ | 2977 | /* Cache only when it is _not_ an uninitialized extent */ |
2997 | if (create != EXT4_CREATE_UNINITIALIZED_EXT) | 2978 | if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) |
2998 | ext4_ext_put_in_cache(inode, iblock, allocated, newblock, | 2979 | ext4_ext_put_in_cache(inode, iblock, allocated, newblock, |
2999 | EXT4_EXT_CACHE_EXTENT); | 2980 | EXT4_EXT_CACHE_EXTENT); |
3000 | out: | 2981 | out: |
@@ -3150,9 +3131,10 @@ retry: | |||
3150 | ret = PTR_ERR(handle); | 3131 | ret = PTR_ERR(handle); |
3151 | break; | 3132 | break; |
3152 | } | 3133 | } |
3153 | ret = ext4_get_blocks_wrap(handle, inode, block, | 3134 | map_bh.b_state = 0; |
3154 | max_blocks, &map_bh, | 3135 | ret = ext4_get_blocks(handle, inode, block, |
3155 | EXT4_CREATE_UNINITIALIZED_EXT, 0, 0); | 3136 | max_blocks, &map_bh, |
3137 | EXT4_GET_BLOCKS_CREATE_UNINIT_EXT); | ||
3156 | if (ret <= 0) { | 3138 | if (ret <= 0) { |
3157 | #ifdef EXT4FS_DEBUG | 3139 | #ifdef EXT4FS_DEBUG |
3158 | WARN_ON(ret <= 0); | 3140 | WARN_ON(ret <= 0); |
@@ -3195,7 +3177,7 @@ static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path, | |||
3195 | void *data) | 3177 | void *data) |
3196 | { | 3178 | { |
3197 | struct fiemap_extent_info *fieinfo = data; | 3179 | struct fiemap_extent_info *fieinfo = data; |
3198 | unsigned long blksize_bits = inode->i_sb->s_blocksize_bits; | 3180 | unsigned char blksize_bits = inode->i_sb->s_blocksize_bits; |
3199 | __u64 logical; | 3181 | __u64 logical; |
3200 | __u64 physical; | 3182 | __u64 physical; |
3201 | __u64 length; | 3183 | __u64 length; |
@@ -3242,9 +3224,16 @@ static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path, | |||
3242 | * | 3224 | * |
3243 | * XXX this might miss a single-block extent at EXT_MAX_BLOCK | 3225 | * XXX this might miss a single-block extent at EXT_MAX_BLOCK |
3244 | */ | 3226 | */ |
3245 | if (logical + length - 1 == EXT_MAX_BLOCK || | 3227 | if (ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK || |
3246 | ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK) | 3228 | newex->ec_block + newex->ec_len - 1 == EXT_MAX_BLOCK) { |
3229 | loff_t size = i_size_read(inode); | ||
3230 | loff_t bs = EXT4_BLOCK_SIZE(inode->i_sb); | ||
3231 | |||
3247 | flags |= FIEMAP_EXTENT_LAST; | 3232 | flags |= FIEMAP_EXTENT_LAST; |
3233 | if ((flags & FIEMAP_EXTENT_DELALLOC) && | ||
3234 | logical+length > size) | ||
3235 | length = (size - logical + bs - 1) & ~(bs-1); | ||
3236 | } | ||
3248 | 3237 | ||
3249 | error = fiemap_fill_next_extent(fieinfo, logical, physical, | 3238 | error = fiemap_fill_next_extent(fieinfo, logical, physical, |
3250 | length, flags); | 3239 | length, flags); |
@@ -3318,10 +3307,10 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
3318 | * Walk the extent tree gathering extent information. | 3307 | * Walk the extent tree gathering extent information. |
3319 | * ext4_ext_fiemap_cb will push extents back to user. | 3308 | * ext4_ext_fiemap_cb will push extents back to user. |
3320 | */ | 3309 | */ |
3321 | down_write(&EXT4_I(inode)->i_data_sem); | 3310 | down_read(&EXT4_I(inode)->i_data_sem); |
3322 | error = ext4_ext_walk_space(inode, start_blk, len_blks, | 3311 | error = ext4_ext_walk_space(inode, start_blk, len_blks, |
3323 | ext4_ext_fiemap_cb, fieinfo); | 3312 | ext4_ext_fiemap_cb, fieinfo); |
3324 | up_write(&EXT4_I(inode)->i_data_sem); | 3313 | up_read(&EXT4_I(inode)->i_data_sem); |
3325 | } | 3314 | } |
3326 | 3315 | ||
3327 | return error; | 3316 | return error; |
diff --git a/fs/ext4/group.h b/fs/ext4/group.h deleted file mode 100644 index c2c0a8d06d0e..000000000000 --- a/fs/ext4/group.h +++ /dev/null | |||
@@ -1,29 +0,0 @@ | |||
1 | /* | ||
2 | * linux/fs/ext4/group.h | ||
3 | * | ||
4 | * Copyright (C) 2007 Cluster File Systems, Inc | ||
5 | * | ||
6 | * Author: Andreas Dilger <adilger@clusterfs.com> | ||
7 | */ | ||
8 | |||
9 | #ifndef _LINUX_EXT4_GROUP_H | ||
10 | #define _LINUX_EXT4_GROUP_H | ||
11 | |||
12 | extern __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 group, | ||
13 | struct ext4_group_desc *gdp); | ||
14 | extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group, | ||
15 | struct ext4_group_desc *gdp); | ||
16 | struct buffer_head *ext4_read_block_bitmap(struct super_block *sb, | ||
17 | ext4_group_t block_group); | ||
18 | extern unsigned ext4_init_block_bitmap(struct super_block *sb, | ||
19 | struct buffer_head *bh, | ||
20 | ext4_group_t group, | ||
21 | struct ext4_group_desc *desc); | ||
22 | #define ext4_free_blocks_after_init(sb, group, desc) \ | ||
23 | ext4_init_block_bitmap(sb, NULL, group, desc) | ||
24 | extern unsigned ext4_init_inode_bitmap(struct super_block *sb, | ||
25 | struct buffer_head *bh, | ||
26 | ext4_group_t group, | ||
27 | struct ext4_group_desc *desc); | ||
28 | extern void mark_bitmap_end(int start_bit, int end_bit, char *bitmap); | ||
29 | #endif /* _LINUX_EXT4_GROUP_H */ | ||
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index f18e0a08a6b5..3743bd849bce 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
@@ -27,7 +27,6 @@ | |||
27 | #include "ext4_jbd2.h" | 27 | #include "ext4_jbd2.h" |
28 | #include "xattr.h" | 28 | #include "xattr.h" |
29 | #include "acl.h" | 29 | #include "acl.h" |
30 | #include "group.h" | ||
31 | 30 | ||
32 | /* | 31 | /* |
33 | * ialloc.c contains the inodes allocation and deallocation routines | 32 | * ialloc.c contains the inodes allocation and deallocation routines |
@@ -123,16 +122,16 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
123 | unlock_buffer(bh); | 122 | unlock_buffer(bh); |
124 | return bh; | 123 | return bh; |
125 | } | 124 | } |
126 | spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group)); | 125 | ext4_lock_group(sb, block_group); |
127 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { | 126 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { |
128 | ext4_init_inode_bitmap(sb, bh, block_group, desc); | 127 | ext4_init_inode_bitmap(sb, bh, block_group, desc); |
129 | set_bitmap_uptodate(bh); | 128 | set_bitmap_uptodate(bh); |
130 | set_buffer_uptodate(bh); | 129 | set_buffer_uptodate(bh); |
131 | spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group)); | 130 | ext4_unlock_group(sb, block_group); |
132 | unlock_buffer(bh); | 131 | unlock_buffer(bh); |
133 | return bh; | 132 | return bh; |
134 | } | 133 | } |
135 | spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group)); | 134 | ext4_unlock_group(sb, block_group); |
136 | if (buffer_uptodate(bh)) { | 135 | if (buffer_uptodate(bh)) { |
137 | /* | 136 | /* |
138 | * if not uninit if bh is uptodate, | 137 | * if not uninit if bh is uptodate, |
@@ -247,9 +246,8 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) | |||
247 | goto error_return; | 246 | goto error_return; |
248 | 247 | ||
249 | /* Ok, now we can actually update the inode bitmaps.. */ | 248 | /* Ok, now we can actually update the inode bitmaps.. */ |
250 | spin_lock(sb_bgl_lock(sbi, block_group)); | 249 | cleared = ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group), |
251 | cleared = ext4_clear_bit(bit, bitmap_bh->b_data); | 250 | bit, bitmap_bh->b_data); |
252 | spin_unlock(sb_bgl_lock(sbi, block_group)); | ||
253 | if (!cleared) | 251 | if (!cleared) |
254 | ext4_error(sb, "ext4_free_inode", | 252 | ext4_error(sb, "ext4_free_inode", |
255 | "bit already cleared for inode %lu", ino); | 253 | "bit already cleared for inode %lu", ino); |
@@ -261,7 +259,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) | |||
261 | if (fatal) goto error_return; | 259 | if (fatal) goto error_return; |
262 | 260 | ||
263 | if (gdp) { | 261 | if (gdp) { |
264 | spin_lock(sb_bgl_lock(sbi, block_group)); | 262 | ext4_lock_group(sb, block_group); |
265 | count = ext4_free_inodes_count(sb, gdp) + 1; | 263 | count = ext4_free_inodes_count(sb, gdp) + 1; |
266 | ext4_free_inodes_set(sb, gdp, count); | 264 | ext4_free_inodes_set(sb, gdp, count); |
267 | if (is_directory) { | 265 | if (is_directory) { |
@@ -277,7 +275,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) | |||
277 | } | 275 | } |
278 | gdp->bg_checksum = ext4_group_desc_csum(sbi, | 276 | gdp->bg_checksum = ext4_group_desc_csum(sbi, |
279 | block_group, gdp); | 277 | block_group, gdp); |
280 | spin_unlock(sb_bgl_lock(sbi, block_group)); | 278 | ext4_unlock_group(sb, block_group); |
281 | percpu_counter_inc(&sbi->s_freeinodes_counter); | 279 | percpu_counter_inc(&sbi->s_freeinodes_counter); |
282 | if (is_directory) | 280 | if (is_directory) |
283 | percpu_counter_dec(&sbi->s_dirs_counter); | 281 | percpu_counter_dec(&sbi->s_dirs_counter); |
@@ -316,7 +314,7 @@ error_return: | |||
316 | static int find_group_dir(struct super_block *sb, struct inode *parent, | 314 | static int find_group_dir(struct super_block *sb, struct inode *parent, |
317 | ext4_group_t *best_group) | 315 | ext4_group_t *best_group) |
318 | { | 316 | { |
319 | ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; | 317 | ext4_group_t ngroups = ext4_get_groups_count(sb); |
320 | unsigned int freei, avefreei; | 318 | unsigned int freei, avefreei; |
321 | struct ext4_group_desc *desc, *best_desc = NULL; | 319 | struct ext4_group_desc *desc, *best_desc = NULL; |
322 | ext4_group_t group; | 320 | ext4_group_t group; |
@@ -349,11 +347,10 @@ static int find_group_flex(struct super_block *sb, struct inode *parent, | |||
349 | { | 347 | { |
350 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 348 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
351 | struct ext4_group_desc *desc; | 349 | struct ext4_group_desc *desc; |
352 | struct buffer_head *bh; | ||
353 | struct flex_groups *flex_group = sbi->s_flex_groups; | 350 | struct flex_groups *flex_group = sbi->s_flex_groups; |
354 | ext4_group_t parent_group = EXT4_I(parent)->i_block_group; | 351 | ext4_group_t parent_group = EXT4_I(parent)->i_block_group; |
355 | ext4_group_t parent_fbg_group = ext4_flex_group(sbi, parent_group); | 352 | ext4_group_t parent_fbg_group = ext4_flex_group(sbi, parent_group); |
356 | ext4_group_t ngroups = sbi->s_groups_count; | 353 | ext4_group_t ngroups = ext4_get_groups_count(sb); |
357 | int flex_size = ext4_flex_bg_size(sbi); | 354 | int flex_size = ext4_flex_bg_size(sbi); |
358 | ext4_group_t best_flex = parent_fbg_group; | 355 | ext4_group_t best_flex = parent_fbg_group; |
359 | int blocks_per_flex = sbi->s_blocks_per_group * flex_size; | 356 | int blocks_per_flex = sbi->s_blocks_per_group * flex_size; |
@@ -362,7 +359,7 @@ static int find_group_flex(struct super_block *sb, struct inode *parent, | |||
362 | ext4_group_t n_fbg_groups; | 359 | ext4_group_t n_fbg_groups; |
363 | ext4_group_t i; | 360 | ext4_group_t i; |
364 | 361 | ||
365 | n_fbg_groups = (sbi->s_groups_count + flex_size - 1) >> | 362 | n_fbg_groups = (ngroups + flex_size - 1) >> |
366 | sbi->s_log_groups_per_flex; | 363 | sbi->s_log_groups_per_flex; |
367 | 364 | ||
368 | find_close_to_parent: | 365 | find_close_to_parent: |
@@ -404,7 +401,7 @@ find_close_to_parent: | |||
404 | found_flexbg: | 401 | found_flexbg: |
405 | for (i = best_flex * flex_size; i < ngroups && | 402 | for (i = best_flex * flex_size; i < ngroups && |
406 | i < (best_flex + 1) * flex_size; i++) { | 403 | i < (best_flex + 1) * flex_size; i++) { |
407 | desc = ext4_get_group_desc(sb, i, &bh); | 404 | desc = ext4_get_group_desc(sb, i, NULL); |
408 | if (ext4_free_inodes_count(sb, desc)) { | 405 | if (ext4_free_inodes_count(sb, desc)) { |
409 | *best_group = i; | 406 | *best_group = i; |
410 | goto out; | 407 | goto out; |
@@ -478,20 +475,21 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, | |||
478 | { | 475 | { |
479 | ext4_group_t parent_group = EXT4_I(parent)->i_block_group; | 476 | ext4_group_t parent_group = EXT4_I(parent)->i_block_group; |
480 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 477 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
481 | ext4_group_t ngroups = sbi->s_groups_count; | 478 | ext4_group_t real_ngroups = ext4_get_groups_count(sb); |
482 | int inodes_per_group = EXT4_INODES_PER_GROUP(sb); | 479 | int inodes_per_group = EXT4_INODES_PER_GROUP(sb); |
483 | unsigned int freei, avefreei; | 480 | unsigned int freei, avefreei; |
484 | ext4_fsblk_t freeb, avefreeb; | 481 | ext4_fsblk_t freeb, avefreeb; |
485 | unsigned int ndirs; | 482 | unsigned int ndirs; |
486 | int max_dirs, min_inodes; | 483 | int max_dirs, min_inodes; |
487 | ext4_grpblk_t min_blocks; | 484 | ext4_grpblk_t min_blocks; |
488 | ext4_group_t i, grp, g; | 485 | ext4_group_t i, grp, g, ngroups; |
489 | struct ext4_group_desc *desc; | 486 | struct ext4_group_desc *desc; |
490 | struct orlov_stats stats; | 487 | struct orlov_stats stats; |
491 | int flex_size = ext4_flex_bg_size(sbi); | 488 | int flex_size = ext4_flex_bg_size(sbi); |
492 | 489 | ||
490 | ngroups = real_ngroups; | ||
493 | if (flex_size > 1) { | 491 | if (flex_size > 1) { |
494 | ngroups = (ngroups + flex_size - 1) >> | 492 | ngroups = (real_ngroups + flex_size - 1) >> |
495 | sbi->s_log_groups_per_flex; | 493 | sbi->s_log_groups_per_flex; |
496 | parent_group >>= sbi->s_log_groups_per_flex; | 494 | parent_group >>= sbi->s_log_groups_per_flex; |
497 | } | 495 | } |
@@ -543,7 +541,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, | |||
543 | */ | 541 | */ |
544 | grp *= flex_size; | 542 | grp *= flex_size; |
545 | for (i = 0; i < flex_size; i++) { | 543 | for (i = 0; i < flex_size; i++) { |
546 | if (grp+i >= sbi->s_groups_count) | 544 | if (grp+i >= real_ngroups) |
547 | break; | 545 | break; |
548 | desc = ext4_get_group_desc(sb, grp+i, NULL); | 546 | desc = ext4_get_group_desc(sb, grp+i, NULL); |
549 | if (desc && ext4_free_inodes_count(sb, desc)) { | 547 | if (desc && ext4_free_inodes_count(sb, desc)) { |
@@ -583,7 +581,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, | |||
583 | } | 581 | } |
584 | 582 | ||
585 | fallback: | 583 | fallback: |
586 | ngroups = sbi->s_groups_count; | 584 | ngroups = real_ngroups; |
587 | avefreei = freei / ngroups; | 585 | avefreei = freei / ngroups; |
588 | fallback_retry: | 586 | fallback_retry: |
589 | parent_group = EXT4_I(parent)->i_block_group; | 587 | parent_group = EXT4_I(parent)->i_block_group; |
@@ -613,9 +611,8 @@ static int find_group_other(struct super_block *sb, struct inode *parent, | |||
613 | ext4_group_t *group, int mode) | 611 | ext4_group_t *group, int mode) |
614 | { | 612 | { |
615 | ext4_group_t parent_group = EXT4_I(parent)->i_block_group; | 613 | ext4_group_t parent_group = EXT4_I(parent)->i_block_group; |
616 | ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; | 614 | ext4_group_t i, last, ngroups = ext4_get_groups_count(sb); |
617 | struct ext4_group_desc *desc; | 615 | struct ext4_group_desc *desc; |
618 | ext4_group_t i, last; | ||
619 | int flex_size = ext4_flex_bg_size(EXT4_SB(sb)); | 616 | int flex_size = ext4_flex_bg_size(EXT4_SB(sb)); |
620 | 617 | ||
621 | /* | 618 | /* |
@@ -708,10 +705,10 @@ static int find_group_other(struct super_block *sb, struct inode *parent, | |||
708 | 705 | ||
709 | /* | 706 | /* |
710 | * claim the inode from the inode bitmap. If the group | 707 | * claim the inode from the inode bitmap. If the group |
711 | * is uninit we need to take the groups's sb_bgl_lock | 708 | * is uninit we need to take the groups's ext4_group_lock |
712 | * and clear the uninit flag. The inode bitmap update | 709 | * and clear the uninit flag. The inode bitmap update |
713 | * and group desc uninit flag clear should be done | 710 | * and group desc uninit flag clear should be done |
714 | * after holding sb_bgl_lock so that ext4_read_inode_bitmap | 711 | * after holding ext4_group_lock so that ext4_read_inode_bitmap |
715 | * doesn't race with the ext4_claim_inode | 712 | * doesn't race with the ext4_claim_inode |
716 | */ | 713 | */ |
717 | static int ext4_claim_inode(struct super_block *sb, | 714 | static int ext4_claim_inode(struct super_block *sb, |
@@ -722,7 +719,7 @@ static int ext4_claim_inode(struct super_block *sb, | |||
722 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 719 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
723 | struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL); | 720 | struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL); |
724 | 721 | ||
725 | spin_lock(sb_bgl_lock(sbi, group)); | 722 | ext4_lock_group(sb, group); |
726 | if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) { | 723 | if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) { |
727 | /* not a free inode */ | 724 | /* not a free inode */ |
728 | retval = 1; | 725 | retval = 1; |
@@ -731,7 +728,7 @@ static int ext4_claim_inode(struct super_block *sb, | |||
731 | ino++; | 728 | ino++; |
732 | if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || | 729 | if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || |
733 | ino > EXT4_INODES_PER_GROUP(sb)) { | 730 | ino > EXT4_INODES_PER_GROUP(sb)) { |
734 | spin_unlock(sb_bgl_lock(sbi, group)); | 731 | ext4_unlock_group(sb, group); |
735 | ext4_error(sb, __func__, | 732 | ext4_error(sb, __func__, |
736 | "reserved inode or inode > inodes count - " | 733 | "reserved inode or inode > inodes count - " |
737 | "block_group = %u, inode=%lu", group, | 734 | "block_group = %u, inode=%lu", group, |
@@ -780,7 +777,7 @@ static int ext4_claim_inode(struct super_block *sb, | |||
780 | } | 777 | } |
781 | gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); | 778 | gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); |
782 | err_ret: | 779 | err_ret: |
783 | spin_unlock(sb_bgl_lock(sbi, group)); | 780 | ext4_unlock_group(sb, group); |
784 | return retval; | 781 | return retval; |
785 | } | 782 | } |
786 | 783 | ||
@@ -799,11 +796,10 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) | |||
799 | struct super_block *sb; | 796 | struct super_block *sb; |
800 | struct buffer_head *inode_bitmap_bh = NULL; | 797 | struct buffer_head *inode_bitmap_bh = NULL; |
801 | struct buffer_head *group_desc_bh; | 798 | struct buffer_head *group_desc_bh; |
802 | ext4_group_t group = 0; | 799 | ext4_group_t ngroups, group = 0; |
803 | unsigned long ino = 0; | 800 | unsigned long ino = 0; |
804 | struct inode *inode; | 801 | struct inode *inode; |
805 | struct ext4_group_desc *gdp = NULL; | 802 | struct ext4_group_desc *gdp = NULL; |
806 | struct ext4_super_block *es; | ||
807 | struct ext4_inode_info *ei; | 803 | struct ext4_inode_info *ei; |
808 | struct ext4_sb_info *sbi; | 804 | struct ext4_sb_info *sbi; |
809 | int ret2, err = 0; | 805 | int ret2, err = 0; |
@@ -818,15 +814,14 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) | |||
818 | return ERR_PTR(-EPERM); | 814 | return ERR_PTR(-EPERM); |
819 | 815 | ||
820 | sb = dir->i_sb; | 816 | sb = dir->i_sb; |
817 | ngroups = ext4_get_groups_count(sb); | ||
821 | trace_mark(ext4_request_inode, "dev %s dir %lu mode %d", sb->s_id, | 818 | trace_mark(ext4_request_inode, "dev %s dir %lu mode %d", sb->s_id, |
822 | dir->i_ino, mode); | 819 | dir->i_ino, mode); |
823 | inode = new_inode(sb); | 820 | inode = new_inode(sb); |
824 | if (!inode) | 821 | if (!inode) |
825 | return ERR_PTR(-ENOMEM); | 822 | return ERR_PTR(-ENOMEM); |
826 | ei = EXT4_I(inode); | 823 | ei = EXT4_I(inode); |
827 | |||
828 | sbi = EXT4_SB(sb); | 824 | sbi = EXT4_SB(sb); |
829 | es = sbi->s_es; | ||
830 | 825 | ||
831 | if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) { | 826 | if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) { |
832 | ret2 = find_group_flex(sb, dir, &group); | 827 | ret2 = find_group_flex(sb, dir, &group); |
@@ -856,7 +851,7 @@ got_group: | |||
856 | if (ret2 == -1) | 851 | if (ret2 == -1) |
857 | goto out; | 852 | goto out; |
858 | 853 | ||
859 | for (i = 0; i < sbi->s_groups_count; i++) { | 854 | for (i = 0; i < ngroups; i++) { |
860 | err = -EIO; | 855 | err = -EIO; |
861 | 856 | ||
862 | gdp = ext4_get_group_desc(sb, group, &group_desc_bh); | 857 | gdp = ext4_get_group_desc(sb, group, &group_desc_bh); |
@@ -917,7 +912,7 @@ repeat_in_this_group: | |||
917 | * group descriptor metadata has not yet been updated. | 912 | * group descriptor metadata has not yet been updated. |
918 | * So we just go onto the next blockgroup. | 913 | * So we just go onto the next blockgroup. |
919 | */ | 914 | */ |
920 | if (++group == sbi->s_groups_count) | 915 | if (++group == ngroups) |
921 | group = 0; | 916 | group = 0; |
922 | } | 917 | } |
923 | err = -ENOSPC; | 918 | err = -ENOSPC; |
@@ -938,7 +933,7 @@ got: | |||
938 | } | 933 | } |
939 | 934 | ||
940 | free = 0; | 935 | free = 0; |
941 | spin_lock(sb_bgl_lock(sbi, group)); | 936 | ext4_lock_group(sb, group); |
942 | /* recheck and clear flag under lock if we still need to */ | 937 | /* recheck and clear flag under lock if we still need to */ |
943 | if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { | 938 | if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { |
944 | free = ext4_free_blocks_after_init(sb, group, gdp); | 939 | free = ext4_free_blocks_after_init(sb, group, gdp); |
@@ -947,7 +942,7 @@ got: | |||
947 | gdp->bg_checksum = ext4_group_desc_csum(sbi, group, | 942 | gdp->bg_checksum = ext4_group_desc_csum(sbi, group, |
948 | gdp); | 943 | gdp); |
949 | } | 944 | } |
950 | spin_unlock(sb_bgl_lock(sbi, group)); | 945 | ext4_unlock_group(sb, group); |
951 | 946 | ||
952 | /* Don't need to dirty bitmap block if we didn't change it */ | 947 | /* Don't need to dirty bitmap block if we didn't change it */ |
953 | if (free) { | 948 | if (free) { |
@@ -1158,7 +1153,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb) | |||
1158 | { | 1153 | { |
1159 | unsigned long desc_count; | 1154 | unsigned long desc_count; |
1160 | struct ext4_group_desc *gdp; | 1155 | struct ext4_group_desc *gdp; |
1161 | ext4_group_t i; | 1156 | ext4_group_t i, ngroups = ext4_get_groups_count(sb); |
1162 | #ifdef EXT4FS_DEBUG | 1157 | #ifdef EXT4FS_DEBUG |
1163 | struct ext4_super_block *es; | 1158 | struct ext4_super_block *es; |
1164 | unsigned long bitmap_count, x; | 1159 | unsigned long bitmap_count, x; |
@@ -1168,7 +1163,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb) | |||
1168 | desc_count = 0; | 1163 | desc_count = 0; |
1169 | bitmap_count = 0; | 1164 | bitmap_count = 0; |
1170 | gdp = NULL; | 1165 | gdp = NULL; |
1171 | for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { | 1166 | for (i = 0; i < ngroups; i++) { |
1172 | gdp = ext4_get_group_desc(sb, i, NULL); | 1167 | gdp = ext4_get_group_desc(sb, i, NULL); |
1173 | if (!gdp) | 1168 | if (!gdp) |
1174 | continue; | 1169 | continue; |
@@ -1190,7 +1185,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb) | |||
1190 | return desc_count; | 1185 | return desc_count; |
1191 | #else | 1186 | #else |
1192 | desc_count = 0; | 1187 | desc_count = 0; |
1193 | for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { | 1188 | for (i = 0; i < ngroups; i++) { |
1194 | gdp = ext4_get_group_desc(sb, i, NULL); | 1189 | gdp = ext4_get_group_desc(sb, i, NULL); |
1195 | if (!gdp) | 1190 | if (!gdp) |
1196 | continue; | 1191 | continue; |
@@ -1205,9 +1200,9 @@ unsigned long ext4_count_free_inodes(struct super_block *sb) | |||
1205 | unsigned long ext4_count_dirs(struct super_block * sb) | 1200 | unsigned long ext4_count_dirs(struct super_block * sb) |
1206 | { | 1201 | { |
1207 | unsigned long count = 0; | 1202 | unsigned long count = 0; |
1208 | ext4_group_t i; | 1203 | ext4_group_t i, ngroups = ext4_get_groups_count(sb); |
1209 | 1204 | ||
1210 | for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { | 1205 | for (i = 0; i < ngroups; i++) { |
1211 | struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL); | 1206 | struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL); |
1212 | if (!gdp) | 1207 | if (!gdp) |
1213 | continue; | 1208 | continue; |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 2a9ffd528dd1..875db944b22f 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -372,20 +372,21 @@ static int ext4_block_to_path(struct inode *inode, | |||
372 | } | 372 | } |
373 | 373 | ||
374 | static int __ext4_check_blockref(const char *function, struct inode *inode, | 374 | static int __ext4_check_blockref(const char *function, struct inode *inode, |
375 | __le32 *p, unsigned int max) { | 375 | __le32 *p, unsigned int max) |
376 | 376 | { | |
377 | unsigned int maxblocks = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es); | ||
378 | __le32 *bref = p; | 377 | __le32 *bref = p; |
378 | unsigned int blk; | ||
379 | |||
379 | while (bref < p+max) { | 380 | while (bref < p+max) { |
380 | if (unlikely(le32_to_cpu(*bref) >= maxblocks)) { | 381 | blk = le32_to_cpu(*bref++); |
382 | if (blk && | ||
383 | unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), | ||
384 | blk, 1))) { | ||
381 | ext4_error(inode->i_sb, function, | 385 | ext4_error(inode->i_sb, function, |
382 | "block reference %u >= max (%u) " | 386 | "invalid block reference %u " |
383 | "in inode #%lu, offset=%d", | 387 | "in inode #%lu", blk, inode->i_ino); |
384 | le32_to_cpu(*bref), maxblocks, | ||
385 | inode->i_ino, (int)(bref-p)); | ||
386 | return -EIO; | 388 | return -EIO; |
387 | } | 389 | } |
388 | bref++; | ||
389 | } | 390 | } |
390 | return 0; | 391 | return 0; |
391 | } | 392 | } |
@@ -892,6 +893,10 @@ err_out: | |||
892 | } | 893 | } |
893 | 894 | ||
894 | /* | 895 | /* |
896 | * The ext4_ind_get_blocks() function handles non-extents inodes | ||
897 | * (i.e., using the traditional indirect/double-indirect i_blocks | ||
898 | * scheme) for ext4_get_blocks(). | ||
899 | * | ||
895 | * Allocation strategy is simple: if we have to allocate something, we will | 900 | * Allocation strategy is simple: if we have to allocate something, we will |
896 | * have to go the whole way to leaf. So let's do it before attaching anything | 901 | * have to go the whole way to leaf. So let's do it before attaching anything |
897 | * to tree, set linkage between the newborn blocks, write them if sync is | 902 | * to tree, set linkage between the newborn blocks, write them if sync is |
@@ -909,15 +914,16 @@ err_out: | |||
909 | * return = 0, if plain lookup failed. | 914 | * return = 0, if plain lookup failed. |
910 | * return < 0, error case. | 915 | * return < 0, error case. |
911 | * | 916 | * |
912 | * | 917 | * The ext4_ind_get_blocks() function should be called with |
913 | * Need to be called with | 918 | * down_write(&EXT4_I(inode)->i_data_sem) if allocating filesystem |
914 | * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system block | 919 | * blocks (i.e., flags has EXT4_GET_BLOCKS_CREATE set) or |
915 | * (ie, create is zero). Otherwise down_write(&EXT4_I(inode)->i_data_sem) | 920 | * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system |
921 | * blocks. | ||
916 | */ | 922 | */ |
917 | static int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, | 923 | static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode, |
918 | ext4_lblk_t iblock, unsigned int maxblocks, | 924 | ext4_lblk_t iblock, unsigned int maxblocks, |
919 | struct buffer_head *bh_result, | 925 | struct buffer_head *bh_result, |
920 | int create, int extend_disksize) | 926 | int flags) |
921 | { | 927 | { |
922 | int err = -EIO; | 928 | int err = -EIO; |
923 | ext4_lblk_t offsets[4]; | 929 | ext4_lblk_t offsets[4]; |
@@ -927,14 +933,11 @@ static int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, | |||
927 | int indirect_blks; | 933 | int indirect_blks; |
928 | int blocks_to_boundary = 0; | 934 | int blocks_to_boundary = 0; |
929 | int depth; | 935 | int depth; |
930 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
931 | int count = 0; | 936 | int count = 0; |
932 | ext4_fsblk_t first_block = 0; | 937 | ext4_fsblk_t first_block = 0; |
933 | loff_t disksize; | ||
934 | |||
935 | 938 | ||
936 | J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)); | 939 | J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)); |
937 | J_ASSERT(handle != NULL || create == 0); | 940 | J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0); |
938 | depth = ext4_block_to_path(inode, iblock, offsets, | 941 | depth = ext4_block_to_path(inode, iblock, offsets, |
939 | &blocks_to_boundary); | 942 | &blocks_to_boundary); |
940 | 943 | ||
@@ -963,7 +966,7 @@ static int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, | |||
963 | } | 966 | } |
964 | 967 | ||
965 | /* Next simple case - plain lookup or failed read of indirect block */ | 968 | /* Next simple case - plain lookup or failed read of indirect block */ |
966 | if (!create || err == -EIO) | 969 | if ((flags & EXT4_GET_BLOCKS_CREATE) == 0 || err == -EIO) |
967 | goto cleanup; | 970 | goto cleanup; |
968 | 971 | ||
969 | /* | 972 | /* |
@@ -997,19 +1000,7 @@ static int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, | |||
997 | if (!err) | 1000 | if (!err) |
998 | err = ext4_splice_branch(handle, inode, iblock, | 1001 | err = ext4_splice_branch(handle, inode, iblock, |
999 | partial, indirect_blks, count); | 1002 | partial, indirect_blks, count); |
1000 | /* | 1003 | else |
1001 | * i_disksize growing is protected by i_data_sem. Don't forget to | ||
1002 | * protect it if you're about to implement concurrent | ||
1003 | * ext4_get_block() -bzzz | ||
1004 | */ | ||
1005 | if (!err && extend_disksize) { | ||
1006 | disksize = ((loff_t) iblock + count) << inode->i_blkbits; | ||
1007 | if (disksize > i_size_read(inode)) | ||
1008 | disksize = i_size_read(inode); | ||
1009 | if (disksize > ei->i_disksize) | ||
1010 | ei->i_disksize = disksize; | ||
1011 | } | ||
1012 | if (err) | ||
1013 | goto cleanup; | 1004 | goto cleanup; |
1014 | 1005 | ||
1015 | set_buffer_new(bh_result); | 1006 | set_buffer_new(bh_result); |
@@ -1120,8 +1111,23 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used) | |||
1120 | ext4_discard_preallocations(inode); | 1111 | ext4_discard_preallocations(inode); |
1121 | } | 1112 | } |
1122 | 1113 | ||
1114 | static int check_block_validity(struct inode *inode, sector_t logical, | ||
1115 | sector_t phys, int len) | ||
1116 | { | ||
1117 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) { | ||
1118 | ext4_error(inode->i_sb, "check_block_validity", | ||
1119 | "inode #%lu logical block %llu mapped to %llu " | ||
1120 | "(size %d)", inode->i_ino, | ||
1121 | (unsigned long long) logical, | ||
1122 | (unsigned long long) phys, len); | ||
1123 | WARN_ON(1); | ||
1124 | return -EIO; | ||
1125 | } | ||
1126 | return 0; | ||
1127 | } | ||
1128 | |||
1123 | /* | 1129 | /* |
1124 | * The ext4_get_blocks_wrap() function try to look up the requested blocks, | 1130 | * The ext4_get_blocks() function tries to look up the requested blocks, |
1125 | * and returns if the blocks are already mapped. | 1131 | * and returns if the blocks are already mapped. |
1126 | * | 1132 | * |
1127 | * Otherwise it takes the write lock of the i_data_sem and allocate blocks | 1133 | * Otherwise it takes the write lock of the i_data_sem and allocate blocks |
@@ -1129,7 +1135,7 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used) | |||
1129 | * mapped. | 1135 | * mapped. |
1130 | * | 1136 | * |
1131 | * If file type is extents based, it will call ext4_ext_get_blocks(), | 1137 | * If file type is extents based, it will call ext4_ext_get_blocks(), |
1132 | * Otherwise, call with ext4_get_blocks_handle() to handle indirect mapping | 1138 | * Otherwise, call with ext4_ind_get_blocks() to handle indirect mapping |
1133 | * based files | 1139 | * based files |
1134 | * | 1140 | * |
1135 | * On success, it returns the number of blocks being mapped or allocate. | 1141 | * On success, it returns the number of blocks being mapped or allocate. |
@@ -1142,9 +1148,9 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used) | |||
1142 | * | 1148 | * |
1143 | * It returns the error in case of allocation failure. | 1149 | * It returns the error in case of allocation failure. |
1144 | */ | 1150 | */ |
1145 | int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, | 1151 | int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, |
1146 | unsigned int max_blocks, struct buffer_head *bh, | 1152 | unsigned int max_blocks, struct buffer_head *bh, |
1147 | int create, int extend_disksize, int flag) | 1153 | int flags) |
1148 | { | 1154 | { |
1149 | int retval; | 1155 | int retval; |
1150 | 1156 | ||
@@ -1152,21 +1158,28 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, | |||
1152 | clear_buffer_unwritten(bh); | 1158 | clear_buffer_unwritten(bh); |
1153 | 1159 | ||
1154 | /* | 1160 | /* |
1155 | * Try to see if we can get the block without requesting | 1161 | * Try to see if we can get the block without requesting a new |
1156 | * for new file system block. | 1162 | * file system block. |
1157 | */ | 1163 | */ |
1158 | down_read((&EXT4_I(inode)->i_data_sem)); | 1164 | down_read((&EXT4_I(inode)->i_data_sem)); |
1159 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { | 1165 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { |
1160 | retval = ext4_ext_get_blocks(handle, inode, block, max_blocks, | 1166 | retval = ext4_ext_get_blocks(handle, inode, block, max_blocks, |
1161 | bh, 0, 0); | 1167 | bh, 0); |
1162 | } else { | 1168 | } else { |
1163 | retval = ext4_get_blocks_handle(handle, | 1169 | retval = ext4_ind_get_blocks(handle, inode, block, max_blocks, |
1164 | inode, block, max_blocks, bh, 0, 0); | 1170 | bh, 0); |
1165 | } | 1171 | } |
1166 | up_read((&EXT4_I(inode)->i_data_sem)); | 1172 | up_read((&EXT4_I(inode)->i_data_sem)); |
1167 | 1173 | ||
1174 | if (retval > 0 && buffer_mapped(bh)) { | ||
1175 | int ret = check_block_validity(inode, block, | ||
1176 | bh->b_blocknr, retval); | ||
1177 | if (ret != 0) | ||
1178 | return ret; | ||
1179 | } | ||
1180 | |||
1168 | /* If it is only a block(s) look up */ | 1181 | /* If it is only a block(s) look up */ |
1169 | if (!create) | 1182 | if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) |
1170 | return retval; | 1183 | return retval; |
1171 | 1184 | ||
1172 | /* | 1185 | /* |
@@ -1205,7 +1218,7 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, | |||
1205 | * let the underlying get_block() function know to | 1218 | * let the underlying get_block() function know to |
1206 | * avoid double accounting | 1219 | * avoid double accounting |
1207 | */ | 1220 | */ |
1208 | if (flag) | 1221 | if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) |
1209 | EXT4_I(inode)->i_delalloc_reserved_flag = 1; | 1222 | EXT4_I(inode)->i_delalloc_reserved_flag = 1; |
1210 | /* | 1223 | /* |
1211 | * We need to check for EXT4 here because migrate | 1224 | * We need to check for EXT4 here because migrate |
@@ -1213,10 +1226,10 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, | |||
1213 | */ | 1226 | */ |
1214 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { | 1227 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { |
1215 | retval = ext4_ext_get_blocks(handle, inode, block, max_blocks, | 1228 | retval = ext4_ext_get_blocks(handle, inode, block, max_blocks, |
1216 | bh, create, extend_disksize); | 1229 | bh, flags); |
1217 | } else { | 1230 | } else { |
1218 | retval = ext4_get_blocks_handle(handle, inode, block, | 1231 | retval = ext4_ind_get_blocks(handle, inode, block, |
1219 | max_blocks, bh, create, extend_disksize); | 1232 | max_blocks, bh, flags); |
1220 | 1233 | ||
1221 | if (retval > 0 && buffer_new(bh)) { | 1234 | if (retval > 0 && buffer_new(bh)) { |
1222 | /* | 1235 | /* |
@@ -1229,18 +1242,23 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, | |||
1229 | } | 1242 | } |
1230 | } | 1243 | } |
1231 | 1244 | ||
1232 | if (flag) { | 1245 | if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) |
1233 | EXT4_I(inode)->i_delalloc_reserved_flag = 0; | 1246 | EXT4_I(inode)->i_delalloc_reserved_flag = 0; |
1234 | /* | 1247 | |
1235 | * Update reserved blocks/metadata blocks | 1248 | /* |
1236 | * after successful block allocation | 1249 | * Update reserved blocks/metadata blocks after successful |
1237 | * which were deferred till now | 1250 | * block allocation which had been deferred till now. |
1238 | */ | 1251 | */ |
1239 | if ((retval > 0) && buffer_delay(bh)) | 1252 | if ((retval > 0) && (flags & EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE)) |
1240 | ext4_da_update_reserve_space(inode, retval); | 1253 | ext4_da_update_reserve_space(inode, retval); |
1241 | } | ||
1242 | 1254 | ||
1243 | up_write((&EXT4_I(inode)->i_data_sem)); | 1255 | up_write((&EXT4_I(inode)->i_data_sem)); |
1256 | if (retval > 0 && buffer_mapped(bh)) { | ||
1257 | int ret = check_block_validity(inode, block, | ||
1258 | bh->b_blocknr, retval); | ||
1259 | if (ret != 0) | ||
1260 | return ret; | ||
1261 | } | ||
1244 | return retval; | 1262 | return retval; |
1245 | } | 1263 | } |
1246 | 1264 | ||
@@ -1268,8 +1286,8 @@ int ext4_get_block(struct inode *inode, sector_t iblock, | |||
1268 | started = 1; | 1286 | started = 1; |
1269 | } | 1287 | } |
1270 | 1288 | ||
1271 | ret = ext4_get_blocks_wrap(handle, inode, iblock, | 1289 | ret = ext4_get_blocks(handle, inode, iblock, max_blocks, bh_result, |
1272 | max_blocks, bh_result, create, 0, 0); | 1290 | create ? EXT4_GET_BLOCKS_CREATE : 0); |
1273 | if (ret > 0) { | 1291 | if (ret > 0) { |
1274 | bh_result->b_size = (ret << inode->i_blkbits); | 1292 | bh_result->b_size = (ret << inode->i_blkbits); |
1275 | ret = 0; | 1293 | ret = 0; |
@@ -1288,17 +1306,19 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, | |||
1288 | { | 1306 | { |
1289 | struct buffer_head dummy; | 1307 | struct buffer_head dummy; |
1290 | int fatal = 0, err; | 1308 | int fatal = 0, err; |
1309 | int flags = 0; | ||
1291 | 1310 | ||
1292 | J_ASSERT(handle != NULL || create == 0); | 1311 | J_ASSERT(handle != NULL || create == 0); |
1293 | 1312 | ||
1294 | dummy.b_state = 0; | 1313 | dummy.b_state = 0; |
1295 | dummy.b_blocknr = -1000; | 1314 | dummy.b_blocknr = -1000; |
1296 | buffer_trace_init(&dummy.b_history); | 1315 | buffer_trace_init(&dummy.b_history); |
1297 | err = ext4_get_blocks_wrap(handle, inode, block, 1, | 1316 | if (create) |
1298 | &dummy, create, 1, 0); | 1317 | flags |= EXT4_GET_BLOCKS_CREATE; |
1318 | err = ext4_get_blocks(handle, inode, block, 1, &dummy, flags); | ||
1299 | /* | 1319 | /* |
1300 | * ext4_get_blocks_handle() returns number of blocks | 1320 | * ext4_get_blocks() returns number of blocks mapped. 0 in |
1301 | * mapped. 0 in case of a HOLE. | 1321 | * case of a HOLE. |
1302 | */ | 1322 | */ |
1303 | if (err > 0) { | 1323 | if (err > 0) { |
1304 | if (err > 1) | 1324 | if (err > 1) |
@@ -1439,7 +1459,7 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping, | |||
1439 | struct page **pagep, void **fsdata) | 1459 | struct page **pagep, void **fsdata) |
1440 | { | 1460 | { |
1441 | struct inode *inode = mapping->host; | 1461 | struct inode *inode = mapping->host; |
1442 | int ret, needed_blocks = ext4_writepage_trans_blocks(inode); | 1462 | int ret, needed_blocks; |
1443 | handle_t *handle; | 1463 | handle_t *handle; |
1444 | int retries = 0; | 1464 | int retries = 0; |
1445 | struct page *page; | 1465 | struct page *page; |
@@ -1450,6 +1470,11 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping, | |||
1450 | "dev %s ino %lu pos %llu len %u flags %u", | 1470 | "dev %s ino %lu pos %llu len %u flags %u", |
1451 | inode->i_sb->s_id, inode->i_ino, | 1471 | inode->i_sb->s_id, inode->i_ino, |
1452 | (unsigned long long) pos, len, flags); | 1472 | (unsigned long long) pos, len, flags); |
1473 | /* | ||
1474 | * Reserve one block more for addition to orphan list in case | ||
1475 | * we allocate blocks but write fails for some reason | ||
1476 | */ | ||
1477 | needed_blocks = ext4_writepage_trans_blocks(inode) + 1; | ||
1453 | index = pos >> PAGE_CACHE_SHIFT; | 1478 | index = pos >> PAGE_CACHE_SHIFT; |
1454 | from = pos & (PAGE_CACHE_SIZE - 1); | 1479 | from = pos & (PAGE_CACHE_SIZE - 1); |
1455 | to = from + len; | 1480 | to = from + len; |
@@ -1483,15 +1508,30 @@ retry: | |||
1483 | 1508 | ||
1484 | if (ret) { | 1509 | if (ret) { |
1485 | unlock_page(page); | 1510 | unlock_page(page); |
1486 | ext4_journal_stop(handle); | ||
1487 | page_cache_release(page); | 1511 | page_cache_release(page); |
1488 | /* | 1512 | /* |
1489 | * block_write_begin may have instantiated a few blocks | 1513 | * block_write_begin may have instantiated a few blocks |
1490 | * outside i_size. Trim these off again. Don't need | 1514 | * outside i_size. Trim these off again. Don't need |
1491 | * i_size_read because we hold i_mutex. | 1515 | * i_size_read because we hold i_mutex. |
1516 | * | ||
1517 | * Add inode to orphan list in case we crash before | ||
1518 | * truncate finishes | ||
1492 | */ | 1519 | */ |
1493 | if (pos + len > inode->i_size) | 1520 | if (pos + len > inode->i_size) |
1521 | ext4_orphan_add(handle, inode); | ||
1522 | |||
1523 | ext4_journal_stop(handle); | ||
1524 | if (pos + len > inode->i_size) { | ||
1494 | vmtruncate(inode, inode->i_size); | 1525 | vmtruncate(inode, inode->i_size); |
1526 | /* | ||
1527 | * If vmtruncate failed early the inode might | ||
1528 | * still be on the orphan list; we need to | ||
1529 | * make sure the inode is removed from the | ||
1530 | * orphan list in that case. | ||
1531 | */ | ||
1532 | if (inode->i_nlink) | ||
1533 | ext4_orphan_del(NULL, inode); | ||
1534 | } | ||
1495 | } | 1535 | } |
1496 | 1536 | ||
1497 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) | 1537 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) |
@@ -1509,6 +1549,52 @@ static int write_end_fn(handle_t *handle, struct buffer_head *bh) | |||
1509 | return ext4_handle_dirty_metadata(handle, NULL, bh); | 1549 | return ext4_handle_dirty_metadata(handle, NULL, bh); |
1510 | } | 1550 | } |
1511 | 1551 | ||
1552 | static int ext4_generic_write_end(struct file *file, | ||
1553 | struct address_space *mapping, | ||
1554 | loff_t pos, unsigned len, unsigned copied, | ||
1555 | struct page *page, void *fsdata) | ||
1556 | { | ||
1557 | int i_size_changed = 0; | ||
1558 | struct inode *inode = mapping->host; | ||
1559 | handle_t *handle = ext4_journal_current_handle(); | ||
1560 | |||
1561 | copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); | ||
1562 | |||
1563 | /* | ||
1564 | * No need to use i_size_read() here, the i_size | ||
1565 | * cannot change under us because we hold i_mutex. | ||
1566 | * | ||
1567 | * But it's important to update i_size while still holding page lock: | ||
1568 | * page writeout could otherwise come in and zero beyond i_size. | ||
1569 | */ | ||
1570 | if (pos + copied > inode->i_size) { | ||
1571 | i_size_write(inode, pos + copied); | ||
1572 | i_size_changed = 1; | ||
1573 | } | ||
1574 | |||
1575 | if (pos + copied > EXT4_I(inode)->i_disksize) { | ||
1576 | /* We need to mark inode dirty even if | ||
1577 | * new_i_size is less that inode->i_size | ||
1578 | * bu greater than i_disksize.(hint delalloc) | ||
1579 | */ | ||
1580 | ext4_update_i_disksize(inode, (pos + copied)); | ||
1581 | i_size_changed = 1; | ||
1582 | } | ||
1583 | unlock_page(page); | ||
1584 | page_cache_release(page); | ||
1585 | |||
1586 | /* | ||
1587 | * Don't mark the inode dirty under page lock. First, it unnecessarily | ||
1588 | * makes the holding time of page lock longer. Second, it forces lock | ||
1589 | * ordering of page lock and transaction start for journaling | ||
1590 | * filesystems. | ||
1591 | */ | ||
1592 | if (i_size_changed) | ||
1593 | ext4_mark_inode_dirty(handle, inode); | ||
1594 | |||
1595 | return copied; | ||
1596 | } | ||
1597 | |||
1512 | /* | 1598 | /* |
1513 | * We need to pick up the new inode size which generic_commit_write gave us | 1599 | * We need to pick up the new inode size which generic_commit_write gave us |
1514 | * `file' can be NULL - eg, when called from page_symlink(). | 1600 | * `file' can be NULL - eg, when called from page_symlink(). |
@@ -1532,21 +1618,15 @@ static int ext4_ordered_write_end(struct file *file, | |||
1532 | ret = ext4_jbd2_file_inode(handle, inode); | 1618 | ret = ext4_jbd2_file_inode(handle, inode); |
1533 | 1619 | ||
1534 | if (ret == 0) { | 1620 | if (ret == 0) { |
1535 | loff_t new_i_size; | 1621 | ret2 = ext4_generic_write_end(file, mapping, pos, len, copied, |
1536 | |||
1537 | new_i_size = pos + copied; | ||
1538 | if (new_i_size > EXT4_I(inode)->i_disksize) { | ||
1539 | ext4_update_i_disksize(inode, new_i_size); | ||
1540 | /* We need to mark inode dirty even if | ||
1541 | * new_i_size is less that inode->i_size | ||
1542 | * bu greater than i_disksize.(hint delalloc) | ||
1543 | */ | ||
1544 | ext4_mark_inode_dirty(handle, inode); | ||
1545 | } | ||
1546 | |||
1547 | ret2 = generic_write_end(file, mapping, pos, len, copied, | ||
1548 | page, fsdata); | 1622 | page, fsdata); |
1549 | copied = ret2; | 1623 | copied = ret2; |
1624 | if (pos + len > inode->i_size) | ||
1625 | /* if we have allocated more blocks and copied | ||
1626 | * less. We will have blocks allocated outside | ||
1627 | * inode->i_size. So truncate them | ||
1628 | */ | ||
1629 | ext4_orphan_add(handle, inode); | ||
1550 | if (ret2 < 0) | 1630 | if (ret2 < 0) |
1551 | ret = ret2; | 1631 | ret = ret2; |
1552 | } | 1632 | } |
@@ -1554,6 +1634,18 @@ static int ext4_ordered_write_end(struct file *file, | |||
1554 | if (!ret) | 1634 | if (!ret) |
1555 | ret = ret2; | 1635 | ret = ret2; |
1556 | 1636 | ||
1637 | if (pos + len > inode->i_size) { | ||
1638 | vmtruncate(inode, inode->i_size); | ||
1639 | /* | ||
1640 | * If vmtruncate failed early the inode might still be | ||
1641 | * on the orphan list; we need to make sure the inode | ||
1642 | * is removed from the orphan list in that case. | ||
1643 | */ | ||
1644 | if (inode->i_nlink) | ||
1645 | ext4_orphan_del(NULL, inode); | ||
1646 | } | ||
1647 | |||
1648 | |||
1557 | return ret ? ret : copied; | 1649 | return ret ? ret : copied; |
1558 | } | 1650 | } |
1559 | 1651 | ||
@@ -1565,25 +1657,21 @@ static int ext4_writeback_write_end(struct file *file, | |||
1565 | handle_t *handle = ext4_journal_current_handle(); | 1657 | handle_t *handle = ext4_journal_current_handle(); |
1566 | struct inode *inode = mapping->host; | 1658 | struct inode *inode = mapping->host; |
1567 | int ret = 0, ret2; | 1659 | int ret = 0, ret2; |
1568 | loff_t new_i_size; | ||
1569 | 1660 | ||
1570 | trace_mark(ext4_writeback_write_end, | 1661 | trace_mark(ext4_writeback_write_end, |
1571 | "dev %s ino %lu pos %llu len %u copied %u", | 1662 | "dev %s ino %lu pos %llu len %u copied %u", |
1572 | inode->i_sb->s_id, inode->i_ino, | 1663 | inode->i_sb->s_id, inode->i_ino, |
1573 | (unsigned long long) pos, len, copied); | 1664 | (unsigned long long) pos, len, copied); |
1574 | new_i_size = pos + copied; | 1665 | ret2 = ext4_generic_write_end(file, mapping, pos, len, copied, |
1575 | if (new_i_size > EXT4_I(inode)->i_disksize) { | ||
1576 | ext4_update_i_disksize(inode, new_i_size); | ||
1577 | /* We need to mark inode dirty even if | ||
1578 | * new_i_size is less that inode->i_size | ||
1579 | * bu greater than i_disksize.(hint delalloc) | ||
1580 | */ | ||
1581 | ext4_mark_inode_dirty(handle, inode); | ||
1582 | } | ||
1583 | |||
1584 | ret2 = generic_write_end(file, mapping, pos, len, copied, | ||
1585 | page, fsdata); | 1666 | page, fsdata); |
1586 | copied = ret2; | 1667 | copied = ret2; |
1668 | if (pos + len > inode->i_size) | ||
1669 | /* if we have allocated more blocks and copied | ||
1670 | * less. We will have blocks allocated outside | ||
1671 | * inode->i_size. So truncate them | ||
1672 | */ | ||
1673 | ext4_orphan_add(handle, inode); | ||
1674 | |||
1587 | if (ret2 < 0) | 1675 | if (ret2 < 0) |
1588 | ret = ret2; | 1676 | ret = ret2; |
1589 | 1677 | ||
@@ -1591,6 +1679,17 @@ static int ext4_writeback_write_end(struct file *file, | |||
1591 | if (!ret) | 1679 | if (!ret) |
1592 | ret = ret2; | 1680 | ret = ret2; |
1593 | 1681 | ||
1682 | if (pos + len > inode->i_size) { | ||
1683 | vmtruncate(inode, inode->i_size); | ||
1684 | /* | ||
1685 | * If vmtruncate failed early the inode might still be | ||
1686 | * on the orphan list; we need to make sure the inode | ||
1687 | * is removed from the orphan list in that case. | ||
1688 | */ | ||
1689 | if (inode->i_nlink) | ||
1690 | ext4_orphan_del(NULL, inode); | ||
1691 | } | ||
1692 | |||
1594 | return ret ? ret : copied; | 1693 | return ret ? ret : copied; |
1595 | } | 1694 | } |
1596 | 1695 | ||
@@ -1635,10 +1734,27 @@ static int ext4_journalled_write_end(struct file *file, | |||
1635 | } | 1734 | } |
1636 | 1735 | ||
1637 | unlock_page(page); | 1736 | unlock_page(page); |
1737 | page_cache_release(page); | ||
1738 | if (pos + len > inode->i_size) | ||
1739 | /* if we have allocated more blocks and copied | ||
1740 | * less. We will have blocks allocated outside | ||
1741 | * inode->i_size. So truncate them | ||
1742 | */ | ||
1743 | ext4_orphan_add(handle, inode); | ||
1744 | |||
1638 | ret2 = ext4_journal_stop(handle); | 1745 | ret2 = ext4_journal_stop(handle); |
1639 | if (!ret) | 1746 | if (!ret) |
1640 | ret = ret2; | 1747 | ret = ret2; |
1641 | page_cache_release(page); | 1748 | if (pos + len > inode->i_size) { |
1749 | vmtruncate(inode, inode->i_size); | ||
1750 | /* | ||
1751 | * If vmtruncate failed early the inode might still be | ||
1752 | * on the orphan list; we need to make sure the inode | ||
1753 | * is removed from the orphan list in that case. | ||
1754 | */ | ||
1755 | if (inode->i_nlink) | ||
1756 | ext4_orphan_del(NULL, inode); | ||
1757 | } | ||
1642 | 1758 | ||
1643 | return ret ? ret : copied; | 1759 | return ret ? ret : copied; |
1644 | } | 1760 | } |
@@ -1852,7 +1968,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd) | |||
1852 | * @logical - first logical block to start assignment with | 1968 | * @logical - first logical block to start assignment with |
1853 | * | 1969 | * |
1854 | * the function goes through all passed space and put actual disk | 1970 | * the function goes through all passed space and put actual disk |
1855 | * block numbers into buffer heads, dropping BH_Delay | 1971 | * block numbers into buffer heads, dropping BH_Delay and BH_Unwritten |
1856 | */ | 1972 | */ |
1857 | static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, | 1973 | static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, |
1858 | struct buffer_head *exbh) | 1974 | struct buffer_head *exbh) |
@@ -1902,16 +2018,24 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, | |||
1902 | do { | 2018 | do { |
1903 | if (cur_logical >= logical + blocks) | 2019 | if (cur_logical >= logical + blocks) |
1904 | break; | 2020 | break; |
1905 | if (buffer_delay(bh)) { | 2021 | |
1906 | bh->b_blocknr = pblock; | 2022 | if (buffer_delay(bh) || |
1907 | clear_buffer_delay(bh); | 2023 | buffer_unwritten(bh)) { |
1908 | bh->b_bdev = inode->i_sb->s_bdev; | 2024 | |
1909 | } else if (buffer_unwritten(bh)) { | 2025 | BUG_ON(bh->b_bdev != inode->i_sb->s_bdev); |
1910 | bh->b_blocknr = pblock; | 2026 | |
1911 | clear_buffer_unwritten(bh); | 2027 | if (buffer_delay(bh)) { |
1912 | set_buffer_mapped(bh); | 2028 | clear_buffer_delay(bh); |
1913 | set_buffer_new(bh); | 2029 | bh->b_blocknr = pblock; |
1914 | bh->b_bdev = inode->i_sb->s_bdev; | 2030 | } else { |
2031 | /* | ||
2032 | * unwritten already should have | ||
2033 | * blocknr assigned. Verify that | ||
2034 | */ | ||
2035 | clear_buffer_unwritten(bh); | ||
2036 | BUG_ON(bh->b_blocknr != pblock); | ||
2037 | } | ||
2038 | |||
1915 | } else if (buffer_mapped(bh)) | 2039 | } else if (buffer_mapped(bh)) |
1916 | BUG_ON(bh->b_blocknr != pblock); | 2040 | BUG_ON(bh->b_blocknr != pblock); |
1917 | 2041 | ||
@@ -1990,51 +2114,6 @@ static void ext4_print_free_blocks(struct inode *inode) | |||
1990 | return; | 2114 | return; |
1991 | } | 2115 | } |
1992 | 2116 | ||
1993 | #define EXT4_DELALLOC_RSVED 1 | ||
1994 | static int ext4_da_get_block_write(struct inode *inode, sector_t iblock, | ||
1995 | struct buffer_head *bh_result, int create) | ||
1996 | { | ||
1997 | int ret; | ||
1998 | unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; | ||
1999 | loff_t disksize = EXT4_I(inode)->i_disksize; | ||
2000 | handle_t *handle = NULL; | ||
2001 | |||
2002 | handle = ext4_journal_current_handle(); | ||
2003 | BUG_ON(!handle); | ||
2004 | ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks, | ||
2005 | bh_result, create, 0, EXT4_DELALLOC_RSVED); | ||
2006 | if (ret <= 0) | ||
2007 | return ret; | ||
2008 | |||
2009 | bh_result->b_size = (ret << inode->i_blkbits); | ||
2010 | |||
2011 | if (ext4_should_order_data(inode)) { | ||
2012 | int retval; | ||
2013 | retval = ext4_jbd2_file_inode(handle, inode); | ||
2014 | if (retval) | ||
2015 | /* | ||
2016 | * Failed to add inode for ordered mode. Don't | ||
2017 | * update file size | ||
2018 | */ | ||
2019 | return retval; | ||
2020 | } | ||
2021 | |||
2022 | /* | ||
2023 | * Update on-disk size along with block allocation we don't | ||
2024 | * use 'extend_disksize' as size may change within already | ||
2025 | * allocated block -bzzz | ||
2026 | */ | ||
2027 | disksize = ((loff_t) iblock + ret) << inode->i_blkbits; | ||
2028 | if (disksize > i_size_read(inode)) | ||
2029 | disksize = i_size_read(inode); | ||
2030 | if (disksize > EXT4_I(inode)->i_disksize) { | ||
2031 | ext4_update_i_disksize(inode, disksize); | ||
2032 | ret = ext4_mark_inode_dirty(handle, inode); | ||
2033 | return ret; | ||
2034 | } | ||
2035 | return 0; | ||
2036 | } | ||
2037 | |||
2038 | /* | 2117 | /* |
2039 | * mpage_da_map_blocks - go through given space | 2118 | * mpage_da_map_blocks - go through given space |
2040 | * | 2119 | * |
@@ -2045,29 +2124,57 @@ static int ext4_da_get_block_write(struct inode *inode, sector_t iblock, | |||
2045 | */ | 2124 | */ |
2046 | static int mpage_da_map_blocks(struct mpage_da_data *mpd) | 2125 | static int mpage_da_map_blocks(struct mpage_da_data *mpd) |
2047 | { | 2126 | { |
2048 | int err = 0; | 2127 | int err, blks, get_blocks_flags; |
2049 | struct buffer_head new; | 2128 | struct buffer_head new; |
2050 | sector_t next; | 2129 | sector_t next = mpd->b_blocknr; |
2130 | unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits; | ||
2131 | loff_t disksize = EXT4_I(mpd->inode)->i_disksize; | ||
2132 | handle_t *handle = NULL; | ||
2051 | 2133 | ||
2052 | /* | 2134 | /* |
2053 | * We consider only non-mapped and non-allocated blocks | 2135 | * We consider only non-mapped and non-allocated blocks |
2054 | */ | 2136 | */ |
2055 | if ((mpd->b_state & (1 << BH_Mapped)) && | 2137 | if ((mpd->b_state & (1 << BH_Mapped)) && |
2056 | !(mpd->b_state & (1 << BH_Delay))) | 2138 | !(mpd->b_state & (1 << BH_Delay)) && |
2139 | !(mpd->b_state & (1 << BH_Unwritten))) | ||
2057 | return 0; | 2140 | return 0; |
2058 | new.b_state = mpd->b_state; | 2141 | |
2059 | new.b_blocknr = 0; | ||
2060 | new.b_size = mpd->b_size; | ||
2061 | next = mpd->b_blocknr; | ||
2062 | /* | 2142 | /* |
2063 | * If we didn't accumulate anything | 2143 | * If we didn't accumulate anything to write simply return |
2064 | * to write simply return | ||
2065 | */ | 2144 | */ |
2066 | if (!new.b_size) | 2145 | if (!mpd->b_size) |
2067 | return 0; | 2146 | return 0; |
2068 | 2147 | ||
2069 | err = ext4_da_get_block_write(mpd->inode, next, &new, 1); | 2148 | handle = ext4_journal_current_handle(); |
2070 | if (err) { | 2149 | BUG_ON(!handle); |
2150 | |||
2151 | /* | ||
2152 | * Call ext4_get_blocks() to allocate any delayed allocation | ||
2153 | * blocks, or to convert an uninitialized extent to be | ||
2154 | * initialized (in the case where we have written into | ||
2155 | * one or more preallocated blocks). | ||
2156 | * | ||
2157 | * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE to | ||
2158 | * indicate that we are on the delayed allocation path. This | ||
2159 | * affects functions in many different parts of the allocation | ||
2160 | * call path. This flag exists primarily because we don't | ||
2161 | * want to change *many* call functions, so ext4_get_blocks() | ||
2162 | * will set the magic i_delalloc_reserved_flag once the | ||
2163 | * inode's allocation semaphore is taken. | ||
2164 | * | ||
2165 | * If the blocks in questions were delalloc blocks, set | ||
2166 | * EXT4_GET_BLOCKS_DELALLOC_RESERVE so the delalloc accounting | ||
2167 | * variables are updated after the blocks have been allocated. | ||
2168 | */ | ||
2169 | new.b_state = 0; | ||
2170 | get_blocks_flags = (EXT4_GET_BLOCKS_CREATE | | ||
2171 | EXT4_GET_BLOCKS_DELALLOC_RESERVE); | ||
2172 | if (mpd->b_state & (1 << BH_Delay)) | ||
2173 | get_blocks_flags |= EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE; | ||
2174 | blks = ext4_get_blocks(handle, mpd->inode, next, max_blocks, | ||
2175 | &new, get_blocks_flags); | ||
2176 | if (blks < 0) { | ||
2177 | err = blks; | ||
2071 | /* | 2178 | /* |
2072 | * If get block returns with error we simply | 2179 | * If get block returns with error we simply |
2073 | * return. Later writepage will redirty the page and | 2180 | * return. Later writepage will redirty the page and |
@@ -2100,12 +2207,14 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2100 | if (err == -ENOSPC) { | 2207 | if (err == -ENOSPC) { |
2101 | ext4_print_free_blocks(mpd->inode); | 2208 | ext4_print_free_blocks(mpd->inode); |
2102 | } | 2209 | } |
2103 | /* invlaidate all the pages */ | 2210 | /* invalidate all the pages */ |
2104 | ext4_da_block_invalidatepages(mpd, next, | 2211 | ext4_da_block_invalidatepages(mpd, next, |
2105 | mpd->b_size >> mpd->inode->i_blkbits); | 2212 | mpd->b_size >> mpd->inode->i_blkbits); |
2106 | return err; | 2213 | return err; |
2107 | } | 2214 | } |
2108 | BUG_ON(new.b_size == 0); | 2215 | BUG_ON(blks == 0); |
2216 | |||
2217 | new.b_size = (blks << mpd->inode->i_blkbits); | ||
2109 | 2218 | ||
2110 | if (buffer_new(&new)) | 2219 | if (buffer_new(&new)) |
2111 | __unmap_underlying_blocks(mpd->inode, &new); | 2220 | __unmap_underlying_blocks(mpd->inode, &new); |
@@ -2118,6 +2227,23 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2118 | (mpd->b_state & (1 << BH_Unwritten))) | 2227 | (mpd->b_state & (1 << BH_Unwritten))) |
2119 | mpage_put_bnr_to_bhs(mpd, next, &new); | 2228 | mpage_put_bnr_to_bhs(mpd, next, &new); |
2120 | 2229 | ||
2230 | if (ext4_should_order_data(mpd->inode)) { | ||
2231 | err = ext4_jbd2_file_inode(handle, mpd->inode); | ||
2232 | if (err) | ||
2233 | return err; | ||
2234 | } | ||
2235 | |||
2236 | /* | ||
2237 | * Update on-disk size along with block allocation. | ||
2238 | */ | ||
2239 | disksize = ((loff_t) next + blks) << mpd->inode->i_blkbits; | ||
2240 | if (disksize > i_size_read(mpd->inode)) | ||
2241 | disksize = i_size_read(mpd->inode); | ||
2242 | if (disksize > EXT4_I(mpd->inode)->i_disksize) { | ||
2243 | ext4_update_i_disksize(mpd->inode, disksize); | ||
2244 | return ext4_mark_inode_dirty(handle, mpd->inode); | ||
2245 | } | ||
2246 | |||
2121 | return 0; | 2247 | return 0; |
2122 | } | 2248 | } |
2123 | 2249 | ||
@@ -2192,6 +2318,17 @@ flush_it: | |||
2192 | return; | 2318 | return; |
2193 | } | 2319 | } |
2194 | 2320 | ||
2321 | static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh) | ||
2322 | { | ||
2323 | /* | ||
2324 | * unmapped buffer is possible for holes. | ||
2325 | * delay buffer is possible with delayed allocation. | ||
2326 | * We also need to consider unwritten buffer as unmapped. | ||
2327 | */ | ||
2328 | return (!buffer_mapped(bh) || buffer_delay(bh) || | ||
2329 | buffer_unwritten(bh)) && buffer_dirty(bh); | ||
2330 | } | ||
2331 | |||
2195 | /* | 2332 | /* |
2196 | * __mpage_da_writepage - finds extent of pages and blocks | 2333 | * __mpage_da_writepage - finds extent of pages and blocks |
2197 | * | 2334 | * |
@@ -2276,8 +2413,7 @@ static int __mpage_da_writepage(struct page *page, | |||
2276 | * Otherwise we won't make progress | 2413 | * Otherwise we won't make progress |
2277 | * with the page in ext4_da_writepage | 2414 | * with the page in ext4_da_writepage |
2278 | */ | 2415 | */ |
2279 | if (buffer_dirty(bh) && | 2416 | if (ext4_bh_unmapped_or_delay(NULL, bh)) { |
2280 | (!buffer_mapped(bh) || buffer_delay(bh))) { | ||
2281 | mpage_add_bh_to_extent(mpd, logical, | 2417 | mpage_add_bh_to_extent(mpd, logical, |
2282 | bh->b_size, | 2418 | bh->b_size, |
2283 | bh->b_state); | 2419 | bh->b_state); |
@@ -2303,8 +2439,16 @@ static int __mpage_da_writepage(struct page *page, | |||
2303 | } | 2439 | } |
2304 | 2440 | ||
2305 | /* | 2441 | /* |
2306 | * this is a special callback for ->write_begin() only | 2442 | * This is a special get_blocks_t callback which is used by |
2307 | * it's intention is to return mapped block or reserve space | 2443 | * ext4_da_write_begin(). It will either return mapped block or |
2444 | * reserve space for a single block. | ||
2445 | * | ||
2446 | * For delayed buffer_head we have BH_Mapped, BH_New, BH_Delay set. | ||
2447 | * We also have b_blocknr = -1 and b_bdev initialized properly | ||
2448 | * | ||
2449 | * For unwritten buffer_head we have BH_Mapped, BH_New, BH_Unwritten set. | ||
2450 | * We also have b_blocknr = physicalblock mapping unwritten extent and b_bdev | ||
2451 | * initialized properly. | ||
2308 | */ | 2452 | */ |
2309 | static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | 2453 | static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, |
2310 | struct buffer_head *bh_result, int create) | 2454 | struct buffer_head *bh_result, int create) |
@@ -2323,7 +2467,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | |||
2323 | * preallocated blocks are unmapped but should treated | 2467 | * preallocated blocks are unmapped but should treated |
2324 | * the same as allocated blocks. | 2468 | * the same as allocated blocks. |
2325 | */ | 2469 | */ |
2326 | ret = ext4_get_blocks_wrap(NULL, inode, iblock, 1, bh_result, 0, 0, 0); | 2470 | ret = ext4_get_blocks(NULL, inode, iblock, 1, bh_result, 0); |
2327 | if ((ret == 0) && !buffer_delay(bh_result)) { | 2471 | if ((ret == 0) && !buffer_delay(bh_result)) { |
2328 | /* the block isn't (pre)allocated yet, let's reserve space */ | 2472 | /* the block isn't (pre)allocated yet, let's reserve space */ |
2329 | /* | 2473 | /* |
@@ -2340,40 +2484,53 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | |||
2340 | set_buffer_delay(bh_result); | 2484 | set_buffer_delay(bh_result); |
2341 | } else if (ret > 0) { | 2485 | } else if (ret > 0) { |
2342 | bh_result->b_size = (ret << inode->i_blkbits); | 2486 | bh_result->b_size = (ret << inode->i_blkbits); |
2343 | /* | 2487 | if (buffer_unwritten(bh_result)) { |
2344 | * With sub-block writes into unwritten extents | 2488 | /* A delayed write to unwritten bh should |
2345 | * we also need to mark the buffer as new so that | 2489 | * be marked new and mapped. Mapped ensures |
2346 | * the unwritten parts of the buffer gets correctly zeroed. | 2490 | * that we don't do get_block multiple times |
2347 | */ | 2491 | * when we write to the same offset and new |
2348 | if (buffer_unwritten(bh_result)) | 2492 | * ensures that we do proper zero out for |
2493 | * partial write. | ||
2494 | */ | ||
2349 | set_buffer_new(bh_result); | 2495 | set_buffer_new(bh_result); |
2496 | set_buffer_mapped(bh_result); | ||
2497 | } | ||
2350 | ret = 0; | 2498 | ret = 0; |
2351 | } | 2499 | } |
2352 | 2500 | ||
2353 | return ret; | 2501 | return ret; |
2354 | } | 2502 | } |
2355 | 2503 | ||
2356 | static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh) | 2504 | /* |
2357 | { | 2505 | * This function is used as a standard get_block_t calback function |
2358 | /* | 2506 | * when there is no desire to allocate any blocks. It is used as a |
2359 | * unmapped buffer is possible for holes. | 2507 | * callback function for block_prepare_write(), nobh_writepage(), and |
2360 | * delay buffer is possible with delayed allocation | 2508 | * block_write_full_page(). These functions should only try to map a |
2361 | */ | 2509 | * single block at a time. |
2362 | return ((!buffer_mapped(bh) || buffer_delay(bh)) && buffer_dirty(bh)); | 2510 | * |
2363 | } | 2511 | * Since this function doesn't do block allocations even if the caller |
2364 | 2512 | * requests it by passing in create=1, it is critically important that | |
2365 | static int ext4_normal_get_block_write(struct inode *inode, sector_t iblock, | 2513 | * any caller checks to make sure that any buffer heads are returned |
2514 | * by this function are either all already mapped or marked for | ||
2515 | * delayed allocation before calling nobh_writepage() or | ||
2516 | * block_write_full_page(). Otherwise, b_blocknr could be left | ||
2517 | * unitialized, and the page write functions will be taken by | ||
2518 | * surprise. | ||
2519 | */ | ||
2520 | static int noalloc_get_block_write(struct inode *inode, sector_t iblock, | ||
2366 | struct buffer_head *bh_result, int create) | 2521 | struct buffer_head *bh_result, int create) |
2367 | { | 2522 | { |
2368 | int ret = 0; | 2523 | int ret = 0; |
2369 | unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; | 2524 | unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; |
2370 | 2525 | ||
2526 | BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize); | ||
2527 | |||
2371 | /* | 2528 | /* |
2372 | * we don't want to do block allocation in writepage | 2529 | * we don't want to do block allocation in writepage |
2373 | * so call get_block_wrap with create = 0 | 2530 | * so call get_block_wrap with create = 0 |
2374 | */ | 2531 | */ |
2375 | ret = ext4_get_blocks_wrap(NULL, inode, iblock, max_blocks, | 2532 | ret = ext4_get_blocks(NULL, inode, iblock, max_blocks, bh_result, 0); |
2376 | bh_result, 0, 0, 0); | 2533 | BUG_ON(create && ret == 0); |
2377 | if (ret > 0) { | 2534 | if (ret > 0) { |
2378 | bh_result->b_size = (ret << inode->i_blkbits); | 2535 | bh_result->b_size = (ret << inode->i_blkbits); |
2379 | ret = 0; | 2536 | ret = 0; |
@@ -2382,10 +2539,11 @@ static int ext4_normal_get_block_write(struct inode *inode, sector_t iblock, | |||
2382 | } | 2539 | } |
2383 | 2540 | ||
2384 | /* | 2541 | /* |
2385 | * get called vi ext4_da_writepages after taking page lock (have journal handle) | 2542 | * This function can get called via... |
2386 | * get called via journal_submit_inode_data_buffers (no journal handle) | 2543 | * - ext4_da_writepages after taking page lock (have journal handle) |
2387 | * get called via shrink_page_list via pdflush (no journal handle) | 2544 | * - journal_submit_inode_data_buffers (no journal handle) |
2388 | * or grab_page_cache when doing write_begin (have journal handle) | 2545 | * - shrink_page_list via pdflush (no journal handle) |
2546 | * - grab_page_cache when doing write_begin (have journal handle) | ||
2389 | */ | 2547 | */ |
2390 | static int ext4_da_writepage(struct page *page, | 2548 | static int ext4_da_writepage(struct page *page, |
2391 | struct writeback_control *wbc) | 2549 | struct writeback_control *wbc) |
@@ -2436,7 +2594,7 @@ static int ext4_da_writepage(struct page *page, | |||
2436 | * do block allocation here. | 2594 | * do block allocation here. |
2437 | */ | 2595 | */ |
2438 | ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE, | 2596 | ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE, |
2439 | ext4_normal_get_block_write); | 2597 | noalloc_get_block_write); |
2440 | if (!ret) { | 2598 | if (!ret) { |
2441 | page_bufs = page_buffers(page); | 2599 | page_bufs = page_buffers(page); |
2442 | /* check whether all are mapped and non delay */ | 2600 | /* check whether all are mapped and non delay */ |
@@ -2461,11 +2619,10 @@ static int ext4_da_writepage(struct page *page, | |||
2461 | } | 2619 | } |
2462 | 2620 | ||
2463 | if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) | 2621 | if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) |
2464 | ret = nobh_writepage(page, ext4_normal_get_block_write, wbc); | 2622 | ret = nobh_writepage(page, noalloc_get_block_write, wbc); |
2465 | else | 2623 | else |
2466 | ret = block_write_full_page(page, | 2624 | ret = block_write_full_page(page, noalloc_get_block_write, |
2467 | ext4_normal_get_block_write, | 2625 | wbc); |
2468 | wbc); | ||
2469 | 2626 | ||
2470 | return ret; | 2627 | return ret; |
2471 | } | 2628 | } |
@@ -2777,7 +2934,7 @@ retry: | |||
2777 | *pagep = page; | 2934 | *pagep = page; |
2778 | 2935 | ||
2779 | ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, | 2936 | ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, |
2780 | ext4_da_get_block_prep); | 2937 | ext4_da_get_block_prep); |
2781 | if (ret < 0) { | 2938 | if (ret < 0) { |
2782 | unlock_page(page); | 2939 | unlock_page(page); |
2783 | ext4_journal_stop(handle); | 2940 | ext4_journal_stop(handle); |
@@ -2815,7 +2972,7 @@ static int ext4_da_should_update_i_disksize(struct page *page, | |||
2815 | for (i = 0; i < idx; i++) | 2972 | for (i = 0; i < idx; i++) |
2816 | bh = bh->b_this_page; | 2973 | bh = bh->b_this_page; |
2817 | 2974 | ||
2818 | if (!buffer_mapped(bh) || (buffer_delay(bh))) | 2975 | if (!buffer_mapped(bh) || (buffer_delay(bh)) || buffer_unwritten(bh)) |
2819 | return 0; | 2976 | return 0; |
2820 | return 1; | 2977 | return 1; |
2821 | } | 2978 | } |
@@ -3085,12 +3242,10 @@ static int __ext4_normal_writepage(struct page *page, | |||
3085 | struct inode *inode = page->mapping->host; | 3242 | struct inode *inode = page->mapping->host; |
3086 | 3243 | ||
3087 | if (test_opt(inode->i_sb, NOBH)) | 3244 | if (test_opt(inode->i_sb, NOBH)) |
3088 | return nobh_writepage(page, | 3245 | return nobh_writepage(page, noalloc_get_block_write, wbc); |
3089 | ext4_normal_get_block_write, wbc); | ||
3090 | else | 3246 | else |
3091 | return block_write_full_page(page, | 3247 | return block_write_full_page(page, noalloc_get_block_write, |
3092 | ext4_normal_get_block_write, | 3248 | wbc); |
3093 | wbc); | ||
3094 | } | 3249 | } |
3095 | 3250 | ||
3096 | static int ext4_normal_writepage(struct page *page, | 3251 | static int ext4_normal_writepage(struct page *page, |
@@ -3142,7 +3297,7 @@ static int __ext4_journalled_writepage(struct page *page, | |||
3142 | int err; | 3297 | int err; |
3143 | 3298 | ||
3144 | ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE, | 3299 | ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE, |
3145 | ext4_normal_get_block_write); | 3300 | noalloc_get_block_write); |
3146 | if (ret != 0) | 3301 | if (ret != 0) |
3147 | goto out_unlock; | 3302 | goto out_unlock; |
3148 | 3303 | ||
@@ -3227,9 +3382,8 @@ static int ext4_journalled_writepage(struct page *page, | |||
3227 | * really know unless we go poke around in the buffer_heads. | 3382 | * really know unless we go poke around in the buffer_heads. |
3228 | * But block_write_full_page will do the right thing. | 3383 | * But block_write_full_page will do the right thing. |
3229 | */ | 3384 | */ |
3230 | return block_write_full_page(page, | 3385 | return block_write_full_page(page, noalloc_get_block_write, |
3231 | ext4_normal_get_block_write, | 3386 | wbc); |
3232 | wbc); | ||
3233 | } | 3387 | } |
3234 | no_write: | 3388 | no_write: |
3235 | redirty_page_for_writepage(wbc, page); | 3389 | redirty_page_for_writepage(wbc, page); |
@@ -3973,7 +4127,8 @@ void ext4_truncate(struct inode *inode) | |||
3973 | if (!ext4_can_truncate(inode)) | 4127 | if (!ext4_can_truncate(inode)) |
3974 | return; | 4128 | return; |
3975 | 4129 | ||
3976 | if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) | 4130 | if (ei->i_disksize && inode->i_size == 0 && |
4131 | !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) | ||
3977 | ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE; | 4132 | ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE; |
3978 | 4133 | ||
3979 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { | 4134 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { |
@@ -4715,25 +4870,6 @@ int ext4_write_inode(struct inode *inode, int wait) | |||
4715 | return ext4_force_commit(inode->i_sb); | 4870 | return ext4_force_commit(inode->i_sb); |
4716 | } | 4871 | } |
4717 | 4872 | ||
4718 | int __ext4_write_dirty_metadata(struct inode *inode, struct buffer_head *bh) | ||
4719 | { | ||
4720 | int err = 0; | ||
4721 | |||
4722 | mark_buffer_dirty(bh); | ||
4723 | if (inode && inode_needs_sync(inode)) { | ||
4724 | sync_dirty_buffer(bh); | ||
4725 | if (buffer_req(bh) && !buffer_uptodate(bh)) { | ||
4726 | ext4_error(inode->i_sb, __func__, | ||
4727 | "IO error syncing inode, " | ||
4728 | "inode=%lu, block=%llu", | ||
4729 | inode->i_ino, | ||
4730 | (unsigned long long)bh->b_blocknr); | ||
4731 | err = -EIO; | ||
4732 | } | ||
4733 | } | ||
4734 | return err; | ||
4735 | } | ||
4736 | |||
4737 | /* | 4873 | /* |
4738 | * ext4_setattr() | 4874 | * ext4_setattr() |
4739 | * | 4875 | * |
@@ -4930,7 +5066,8 @@ static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) | |||
4930 | */ | 5066 | */ |
4931 | int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) | 5067 | int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) |
4932 | { | 5068 | { |
4933 | int groups, gdpblocks; | 5069 | ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb); |
5070 | int gdpblocks; | ||
4934 | int idxblocks; | 5071 | int idxblocks; |
4935 | int ret = 0; | 5072 | int ret = 0; |
4936 | 5073 | ||
@@ -4957,8 +5094,8 @@ int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) | |||
4957 | groups += nrblocks; | 5094 | groups += nrblocks; |
4958 | 5095 | ||
4959 | gdpblocks = groups; | 5096 | gdpblocks = groups; |
4960 | if (groups > EXT4_SB(inode->i_sb)->s_groups_count) | 5097 | if (groups > ngroups) |
4961 | groups = EXT4_SB(inode->i_sb)->s_groups_count; | 5098 | groups = ngroups; |
4962 | if (groups > EXT4_SB(inode->i_sb)->s_gdb_count) | 5099 | if (groups > EXT4_SB(inode->i_sb)->s_gdb_count) |
4963 | gdpblocks = EXT4_SB(inode->i_sb)->s_gdb_count; | 5100 | gdpblocks = EXT4_SB(inode->i_sb)->s_gdb_count; |
4964 | 5101 | ||
@@ -4998,7 +5135,7 @@ int ext4_writepage_trans_blocks(struct inode *inode) | |||
4998 | * Calculate the journal credits for a chunk of data modification. | 5135 | * Calculate the journal credits for a chunk of data modification. |
4999 | * | 5136 | * |
5000 | * This is called from DIO, fallocate or whoever calling | 5137 | * This is called from DIO, fallocate or whoever calling |
5001 | * ext4_get_blocks_wrap() to map/allocate a chunk of contigous disk blocks. | 5138 | * ext4_get_blocks() to map/allocate a chunk of contigous disk blocks. |
5002 | * | 5139 | * |
5003 | * journal buffers for data blocks are not included here, as DIO | 5140 | * journal buffers for data blocks are not included here, as DIO |
5004 | * and fallocate do no need to journal data buffers. | 5141 | * and fallocate do no need to journal data buffers. |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index f871677a7984..ed8482e22c0e 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -372,24 +372,12 @@ static inline void mb_set_bit(int bit, void *addr) | |||
372 | ext4_set_bit(bit, addr); | 372 | ext4_set_bit(bit, addr); |
373 | } | 373 | } |
374 | 374 | ||
375 | static inline void mb_set_bit_atomic(spinlock_t *lock, int bit, void *addr) | ||
376 | { | ||
377 | addr = mb_correct_addr_and_bit(&bit, addr); | ||
378 | ext4_set_bit_atomic(lock, bit, addr); | ||
379 | } | ||
380 | |||
381 | static inline void mb_clear_bit(int bit, void *addr) | 375 | static inline void mb_clear_bit(int bit, void *addr) |
382 | { | 376 | { |
383 | addr = mb_correct_addr_and_bit(&bit, addr); | 377 | addr = mb_correct_addr_and_bit(&bit, addr); |
384 | ext4_clear_bit(bit, addr); | 378 | ext4_clear_bit(bit, addr); |
385 | } | 379 | } |
386 | 380 | ||
387 | static inline void mb_clear_bit_atomic(spinlock_t *lock, int bit, void *addr) | ||
388 | { | ||
389 | addr = mb_correct_addr_and_bit(&bit, addr); | ||
390 | ext4_clear_bit_atomic(lock, bit, addr); | ||
391 | } | ||
392 | |||
393 | static inline int mb_find_next_zero_bit(void *addr, int max, int start) | 381 | static inline int mb_find_next_zero_bit(void *addr, int max, int start) |
394 | { | 382 | { |
395 | int fix = 0, ret, tmpmax; | 383 | int fix = 0, ret, tmpmax; |
@@ -448,7 +436,7 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b, | |||
448 | 436 | ||
449 | if (unlikely(e4b->bd_info->bb_bitmap == NULL)) | 437 | if (unlikely(e4b->bd_info->bb_bitmap == NULL)) |
450 | return; | 438 | return; |
451 | BUG_ON(!ext4_is_group_locked(sb, e4b->bd_group)); | 439 | assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group)); |
452 | for (i = 0; i < count; i++) { | 440 | for (i = 0; i < count; i++) { |
453 | if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) { | 441 | if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) { |
454 | ext4_fsblk_t blocknr; | 442 | ext4_fsblk_t blocknr; |
@@ -472,7 +460,7 @@ static void mb_mark_used_double(struct ext4_buddy *e4b, int first, int count) | |||
472 | 460 | ||
473 | if (unlikely(e4b->bd_info->bb_bitmap == NULL)) | 461 | if (unlikely(e4b->bd_info->bb_bitmap == NULL)) |
474 | return; | 462 | return; |
475 | BUG_ON(!ext4_is_group_locked(e4b->bd_sb, e4b->bd_group)); | 463 | assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group)); |
476 | for (i = 0; i < count; i++) { | 464 | for (i = 0; i < count; i++) { |
477 | BUG_ON(mb_test_bit(first + i, e4b->bd_info->bb_bitmap)); | 465 | BUG_ON(mb_test_bit(first + i, e4b->bd_info->bb_bitmap)); |
478 | mb_set_bit(first + i, e4b->bd_info->bb_bitmap); | 466 | mb_set_bit(first + i, e4b->bd_info->bb_bitmap); |
@@ -739,6 +727,7 @@ static void ext4_mb_generate_buddy(struct super_block *sb, | |||
739 | 727 | ||
740 | static int ext4_mb_init_cache(struct page *page, char *incore) | 728 | static int ext4_mb_init_cache(struct page *page, char *incore) |
741 | { | 729 | { |
730 | ext4_group_t ngroups; | ||
742 | int blocksize; | 731 | int blocksize; |
743 | int blocks_per_page; | 732 | int blocks_per_page; |
744 | int groups_per_page; | 733 | int groups_per_page; |
@@ -757,6 +746,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
757 | 746 | ||
758 | inode = page->mapping->host; | 747 | inode = page->mapping->host; |
759 | sb = inode->i_sb; | 748 | sb = inode->i_sb; |
749 | ngroups = ext4_get_groups_count(sb); | ||
760 | blocksize = 1 << inode->i_blkbits; | 750 | blocksize = 1 << inode->i_blkbits; |
761 | blocks_per_page = PAGE_CACHE_SIZE / blocksize; | 751 | blocks_per_page = PAGE_CACHE_SIZE / blocksize; |
762 | 752 | ||
@@ -780,7 +770,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
780 | for (i = 0; i < groups_per_page; i++) { | 770 | for (i = 0; i < groups_per_page; i++) { |
781 | struct ext4_group_desc *desc; | 771 | struct ext4_group_desc *desc; |
782 | 772 | ||
783 | if (first_group + i >= EXT4_SB(sb)->s_groups_count) | 773 | if (first_group + i >= ngroups) |
784 | break; | 774 | break; |
785 | 775 | ||
786 | err = -EIO; | 776 | err = -EIO; |
@@ -801,17 +791,17 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
801 | unlock_buffer(bh[i]); | 791 | unlock_buffer(bh[i]); |
802 | continue; | 792 | continue; |
803 | } | 793 | } |
804 | spin_lock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); | 794 | ext4_lock_group(sb, first_group + i); |
805 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { | 795 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { |
806 | ext4_init_block_bitmap(sb, bh[i], | 796 | ext4_init_block_bitmap(sb, bh[i], |
807 | first_group + i, desc); | 797 | first_group + i, desc); |
808 | set_bitmap_uptodate(bh[i]); | 798 | set_bitmap_uptodate(bh[i]); |
809 | set_buffer_uptodate(bh[i]); | 799 | set_buffer_uptodate(bh[i]); |
810 | spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); | 800 | ext4_unlock_group(sb, first_group + i); |
811 | unlock_buffer(bh[i]); | 801 | unlock_buffer(bh[i]); |
812 | continue; | 802 | continue; |
813 | } | 803 | } |
814 | spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); | 804 | ext4_unlock_group(sb, first_group + i); |
815 | if (buffer_uptodate(bh[i])) { | 805 | if (buffer_uptodate(bh[i])) { |
816 | /* | 806 | /* |
817 | * if not uninit if bh is uptodate, | 807 | * if not uninit if bh is uptodate, |
@@ -852,7 +842,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
852 | struct ext4_group_info *grinfo; | 842 | struct ext4_group_info *grinfo; |
853 | 843 | ||
854 | group = (first_block + i) >> 1; | 844 | group = (first_block + i) >> 1; |
855 | if (group >= EXT4_SB(sb)->s_groups_count) | 845 | if (group >= ngroups) |
856 | break; | 846 | break; |
857 | 847 | ||
858 | /* | 848 | /* |
@@ -1078,7 +1068,7 @@ static int mb_find_order_for_block(struct ext4_buddy *e4b, int block) | |||
1078 | return 0; | 1068 | return 0; |
1079 | } | 1069 | } |
1080 | 1070 | ||
1081 | static void mb_clear_bits(spinlock_t *lock, void *bm, int cur, int len) | 1071 | static void mb_clear_bits(void *bm, int cur, int len) |
1082 | { | 1072 | { |
1083 | __u32 *addr; | 1073 | __u32 *addr; |
1084 | 1074 | ||
@@ -1091,15 +1081,12 @@ static void mb_clear_bits(spinlock_t *lock, void *bm, int cur, int len) | |||
1091 | cur += 32; | 1081 | cur += 32; |
1092 | continue; | 1082 | continue; |
1093 | } | 1083 | } |
1094 | if (lock) | 1084 | mb_clear_bit(cur, bm); |
1095 | mb_clear_bit_atomic(lock, cur, bm); | ||
1096 | else | ||
1097 | mb_clear_bit(cur, bm); | ||
1098 | cur++; | 1085 | cur++; |
1099 | } | 1086 | } |
1100 | } | 1087 | } |
1101 | 1088 | ||
1102 | static void mb_set_bits(spinlock_t *lock, void *bm, int cur, int len) | 1089 | static void mb_set_bits(void *bm, int cur, int len) |
1103 | { | 1090 | { |
1104 | __u32 *addr; | 1091 | __u32 *addr; |
1105 | 1092 | ||
@@ -1112,10 +1099,7 @@ static void mb_set_bits(spinlock_t *lock, void *bm, int cur, int len) | |||
1112 | cur += 32; | 1099 | cur += 32; |
1113 | continue; | 1100 | continue; |
1114 | } | 1101 | } |
1115 | if (lock) | 1102 | mb_set_bit(cur, bm); |
1116 | mb_set_bit_atomic(lock, cur, bm); | ||
1117 | else | ||
1118 | mb_set_bit(cur, bm); | ||
1119 | cur++; | 1103 | cur++; |
1120 | } | 1104 | } |
1121 | } | 1105 | } |
@@ -1131,7 +1115,7 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, | |||
1131 | struct super_block *sb = e4b->bd_sb; | 1115 | struct super_block *sb = e4b->bd_sb; |
1132 | 1116 | ||
1133 | BUG_ON(first + count > (sb->s_blocksize << 3)); | 1117 | BUG_ON(first + count > (sb->s_blocksize << 3)); |
1134 | BUG_ON(!ext4_is_group_locked(sb, e4b->bd_group)); | 1118 | assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group)); |
1135 | mb_check_buddy(e4b); | 1119 | mb_check_buddy(e4b); |
1136 | mb_free_blocks_double(inode, e4b, first, count); | 1120 | mb_free_blocks_double(inode, e4b, first, count); |
1137 | 1121 | ||
@@ -1212,7 +1196,7 @@ static int mb_find_extent(struct ext4_buddy *e4b, int order, int block, | |||
1212 | int ord; | 1196 | int ord; |
1213 | void *buddy; | 1197 | void *buddy; |
1214 | 1198 | ||
1215 | BUG_ON(!ext4_is_group_locked(e4b->bd_sb, e4b->bd_group)); | 1199 | assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group)); |
1216 | BUG_ON(ex == NULL); | 1200 | BUG_ON(ex == NULL); |
1217 | 1201 | ||
1218 | buddy = mb_find_buddy(e4b, order, &max); | 1202 | buddy = mb_find_buddy(e4b, order, &max); |
@@ -1276,7 +1260,7 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex) | |||
1276 | 1260 | ||
1277 | BUG_ON(start + len > (e4b->bd_sb->s_blocksize << 3)); | 1261 | BUG_ON(start + len > (e4b->bd_sb->s_blocksize << 3)); |
1278 | BUG_ON(e4b->bd_group != ex->fe_group); | 1262 | BUG_ON(e4b->bd_group != ex->fe_group); |
1279 | BUG_ON(!ext4_is_group_locked(e4b->bd_sb, e4b->bd_group)); | 1263 | assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group)); |
1280 | mb_check_buddy(e4b); | 1264 | mb_check_buddy(e4b); |
1281 | mb_mark_used_double(e4b, start, len); | 1265 | mb_mark_used_double(e4b, start, len); |
1282 | 1266 | ||
@@ -1330,8 +1314,7 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex) | |||
1330 | e4b->bd_info->bb_counters[ord]++; | 1314 | e4b->bd_info->bb_counters[ord]++; |
1331 | } | 1315 | } |
1332 | 1316 | ||
1333 | mb_set_bits(sb_bgl_lock(EXT4_SB(e4b->bd_sb), ex->fe_group), | 1317 | mb_set_bits(EXT4_MB_BITMAP(e4b), ex->fe_start, len0); |
1334 | EXT4_MB_BITMAP(e4b), ex->fe_start, len0); | ||
1335 | mb_check_buddy(e4b); | 1318 | mb_check_buddy(e4b); |
1336 | 1319 | ||
1337 | return ret; | 1320 | return ret; |
@@ -1726,7 +1709,6 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac, | |||
1726 | unsigned free, fragments; | 1709 | unsigned free, fragments; |
1727 | unsigned i, bits; | 1710 | unsigned i, bits; |
1728 | int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb)); | 1711 | int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb)); |
1729 | struct ext4_group_desc *desc; | ||
1730 | struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group); | 1712 | struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group); |
1731 | 1713 | ||
1732 | BUG_ON(cr < 0 || cr >= 4); | 1714 | BUG_ON(cr < 0 || cr >= 4); |
@@ -1742,10 +1724,6 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac, | |||
1742 | switch (cr) { | 1724 | switch (cr) { |
1743 | case 0: | 1725 | case 0: |
1744 | BUG_ON(ac->ac_2order == 0); | 1726 | BUG_ON(ac->ac_2order == 0); |
1745 | /* If this group is uninitialized, skip it initially */ | ||
1746 | desc = ext4_get_group_desc(ac->ac_sb, group, NULL); | ||
1747 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) | ||
1748 | return 0; | ||
1749 | 1727 | ||
1750 | /* Avoid using the first bg of a flexgroup for data files */ | 1728 | /* Avoid using the first bg of a flexgroup for data files */ |
1751 | if ((ac->ac_flags & EXT4_MB_HINT_DATA) && | 1729 | if ((ac->ac_flags & EXT4_MB_HINT_DATA) && |
@@ -1788,6 +1766,7 @@ int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group) | |||
1788 | int block, pnum; | 1766 | int block, pnum; |
1789 | int blocks_per_page; | 1767 | int blocks_per_page; |
1790 | int groups_per_page; | 1768 | int groups_per_page; |
1769 | ext4_group_t ngroups = ext4_get_groups_count(sb); | ||
1791 | ext4_group_t first_group; | 1770 | ext4_group_t first_group; |
1792 | struct ext4_group_info *grp; | 1771 | struct ext4_group_info *grp; |
1793 | 1772 | ||
@@ -1807,7 +1786,7 @@ int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group) | |||
1807 | /* read all groups the page covers into the cache */ | 1786 | /* read all groups the page covers into the cache */ |
1808 | for (i = 0; i < groups_per_page; i++) { | 1787 | for (i = 0; i < groups_per_page; i++) { |
1809 | 1788 | ||
1810 | if ((first_group + i) >= EXT4_SB(sb)->s_groups_count) | 1789 | if ((first_group + i) >= ngroups) |
1811 | break; | 1790 | break; |
1812 | grp = ext4_get_group_info(sb, first_group + i); | 1791 | grp = ext4_get_group_info(sb, first_group + i); |
1813 | /* take all groups write allocation | 1792 | /* take all groups write allocation |
@@ -1945,8 +1924,7 @@ err: | |||
1945 | static noinline_for_stack int | 1924 | static noinline_for_stack int |
1946 | ext4_mb_regular_allocator(struct ext4_allocation_context *ac) | 1925 | ext4_mb_regular_allocator(struct ext4_allocation_context *ac) |
1947 | { | 1926 | { |
1948 | ext4_group_t group; | 1927 | ext4_group_t ngroups, group, i; |
1949 | ext4_group_t i; | ||
1950 | int cr; | 1928 | int cr; |
1951 | int err = 0; | 1929 | int err = 0; |
1952 | int bsbits; | 1930 | int bsbits; |
@@ -1957,6 +1935,7 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac) | |||
1957 | 1935 | ||
1958 | sb = ac->ac_sb; | 1936 | sb = ac->ac_sb; |
1959 | sbi = EXT4_SB(sb); | 1937 | sbi = EXT4_SB(sb); |
1938 | ngroups = ext4_get_groups_count(sb); | ||
1960 | BUG_ON(ac->ac_status == AC_STATUS_FOUND); | 1939 | BUG_ON(ac->ac_status == AC_STATUS_FOUND); |
1961 | 1940 | ||
1962 | /* first, try the goal */ | 1941 | /* first, try the goal */ |
@@ -2017,11 +1996,11 @@ repeat: | |||
2017 | */ | 1996 | */ |
2018 | group = ac->ac_g_ex.fe_group; | 1997 | group = ac->ac_g_ex.fe_group; |
2019 | 1998 | ||
2020 | for (i = 0; i < EXT4_SB(sb)->s_groups_count; group++, i++) { | 1999 | for (i = 0; i < ngroups; group++, i++) { |
2021 | struct ext4_group_info *grp; | 2000 | struct ext4_group_info *grp; |
2022 | struct ext4_group_desc *desc; | 2001 | struct ext4_group_desc *desc; |
2023 | 2002 | ||
2024 | if (group == EXT4_SB(sb)->s_groups_count) | 2003 | if (group == ngroups) |
2025 | group = 0; | 2004 | group = 0; |
2026 | 2005 | ||
2027 | /* quick check to skip empty groups */ | 2006 | /* quick check to skip empty groups */ |
@@ -2064,9 +2043,7 @@ repeat: | |||
2064 | 2043 | ||
2065 | ac->ac_groups_scanned++; | 2044 | ac->ac_groups_scanned++; |
2066 | desc = ext4_get_group_desc(sb, group, NULL); | 2045 | desc = ext4_get_group_desc(sb, group, NULL); |
2067 | if (cr == 0 || (desc->bg_flags & | 2046 | if (cr == 0) |
2068 | cpu_to_le16(EXT4_BG_BLOCK_UNINIT) && | ||
2069 | ac->ac_2order != 0)) | ||
2070 | ext4_mb_simple_scan_group(ac, &e4b); | 2047 | ext4_mb_simple_scan_group(ac, &e4b); |
2071 | else if (cr == 1 && | 2048 | else if (cr == 1 && |
2072 | ac->ac_g_ex.fe_len == sbi->s_stripe) | 2049 | ac->ac_g_ex.fe_len == sbi->s_stripe) |
@@ -2315,12 +2292,10 @@ static struct file_operations ext4_mb_seq_history_fops = { | |||
2315 | static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos) | 2292 | static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos) |
2316 | { | 2293 | { |
2317 | struct super_block *sb = seq->private; | 2294 | struct super_block *sb = seq->private; |
2318 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
2319 | ext4_group_t group; | 2295 | ext4_group_t group; |
2320 | 2296 | ||
2321 | if (*pos < 0 || *pos >= sbi->s_groups_count) | 2297 | if (*pos < 0 || *pos >= ext4_get_groups_count(sb)) |
2322 | return NULL; | 2298 | return NULL; |
2323 | |||
2324 | group = *pos + 1; | 2299 | group = *pos + 1; |
2325 | return (void *) ((unsigned long) group); | 2300 | return (void *) ((unsigned long) group); |
2326 | } | 2301 | } |
@@ -2328,11 +2303,10 @@ static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos) | |||
2328 | static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos) | 2303 | static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos) |
2329 | { | 2304 | { |
2330 | struct super_block *sb = seq->private; | 2305 | struct super_block *sb = seq->private; |
2331 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
2332 | ext4_group_t group; | 2306 | ext4_group_t group; |
2333 | 2307 | ||
2334 | ++*pos; | 2308 | ++*pos; |
2335 | if (*pos < 0 || *pos >= sbi->s_groups_count) | 2309 | if (*pos < 0 || *pos >= ext4_get_groups_count(sb)) |
2336 | return NULL; | 2310 | return NULL; |
2337 | group = *pos + 1; | 2311 | group = *pos + 1; |
2338 | return (void *) ((unsigned long) group); | 2312 | return (void *) ((unsigned long) group); |
@@ -2420,7 +2394,8 @@ static void ext4_mb_history_release(struct super_block *sb) | |||
2420 | 2394 | ||
2421 | if (sbi->s_proc != NULL) { | 2395 | if (sbi->s_proc != NULL) { |
2422 | remove_proc_entry("mb_groups", sbi->s_proc); | 2396 | remove_proc_entry("mb_groups", sbi->s_proc); |
2423 | remove_proc_entry("mb_history", sbi->s_proc); | 2397 | if (sbi->s_mb_history_max) |
2398 | remove_proc_entry("mb_history", sbi->s_proc); | ||
2424 | } | 2399 | } |
2425 | kfree(sbi->s_mb_history); | 2400 | kfree(sbi->s_mb_history); |
2426 | } | 2401 | } |
@@ -2431,17 +2406,17 @@ static void ext4_mb_history_init(struct super_block *sb) | |||
2431 | int i; | 2406 | int i; |
2432 | 2407 | ||
2433 | if (sbi->s_proc != NULL) { | 2408 | if (sbi->s_proc != NULL) { |
2434 | proc_create_data("mb_history", S_IRUGO, sbi->s_proc, | 2409 | if (sbi->s_mb_history_max) |
2435 | &ext4_mb_seq_history_fops, sb); | 2410 | proc_create_data("mb_history", S_IRUGO, sbi->s_proc, |
2411 | &ext4_mb_seq_history_fops, sb); | ||
2436 | proc_create_data("mb_groups", S_IRUGO, sbi->s_proc, | 2412 | proc_create_data("mb_groups", S_IRUGO, sbi->s_proc, |
2437 | &ext4_mb_seq_groups_fops, sb); | 2413 | &ext4_mb_seq_groups_fops, sb); |
2438 | } | 2414 | } |
2439 | 2415 | ||
2440 | sbi->s_mb_history_max = 1000; | ||
2441 | sbi->s_mb_history_cur = 0; | 2416 | sbi->s_mb_history_cur = 0; |
2442 | spin_lock_init(&sbi->s_mb_history_lock); | 2417 | spin_lock_init(&sbi->s_mb_history_lock); |
2443 | i = sbi->s_mb_history_max * sizeof(struct ext4_mb_history); | 2418 | i = sbi->s_mb_history_max * sizeof(struct ext4_mb_history); |
2444 | sbi->s_mb_history = kzalloc(i, GFP_KERNEL); | 2419 | sbi->s_mb_history = i ? kzalloc(i, GFP_KERNEL) : NULL; |
2445 | /* if we can't allocate history, then we simple won't use it */ | 2420 | /* if we can't allocate history, then we simple won't use it */ |
2446 | } | 2421 | } |
2447 | 2422 | ||
@@ -2451,7 +2426,7 @@ ext4_mb_store_history(struct ext4_allocation_context *ac) | |||
2451 | struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); | 2426 | struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); |
2452 | struct ext4_mb_history h; | 2427 | struct ext4_mb_history h; |
2453 | 2428 | ||
2454 | if (unlikely(sbi->s_mb_history == NULL)) | 2429 | if (sbi->s_mb_history == NULL) |
2455 | return; | 2430 | return; |
2456 | 2431 | ||
2457 | if (!(ac->ac_op & sbi->s_mb_history_filter)) | 2432 | if (!(ac->ac_op & sbi->s_mb_history_filter)) |
@@ -2587,6 +2562,7 @@ void ext4_mb_update_group_info(struct ext4_group_info *grp, ext4_grpblk_t add) | |||
2587 | 2562 | ||
2588 | static int ext4_mb_init_backend(struct super_block *sb) | 2563 | static int ext4_mb_init_backend(struct super_block *sb) |
2589 | { | 2564 | { |
2565 | ext4_group_t ngroups = ext4_get_groups_count(sb); | ||
2590 | ext4_group_t i; | 2566 | ext4_group_t i; |
2591 | int metalen; | 2567 | int metalen; |
2592 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2568 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
@@ -2598,7 +2574,7 @@ static int ext4_mb_init_backend(struct super_block *sb) | |||
2598 | struct ext4_group_desc *desc; | 2574 | struct ext4_group_desc *desc; |
2599 | 2575 | ||
2600 | /* This is the number of blocks used by GDT */ | 2576 | /* This is the number of blocks used by GDT */ |
2601 | num_meta_group_infos = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - | 2577 | num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) - |
2602 | 1) >> EXT4_DESC_PER_BLOCK_BITS(sb); | 2578 | 1) >> EXT4_DESC_PER_BLOCK_BITS(sb); |
2603 | 2579 | ||
2604 | /* | 2580 | /* |
@@ -2644,7 +2620,7 @@ static int ext4_mb_init_backend(struct super_block *sb) | |||
2644 | for (i = 0; i < num_meta_group_infos; i++) { | 2620 | for (i = 0; i < num_meta_group_infos; i++) { |
2645 | if ((i + 1) == num_meta_group_infos) | 2621 | if ((i + 1) == num_meta_group_infos) |
2646 | metalen = sizeof(*meta_group_info) * | 2622 | metalen = sizeof(*meta_group_info) * |
2647 | (sbi->s_groups_count - | 2623 | (ngroups - |
2648 | (i << EXT4_DESC_PER_BLOCK_BITS(sb))); | 2624 | (i << EXT4_DESC_PER_BLOCK_BITS(sb))); |
2649 | meta_group_info = kmalloc(metalen, GFP_KERNEL); | 2625 | meta_group_info = kmalloc(metalen, GFP_KERNEL); |
2650 | if (meta_group_info == NULL) { | 2626 | if (meta_group_info == NULL) { |
@@ -2655,7 +2631,7 @@ static int ext4_mb_init_backend(struct super_block *sb) | |||
2655 | sbi->s_group_info[i] = meta_group_info; | 2631 | sbi->s_group_info[i] = meta_group_info; |
2656 | } | 2632 | } |
2657 | 2633 | ||
2658 | for (i = 0; i < sbi->s_groups_count; i++) { | 2634 | for (i = 0; i < ngroups; i++) { |
2659 | desc = ext4_get_group_desc(sb, i, NULL); | 2635 | desc = ext4_get_group_desc(sb, i, NULL); |
2660 | if (desc == NULL) { | 2636 | if (desc == NULL) { |
2661 | printk(KERN_ERR | 2637 | printk(KERN_ERR |
@@ -2761,7 +2737,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2761 | return 0; | 2737 | return 0; |
2762 | } | 2738 | } |
2763 | 2739 | ||
2764 | /* need to called with ext4 group lock (ext4_lock_group) */ | 2740 | /* need to called with the ext4 group lock held */ |
2765 | static void ext4_mb_cleanup_pa(struct ext4_group_info *grp) | 2741 | static void ext4_mb_cleanup_pa(struct ext4_group_info *grp) |
2766 | { | 2742 | { |
2767 | struct ext4_prealloc_space *pa; | 2743 | struct ext4_prealloc_space *pa; |
@@ -2781,13 +2757,14 @@ static void ext4_mb_cleanup_pa(struct ext4_group_info *grp) | |||
2781 | 2757 | ||
2782 | int ext4_mb_release(struct super_block *sb) | 2758 | int ext4_mb_release(struct super_block *sb) |
2783 | { | 2759 | { |
2760 | ext4_group_t ngroups = ext4_get_groups_count(sb); | ||
2784 | ext4_group_t i; | 2761 | ext4_group_t i; |
2785 | int num_meta_group_infos; | 2762 | int num_meta_group_infos; |
2786 | struct ext4_group_info *grinfo; | 2763 | struct ext4_group_info *grinfo; |
2787 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2764 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2788 | 2765 | ||
2789 | if (sbi->s_group_info) { | 2766 | if (sbi->s_group_info) { |
2790 | for (i = 0; i < sbi->s_groups_count; i++) { | 2767 | for (i = 0; i < ngroups; i++) { |
2791 | grinfo = ext4_get_group_info(sb, i); | 2768 | grinfo = ext4_get_group_info(sb, i); |
2792 | #ifdef DOUBLE_CHECK | 2769 | #ifdef DOUBLE_CHECK |
2793 | kfree(grinfo->bb_bitmap); | 2770 | kfree(grinfo->bb_bitmap); |
@@ -2797,7 +2774,7 @@ int ext4_mb_release(struct super_block *sb) | |||
2797 | ext4_unlock_group(sb, i); | 2774 | ext4_unlock_group(sb, i); |
2798 | kfree(grinfo); | 2775 | kfree(grinfo); |
2799 | } | 2776 | } |
2800 | num_meta_group_infos = (sbi->s_groups_count + | 2777 | num_meta_group_infos = (ngroups + |
2801 | EXT4_DESC_PER_BLOCK(sb) - 1) >> | 2778 | EXT4_DESC_PER_BLOCK(sb) - 1) >> |
2802 | EXT4_DESC_PER_BLOCK_BITS(sb); | 2779 | EXT4_DESC_PER_BLOCK_BITS(sb); |
2803 | for (i = 0; i < num_meta_group_infos; i++) | 2780 | for (i = 0; i < num_meta_group_infos; i++) |
@@ -2984,27 +2961,25 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, | |||
2984 | + le32_to_cpu(es->s_first_data_block); | 2961 | + le32_to_cpu(es->s_first_data_block); |
2985 | 2962 | ||
2986 | len = ac->ac_b_ex.fe_len; | 2963 | len = ac->ac_b_ex.fe_len; |
2987 | if (in_range(ext4_block_bitmap(sb, gdp), block, len) || | 2964 | if (!ext4_data_block_valid(sbi, block, len)) { |
2988 | in_range(ext4_inode_bitmap(sb, gdp), block, len) || | ||
2989 | in_range(block, ext4_inode_table(sb, gdp), | ||
2990 | EXT4_SB(sb)->s_itb_per_group) || | ||
2991 | in_range(block + len - 1, ext4_inode_table(sb, gdp), | ||
2992 | EXT4_SB(sb)->s_itb_per_group)) { | ||
2993 | ext4_error(sb, __func__, | 2965 | ext4_error(sb, __func__, |
2994 | "Allocating block %llu in system zone of %d group\n", | 2966 | "Allocating blocks %llu-%llu which overlap " |
2995 | block, ac->ac_b_ex.fe_group); | 2967 | "fs metadata\n", block, block+len); |
2996 | /* File system mounted not to panic on error | 2968 | /* File system mounted not to panic on error |
2997 | * Fix the bitmap and repeat the block allocation | 2969 | * Fix the bitmap and repeat the block allocation |
2998 | * We leak some of the blocks here. | 2970 | * We leak some of the blocks here. |
2999 | */ | 2971 | */ |
3000 | mb_set_bits(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group), | 2972 | ext4_lock_group(sb, ac->ac_b_ex.fe_group); |
3001 | bitmap_bh->b_data, ac->ac_b_ex.fe_start, | 2973 | mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start, |
3002 | ac->ac_b_ex.fe_len); | 2974 | ac->ac_b_ex.fe_len); |
2975 | ext4_unlock_group(sb, ac->ac_b_ex.fe_group); | ||
3003 | err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); | 2976 | err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); |
3004 | if (!err) | 2977 | if (!err) |
3005 | err = -EAGAIN; | 2978 | err = -EAGAIN; |
3006 | goto out_err; | 2979 | goto out_err; |
3007 | } | 2980 | } |
2981 | |||
2982 | ext4_lock_group(sb, ac->ac_b_ex.fe_group); | ||
3008 | #ifdef AGGRESSIVE_CHECK | 2983 | #ifdef AGGRESSIVE_CHECK |
3009 | { | 2984 | { |
3010 | int i; | 2985 | int i; |
@@ -3014,9 +2989,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, | |||
3014 | } | 2989 | } |
3015 | } | 2990 | } |
3016 | #endif | 2991 | #endif |
3017 | spin_lock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); | 2992 | mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,ac->ac_b_ex.fe_len); |
3018 | mb_set_bits(NULL, bitmap_bh->b_data, | ||
3019 | ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len); | ||
3020 | if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { | 2993 | if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { |
3021 | gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); | 2994 | gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); |
3022 | ext4_free_blks_set(sb, gdp, | 2995 | ext4_free_blks_set(sb, gdp, |
@@ -3026,7 +2999,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, | |||
3026 | len = ext4_free_blks_count(sb, gdp) - ac->ac_b_ex.fe_len; | 2999 | len = ext4_free_blks_count(sb, gdp) - ac->ac_b_ex.fe_len; |
3027 | ext4_free_blks_set(sb, gdp, len); | 3000 | ext4_free_blks_set(sb, gdp, len); |
3028 | gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); | 3001 | gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); |
3029 | spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); | 3002 | |
3003 | ext4_unlock_group(sb, ac->ac_b_ex.fe_group); | ||
3030 | percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len); | 3004 | percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len); |
3031 | /* | 3005 | /* |
3032 | * Now reduce the dirty block count also. Should not go negative | 3006 | * Now reduce the dirty block count also. Should not go negative |
@@ -3459,7 +3433,7 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac) | |||
3459 | * the function goes through all block freed in the group | 3433 | * the function goes through all block freed in the group |
3460 | * but not yet committed and marks them used in in-core bitmap. | 3434 | * but not yet committed and marks them used in in-core bitmap. |
3461 | * buddy must be generated from this bitmap | 3435 | * buddy must be generated from this bitmap |
3462 | * Need to be called with ext4 group lock (ext4_lock_group) | 3436 | * Need to be called with the ext4 group lock held |
3463 | */ | 3437 | */ |
3464 | static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, | 3438 | static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, |
3465 | ext4_group_t group) | 3439 | ext4_group_t group) |
@@ -3473,9 +3447,7 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, | |||
3473 | 3447 | ||
3474 | while (n) { | 3448 | while (n) { |
3475 | entry = rb_entry(n, struct ext4_free_data, node); | 3449 | entry = rb_entry(n, struct ext4_free_data, node); |
3476 | mb_set_bits(sb_bgl_lock(EXT4_SB(sb), group), | 3450 | mb_set_bits(bitmap, entry->start_blk, entry->count); |
3477 | bitmap, entry->start_blk, | ||
3478 | entry->count); | ||
3479 | n = rb_next(n); | 3451 | n = rb_next(n); |
3480 | } | 3452 | } |
3481 | return; | 3453 | return; |
@@ -3484,7 +3456,7 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, | |||
3484 | /* | 3456 | /* |
3485 | * the function goes through all preallocation in this group and marks them | 3457 | * the function goes through all preallocation in this group and marks them |
3486 | * used in in-core bitmap. buddy must be generated from this bitmap | 3458 | * used in in-core bitmap. buddy must be generated from this bitmap |
3487 | * Need to be called with ext4 group lock (ext4_lock_group) | 3459 | * Need to be called with ext4 group lock held |
3488 | */ | 3460 | */ |
3489 | static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, | 3461 | static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, |
3490 | ext4_group_t group) | 3462 | ext4_group_t group) |
@@ -3516,8 +3488,7 @@ static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, | |||
3516 | if (unlikely(len == 0)) | 3488 | if (unlikely(len == 0)) |
3517 | continue; | 3489 | continue; |
3518 | BUG_ON(groupnr != group); | 3490 | BUG_ON(groupnr != group); |
3519 | mb_set_bits(sb_bgl_lock(EXT4_SB(sb), group), | 3491 | mb_set_bits(bitmap, start, len); |
3520 | bitmap, start, len); | ||
3521 | preallocated += len; | 3492 | preallocated += len; |
3522 | count++; | 3493 | count++; |
3523 | } | 3494 | } |
@@ -4121,7 +4092,7 @@ static void ext4_mb_return_to_preallocation(struct inode *inode, | |||
4121 | static void ext4_mb_show_ac(struct ext4_allocation_context *ac) | 4092 | static void ext4_mb_show_ac(struct ext4_allocation_context *ac) |
4122 | { | 4093 | { |
4123 | struct super_block *sb = ac->ac_sb; | 4094 | struct super_block *sb = ac->ac_sb; |
4124 | ext4_group_t i; | 4095 | ext4_group_t ngroups, i; |
4125 | 4096 | ||
4126 | printk(KERN_ERR "EXT4-fs: Can't allocate:" | 4097 | printk(KERN_ERR "EXT4-fs: Can't allocate:" |
4127 | " Allocation context details:\n"); | 4098 | " Allocation context details:\n"); |
@@ -4145,7 +4116,8 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac) | |||
4145 | printk(KERN_ERR "EXT4-fs: %lu scanned, %d found\n", ac->ac_ex_scanned, | 4116 | printk(KERN_ERR "EXT4-fs: %lu scanned, %d found\n", ac->ac_ex_scanned, |
4146 | ac->ac_found); | 4117 | ac->ac_found); |
4147 | printk(KERN_ERR "EXT4-fs: groups: \n"); | 4118 | printk(KERN_ERR "EXT4-fs: groups: \n"); |
4148 | for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { | 4119 | ngroups = ext4_get_groups_count(sb); |
4120 | for (i = 0; i < ngroups; i++) { | ||
4149 | struct ext4_group_info *grp = ext4_get_group_info(sb, i); | 4121 | struct ext4_group_info *grp = ext4_get_group_info(sb, i); |
4150 | struct ext4_prealloc_space *pa; | 4122 | struct ext4_prealloc_space *pa; |
4151 | ext4_grpblk_t start; | 4123 | ext4_grpblk_t start; |
@@ -4469,13 +4441,13 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac) | |||
4469 | 4441 | ||
4470 | static int ext4_mb_discard_preallocations(struct super_block *sb, int needed) | 4442 | static int ext4_mb_discard_preallocations(struct super_block *sb, int needed) |
4471 | { | 4443 | { |
4472 | ext4_group_t i; | 4444 | ext4_group_t i, ngroups = ext4_get_groups_count(sb); |
4473 | int ret; | 4445 | int ret; |
4474 | int freed = 0; | 4446 | int freed = 0; |
4475 | 4447 | ||
4476 | trace_mark(ext4_mb_discard_preallocations, "dev %s needed %d", | 4448 | trace_mark(ext4_mb_discard_preallocations, "dev %s needed %d", |
4477 | sb->s_id, needed); | 4449 | sb->s_id, needed); |
4478 | for (i = 0; i < EXT4_SB(sb)->s_groups_count && needed > 0; i++) { | 4450 | for (i = 0; i < ngroups && needed > 0; i++) { |
4479 | ret = ext4_mb_discard_group_preallocations(sb, i, needed); | 4451 | ret = ext4_mb_discard_group_preallocations(sb, i, needed); |
4480 | freed += ret; | 4452 | freed += ret; |
4481 | needed -= ret; | 4453 | needed -= ret; |
@@ -4859,29 +4831,25 @@ do_more: | |||
4859 | new_entry->group = block_group; | 4831 | new_entry->group = block_group; |
4860 | new_entry->count = count; | 4832 | new_entry->count = count; |
4861 | new_entry->t_tid = handle->h_transaction->t_tid; | 4833 | new_entry->t_tid = handle->h_transaction->t_tid; |
4834 | |||
4862 | ext4_lock_group(sb, block_group); | 4835 | ext4_lock_group(sb, block_group); |
4863 | mb_clear_bits(sb_bgl_lock(sbi, block_group), bitmap_bh->b_data, | 4836 | mb_clear_bits(bitmap_bh->b_data, bit, count); |
4864 | bit, count); | ||
4865 | ext4_mb_free_metadata(handle, &e4b, new_entry); | 4837 | ext4_mb_free_metadata(handle, &e4b, new_entry); |
4866 | ext4_unlock_group(sb, block_group); | ||
4867 | } else { | 4838 | } else { |
4868 | ext4_lock_group(sb, block_group); | ||
4869 | /* need to update group_info->bb_free and bitmap | 4839 | /* need to update group_info->bb_free and bitmap |
4870 | * with group lock held. generate_buddy look at | 4840 | * with group lock held. generate_buddy look at |
4871 | * them with group lock_held | 4841 | * them with group lock_held |
4872 | */ | 4842 | */ |
4873 | mb_clear_bits(sb_bgl_lock(sbi, block_group), bitmap_bh->b_data, | 4843 | ext4_lock_group(sb, block_group); |
4874 | bit, count); | 4844 | mb_clear_bits(bitmap_bh->b_data, bit, count); |
4875 | mb_free_blocks(inode, &e4b, bit, count); | 4845 | mb_free_blocks(inode, &e4b, bit, count); |
4876 | ext4_mb_return_to_preallocation(inode, &e4b, block, count); | 4846 | ext4_mb_return_to_preallocation(inode, &e4b, block, count); |
4877 | ext4_unlock_group(sb, block_group); | ||
4878 | } | 4847 | } |
4879 | 4848 | ||
4880 | spin_lock(sb_bgl_lock(sbi, block_group)); | ||
4881 | ret = ext4_free_blks_count(sb, gdp) + count; | 4849 | ret = ext4_free_blks_count(sb, gdp) + count; |
4882 | ext4_free_blks_set(sb, gdp, ret); | 4850 | ext4_free_blks_set(sb, gdp, ret); |
4883 | gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp); | 4851 | gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp); |
4884 | spin_unlock(sb_bgl_lock(sbi, block_group)); | 4852 | ext4_unlock_group(sb, block_group); |
4885 | percpu_counter_add(&sbi->s_freeblocks_counter, count); | 4853 | percpu_counter_add(&sbi->s_freeblocks_counter, count); |
4886 | 4854 | ||
4887 | if (sbi->s_log_groups_per_flex) { | 4855 | if (sbi->s_log_groups_per_flex) { |
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index dd9e6cd5f6cf..75e34f69215b 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h | |||
@@ -23,7 +23,6 @@ | |||
23 | #include <linux/mutex.h> | 23 | #include <linux/mutex.h> |
24 | #include "ext4_jbd2.h" | 24 | #include "ext4_jbd2.h" |
25 | #include "ext4.h" | 25 | #include "ext4.h" |
26 | #include "group.h" | ||
27 | 26 | ||
28 | /* | 27 | /* |
29 | * with AGGRESSIVE_CHECK allocator runs consistency checks over | 28 | * with AGGRESSIVE_CHECK allocator runs consistency checks over |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 22098e1cd085..07eb6649e4fa 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
@@ -37,7 +37,6 @@ | |||
37 | #include "ext4.h" | 37 | #include "ext4.h" |
38 | #include "ext4_jbd2.h" | 38 | #include "ext4_jbd2.h" |
39 | 39 | ||
40 | #include "namei.h" | ||
41 | #include "xattr.h" | 40 | #include "xattr.h" |
42 | #include "acl.h" | 41 | #include "acl.h" |
43 | 42 | ||
@@ -750,7 +749,7 @@ static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize, | |||
750 | ext4fs_dirhash(de->name, de->name_len, &h); | 749 | ext4fs_dirhash(de->name, de->name_len, &h); |
751 | map_tail--; | 750 | map_tail--; |
752 | map_tail->hash = h.hash; | 751 | map_tail->hash = h.hash; |
753 | map_tail->offs = (u16) ((char *) de - base); | 752 | map_tail->offs = ((char *) de - base)>>2; |
754 | map_tail->size = le16_to_cpu(de->rec_len); | 753 | map_tail->size = le16_to_cpu(de->rec_len); |
755 | count++; | 754 | count++; |
756 | cond_resched(); | 755 | cond_resched(); |
@@ -1148,7 +1147,8 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count, | |||
1148 | unsigned rec_len = 0; | 1147 | unsigned rec_len = 0; |
1149 | 1148 | ||
1150 | while (count--) { | 1149 | while (count--) { |
1151 | struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *) (from + map->offs); | 1150 | struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *) |
1151 | (from + (map->offs<<2)); | ||
1152 | rec_len = EXT4_DIR_REC_LEN(de->name_len); | 1152 | rec_len = EXT4_DIR_REC_LEN(de->name_len); |
1153 | memcpy (to, de, rec_len); | 1153 | memcpy (to, de, rec_len); |
1154 | ((struct ext4_dir_entry_2 *) to)->rec_len = | 1154 | ((struct ext4_dir_entry_2 *) to)->rec_len = |
@@ -1997,7 +1997,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) | |||
1997 | if (!ext4_handle_valid(handle)) | 1997 | if (!ext4_handle_valid(handle)) |
1998 | return 0; | 1998 | return 0; |
1999 | 1999 | ||
2000 | lock_super(sb); | 2000 | mutex_lock(&EXT4_SB(sb)->s_orphan_lock); |
2001 | if (!list_empty(&EXT4_I(inode)->i_orphan)) | 2001 | if (!list_empty(&EXT4_I(inode)->i_orphan)) |
2002 | goto out_unlock; | 2002 | goto out_unlock; |
2003 | 2003 | ||
@@ -2006,9 +2006,13 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) | |||
2006 | 2006 | ||
2007 | /* @@@ FIXME: Observation from aviro: | 2007 | /* @@@ FIXME: Observation from aviro: |
2008 | * I think I can trigger J_ASSERT in ext4_orphan_add(). We block | 2008 | * I think I can trigger J_ASSERT in ext4_orphan_add(). We block |
2009 | * here (on lock_super()), so race with ext4_link() which might bump | 2009 | * here (on s_orphan_lock), so race with ext4_link() which might bump |
2010 | * ->i_nlink. For, say it, character device. Not a regular file, | 2010 | * ->i_nlink. For, say it, character device. Not a regular file, |
2011 | * not a directory, not a symlink and ->i_nlink > 0. | 2011 | * not a directory, not a symlink and ->i_nlink > 0. |
2012 | * | ||
2013 | * tytso, 4/25/2009: I'm not sure how that could happen; | ||
2014 | * shouldn't the fs core protect us from these sort of | ||
2015 | * unlink()/link() races? | ||
2012 | */ | 2016 | */ |
2013 | J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || | 2017 | J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || |
2014 | S_ISLNK(inode->i_mode)) || inode->i_nlink == 0); | 2018 | S_ISLNK(inode->i_mode)) || inode->i_nlink == 0); |
@@ -2045,7 +2049,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) | |||
2045 | jbd_debug(4, "orphan inode %lu will point to %d\n", | 2049 | jbd_debug(4, "orphan inode %lu will point to %d\n", |
2046 | inode->i_ino, NEXT_ORPHAN(inode)); | 2050 | inode->i_ino, NEXT_ORPHAN(inode)); |
2047 | out_unlock: | 2051 | out_unlock: |
2048 | unlock_super(sb); | 2052 | mutex_unlock(&EXT4_SB(sb)->s_orphan_lock); |
2049 | ext4_std_error(inode->i_sb, err); | 2053 | ext4_std_error(inode->i_sb, err); |
2050 | return err; | 2054 | return err; |
2051 | } | 2055 | } |
@@ -2066,11 +2070,9 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode) | |||
2066 | if (!ext4_handle_valid(handle)) | 2070 | if (!ext4_handle_valid(handle)) |
2067 | return 0; | 2071 | return 0; |
2068 | 2072 | ||
2069 | lock_super(inode->i_sb); | 2073 | mutex_lock(&EXT4_SB(inode->i_sb)->s_orphan_lock); |
2070 | if (list_empty(&ei->i_orphan)) { | 2074 | if (list_empty(&ei->i_orphan)) |
2071 | unlock_super(inode->i_sb); | 2075 | goto out; |
2072 | return 0; | ||
2073 | } | ||
2074 | 2076 | ||
2075 | ino_next = NEXT_ORPHAN(inode); | 2077 | ino_next = NEXT_ORPHAN(inode); |
2076 | prev = ei->i_orphan.prev; | 2078 | prev = ei->i_orphan.prev; |
@@ -2120,7 +2122,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode) | |||
2120 | out_err: | 2122 | out_err: |
2121 | ext4_std_error(inode->i_sb, err); | 2123 | ext4_std_error(inode->i_sb, err); |
2122 | out: | 2124 | out: |
2123 | unlock_super(inode->i_sb); | 2125 | mutex_unlock(&EXT4_SB(inode->i_sb)->s_orphan_lock); |
2124 | return err; | 2126 | return err; |
2125 | 2127 | ||
2126 | out_brelse: | 2128 | out_brelse: |
@@ -2533,6 +2535,7 @@ const struct inode_operations ext4_dir_inode_operations = { | |||
2533 | .removexattr = generic_removexattr, | 2535 | .removexattr = generic_removexattr, |
2534 | #endif | 2536 | #endif |
2535 | .permission = ext4_permission, | 2537 | .permission = ext4_permission, |
2538 | .fiemap = ext4_fiemap, | ||
2536 | }; | 2539 | }; |
2537 | 2540 | ||
2538 | const struct inode_operations ext4_special_inode_operations = { | 2541 | const struct inode_operations ext4_special_inode_operations = { |
diff --git a/fs/ext4/namei.h b/fs/ext4/namei.h deleted file mode 100644 index 5e4dfff36a00..000000000000 --- a/fs/ext4/namei.h +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | /* linux/fs/ext4/namei.h | ||
2 | * | ||
3 | * Copyright (C) 2005 Simtec Electronics | ||
4 | * Ben Dooks <ben@simtec.co.uk> | ||
5 | * | ||
6 | */ | ||
7 | |||
8 | extern struct dentry *ext4_get_parent(struct dentry *child); | ||
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 546c7dd869e1..27eb289eea37 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c | |||
@@ -15,7 +15,6 @@ | |||
15 | #include <linux/slab.h> | 15 | #include <linux/slab.h> |
16 | 16 | ||
17 | #include "ext4_jbd2.h" | 17 | #include "ext4_jbd2.h" |
18 | #include "group.h" | ||
19 | 18 | ||
20 | #define outside(b, first, last) ((b) < (first) || (b) >= (last)) | 19 | #define outside(b, first, last) ((b) < (first) || (b) >= (last)) |
21 | #define inside(b, first, last) ((b) >= (first) && (b) < (last)) | 20 | #define inside(b, first, last) ((b) >= (first) && (b) < (last)) |
@@ -193,7 +192,7 @@ static int setup_new_group_blocks(struct super_block *sb, | |||
193 | if (IS_ERR(handle)) | 192 | if (IS_ERR(handle)) |
194 | return PTR_ERR(handle); | 193 | return PTR_ERR(handle); |
195 | 194 | ||
196 | lock_super(sb); | 195 | mutex_lock(&sbi->s_resize_lock); |
197 | if (input->group != sbi->s_groups_count) { | 196 | if (input->group != sbi->s_groups_count) { |
198 | err = -EBUSY; | 197 | err = -EBUSY; |
199 | goto exit_journal; | 198 | goto exit_journal; |
@@ -302,7 +301,7 @@ exit_bh: | |||
302 | brelse(bh); | 301 | brelse(bh); |
303 | 302 | ||
304 | exit_journal: | 303 | exit_journal: |
305 | unlock_super(sb); | 304 | mutex_unlock(&sbi->s_resize_lock); |
306 | if ((err2 = ext4_journal_stop(handle)) && !err) | 305 | if ((err2 = ext4_journal_stop(handle)) && !err) |
307 | err = err2; | 306 | err = err2; |
308 | 307 | ||
@@ -643,11 +642,12 @@ exit_free: | |||
643 | * important part is that the new block and inode counts are in the backup | 642 | * important part is that the new block and inode counts are in the backup |
644 | * superblocks, and the location of the new group metadata in the GDT backups. | 643 | * superblocks, and the location of the new group metadata in the GDT backups. |
645 | * | 644 | * |
646 | * We do not need lock_super() for this, because these blocks are not | 645 | * We do not need take the s_resize_lock for this, because these |
647 | * otherwise touched by the filesystem code when it is mounted. We don't | 646 | * blocks are not otherwise touched by the filesystem code when it is |
648 | * need to worry about last changing from sbi->s_groups_count, because the | 647 | * mounted. We don't need to worry about last changing from |
649 | * worst that can happen is that we do not copy the full number of backups | 648 | * sbi->s_groups_count, because the worst that can happen is that we |
650 | * at this time. The resize which changed s_groups_count will backup again. | 649 | * do not copy the full number of backups at this time. The resize |
650 | * which changed s_groups_count will backup again. | ||
651 | */ | 651 | */ |
652 | static void update_backups(struct super_block *sb, | 652 | static void update_backups(struct super_block *sb, |
653 | int blk_off, char *data, int size) | 653 | int blk_off, char *data, int size) |
@@ -809,7 +809,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
809 | goto exit_put; | 809 | goto exit_put; |
810 | } | 810 | } |
811 | 811 | ||
812 | lock_super(sb); | 812 | mutex_lock(&sbi->s_resize_lock); |
813 | if (input->group != sbi->s_groups_count) { | 813 | if (input->group != sbi->s_groups_count) { |
814 | ext4_warning(sb, __func__, | 814 | ext4_warning(sb, __func__, |
815 | "multiple resizers run on filesystem!"); | 815 | "multiple resizers run on filesystem!"); |
@@ -840,7 +840,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
840 | /* | 840 | /* |
841 | * OK, now we've set up the new group. Time to make it active. | 841 | * OK, now we've set up the new group. Time to make it active. |
842 | * | 842 | * |
843 | * Current kernels don't lock all allocations via lock_super(), | 843 | * We do not lock all allocations via s_resize_lock |
844 | * so we have to be safe wrt. concurrent accesses the group | 844 | * so we have to be safe wrt. concurrent accesses the group |
845 | * data. So we need to be careful to set all of the relevant | 845 | * data. So we need to be careful to set all of the relevant |
846 | * group descriptor data etc. *before* we enable the group. | 846 | * group descriptor data etc. *before* we enable the group. |
@@ -900,12 +900,12 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
900 | * | 900 | * |
901 | * The precise rules we use are: | 901 | * The precise rules we use are: |
902 | * | 902 | * |
903 | * * Writers of s_groups_count *must* hold lock_super | 903 | * * Writers of s_groups_count *must* hold s_resize_lock |
904 | * AND | 904 | * AND |
905 | * * Writers must perform a smp_wmb() after updating all dependent | 905 | * * Writers must perform a smp_wmb() after updating all dependent |
906 | * data and before modifying the groups count | 906 | * data and before modifying the groups count |
907 | * | 907 | * |
908 | * * Readers must hold lock_super() over the access | 908 | * * Readers must hold s_resize_lock over the access |
909 | * OR | 909 | * OR |
910 | * * Readers must perform an smp_rmb() after reading the groups count | 910 | * * Readers must perform an smp_rmb() after reading the groups count |
911 | * and before reading any dependent data. | 911 | * and before reading any dependent data. |
@@ -948,7 +948,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
948 | sb->s_dirt = 1; | 948 | sb->s_dirt = 1; |
949 | 949 | ||
950 | exit_journal: | 950 | exit_journal: |
951 | unlock_super(sb); | 951 | mutex_unlock(&sbi->s_resize_lock); |
952 | if ((err2 = ext4_journal_stop(handle)) && !err) | 952 | if ((err2 = ext4_journal_stop(handle)) && !err) |
953 | err = err2; | 953 | err = err2; |
954 | if (!err) { | 954 | if (!err) { |
@@ -986,7 +986,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, | |||
986 | 986 | ||
987 | /* We don't need to worry about locking wrt other resizers just | 987 | /* We don't need to worry about locking wrt other resizers just |
988 | * yet: we're going to revalidate es->s_blocks_count after | 988 | * yet: we're going to revalidate es->s_blocks_count after |
989 | * taking lock_super() below. */ | 989 | * taking the s_resize_lock below. */ |
990 | o_blocks_count = ext4_blocks_count(es); | 990 | o_blocks_count = ext4_blocks_count(es); |
991 | o_groups_count = EXT4_SB(sb)->s_groups_count; | 991 | o_groups_count = EXT4_SB(sb)->s_groups_count; |
992 | 992 | ||
@@ -1056,11 +1056,11 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, | |||
1056 | goto exit_put; | 1056 | goto exit_put; |
1057 | } | 1057 | } |
1058 | 1058 | ||
1059 | lock_super(sb); | 1059 | mutex_lock(&EXT4_SB(sb)->s_resize_lock); |
1060 | if (o_blocks_count != ext4_blocks_count(es)) { | 1060 | if (o_blocks_count != ext4_blocks_count(es)) { |
1061 | ext4_warning(sb, __func__, | 1061 | ext4_warning(sb, __func__, |
1062 | "multiple resizers run on filesystem!"); | 1062 | "multiple resizers run on filesystem!"); |
1063 | unlock_super(sb); | 1063 | mutex_unlock(&EXT4_SB(sb)->s_resize_lock); |
1064 | ext4_journal_stop(handle); | 1064 | ext4_journal_stop(handle); |
1065 | err = -EBUSY; | 1065 | err = -EBUSY; |
1066 | goto exit_put; | 1066 | goto exit_put; |
@@ -1070,14 +1070,14 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, | |||
1070 | EXT4_SB(sb)->s_sbh))) { | 1070 | EXT4_SB(sb)->s_sbh))) { |
1071 | ext4_warning(sb, __func__, | 1071 | ext4_warning(sb, __func__, |
1072 | "error %d on journal write access", err); | 1072 | "error %d on journal write access", err); |
1073 | unlock_super(sb); | 1073 | mutex_unlock(&EXT4_SB(sb)->s_resize_lock); |
1074 | ext4_journal_stop(handle); | 1074 | ext4_journal_stop(handle); |
1075 | goto exit_put; | 1075 | goto exit_put; |
1076 | } | 1076 | } |
1077 | ext4_blocks_count_set(es, o_blocks_count + add); | 1077 | ext4_blocks_count_set(es, o_blocks_count + add); |
1078 | ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh); | 1078 | ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh); |
1079 | sb->s_dirt = 1; | 1079 | sb->s_dirt = 1; |
1080 | unlock_super(sb); | 1080 | mutex_unlock(&EXT4_SB(sb)->s_resize_lock); |
1081 | ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, | 1081 | ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, |
1082 | o_blocks_count + add); | 1082 | o_blocks_count + add); |
1083 | /* We add the blocks to the bitmap and set the group need init bit */ | 1083 | /* We add the blocks to the bitmap and set the group need init bit */ |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 2958f4e6f222..012c4251397e 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/string.h> | 20 | #include <linux/string.h> |
21 | #include <linux/fs.h> | 21 | #include <linux/fs.h> |
22 | #include <linux/time.h> | 22 | #include <linux/time.h> |
23 | #include <linux/vmalloc.h> | ||
23 | #include <linux/jbd2.h> | 24 | #include <linux/jbd2.h> |
24 | #include <linux/slab.h> | 25 | #include <linux/slab.h> |
25 | #include <linux/init.h> | 26 | #include <linux/init.h> |
@@ -45,16 +46,20 @@ | |||
45 | #include "ext4_jbd2.h" | 46 | #include "ext4_jbd2.h" |
46 | #include "xattr.h" | 47 | #include "xattr.h" |
47 | #include "acl.h" | 48 | #include "acl.h" |
48 | #include "namei.h" | 49 | |
49 | #include "group.h" | 50 | static int default_mb_history_length = 1000; |
51 | |||
52 | module_param_named(default_mb_history_length, default_mb_history_length, | ||
53 | int, 0644); | ||
54 | MODULE_PARM_DESC(default_mb_history_length, | ||
55 | "Default number of entries saved for mb_history"); | ||
50 | 56 | ||
51 | struct proc_dir_entry *ext4_proc_root; | 57 | struct proc_dir_entry *ext4_proc_root; |
52 | static struct kset *ext4_kset; | 58 | static struct kset *ext4_kset; |
53 | 59 | ||
54 | static int ext4_load_journal(struct super_block *, struct ext4_super_block *, | 60 | static int ext4_load_journal(struct super_block *, struct ext4_super_block *, |
55 | unsigned long journal_devnum); | 61 | unsigned long journal_devnum); |
56 | static int ext4_commit_super(struct super_block *sb, | 62 | static int ext4_commit_super(struct super_block *sb, int sync); |
57 | struct ext4_super_block *es, int sync); | ||
58 | static void ext4_mark_recovery_complete(struct super_block *sb, | 63 | static void ext4_mark_recovery_complete(struct super_block *sb, |
59 | struct ext4_super_block *es); | 64 | struct ext4_super_block *es); |
60 | static void ext4_clear_journal_err(struct super_block *sb, | 65 | static void ext4_clear_journal_err(struct super_block *sb, |
@@ -74,7 +79,7 @@ ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, | |||
74 | { | 79 | { |
75 | return le32_to_cpu(bg->bg_block_bitmap_lo) | | 80 | return le32_to_cpu(bg->bg_block_bitmap_lo) | |
76 | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? | 81 | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? |
77 | (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0); | 82 | (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0); |
78 | } | 83 | } |
79 | 84 | ||
80 | ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb, | 85 | ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb, |
@@ -82,7 +87,7 @@ ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb, | |||
82 | { | 87 | { |
83 | return le32_to_cpu(bg->bg_inode_bitmap_lo) | | 88 | return le32_to_cpu(bg->bg_inode_bitmap_lo) | |
84 | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? | 89 | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? |
85 | (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0); | 90 | (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0); |
86 | } | 91 | } |
87 | 92 | ||
88 | ext4_fsblk_t ext4_inode_table(struct super_block *sb, | 93 | ext4_fsblk_t ext4_inode_table(struct super_block *sb, |
@@ -90,7 +95,7 @@ ext4_fsblk_t ext4_inode_table(struct super_block *sb, | |||
90 | { | 95 | { |
91 | return le32_to_cpu(bg->bg_inode_table_lo) | | 96 | return le32_to_cpu(bg->bg_inode_table_lo) | |
92 | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? | 97 | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? |
93 | (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0); | 98 | (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0); |
94 | } | 99 | } |
95 | 100 | ||
96 | __u32 ext4_free_blks_count(struct super_block *sb, | 101 | __u32 ext4_free_blks_count(struct super_block *sb, |
@@ -98,7 +103,7 @@ __u32 ext4_free_blks_count(struct super_block *sb, | |||
98 | { | 103 | { |
99 | return le16_to_cpu(bg->bg_free_blocks_count_lo) | | 104 | return le16_to_cpu(bg->bg_free_blocks_count_lo) | |
100 | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? | 105 | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? |
101 | (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0); | 106 | (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0); |
102 | } | 107 | } |
103 | 108 | ||
104 | __u32 ext4_free_inodes_count(struct super_block *sb, | 109 | __u32 ext4_free_inodes_count(struct super_block *sb, |
@@ -106,7 +111,7 @@ __u32 ext4_free_inodes_count(struct super_block *sb, | |||
106 | { | 111 | { |
107 | return le16_to_cpu(bg->bg_free_inodes_count_lo) | | 112 | return le16_to_cpu(bg->bg_free_inodes_count_lo) | |
108 | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? | 113 | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? |
109 | (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0); | 114 | (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0); |
110 | } | 115 | } |
111 | 116 | ||
112 | __u32 ext4_used_dirs_count(struct super_block *sb, | 117 | __u32 ext4_used_dirs_count(struct super_block *sb, |
@@ -114,7 +119,7 @@ __u32 ext4_used_dirs_count(struct super_block *sb, | |||
114 | { | 119 | { |
115 | return le16_to_cpu(bg->bg_used_dirs_count_lo) | | 120 | return le16_to_cpu(bg->bg_used_dirs_count_lo) | |
116 | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? | 121 | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? |
117 | (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0); | 122 | (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0); |
118 | } | 123 | } |
119 | 124 | ||
120 | __u32 ext4_itable_unused_count(struct super_block *sb, | 125 | __u32 ext4_itable_unused_count(struct super_block *sb, |
@@ -122,7 +127,7 @@ __u32 ext4_itable_unused_count(struct super_block *sb, | |||
122 | { | 127 | { |
123 | return le16_to_cpu(bg->bg_itable_unused_lo) | | 128 | return le16_to_cpu(bg->bg_itable_unused_lo) | |
124 | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? | 129 | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? |
125 | (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0); | 130 | (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0); |
126 | } | 131 | } |
127 | 132 | ||
128 | void ext4_block_bitmap_set(struct super_block *sb, | 133 | void ext4_block_bitmap_set(struct super_block *sb, |
@@ -202,8 +207,7 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks) | |||
202 | journal = EXT4_SB(sb)->s_journal; | 207 | journal = EXT4_SB(sb)->s_journal; |
203 | if (journal) { | 208 | if (journal) { |
204 | if (is_journal_aborted(journal)) { | 209 | if (is_journal_aborted(journal)) { |
205 | ext4_abort(sb, __func__, | 210 | ext4_abort(sb, __func__, "Detected aborted journal"); |
206 | "Detected aborted journal"); | ||
207 | return ERR_PTR(-EROFS); | 211 | return ERR_PTR(-EROFS); |
208 | } | 212 | } |
209 | return jbd2_journal_start(journal, nblocks); | 213 | return jbd2_journal_start(journal, nblocks); |
@@ -302,10 +306,10 @@ static void ext4_handle_error(struct super_block *sb) | |||
302 | jbd2_journal_abort(journal, -EIO); | 306 | jbd2_journal_abort(journal, -EIO); |
303 | } | 307 | } |
304 | if (test_opt(sb, ERRORS_RO)) { | 308 | if (test_opt(sb, ERRORS_RO)) { |
305 | printk(KERN_CRIT "Remounting filesystem read-only\n"); | 309 | ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); |
306 | sb->s_flags |= MS_RDONLY; | 310 | sb->s_flags |= MS_RDONLY; |
307 | } | 311 | } |
308 | ext4_commit_super(sb, es, 1); | 312 | ext4_commit_super(sb, 1); |
309 | if (test_opt(sb, ERRORS_PANIC)) | 313 | if (test_opt(sb, ERRORS_PANIC)) |
310 | panic("EXT4-fs (device %s): panic forced after error\n", | 314 | panic("EXT4-fs (device %s): panic forced after error\n", |
311 | sb->s_id); | 315 | sb->s_id); |
@@ -395,8 +399,6 @@ void ext4_abort(struct super_block *sb, const char *function, | |||
395 | { | 399 | { |
396 | va_list args; | 400 | va_list args; |
397 | 401 | ||
398 | printk(KERN_CRIT "ext4_abort called.\n"); | ||
399 | |||
400 | va_start(args, fmt); | 402 | va_start(args, fmt); |
401 | printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function); | 403 | printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function); |
402 | vprintk(fmt, args); | 404 | vprintk(fmt, args); |
@@ -409,7 +411,7 @@ void ext4_abort(struct super_block *sb, const char *function, | |||
409 | if (sb->s_flags & MS_RDONLY) | 411 | if (sb->s_flags & MS_RDONLY) |
410 | return; | 412 | return; |
411 | 413 | ||
412 | printk(KERN_CRIT "Remounting filesystem read-only\n"); | 414 | ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); |
413 | EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; | 415 | EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; |
414 | sb->s_flags |= MS_RDONLY; | 416 | sb->s_flags |= MS_RDONLY; |
415 | EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT; | 417 | EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT; |
@@ -417,6 +419,18 @@ void ext4_abort(struct super_block *sb, const char *function, | |||
417 | jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); | 419 | jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); |
418 | } | 420 | } |
419 | 421 | ||
422 | void ext4_msg (struct super_block * sb, const char *prefix, | ||
423 | const char *fmt, ...) | ||
424 | { | ||
425 | va_list args; | ||
426 | |||
427 | va_start(args, fmt); | ||
428 | printk("%sEXT4-fs (%s): ", prefix, sb->s_id); | ||
429 | vprintk(fmt, args); | ||
430 | printk("\n"); | ||
431 | va_end(args); | ||
432 | } | ||
433 | |||
420 | void ext4_warning(struct super_block *sb, const char *function, | 434 | void ext4_warning(struct super_block *sb, const char *function, |
421 | const char *fmt, ...) | 435 | const char *fmt, ...) |
422 | { | 436 | { |
@@ -431,7 +445,7 @@ void ext4_warning(struct super_block *sb, const char *function, | |||
431 | } | 445 | } |
432 | 446 | ||
433 | void ext4_grp_locked_error(struct super_block *sb, ext4_group_t grp, | 447 | void ext4_grp_locked_error(struct super_block *sb, ext4_group_t grp, |
434 | const char *function, const char *fmt, ...) | 448 | const char *function, const char *fmt, ...) |
435 | __releases(bitlock) | 449 | __releases(bitlock) |
436 | __acquires(bitlock) | 450 | __acquires(bitlock) |
437 | { | 451 | { |
@@ -447,7 +461,7 @@ __acquires(bitlock) | |||
447 | if (test_opt(sb, ERRORS_CONT)) { | 461 | if (test_opt(sb, ERRORS_CONT)) { |
448 | EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; | 462 | EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; |
449 | es->s_state |= cpu_to_le16(EXT4_ERROR_FS); | 463 | es->s_state |= cpu_to_le16(EXT4_ERROR_FS); |
450 | ext4_commit_super(sb, es, 0); | 464 | ext4_commit_super(sb, 0); |
451 | return; | 465 | return; |
452 | } | 466 | } |
453 | ext4_unlock_group(sb, grp); | 467 | ext4_unlock_group(sb, grp); |
@@ -467,7 +481,6 @@ __acquires(bitlock) | |||
467 | return; | 481 | return; |
468 | } | 482 | } |
469 | 483 | ||
470 | |||
471 | void ext4_update_dynamic_rev(struct super_block *sb) | 484 | void ext4_update_dynamic_rev(struct super_block *sb) |
472 | { | 485 | { |
473 | struct ext4_super_block *es = EXT4_SB(sb)->s_es; | 486 | struct ext4_super_block *es = EXT4_SB(sb)->s_es; |
@@ -496,7 +509,7 @@ void ext4_update_dynamic_rev(struct super_block *sb) | |||
496 | /* | 509 | /* |
497 | * Open the external journal device | 510 | * Open the external journal device |
498 | */ | 511 | */ |
499 | static struct block_device *ext4_blkdev_get(dev_t dev) | 512 | static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb) |
500 | { | 513 | { |
501 | struct block_device *bdev; | 514 | struct block_device *bdev; |
502 | char b[BDEVNAME_SIZE]; | 515 | char b[BDEVNAME_SIZE]; |
@@ -507,7 +520,7 @@ static struct block_device *ext4_blkdev_get(dev_t dev) | |||
507 | return bdev; | 520 | return bdev; |
508 | 521 | ||
509 | fail: | 522 | fail: |
510 | printk(KERN_ERR "EXT4-fs: failed to open journal device %s: %ld\n", | 523 | ext4_msg(sb, KERN_ERR, "failed to open journal device %s: %ld", |
511 | __bdevname(dev, b), PTR_ERR(bdev)); | 524 | __bdevname(dev, b), PTR_ERR(bdev)); |
512 | return NULL; | 525 | return NULL; |
513 | } | 526 | } |
@@ -543,8 +556,8 @@ static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi) | |||
543 | { | 556 | { |
544 | struct list_head *l; | 557 | struct list_head *l; |
545 | 558 | ||
546 | printk(KERN_ERR "sb orphan head is %d\n", | 559 | ext4_msg(sb, KERN_ERR, "sb orphan head is %d", |
547 | le32_to_cpu(sbi->s_es->s_last_orphan)); | 560 | le32_to_cpu(sbi->s_es->s_last_orphan)); |
548 | 561 | ||
549 | printk(KERN_ERR "sb_info orphan list:\n"); | 562 | printk(KERN_ERR "sb_info orphan list:\n"); |
550 | list_for_each(l, &sbi->s_orphan) { | 563 | list_for_each(l, &sbi->s_orphan) { |
@@ -563,6 +576,12 @@ static void ext4_put_super(struct super_block *sb) | |||
563 | struct ext4_super_block *es = sbi->s_es; | 576 | struct ext4_super_block *es = sbi->s_es; |
564 | int i, err; | 577 | int i, err; |
565 | 578 | ||
579 | lock_super(sb); | ||
580 | lock_kernel(); | ||
581 | if (sb->s_dirt) | ||
582 | ext4_commit_super(sb, 1); | ||
583 | |||
584 | ext4_release_system_zone(sb); | ||
566 | ext4_mb_release(sb); | 585 | ext4_mb_release(sb); |
567 | ext4_ext_release(sb); | 586 | ext4_ext_release(sb); |
568 | ext4_xattr_put_super(sb); | 587 | ext4_xattr_put_super(sb); |
@@ -576,7 +595,7 @@ static void ext4_put_super(struct super_block *sb) | |||
576 | if (!(sb->s_flags & MS_RDONLY)) { | 595 | if (!(sb->s_flags & MS_RDONLY)) { |
577 | EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); | 596 | EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); |
578 | es->s_state = cpu_to_le16(sbi->s_mount_state); | 597 | es->s_state = cpu_to_le16(sbi->s_mount_state); |
579 | ext4_commit_super(sb, es, 1); | 598 | ext4_commit_super(sb, 1); |
580 | } | 599 | } |
581 | if (sbi->s_proc) { | 600 | if (sbi->s_proc) { |
582 | remove_proc_entry(sb->s_id, ext4_proc_root); | 601 | remove_proc_entry(sb->s_id, ext4_proc_root); |
@@ -586,7 +605,10 @@ static void ext4_put_super(struct super_block *sb) | |||
586 | for (i = 0; i < sbi->s_gdb_count; i++) | 605 | for (i = 0; i < sbi->s_gdb_count; i++) |
587 | brelse(sbi->s_group_desc[i]); | 606 | brelse(sbi->s_group_desc[i]); |
588 | kfree(sbi->s_group_desc); | 607 | kfree(sbi->s_group_desc); |
589 | kfree(sbi->s_flex_groups); | 608 | if (is_vmalloc_addr(sbi->s_flex_groups)) |
609 | vfree(sbi->s_flex_groups); | ||
610 | else | ||
611 | kfree(sbi->s_flex_groups); | ||
590 | percpu_counter_destroy(&sbi->s_freeblocks_counter); | 612 | percpu_counter_destroy(&sbi->s_freeblocks_counter); |
591 | percpu_counter_destroy(&sbi->s_freeinodes_counter); | 613 | percpu_counter_destroy(&sbi->s_freeinodes_counter); |
592 | percpu_counter_destroy(&sbi->s_dirs_counter); | 614 | percpu_counter_destroy(&sbi->s_dirs_counter); |
@@ -625,11 +647,8 @@ static void ext4_put_super(struct super_block *sb) | |||
625 | unlock_super(sb); | 647 | unlock_super(sb); |
626 | kobject_put(&sbi->s_kobj); | 648 | kobject_put(&sbi->s_kobj); |
627 | wait_for_completion(&sbi->s_kobj_unregister); | 649 | wait_for_completion(&sbi->s_kobj_unregister); |
628 | lock_super(sb); | ||
629 | lock_kernel(); | ||
630 | kfree(sbi->s_blockgroup_lock); | 650 | kfree(sbi->s_blockgroup_lock); |
631 | kfree(sbi); | 651 | kfree(sbi); |
632 | return; | ||
633 | } | 652 | } |
634 | 653 | ||
635 | static struct kmem_cache *ext4_inode_cachep; | 654 | static struct kmem_cache *ext4_inode_cachep; |
@@ -644,6 +663,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) | |||
644 | ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS); | 663 | ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS); |
645 | if (!ei) | 664 | if (!ei) |
646 | return NULL; | 665 | return NULL; |
666 | |||
647 | #ifdef CONFIG_EXT4_FS_POSIX_ACL | 667 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
648 | ei->i_acl = EXT4_ACL_NOT_CACHED; | 668 | ei->i_acl = EXT4_ACL_NOT_CACHED; |
649 | ei->i_default_acl = EXT4_ACL_NOT_CACHED; | 669 | ei->i_default_acl = EXT4_ACL_NOT_CACHED; |
@@ -664,14 +684,16 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) | |||
664 | ei->i_allocated_meta_blocks = 0; | 684 | ei->i_allocated_meta_blocks = 0; |
665 | ei->i_delalloc_reserved_flag = 0; | 685 | ei->i_delalloc_reserved_flag = 0; |
666 | spin_lock_init(&(ei->i_block_reservation_lock)); | 686 | spin_lock_init(&(ei->i_block_reservation_lock)); |
687 | |||
667 | return &ei->vfs_inode; | 688 | return &ei->vfs_inode; |
668 | } | 689 | } |
669 | 690 | ||
670 | static void ext4_destroy_inode(struct inode *inode) | 691 | static void ext4_destroy_inode(struct inode *inode) |
671 | { | 692 | { |
672 | if (!list_empty(&(EXT4_I(inode)->i_orphan))) { | 693 | if (!list_empty(&(EXT4_I(inode)->i_orphan))) { |
673 | printk("EXT4 Inode %p: orphan list check failed!\n", | 694 | ext4_msg(inode->i_sb, KERN_ERR, |
674 | EXT4_I(inode)); | 695 | "Inode %lu (%p): orphan list check failed!", |
696 | inode->i_ino, EXT4_I(inode)); | ||
675 | print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4, | 697 | print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4, |
676 | EXT4_I(inode), sizeof(struct ext4_inode_info), | 698 | EXT4_I(inode), sizeof(struct ext4_inode_info), |
677 | true); | 699 | true); |
@@ -870,12 +892,12 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
870 | seq_puts(seq, ",noauto_da_alloc"); | 892 | seq_puts(seq, ",noauto_da_alloc"); |
871 | 893 | ||
872 | ext4_show_quota_options(seq, sb); | 894 | ext4_show_quota_options(seq, sb); |
895 | |||
873 | return 0; | 896 | return 0; |
874 | } | 897 | } |
875 | 898 | ||
876 | |||
877 | static struct inode *ext4_nfs_get_inode(struct super_block *sb, | 899 | static struct inode *ext4_nfs_get_inode(struct super_block *sb, |
878 | u64 ino, u32 generation) | 900 | u64 ino, u32 generation) |
879 | { | 901 | { |
880 | struct inode *inode; | 902 | struct inode *inode; |
881 | 903 | ||
@@ -904,14 +926,14 @@ static struct inode *ext4_nfs_get_inode(struct super_block *sb, | |||
904 | } | 926 | } |
905 | 927 | ||
906 | static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid, | 928 | static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid, |
907 | int fh_len, int fh_type) | 929 | int fh_len, int fh_type) |
908 | { | 930 | { |
909 | return generic_fh_to_dentry(sb, fid, fh_len, fh_type, | 931 | return generic_fh_to_dentry(sb, fid, fh_len, fh_type, |
910 | ext4_nfs_get_inode); | 932 | ext4_nfs_get_inode); |
911 | } | 933 | } |
912 | 934 | ||
913 | static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid, | 935 | static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid, |
914 | int fh_len, int fh_type) | 936 | int fh_len, int fh_type) |
915 | { | 937 | { |
916 | return generic_fh_to_parent(sb, fid, fh_len, fh_type, | 938 | return generic_fh_to_parent(sb, fid, fh_len, fh_type, |
917 | ext4_nfs_get_inode); | 939 | ext4_nfs_get_inode); |
@@ -923,7 +945,8 @@ static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid, | |||
923 | * which would prevent try_to_free_buffers() from freeing them, we must use | 945 | * which would prevent try_to_free_buffers() from freeing them, we must use |
924 | * jbd2 layer's try_to_free_buffers() function to release them. | 946 | * jbd2 layer's try_to_free_buffers() function to release them. |
925 | */ | 947 | */ |
926 | static int bdev_try_to_free_page(struct super_block *sb, struct page *page, gfp_t wait) | 948 | static int bdev_try_to_free_page(struct super_block *sb, struct page *page, |
949 | gfp_t wait) | ||
927 | { | 950 | { |
928 | journal_t *journal = EXT4_SB(sb)->s_journal; | 951 | journal_t *journal = EXT4_SB(sb)->s_journal; |
929 | 952 | ||
@@ -992,7 +1015,6 @@ static const struct super_operations ext4_sops = { | |||
992 | .dirty_inode = ext4_dirty_inode, | 1015 | .dirty_inode = ext4_dirty_inode, |
993 | .delete_inode = ext4_delete_inode, | 1016 | .delete_inode = ext4_delete_inode, |
994 | .put_super = ext4_put_super, | 1017 | .put_super = ext4_put_super, |
995 | .write_super = ext4_write_super, | ||
996 | .sync_fs = ext4_sync_fs, | 1018 | .sync_fs = ext4_sync_fs, |
997 | .freeze_fs = ext4_freeze, | 1019 | .freeze_fs = ext4_freeze, |
998 | .unfreeze_fs = ext4_unfreeze, | 1020 | .unfreeze_fs = ext4_unfreeze, |
@@ -1007,6 +1029,25 @@ static const struct super_operations ext4_sops = { | |||
1007 | .bdev_try_to_free_page = bdev_try_to_free_page, | 1029 | .bdev_try_to_free_page = bdev_try_to_free_page, |
1008 | }; | 1030 | }; |
1009 | 1031 | ||
1032 | static const struct super_operations ext4_nojournal_sops = { | ||
1033 | .alloc_inode = ext4_alloc_inode, | ||
1034 | .destroy_inode = ext4_destroy_inode, | ||
1035 | .write_inode = ext4_write_inode, | ||
1036 | .dirty_inode = ext4_dirty_inode, | ||
1037 | .delete_inode = ext4_delete_inode, | ||
1038 | .write_super = ext4_write_super, | ||
1039 | .put_super = ext4_put_super, | ||
1040 | .statfs = ext4_statfs, | ||
1041 | .remount_fs = ext4_remount, | ||
1042 | .clear_inode = ext4_clear_inode, | ||
1043 | .show_options = ext4_show_options, | ||
1044 | #ifdef CONFIG_QUOTA | ||
1045 | .quota_read = ext4_quota_read, | ||
1046 | .quota_write = ext4_quota_write, | ||
1047 | #endif | ||
1048 | .bdev_try_to_free_page = bdev_try_to_free_page, | ||
1049 | }; | ||
1050 | |||
1010 | static const struct export_operations ext4_export_ops = { | 1051 | static const struct export_operations ext4_export_ops = { |
1011 | .fh_to_dentry = ext4_fh_to_dentry, | 1052 | .fh_to_dentry = ext4_fh_to_dentry, |
1012 | .fh_to_parent = ext4_fh_to_parent, | 1053 | .fh_to_parent = ext4_fh_to_parent, |
@@ -1023,12 +1064,13 @@ enum { | |||
1023 | Opt_journal_update, Opt_journal_dev, | 1064 | Opt_journal_update, Opt_journal_dev, |
1024 | Opt_journal_checksum, Opt_journal_async_commit, | 1065 | Opt_journal_checksum, Opt_journal_async_commit, |
1025 | Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, | 1066 | Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, |
1026 | Opt_data_err_abort, Opt_data_err_ignore, | 1067 | Opt_data_err_abort, Opt_data_err_ignore, Opt_mb_history_length, |
1027 | Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, | 1068 | Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, |
1028 | Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, | 1069 | Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, |
1029 | Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, Opt_resize, | 1070 | Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, Opt_resize, |
1030 | Opt_usrquota, Opt_grpquota, Opt_i_version, | 1071 | Opt_usrquota, Opt_grpquota, Opt_i_version, |
1031 | Opt_stripe, Opt_delalloc, Opt_nodelalloc, | 1072 | Opt_stripe, Opt_delalloc, Opt_nodelalloc, |
1073 | Opt_block_validity, Opt_noblock_validity, | ||
1032 | Opt_inode_readahead_blks, Opt_journal_ioprio | 1074 | Opt_inode_readahead_blks, Opt_journal_ioprio |
1033 | }; | 1075 | }; |
1034 | 1076 | ||
@@ -1069,6 +1111,7 @@ static const match_table_t tokens = { | |||
1069 | {Opt_data_writeback, "data=writeback"}, | 1111 | {Opt_data_writeback, "data=writeback"}, |
1070 | {Opt_data_err_abort, "data_err=abort"}, | 1112 | {Opt_data_err_abort, "data_err=abort"}, |
1071 | {Opt_data_err_ignore, "data_err=ignore"}, | 1113 | {Opt_data_err_ignore, "data_err=ignore"}, |
1114 | {Opt_mb_history_length, "mb_history_length=%u"}, | ||
1072 | {Opt_offusrjquota, "usrjquota="}, | 1115 | {Opt_offusrjquota, "usrjquota="}, |
1073 | {Opt_usrjquota, "usrjquota=%s"}, | 1116 | {Opt_usrjquota, "usrjquota=%s"}, |
1074 | {Opt_offgrpjquota, "grpjquota="}, | 1117 | {Opt_offgrpjquota, "grpjquota="}, |
@@ -1087,6 +1130,8 @@ static const match_table_t tokens = { | |||
1087 | {Opt_resize, "resize"}, | 1130 | {Opt_resize, "resize"}, |
1088 | {Opt_delalloc, "delalloc"}, | 1131 | {Opt_delalloc, "delalloc"}, |
1089 | {Opt_nodelalloc, "nodelalloc"}, | 1132 | {Opt_nodelalloc, "nodelalloc"}, |
1133 | {Opt_block_validity, "block_validity"}, | ||
1134 | {Opt_noblock_validity, "noblock_validity"}, | ||
1090 | {Opt_inode_readahead_blks, "inode_readahead_blks=%u"}, | 1135 | {Opt_inode_readahead_blks, "inode_readahead_blks=%u"}, |
1091 | {Opt_journal_ioprio, "journal_ioprio=%u"}, | 1136 | {Opt_journal_ioprio, "journal_ioprio=%u"}, |
1092 | {Opt_auto_da_alloc, "auto_da_alloc=%u"}, | 1137 | {Opt_auto_da_alloc, "auto_da_alloc=%u"}, |
@@ -1102,8 +1147,9 @@ static ext4_fsblk_t get_sb_block(void **data) | |||
1102 | 1147 | ||
1103 | if (!options || strncmp(options, "sb=", 3) != 0) | 1148 | if (!options || strncmp(options, "sb=", 3) != 0) |
1104 | return 1; /* Default location */ | 1149 | return 1; /* Default location */ |
1150 | |||
1105 | options += 3; | 1151 | options += 3; |
1106 | /*todo: use simple_strtoll with >32bit ext4 */ | 1152 | /* TODO: use simple_strtoll with >32bit ext4 */ |
1107 | sb_block = simple_strtoul(options, &options, 0); | 1153 | sb_block = simple_strtoul(options, &options, 0); |
1108 | if (*options && *options != ',') { | 1154 | if (*options && *options != ',') { |
1109 | printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n", | 1155 | printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n", |
@@ -1113,6 +1159,7 @@ static ext4_fsblk_t get_sb_block(void **data) | |||
1113 | if (*options == ',') | 1159 | if (*options == ',') |
1114 | options++; | 1160 | options++; |
1115 | *data = (void *) options; | 1161 | *data = (void *) options; |
1162 | |||
1116 | return sb_block; | 1163 | return sb_block; |
1117 | } | 1164 | } |
1118 | 1165 | ||
@@ -1206,8 +1253,7 @@ static int parse_options(char *options, struct super_block *sb, | |||
1206 | #else | 1253 | #else |
1207 | case Opt_user_xattr: | 1254 | case Opt_user_xattr: |
1208 | case Opt_nouser_xattr: | 1255 | case Opt_nouser_xattr: |
1209 | printk(KERN_ERR "EXT4 (no)user_xattr options " | 1256 | ext4_msg(sb, KERN_ERR, "(no)user_xattr options not supported"); |
1210 | "not supported\n"); | ||
1211 | break; | 1257 | break; |
1212 | #endif | 1258 | #endif |
1213 | #ifdef CONFIG_EXT4_FS_POSIX_ACL | 1259 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
@@ -1220,8 +1266,7 @@ static int parse_options(char *options, struct super_block *sb, | |||
1220 | #else | 1266 | #else |
1221 | case Opt_acl: | 1267 | case Opt_acl: |
1222 | case Opt_noacl: | 1268 | case Opt_noacl: |
1223 | printk(KERN_ERR "EXT4 (no)acl options " | 1269 | ext4_msg(sb, KERN_ERR, "(no)acl options not supported"); |
1224 | "not supported\n"); | ||
1225 | break; | 1270 | break; |
1226 | #endif | 1271 | #endif |
1227 | case Opt_journal_update: | 1272 | case Opt_journal_update: |
@@ -1231,16 +1276,16 @@ static int parse_options(char *options, struct super_block *sb, | |||
1231 | user to specify an existing inode to be the | 1276 | user to specify an existing inode to be the |
1232 | journal file. */ | 1277 | journal file. */ |
1233 | if (is_remount) { | 1278 | if (is_remount) { |
1234 | printk(KERN_ERR "EXT4-fs: cannot specify " | 1279 | ext4_msg(sb, KERN_ERR, |
1235 | "journal on remount\n"); | 1280 | "Cannot specify journal on remount"); |
1236 | return 0; | 1281 | return 0; |
1237 | } | 1282 | } |
1238 | set_opt(sbi->s_mount_opt, UPDATE_JOURNAL); | 1283 | set_opt(sbi->s_mount_opt, UPDATE_JOURNAL); |
1239 | break; | 1284 | break; |
1240 | case Opt_journal_dev: | 1285 | case Opt_journal_dev: |
1241 | if (is_remount) { | 1286 | if (is_remount) { |
1242 | printk(KERN_ERR "EXT4-fs: cannot specify " | 1287 | ext4_msg(sb, KERN_ERR, |
1243 | "journal on remount\n"); | 1288 | "Cannot specify journal on remount"); |
1244 | return 0; | 1289 | return 0; |
1245 | } | 1290 | } |
1246 | if (match_int(&args[0], &option)) | 1291 | if (match_int(&args[0], &option)) |
@@ -1294,9 +1339,8 @@ static int parse_options(char *options, struct super_block *sb, | |||
1294 | if (is_remount) { | 1339 | if (is_remount) { |
1295 | if ((sbi->s_mount_opt & EXT4_MOUNT_DATA_FLAGS) | 1340 | if ((sbi->s_mount_opt & EXT4_MOUNT_DATA_FLAGS) |
1296 | != data_opt) { | 1341 | != data_opt) { |
1297 | printk(KERN_ERR | 1342 | ext4_msg(sb, KERN_ERR, |
1298 | "EXT4-fs: cannot change data " | 1343 | "Cannot change data mode on remount"); |
1299 | "mode on remount\n"); | ||
1300 | return 0; | 1344 | return 0; |
1301 | } | 1345 | } |
1302 | } else { | 1346 | } else { |
@@ -1310,6 +1354,13 @@ static int parse_options(char *options, struct super_block *sb, | |||
1310 | case Opt_data_err_ignore: | 1354 | case Opt_data_err_ignore: |
1311 | clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT); | 1355 | clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT); |
1312 | break; | 1356 | break; |
1357 | case Opt_mb_history_length: | ||
1358 | if (match_int(&args[0], &option)) | ||
1359 | return 0; | ||
1360 | if (option < 0) | ||
1361 | return 0; | ||
1362 | sbi->s_mb_history_max = option; | ||
1363 | break; | ||
1313 | #ifdef CONFIG_QUOTA | 1364 | #ifdef CONFIG_QUOTA |
1314 | case Opt_usrjquota: | 1365 | case Opt_usrjquota: |
1315 | qtype = USRQUOTA; | 1366 | qtype = USRQUOTA; |
@@ -1319,31 +1370,31 @@ static int parse_options(char *options, struct super_block *sb, | |||
1319 | set_qf_name: | 1370 | set_qf_name: |
1320 | if (sb_any_quota_loaded(sb) && | 1371 | if (sb_any_quota_loaded(sb) && |
1321 | !sbi->s_qf_names[qtype]) { | 1372 | !sbi->s_qf_names[qtype]) { |
1322 | printk(KERN_ERR | 1373 | ext4_msg(sb, KERN_ERR, |
1323 | "EXT4-fs: Cannot change journaled " | 1374 | "Cannot change journaled " |
1324 | "quota options when quota turned on.\n"); | 1375 | "quota options when quota turned on"); |
1325 | return 0; | 1376 | return 0; |
1326 | } | 1377 | } |
1327 | qname = match_strdup(&args[0]); | 1378 | qname = match_strdup(&args[0]); |
1328 | if (!qname) { | 1379 | if (!qname) { |
1329 | printk(KERN_ERR | 1380 | ext4_msg(sb, KERN_ERR, |
1330 | "EXT4-fs: not enough memory for " | 1381 | "Not enough memory for " |
1331 | "storing quotafile name.\n"); | 1382 | "storing quotafile name"); |
1332 | return 0; | 1383 | return 0; |
1333 | } | 1384 | } |
1334 | if (sbi->s_qf_names[qtype] && | 1385 | if (sbi->s_qf_names[qtype] && |
1335 | strcmp(sbi->s_qf_names[qtype], qname)) { | 1386 | strcmp(sbi->s_qf_names[qtype], qname)) { |
1336 | printk(KERN_ERR | 1387 | ext4_msg(sb, KERN_ERR, |
1337 | "EXT4-fs: %s quota file already " | 1388 | "%s quota file already " |
1338 | "specified.\n", QTYPE2NAME(qtype)); | 1389 | "specified", QTYPE2NAME(qtype)); |
1339 | kfree(qname); | 1390 | kfree(qname); |
1340 | return 0; | 1391 | return 0; |
1341 | } | 1392 | } |
1342 | sbi->s_qf_names[qtype] = qname; | 1393 | sbi->s_qf_names[qtype] = qname; |
1343 | if (strchr(sbi->s_qf_names[qtype], '/')) { | 1394 | if (strchr(sbi->s_qf_names[qtype], '/')) { |
1344 | printk(KERN_ERR | 1395 | ext4_msg(sb, KERN_ERR, |
1345 | "EXT4-fs: quotafile must be on " | 1396 | "quotafile must be on " |
1346 | "filesystem root.\n"); | 1397 | "filesystem root"); |
1347 | kfree(sbi->s_qf_names[qtype]); | 1398 | kfree(sbi->s_qf_names[qtype]); |
1348 | sbi->s_qf_names[qtype] = NULL; | 1399 | sbi->s_qf_names[qtype] = NULL; |
1349 | return 0; | 1400 | return 0; |
@@ -1358,9 +1409,9 @@ set_qf_name: | |||
1358 | clear_qf_name: | 1409 | clear_qf_name: |
1359 | if (sb_any_quota_loaded(sb) && | 1410 | if (sb_any_quota_loaded(sb) && |
1360 | sbi->s_qf_names[qtype]) { | 1411 | sbi->s_qf_names[qtype]) { |
1361 | printk(KERN_ERR "EXT4-fs: Cannot change " | 1412 | ext4_msg(sb, KERN_ERR, "Cannot change " |
1362 | "journaled quota options when " | 1413 | "journaled quota options when " |
1363 | "quota turned on.\n"); | 1414 | "quota turned on"); |
1364 | return 0; | 1415 | return 0; |
1365 | } | 1416 | } |
1366 | /* | 1417 | /* |
@@ -1377,9 +1428,9 @@ clear_qf_name: | |||
1377 | set_qf_format: | 1428 | set_qf_format: |
1378 | if (sb_any_quota_loaded(sb) && | 1429 | if (sb_any_quota_loaded(sb) && |
1379 | sbi->s_jquota_fmt != qfmt) { | 1430 | sbi->s_jquota_fmt != qfmt) { |
1380 | printk(KERN_ERR "EXT4-fs: Cannot change " | 1431 | ext4_msg(sb, KERN_ERR, "Cannot change " |
1381 | "journaled quota options when " | 1432 | "journaled quota options when " |
1382 | "quota turned on.\n"); | 1433 | "quota turned on"); |
1383 | return 0; | 1434 | return 0; |
1384 | } | 1435 | } |
1385 | sbi->s_jquota_fmt = qfmt; | 1436 | sbi->s_jquota_fmt = qfmt; |
@@ -1395,8 +1446,8 @@ set_qf_format: | |||
1395 | break; | 1446 | break; |
1396 | case Opt_noquota: | 1447 | case Opt_noquota: |
1397 | if (sb_any_quota_loaded(sb)) { | 1448 | if (sb_any_quota_loaded(sb)) { |
1398 | printk(KERN_ERR "EXT4-fs: Cannot change quota " | 1449 | ext4_msg(sb, KERN_ERR, "Cannot change quota " |
1399 | "options when quota turned on.\n"); | 1450 | "options when quota turned on"); |
1400 | return 0; | 1451 | return 0; |
1401 | } | 1452 | } |
1402 | clear_opt(sbi->s_mount_opt, QUOTA); | 1453 | clear_opt(sbi->s_mount_opt, QUOTA); |
@@ -1407,8 +1458,8 @@ set_qf_format: | |||
1407 | case Opt_quota: | 1458 | case Opt_quota: |
1408 | case Opt_usrquota: | 1459 | case Opt_usrquota: |
1409 | case Opt_grpquota: | 1460 | case Opt_grpquota: |
1410 | printk(KERN_ERR | 1461 | ext4_msg(sb, KERN_ERR, |
1411 | "EXT4-fs: quota options not supported.\n"); | 1462 | "quota options not supported"); |
1412 | break; | 1463 | break; |
1413 | case Opt_usrjquota: | 1464 | case Opt_usrjquota: |
1414 | case Opt_grpjquota: | 1465 | case Opt_grpjquota: |
@@ -1416,9 +1467,8 @@ set_qf_format: | |||
1416 | case Opt_offgrpjquota: | 1467 | case Opt_offgrpjquota: |
1417 | case Opt_jqfmt_vfsold: | 1468 | case Opt_jqfmt_vfsold: |
1418 | case Opt_jqfmt_vfsv0: | 1469 | case Opt_jqfmt_vfsv0: |
1419 | printk(KERN_ERR | 1470 | ext4_msg(sb, KERN_ERR, |
1420 | "EXT4-fs: journaled quota options not " | 1471 | "journaled quota options not supported"); |
1421 | "supported.\n"); | ||
1422 | break; | 1472 | break; |
1423 | case Opt_noquota: | 1473 | case Opt_noquota: |
1424 | break; | 1474 | break; |
@@ -1443,8 +1493,9 @@ set_qf_format: | |||
1443 | break; | 1493 | break; |
1444 | case Opt_resize: | 1494 | case Opt_resize: |
1445 | if (!is_remount) { | 1495 | if (!is_remount) { |
1446 | printk("EXT4-fs: resize option only available " | 1496 | ext4_msg(sb, KERN_ERR, |
1447 | "for remount\n"); | 1497 | "resize option only available " |
1498 | "for remount"); | ||
1448 | return 0; | 1499 | return 0; |
1449 | } | 1500 | } |
1450 | if (match_int(&args[0], &option) != 0) | 1501 | if (match_int(&args[0], &option) != 0) |
@@ -1474,14 +1525,21 @@ set_qf_format: | |||
1474 | case Opt_delalloc: | 1525 | case Opt_delalloc: |
1475 | set_opt(sbi->s_mount_opt, DELALLOC); | 1526 | set_opt(sbi->s_mount_opt, DELALLOC); |
1476 | break; | 1527 | break; |
1528 | case Opt_block_validity: | ||
1529 | set_opt(sbi->s_mount_opt, BLOCK_VALIDITY); | ||
1530 | break; | ||
1531 | case Opt_noblock_validity: | ||
1532 | clear_opt(sbi->s_mount_opt, BLOCK_VALIDITY); | ||
1533 | break; | ||
1477 | case Opt_inode_readahead_blks: | 1534 | case Opt_inode_readahead_blks: |
1478 | if (match_int(&args[0], &option)) | 1535 | if (match_int(&args[0], &option)) |
1479 | return 0; | 1536 | return 0; |
1480 | if (option < 0 || option > (1 << 30)) | 1537 | if (option < 0 || option > (1 << 30)) |
1481 | return 0; | 1538 | return 0; |
1482 | if (option & (option - 1)) { | 1539 | if (!is_power_of_2(option)) { |
1483 | printk(KERN_ERR "EXT4-fs: inode_readahead_blks" | 1540 | ext4_msg(sb, KERN_ERR, |
1484 | " must be a power of 2\n"); | 1541 | "EXT4-fs: inode_readahead_blks" |
1542 | " must be a power of 2"); | ||
1485 | return 0; | 1543 | return 0; |
1486 | } | 1544 | } |
1487 | sbi->s_inode_readahead_blks = option; | 1545 | sbi->s_inode_readahead_blks = option; |
@@ -1508,9 +1566,9 @@ set_qf_format: | |||
1508 | set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC); | 1566 | set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC); |
1509 | break; | 1567 | break; |
1510 | default: | 1568 | default: |
1511 | printk(KERN_ERR | 1569 | ext4_msg(sb, KERN_ERR, |
1512 | "EXT4-fs: Unrecognized mount option \"%s\" " | 1570 | "Unrecognized mount option \"%s\" " |
1513 | "or missing value\n", p); | 1571 | "or missing value", p); |
1514 | return 0; | 1572 | return 0; |
1515 | } | 1573 | } |
1516 | } | 1574 | } |
@@ -1528,21 +1586,21 @@ set_qf_format: | |||
1528 | (sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA)) || | 1586 | (sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA)) || |
1529 | (sbi->s_qf_names[GRPQUOTA] && | 1587 | (sbi->s_qf_names[GRPQUOTA] && |
1530 | (sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA))) { | 1588 | (sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA))) { |
1531 | printk(KERN_ERR "EXT4-fs: old and new quota " | 1589 | ext4_msg(sb, KERN_ERR, "old and new quota " |
1532 | "format mixing.\n"); | 1590 | "format mixing"); |
1533 | return 0; | 1591 | return 0; |
1534 | } | 1592 | } |
1535 | 1593 | ||
1536 | if (!sbi->s_jquota_fmt) { | 1594 | if (!sbi->s_jquota_fmt) { |
1537 | printk(KERN_ERR "EXT4-fs: journaled quota format " | 1595 | ext4_msg(sb, KERN_ERR, "journaled quota format " |
1538 | "not specified.\n"); | 1596 | "not specified"); |
1539 | return 0; | 1597 | return 0; |
1540 | } | 1598 | } |
1541 | } else { | 1599 | } else { |
1542 | if (sbi->s_jquota_fmt) { | 1600 | if (sbi->s_jquota_fmt) { |
1543 | printk(KERN_ERR "EXT4-fs: journaled quota format " | 1601 | ext4_msg(sb, KERN_ERR, "journaled quota format " |
1544 | "specified with no journaling " | 1602 | "specified with no journaling " |
1545 | "enabled.\n"); | 1603 | "enabled"); |
1546 | return 0; | 1604 | return 0; |
1547 | } | 1605 | } |
1548 | } | 1606 | } |
@@ -1557,32 +1615,32 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, | |||
1557 | int res = 0; | 1615 | int res = 0; |
1558 | 1616 | ||
1559 | if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) { | 1617 | if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) { |
1560 | printk(KERN_ERR "EXT4-fs warning: revision level too high, " | 1618 | ext4_msg(sb, KERN_ERR, "revision level too high, " |
1561 | "forcing read-only mode\n"); | 1619 | "forcing read-only mode"); |
1562 | res = MS_RDONLY; | 1620 | res = MS_RDONLY; |
1563 | } | 1621 | } |
1564 | if (read_only) | 1622 | if (read_only) |
1565 | return res; | 1623 | return res; |
1566 | if (!(sbi->s_mount_state & EXT4_VALID_FS)) | 1624 | if (!(sbi->s_mount_state & EXT4_VALID_FS)) |
1567 | printk(KERN_WARNING "EXT4-fs warning: mounting unchecked fs, " | 1625 | ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, " |
1568 | "running e2fsck is recommended\n"); | 1626 | "running e2fsck is recommended"); |
1569 | else if ((sbi->s_mount_state & EXT4_ERROR_FS)) | 1627 | else if ((sbi->s_mount_state & EXT4_ERROR_FS)) |
1570 | printk(KERN_WARNING | 1628 | ext4_msg(sb, KERN_WARNING, |
1571 | "EXT4-fs warning: mounting fs with errors, " | 1629 | "warning: mounting fs with errors, " |
1572 | "running e2fsck is recommended\n"); | 1630 | "running e2fsck is recommended"); |
1573 | else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 && | 1631 | else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 && |
1574 | le16_to_cpu(es->s_mnt_count) >= | 1632 | le16_to_cpu(es->s_mnt_count) >= |
1575 | (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count)) | 1633 | (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count)) |
1576 | printk(KERN_WARNING | 1634 | ext4_msg(sb, KERN_WARNING, |
1577 | "EXT4-fs warning: maximal mount count reached, " | 1635 | "warning: maximal mount count reached, " |
1578 | "running e2fsck is recommended\n"); | 1636 | "running e2fsck is recommended"); |
1579 | else if (le32_to_cpu(es->s_checkinterval) && | 1637 | else if (le32_to_cpu(es->s_checkinterval) && |
1580 | (le32_to_cpu(es->s_lastcheck) + | 1638 | (le32_to_cpu(es->s_lastcheck) + |
1581 | le32_to_cpu(es->s_checkinterval) <= get_seconds())) | 1639 | le32_to_cpu(es->s_checkinterval) <= get_seconds())) |
1582 | printk(KERN_WARNING | 1640 | ext4_msg(sb, KERN_WARNING, |
1583 | "EXT4-fs warning: checktime reached, " | 1641 | "warning: checktime reached, " |
1584 | "running e2fsck is recommended\n"); | 1642 | "running e2fsck is recommended"); |
1585 | if (!sbi->s_journal) | 1643 | if (!sbi->s_journal) |
1586 | es->s_state &= cpu_to_le16(~EXT4_VALID_FS); | 1644 | es->s_state &= cpu_to_le16(~EXT4_VALID_FS); |
1587 | if (!(__s16) le16_to_cpu(es->s_max_mnt_count)) | 1645 | if (!(__s16) le16_to_cpu(es->s_max_mnt_count)) |
1588 | es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT); | 1646 | es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT); |
@@ -1592,7 +1650,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, | |||
1592 | if (sbi->s_journal) | 1650 | if (sbi->s_journal) |
1593 | EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); | 1651 | EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); |
1594 | 1652 | ||
1595 | ext4_commit_super(sb, es, 1); | 1653 | ext4_commit_super(sb, 1); |
1596 | if (test_opt(sb, DEBUG)) | 1654 | if (test_opt(sb, DEBUG)) |
1597 | printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, " | 1655 | printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, " |
1598 | "bpg=%lu, ipg=%lu, mo=%04lx]\n", | 1656 | "bpg=%lu, ipg=%lu, mo=%04lx]\n", |
@@ -1603,11 +1661,11 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, | |||
1603 | sbi->s_mount_opt); | 1661 | sbi->s_mount_opt); |
1604 | 1662 | ||
1605 | if (EXT4_SB(sb)->s_journal) { | 1663 | if (EXT4_SB(sb)->s_journal) { |
1606 | printk(KERN_INFO "EXT4 FS on %s, %s journal on %s\n", | 1664 | ext4_msg(sb, KERN_INFO, "%s journal on %s", |
1607 | sb->s_id, EXT4_SB(sb)->s_journal->j_inode ? "internal" : | 1665 | EXT4_SB(sb)->s_journal->j_inode ? "internal" : |
1608 | "external", EXT4_SB(sb)->s_journal->j_devname); | 1666 | "external", EXT4_SB(sb)->s_journal->j_devname); |
1609 | } else { | 1667 | } else { |
1610 | printk(KERN_INFO "EXT4 FS on %s, no journal\n", sb->s_id); | 1668 | ext4_msg(sb, KERN_INFO, "no journal"); |
1611 | } | 1669 | } |
1612 | return res; | 1670 | return res; |
1613 | } | 1671 | } |
@@ -1616,10 +1674,10 @@ static int ext4_fill_flex_info(struct super_block *sb) | |||
1616 | { | 1674 | { |
1617 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 1675 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
1618 | struct ext4_group_desc *gdp = NULL; | 1676 | struct ext4_group_desc *gdp = NULL; |
1619 | struct buffer_head *bh; | ||
1620 | ext4_group_t flex_group_count; | 1677 | ext4_group_t flex_group_count; |
1621 | ext4_group_t flex_group; | 1678 | ext4_group_t flex_group; |
1622 | int groups_per_flex = 0; | 1679 | int groups_per_flex = 0; |
1680 | size_t size; | ||
1623 | int i; | 1681 | int i; |
1624 | 1682 | ||
1625 | if (!sbi->s_es->s_log_groups_per_flex) { | 1683 | if (!sbi->s_es->s_log_groups_per_flex) { |
@@ -1634,16 +1692,21 @@ static int ext4_fill_flex_info(struct super_block *sb) | |||
1634 | flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) + | 1692 | flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) + |
1635 | ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) << | 1693 | ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) << |
1636 | EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex; | 1694 | EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex; |
1637 | sbi->s_flex_groups = kzalloc(flex_group_count * | 1695 | size = flex_group_count * sizeof(struct flex_groups); |
1638 | sizeof(struct flex_groups), GFP_KERNEL); | 1696 | sbi->s_flex_groups = kzalloc(size, GFP_KERNEL); |
1697 | if (sbi->s_flex_groups == NULL) { | ||
1698 | sbi->s_flex_groups = vmalloc(size); | ||
1699 | if (sbi->s_flex_groups) | ||
1700 | memset(sbi->s_flex_groups, 0, size); | ||
1701 | } | ||
1639 | if (sbi->s_flex_groups == NULL) { | 1702 | if (sbi->s_flex_groups == NULL) { |
1640 | printk(KERN_ERR "EXT4-fs: not enough memory for " | 1703 | ext4_msg(sb, KERN_ERR, "not enough memory for " |
1641 | "%u flex groups\n", flex_group_count); | 1704 | "%u flex groups", flex_group_count); |
1642 | goto failed; | 1705 | goto failed; |
1643 | } | 1706 | } |
1644 | 1707 | ||
1645 | for (i = 0; i < sbi->s_groups_count; i++) { | 1708 | for (i = 0; i < sbi->s_groups_count; i++) { |
1646 | gdp = ext4_get_group_desc(sb, i, &bh); | 1709 | gdp = ext4_get_group_desc(sb, i, NULL); |
1647 | 1710 | ||
1648 | flex_group = ext4_flex_group(sbi, i); | 1711 | flex_group = ext4_flex_group(sbi, i); |
1649 | atomic_set(&sbi->s_flex_groups[flex_group].free_inodes, | 1712 | atomic_set(&sbi->s_flex_groups[flex_group].free_inodes, |
@@ -1724,44 +1787,44 @@ static int ext4_check_descriptors(struct super_block *sb) | |||
1724 | 1787 | ||
1725 | block_bitmap = ext4_block_bitmap(sb, gdp); | 1788 | block_bitmap = ext4_block_bitmap(sb, gdp); |
1726 | if (block_bitmap < first_block || block_bitmap > last_block) { | 1789 | if (block_bitmap < first_block || block_bitmap > last_block) { |
1727 | printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " | 1790 | ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " |
1728 | "Block bitmap for group %u not in group " | 1791 | "Block bitmap for group %u not in group " |
1729 | "(block %llu)!\n", i, block_bitmap); | 1792 | "(block %llu)!", i, block_bitmap); |
1730 | return 0; | 1793 | return 0; |
1731 | } | 1794 | } |
1732 | inode_bitmap = ext4_inode_bitmap(sb, gdp); | 1795 | inode_bitmap = ext4_inode_bitmap(sb, gdp); |
1733 | if (inode_bitmap < first_block || inode_bitmap > last_block) { | 1796 | if (inode_bitmap < first_block || inode_bitmap > last_block) { |
1734 | printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " | 1797 | ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " |
1735 | "Inode bitmap for group %u not in group " | 1798 | "Inode bitmap for group %u not in group " |
1736 | "(block %llu)!\n", i, inode_bitmap); | 1799 | "(block %llu)!", i, inode_bitmap); |
1737 | return 0; | 1800 | return 0; |
1738 | } | 1801 | } |
1739 | inode_table = ext4_inode_table(sb, gdp); | 1802 | inode_table = ext4_inode_table(sb, gdp); |
1740 | if (inode_table < first_block || | 1803 | if (inode_table < first_block || |
1741 | inode_table + sbi->s_itb_per_group - 1 > last_block) { | 1804 | inode_table + sbi->s_itb_per_group - 1 > last_block) { |
1742 | printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " | 1805 | ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " |
1743 | "Inode table for group %u not in group " | 1806 | "Inode table for group %u not in group " |
1744 | "(block %llu)!\n", i, inode_table); | 1807 | "(block %llu)!", i, inode_table); |
1745 | return 0; | 1808 | return 0; |
1746 | } | 1809 | } |
1747 | spin_lock(sb_bgl_lock(sbi, i)); | 1810 | ext4_lock_group(sb, i); |
1748 | if (!ext4_group_desc_csum_verify(sbi, i, gdp)) { | 1811 | if (!ext4_group_desc_csum_verify(sbi, i, gdp)) { |
1749 | printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " | 1812 | ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " |
1750 | "Checksum for group %u failed (%u!=%u)\n", | 1813 | "Checksum for group %u failed (%u!=%u)", |
1751 | i, le16_to_cpu(ext4_group_desc_csum(sbi, i, | 1814 | i, le16_to_cpu(ext4_group_desc_csum(sbi, i, |
1752 | gdp)), le16_to_cpu(gdp->bg_checksum)); | 1815 | gdp)), le16_to_cpu(gdp->bg_checksum)); |
1753 | if (!(sb->s_flags & MS_RDONLY)) { | 1816 | if (!(sb->s_flags & MS_RDONLY)) { |
1754 | spin_unlock(sb_bgl_lock(sbi, i)); | 1817 | ext4_unlock_group(sb, i); |
1755 | return 0; | 1818 | return 0; |
1756 | } | 1819 | } |
1757 | } | 1820 | } |
1758 | spin_unlock(sb_bgl_lock(sbi, i)); | 1821 | ext4_unlock_group(sb, i); |
1759 | if (!flexbg_flag) | 1822 | if (!flexbg_flag) |
1760 | first_block += EXT4_BLOCKS_PER_GROUP(sb); | 1823 | first_block += EXT4_BLOCKS_PER_GROUP(sb); |
1761 | } | 1824 | } |
1762 | 1825 | ||
1763 | ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb)); | 1826 | ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb)); |
1764 | sbi->s_es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb)); | 1827 | sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb)); |
1765 | return 1; | 1828 | return 1; |
1766 | } | 1829 | } |
1767 | 1830 | ||
@@ -1796,8 +1859,8 @@ static void ext4_orphan_cleanup(struct super_block *sb, | |||
1796 | } | 1859 | } |
1797 | 1860 | ||
1798 | if (bdev_read_only(sb->s_bdev)) { | 1861 | if (bdev_read_only(sb->s_bdev)) { |
1799 | printk(KERN_ERR "EXT4-fs: write access " | 1862 | ext4_msg(sb, KERN_ERR, "write access " |
1800 | "unavailable, skipping orphan cleanup.\n"); | 1863 | "unavailable, skipping orphan cleanup"); |
1801 | return; | 1864 | return; |
1802 | } | 1865 | } |
1803 | 1866 | ||
@@ -1811,8 +1874,7 @@ static void ext4_orphan_cleanup(struct super_block *sb, | |||
1811 | } | 1874 | } |
1812 | 1875 | ||
1813 | if (s_flags & MS_RDONLY) { | 1876 | if (s_flags & MS_RDONLY) { |
1814 | printk(KERN_INFO "EXT4-fs: %s: orphan cleanup on readonly fs\n", | 1877 | ext4_msg(sb, KERN_INFO, "orphan cleanup on readonly fs"); |
1815 | sb->s_id); | ||
1816 | sb->s_flags &= ~MS_RDONLY; | 1878 | sb->s_flags &= ~MS_RDONLY; |
1817 | } | 1879 | } |
1818 | #ifdef CONFIG_QUOTA | 1880 | #ifdef CONFIG_QUOTA |
@@ -1823,9 +1885,9 @@ static void ext4_orphan_cleanup(struct super_block *sb, | |||
1823 | if (EXT4_SB(sb)->s_qf_names[i]) { | 1885 | if (EXT4_SB(sb)->s_qf_names[i]) { |
1824 | int ret = ext4_quota_on_mount(sb, i); | 1886 | int ret = ext4_quota_on_mount(sb, i); |
1825 | if (ret < 0) | 1887 | if (ret < 0) |
1826 | printk(KERN_ERR | 1888 | ext4_msg(sb, KERN_ERR, |
1827 | "EXT4-fs: Cannot turn on journaled " | 1889 | "Cannot turn on journaled " |
1828 | "quota: error %d\n", ret); | 1890 | "quota: error %d", ret); |
1829 | } | 1891 | } |
1830 | } | 1892 | } |
1831 | #endif | 1893 | #endif |
@@ -1842,16 +1904,16 @@ static void ext4_orphan_cleanup(struct super_block *sb, | |||
1842 | list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan); | 1904 | list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan); |
1843 | vfs_dq_init(inode); | 1905 | vfs_dq_init(inode); |
1844 | if (inode->i_nlink) { | 1906 | if (inode->i_nlink) { |
1845 | printk(KERN_DEBUG | 1907 | ext4_msg(sb, KERN_DEBUG, |
1846 | "%s: truncating inode %lu to %lld bytes\n", | 1908 | "%s: truncating inode %lu to %lld bytes", |
1847 | __func__, inode->i_ino, inode->i_size); | 1909 | __func__, inode->i_ino, inode->i_size); |
1848 | jbd_debug(2, "truncating inode %lu to %lld bytes\n", | 1910 | jbd_debug(2, "truncating inode %lu to %lld bytes\n", |
1849 | inode->i_ino, inode->i_size); | 1911 | inode->i_ino, inode->i_size); |
1850 | ext4_truncate(inode); | 1912 | ext4_truncate(inode); |
1851 | nr_truncates++; | 1913 | nr_truncates++; |
1852 | } else { | 1914 | } else { |
1853 | printk(KERN_DEBUG | 1915 | ext4_msg(sb, KERN_DEBUG, |
1854 | "%s: deleting unreferenced inode %lu\n", | 1916 | "%s: deleting unreferenced inode %lu", |
1855 | __func__, inode->i_ino); | 1917 | __func__, inode->i_ino); |
1856 | jbd_debug(2, "deleting unreferenced inode %lu\n", | 1918 | jbd_debug(2, "deleting unreferenced inode %lu\n", |
1857 | inode->i_ino); | 1919 | inode->i_ino); |
@@ -1863,11 +1925,11 @@ static void ext4_orphan_cleanup(struct super_block *sb, | |||
1863 | #define PLURAL(x) (x), ((x) == 1) ? "" : "s" | 1925 | #define PLURAL(x) (x), ((x) == 1) ? "" : "s" |
1864 | 1926 | ||
1865 | if (nr_orphans) | 1927 | if (nr_orphans) |
1866 | printk(KERN_INFO "EXT4-fs: %s: %d orphan inode%s deleted\n", | 1928 | ext4_msg(sb, KERN_INFO, "%d orphan inode%s deleted", |
1867 | sb->s_id, PLURAL(nr_orphans)); | 1929 | PLURAL(nr_orphans)); |
1868 | if (nr_truncates) | 1930 | if (nr_truncates) |
1869 | printk(KERN_INFO "EXT4-fs: %s: %d truncate%s cleaned up\n", | 1931 | ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up", |
1870 | sb->s_id, PLURAL(nr_truncates)); | 1932 | PLURAL(nr_truncates)); |
1871 | #ifdef CONFIG_QUOTA | 1933 | #ifdef CONFIG_QUOTA |
1872 | /* Turn quotas off */ | 1934 | /* Turn quotas off */ |
1873 | for (i = 0; i < MAXQUOTAS; i++) { | 1935 | for (i = 0; i < MAXQUOTAS; i++) { |
@@ -1877,6 +1939,7 @@ static void ext4_orphan_cleanup(struct super_block *sb, | |||
1877 | #endif | 1939 | #endif |
1878 | sb->s_flags = s_flags; /* Restore MS_RDONLY status */ | 1940 | sb->s_flags = s_flags; /* Restore MS_RDONLY status */ |
1879 | } | 1941 | } |
1942 | |||
1880 | /* | 1943 | /* |
1881 | * Maximal extent format file size. | 1944 | * Maximal extent format file size. |
1882 | * Resulting logical blkno at s_maxbytes must fit in our on-disk | 1945 | * Resulting logical blkno at s_maxbytes must fit in our on-disk |
@@ -1927,19 +1990,19 @@ static loff_t ext4_max_bitmap_size(int bits, int has_huge_files) | |||
1927 | loff_t res = EXT4_NDIR_BLOCKS; | 1990 | loff_t res = EXT4_NDIR_BLOCKS; |
1928 | int meta_blocks; | 1991 | int meta_blocks; |
1929 | loff_t upper_limit; | 1992 | loff_t upper_limit; |
1930 | /* This is calculated to be the largest file size for a | 1993 | /* This is calculated to be the largest file size for a dense, block |
1931 | * dense, bitmapped file such that the total number of | 1994 | * mapped file such that the file's total number of 512-byte sectors, |
1932 | * sectors in the file, including data and all indirect blocks, | 1995 | * including data and all indirect blocks, does not exceed (2^48 - 1). |
1933 | * does not exceed 2^48 -1 | 1996 | * |
1934 | * __u32 i_blocks_lo and _u16 i_blocks_high representing the | 1997 | * __u32 i_blocks_lo and _u16 i_blocks_high represent the total |
1935 | * total number of 512 bytes blocks of the file | 1998 | * number of 512-byte sectors of the file. |
1936 | */ | 1999 | */ |
1937 | 2000 | ||
1938 | if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { | 2001 | if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { |
1939 | /* | 2002 | /* |
1940 | * !has_huge_files or CONFIG_LBD is not enabled | 2003 | * !has_huge_files or CONFIG_LBD not enabled implies that |
1941 | * implies the inode i_block represent total blocks in | 2004 | * the inode i_block field represents total file blocks in |
1942 | * 512 bytes 32 == size of vfs inode i_blocks * 8 | 2005 | * 2^32 512-byte sectors == size of vfs inode i_blocks * 8 |
1943 | */ | 2006 | */ |
1944 | upper_limit = (1LL << 32) - 1; | 2007 | upper_limit = (1LL << 32) - 1; |
1945 | 2008 | ||
@@ -1981,7 +2044,7 @@ static loff_t ext4_max_bitmap_size(int bits, int has_huge_files) | |||
1981 | } | 2044 | } |
1982 | 2045 | ||
1983 | static ext4_fsblk_t descriptor_loc(struct super_block *sb, | 2046 | static ext4_fsblk_t descriptor_loc(struct super_block *sb, |
1984 | ext4_fsblk_t logical_sb_block, int nr) | 2047 | ext4_fsblk_t logical_sb_block, int nr) |
1985 | { | 2048 | { |
1986 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2049 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
1987 | ext4_group_t bg, first_meta_bg; | 2050 | ext4_group_t bg, first_meta_bg; |
@@ -1995,6 +2058,7 @@ static ext4_fsblk_t descriptor_loc(struct super_block *sb, | |||
1995 | bg = sbi->s_desc_per_block * nr; | 2058 | bg = sbi->s_desc_per_block * nr; |
1996 | if (ext4_bg_has_super(sb, bg)) | 2059 | if (ext4_bg_has_super(sb, bg)) |
1997 | has_super = 1; | 2060 | has_super = 1; |
2061 | |||
1998 | return (has_super + ext4_group_first_block_no(sb, bg)); | 2062 | return (has_super + ext4_group_first_block_no(sb, bg)); |
1999 | } | 2063 | } |
2000 | 2064 | ||
@@ -2091,8 +2155,7 @@ static ssize_t inode_readahead_blks_store(struct ext4_attr *a, | |||
2091 | if (parse_strtoul(buf, 0x40000000, &t)) | 2155 | if (parse_strtoul(buf, 0x40000000, &t)) |
2092 | return -EINVAL; | 2156 | return -EINVAL; |
2093 | 2157 | ||
2094 | /* inode_readahead_blks must be a power of 2 */ | 2158 | if (!is_power_of_2(t)) |
2095 | if (t & (t-1)) | ||
2096 | return -EINVAL; | 2159 | return -EINVAL; |
2097 | 2160 | ||
2098 | sbi->s_inode_readahead_blks = t; | 2161 | sbi->s_inode_readahead_blks = t; |
@@ -2100,7 +2163,7 @@ static ssize_t inode_readahead_blks_store(struct ext4_attr *a, | |||
2100 | } | 2163 | } |
2101 | 2164 | ||
2102 | static ssize_t sbi_ui_show(struct ext4_attr *a, | 2165 | static ssize_t sbi_ui_show(struct ext4_attr *a, |
2103 | struct ext4_sb_info *sbi, char *buf) | 2166 | struct ext4_sb_info *sbi, char *buf) |
2104 | { | 2167 | { |
2105 | unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset); | 2168 | unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset); |
2106 | 2169 | ||
@@ -2205,7 +2268,6 @@ static struct kobj_type ext4_ktype = { | |||
2205 | static int ext4_fill_super(struct super_block *sb, void *data, int silent) | 2268 | static int ext4_fill_super(struct super_block *sb, void *data, int silent) |
2206 | __releases(kernel_lock) | 2269 | __releases(kernel_lock) |
2207 | __acquires(kernel_lock) | 2270 | __acquires(kernel_lock) |
2208 | |||
2209 | { | 2271 | { |
2210 | struct buffer_head *bh; | 2272 | struct buffer_head *bh; |
2211 | struct ext4_super_block *es = NULL; | 2273 | struct ext4_super_block *es = NULL; |
@@ -2256,7 +2318,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2256 | 2318 | ||
2257 | blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE); | 2319 | blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE); |
2258 | if (!blocksize) { | 2320 | if (!blocksize) { |
2259 | printk(KERN_ERR "EXT4-fs: unable to set blocksize\n"); | 2321 | ext4_msg(sb, KERN_ERR, "unable to set blocksize"); |
2260 | goto out_fail; | 2322 | goto out_fail; |
2261 | } | 2323 | } |
2262 | 2324 | ||
@@ -2272,7 +2334,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2272 | } | 2334 | } |
2273 | 2335 | ||
2274 | if (!(bh = sb_bread(sb, logical_sb_block))) { | 2336 | if (!(bh = sb_bread(sb, logical_sb_block))) { |
2275 | printk(KERN_ERR "EXT4-fs: unable to read superblock\n"); | 2337 | ext4_msg(sb, KERN_ERR, "unable to read superblock"); |
2276 | goto out_fail; | 2338 | goto out_fail; |
2277 | } | 2339 | } |
2278 | /* | 2340 | /* |
@@ -2321,6 +2383,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2321 | sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ; | 2383 | sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ; |
2322 | sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME; | 2384 | sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME; |
2323 | sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME; | 2385 | sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME; |
2386 | sbi->s_mb_history_max = default_mb_history_length; | ||
2324 | 2387 | ||
2325 | set_opt(sbi->s_mount_opt, BARRIER); | 2388 | set_opt(sbi->s_mount_opt, BARRIER); |
2326 | 2389 | ||
@@ -2330,7 +2393,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2330 | */ | 2393 | */ |
2331 | set_opt(sbi->s_mount_opt, DELALLOC); | 2394 | set_opt(sbi->s_mount_opt, DELALLOC); |
2332 | 2395 | ||
2333 | |||
2334 | if (!parse_options((char *) data, sb, &journal_devnum, | 2396 | if (!parse_options((char *) data, sb, &journal_devnum, |
2335 | &journal_ioprio, NULL, 0)) | 2397 | &journal_ioprio, NULL, 0)) |
2336 | goto failed_mount; | 2398 | goto failed_mount; |
@@ -2342,9 +2404,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2342 | (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) || | 2404 | (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) || |
2343 | EXT4_HAS_RO_COMPAT_FEATURE(sb, ~0U) || | 2405 | EXT4_HAS_RO_COMPAT_FEATURE(sb, ~0U) || |
2344 | EXT4_HAS_INCOMPAT_FEATURE(sb, ~0U))) | 2406 | EXT4_HAS_INCOMPAT_FEATURE(sb, ~0U))) |
2345 | printk(KERN_WARNING | 2407 | ext4_msg(sb, KERN_WARNING, |
2346 | "EXT4-fs warning: feature flags set on rev 0 fs, " | 2408 | "feature flags set on rev 0 fs, " |
2347 | "running e2fsck is recommended\n"); | 2409 | "running e2fsck is recommended"); |
2348 | 2410 | ||
2349 | /* | 2411 | /* |
2350 | * Check feature flags regardless of the revision level, since we | 2412 | * Check feature flags regardless of the revision level, since we |
@@ -2353,16 +2415,18 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2353 | */ | 2415 | */ |
2354 | features = EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP); | 2416 | features = EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP); |
2355 | if (features) { | 2417 | if (features) { |
2356 | printk(KERN_ERR "EXT4-fs: %s: couldn't mount because of " | 2418 | ext4_msg(sb, KERN_ERR, |
2357 | "unsupported optional features (%x).\n", sb->s_id, | 2419 | "Couldn't mount because of " |
2420 | "unsupported optional features (%x)", | ||
2358 | (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) & | 2421 | (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) & |
2359 | ~EXT4_FEATURE_INCOMPAT_SUPP)); | 2422 | ~EXT4_FEATURE_INCOMPAT_SUPP)); |
2360 | goto failed_mount; | 2423 | goto failed_mount; |
2361 | } | 2424 | } |
2362 | features = EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP); | 2425 | features = EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP); |
2363 | if (!(sb->s_flags & MS_RDONLY) && features) { | 2426 | if (!(sb->s_flags & MS_RDONLY) && features) { |
2364 | printk(KERN_ERR "EXT4-fs: %s: couldn't mount RDWR because of " | 2427 | ext4_msg(sb, KERN_ERR, |
2365 | "unsupported optional features (%x).\n", sb->s_id, | 2428 | "Couldn't mount RDWR because of " |
2429 | "unsupported optional features (%x)", | ||
2366 | (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) & | 2430 | (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) & |
2367 | ~EXT4_FEATURE_RO_COMPAT_SUPP)); | 2431 | ~EXT4_FEATURE_RO_COMPAT_SUPP)); |
2368 | goto failed_mount; | 2432 | goto failed_mount; |
@@ -2376,9 +2440,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2376 | */ | 2440 | */ |
2377 | if (sizeof(root->i_blocks) < sizeof(u64) && | 2441 | if (sizeof(root->i_blocks) < sizeof(u64) && |
2378 | !(sb->s_flags & MS_RDONLY)) { | 2442 | !(sb->s_flags & MS_RDONLY)) { |
2379 | printk(KERN_ERR "EXT4-fs: %s: Filesystem with huge " | 2443 | ext4_msg(sb, KERN_ERR, "Filesystem with huge " |
2380 | "files cannot be mounted read-write " | 2444 | "files cannot be mounted read-write " |
2381 | "without CONFIG_LBD.\n", sb->s_id); | 2445 | "without CONFIG_LBD"); |
2382 | goto failed_mount; | 2446 | goto failed_mount; |
2383 | } | 2447 | } |
2384 | } | 2448 | } |
@@ -2386,17 +2450,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2386 | 2450 | ||
2387 | if (blocksize < EXT4_MIN_BLOCK_SIZE || | 2451 | if (blocksize < EXT4_MIN_BLOCK_SIZE || |
2388 | blocksize > EXT4_MAX_BLOCK_SIZE) { | 2452 | blocksize > EXT4_MAX_BLOCK_SIZE) { |
2389 | printk(KERN_ERR | 2453 | ext4_msg(sb, KERN_ERR, |
2390 | "EXT4-fs: Unsupported filesystem blocksize %d on %s.\n", | 2454 | "Unsupported filesystem blocksize %d", blocksize); |
2391 | blocksize, sb->s_id); | ||
2392 | goto failed_mount; | 2455 | goto failed_mount; |
2393 | } | 2456 | } |
2394 | 2457 | ||
2395 | if (sb->s_blocksize != blocksize) { | 2458 | if (sb->s_blocksize != blocksize) { |
2396 | |||
2397 | /* Validate the filesystem blocksize */ | 2459 | /* Validate the filesystem blocksize */ |
2398 | if (!sb_set_blocksize(sb, blocksize)) { | 2460 | if (!sb_set_blocksize(sb, blocksize)) { |
2399 | printk(KERN_ERR "EXT4-fs: bad block size %d.\n", | 2461 | ext4_msg(sb, KERN_ERR, "bad block size %d", |
2400 | blocksize); | 2462 | blocksize); |
2401 | goto failed_mount; | 2463 | goto failed_mount; |
2402 | } | 2464 | } |
@@ -2406,15 +2468,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2406 | offset = do_div(logical_sb_block, blocksize); | 2468 | offset = do_div(logical_sb_block, blocksize); |
2407 | bh = sb_bread(sb, logical_sb_block); | 2469 | bh = sb_bread(sb, logical_sb_block); |
2408 | if (!bh) { | 2470 | if (!bh) { |
2409 | printk(KERN_ERR | 2471 | ext4_msg(sb, KERN_ERR, |
2410 | "EXT4-fs: Can't read superblock on 2nd try.\n"); | 2472 | "Can't read superblock on 2nd try"); |
2411 | goto failed_mount; | 2473 | goto failed_mount; |
2412 | } | 2474 | } |
2413 | es = (struct ext4_super_block *)(((char *)bh->b_data) + offset); | 2475 | es = (struct ext4_super_block *)(((char *)bh->b_data) + offset); |
2414 | sbi->s_es = es; | 2476 | sbi->s_es = es; |
2415 | if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) { | 2477 | if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) { |
2416 | printk(KERN_ERR | 2478 | ext4_msg(sb, KERN_ERR, |
2417 | "EXT4-fs: Magic mismatch, very weird !\n"); | 2479 | "Magic mismatch, very weird!"); |
2418 | goto failed_mount; | 2480 | goto failed_mount; |
2419 | } | 2481 | } |
2420 | } | 2482 | } |
@@ -2432,30 +2494,33 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2432 | if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) || | 2494 | if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) || |
2433 | (!is_power_of_2(sbi->s_inode_size)) || | 2495 | (!is_power_of_2(sbi->s_inode_size)) || |
2434 | (sbi->s_inode_size > blocksize)) { | 2496 | (sbi->s_inode_size > blocksize)) { |
2435 | printk(KERN_ERR | 2497 | ext4_msg(sb, KERN_ERR, |
2436 | "EXT4-fs: unsupported inode size: %d\n", | 2498 | "unsupported inode size: %d", |
2437 | sbi->s_inode_size); | 2499 | sbi->s_inode_size); |
2438 | goto failed_mount; | 2500 | goto failed_mount; |
2439 | } | 2501 | } |
2440 | if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) | 2502 | if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) |
2441 | sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2); | 2503 | sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2); |
2442 | } | 2504 | } |
2505 | |||
2443 | sbi->s_desc_size = le16_to_cpu(es->s_desc_size); | 2506 | sbi->s_desc_size = le16_to_cpu(es->s_desc_size); |
2444 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) { | 2507 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) { |
2445 | if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT || | 2508 | if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT || |
2446 | sbi->s_desc_size > EXT4_MAX_DESC_SIZE || | 2509 | sbi->s_desc_size > EXT4_MAX_DESC_SIZE || |
2447 | !is_power_of_2(sbi->s_desc_size)) { | 2510 | !is_power_of_2(sbi->s_desc_size)) { |
2448 | printk(KERN_ERR | 2511 | ext4_msg(sb, KERN_ERR, |
2449 | "EXT4-fs: unsupported descriptor size %lu\n", | 2512 | "unsupported descriptor size %lu", |
2450 | sbi->s_desc_size); | 2513 | sbi->s_desc_size); |
2451 | goto failed_mount; | 2514 | goto failed_mount; |
2452 | } | 2515 | } |
2453 | } else | 2516 | } else |
2454 | sbi->s_desc_size = EXT4_MIN_DESC_SIZE; | 2517 | sbi->s_desc_size = EXT4_MIN_DESC_SIZE; |
2518 | |||
2455 | sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group); | 2519 | sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group); |
2456 | sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group); | 2520 | sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group); |
2457 | if (EXT4_INODE_SIZE(sb) == 0 || EXT4_INODES_PER_GROUP(sb) == 0) | 2521 | if (EXT4_INODE_SIZE(sb) == 0 || EXT4_INODES_PER_GROUP(sb) == 0) |
2458 | goto cantfind_ext4; | 2522 | goto cantfind_ext4; |
2523 | |||
2459 | sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb); | 2524 | sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb); |
2460 | if (sbi->s_inodes_per_block == 0) | 2525 | if (sbi->s_inodes_per_block == 0) |
2461 | goto cantfind_ext4; | 2526 | goto cantfind_ext4; |
@@ -2466,6 +2531,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2466 | sbi->s_mount_state = le16_to_cpu(es->s_state); | 2531 | sbi->s_mount_state = le16_to_cpu(es->s_state); |
2467 | sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb)); | 2532 | sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb)); |
2468 | sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb)); | 2533 | sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb)); |
2534 | |||
2469 | for (i = 0; i < 4; i++) | 2535 | for (i = 0; i < 4; i++) |
2470 | sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); | 2536 | sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); |
2471 | sbi->s_def_hash_version = es->s_def_hash_version; | 2537 | sbi->s_def_hash_version = es->s_def_hash_version; |
@@ -2483,25 +2549,24 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2483 | } | 2549 | } |
2484 | 2550 | ||
2485 | if (sbi->s_blocks_per_group > blocksize * 8) { | 2551 | if (sbi->s_blocks_per_group > blocksize * 8) { |
2486 | printk(KERN_ERR | 2552 | ext4_msg(sb, KERN_ERR, |
2487 | "EXT4-fs: #blocks per group too big: %lu\n", | 2553 | "#blocks per group too big: %lu", |
2488 | sbi->s_blocks_per_group); | 2554 | sbi->s_blocks_per_group); |
2489 | goto failed_mount; | 2555 | goto failed_mount; |
2490 | } | 2556 | } |
2491 | if (sbi->s_inodes_per_group > blocksize * 8) { | 2557 | if (sbi->s_inodes_per_group > blocksize * 8) { |
2492 | printk(KERN_ERR | 2558 | ext4_msg(sb, KERN_ERR, |
2493 | "EXT4-fs: #inodes per group too big: %lu\n", | 2559 | "#inodes per group too big: %lu", |
2494 | sbi->s_inodes_per_group); | 2560 | sbi->s_inodes_per_group); |
2495 | goto failed_mount; | 2561 | goto failed_mount; |
2496 | } | 2562 | } |
2497 | 2563 | ||
2498 | if (ext4_blocks_count(es) > | 2564 | if (ext4_blocks_count(es) > |
2499 | (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { | 2565 | (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { |
2500 | printk(KERN_ERR "EXT4-fs: filesystem on %s:" | 2566 | ext4_msg(sb, KERN_ERR, "filesystem" |
2501 | " too large to mount safely\n", sb->s_id); | 2567 | " too large to mount safely"); |
2502 | if (sizeof(sector_t) < 8) | 2568 | if (sizeof(sector_t) < 8) |
2503 | printk(KERN_WARNING "EXT4-fs: CONFIG_LBD not " | 2569 | ext4_msg(sb, KERN_WARNING, "CONFIG_LBD not enabled"); |
2504 | "enabled\n"); | ||
2505 | goto failed_mount; | 2570 | goto failed_mount; |
2506 | } | 2571 | } |
2507 | 2572 | ||
@@ -2511,21 +2576,21 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2511 | /* check blocks count against device size */ | 2576 | /* check blocks count against device size */ |
2512 | blocks_count = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits; | 2577 | blocks_count = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits; |
2513 | if (blocks_count && ext4_blocks_count(es) > blocks_count) { | 2578 | if (blocks_count && ext4_blocks_count(es) > blocks_count) { |
2514 | printk(KERN_WARNING "EXT4-fs: bad geometry: block count %llu " | 2579 | ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu " |
2515 | "exceeds size of device (%llu blocks)\n", | 2580 | "exceeds size of device (%llu blocks)", |
2516 | ext4_blocks_count(es), blocks_count); | 2581 | ext4_blocks_count(es), blocks_count); |
2517 | goto failed_mount; | 2582 | goto failed_mount; |
2518 | } | 2583 | } |
2519 | 2584 | ||
2520 | /* | 2585 | /* |
2521 | * It makes no sense for the first data block to be beyond the end | 2586 | * It makes no sense for the first data block to be beyond the end |
2522 | * of the filesystem. | 2587 | * of the filesystem. |
2523 | */ | 2588 | */ |
2524 | if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) { | 2589 | if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) { |
2525 | printk(KERN_WARNING "EXT4-fs: bad geometry: first data" | 2590 | ext4_msg(sb, KERN_WARNING, "bad geometry: first data" |
2526 | "block %u is beyond end of filesystem (%llu)\n", | 2591 | "block %u is beyond end of filesystem (%llu)", |
2527 | le32_to_cpu(es->s_first_data_block), | 2592 | le32_to_cpu(es->s_first_data_block), |
2528 | ext4_blocks_count(es)); | 2593 | ext4_blocks_count(es)); |
2529 | goto failed_mount; | 2594 | goto failed_mount; |
2530 | } | 2595 | } |
2531 | blocks_count = (ext4_blocks_count(es) - | 2596 | blocks_count = (ext4_blocks_count(es) - |
@@ -2533,9 +2598,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2533 | EXT4_BLOCKS_PER_GROUP(sb) - 1); | 2598 | EXT4_BLOCKS_PER_GROUP(sb) - 1); |
2534 | do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb)); | 2599 | do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb)); |
2535 | if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) { | 2600 | if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) { |
2536 | printk(KERN_WARNING "EXT4-fs: groups count too large: %u " | 2601 | ext4_msg(sb, KERN_WARNING, "groups count too large: %u " |
2537 | "(block count %llu, first data block %u, " | 2602 | "(block count %llu, first data block %u, " |
2538 | "blocks per group %lu)\n", sbi->s_groups_count, | 2603 | "blocks per group %lu)", sbi->s_groups_count, |
2539 | ext4_blocks_count(es), | 2604 | ext4_blocks_count(es), |
2540 | le32_to_cpu(es->s_first_data_block), | 2605 | le32_to_cpu(es->s_first_data_block), |
2541 | EXT4_BLOCKS_PER_GROUP(sb)); | 2606 | EXT4_BLOCKS_PER_GROUP(sb)); |
@@ -2547,7 +2612,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2547 | sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *), | 2612 | sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *), |
2548 | GFP_KERNEL); | 2613 | GFP_KERNEL); |
2549 | if (sbi->s_group_desc == NULL) { | 2614 | if (sbi->s_group_desc == NULL) { |
2550 | printk(KERN_ERR "EXT4-fs: not enough memory\n"); | 2615 | ext4_msg(sb, KERN_ERR, "not enough memory"); |
2551 | goto failed_mount; | 2616 | goto failed_mount; |
2552 | } | 2617 | } |
2553 | 2618 | ||
@@ -2562,21 +2627,21 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2562 | block = descriptor_loc(sb, logical_sb_block, i); | 2627 | block = descriptor_loc(sb, logical_sb_block, i); |
2563 | sbi->s_group_desc[i] = sb_bread(sb, block); | 2628 | sbi->s_group_desc[i] = sb_bread(sb, block); |
2564 | if (!sbi->s_group_desc[i]) { | 2629 | if (!sbi->s_group_desc[i]) { |
2565 | printk(KERN_ERR "EXT4-fs: " | 2630 | ext4_msg(sb, KERN_ERR, |
2566 | "can't read group descriptor %d\n", i); | 2631 | "can't read group descriptor %d", i); |
2567 | db_count = i; | 2632 | db_count = i; |
2568 | goto failed_mount2; | 2633 | goto failed_mount2; |
2569 | } | 2634 | } |
2570 | } | 2635 | } |
2571 | if (!ext4_check_descriptors(sb)) { | 2636 | if (!ext4_check_descriptors(sb)) { |
2572 | printk(KERN_ERR "EXT4-fs: group descriptors corrupted!\n"); | 2637 | ext4_msg(sb, KERN_ERR, "group descriptors corrupted!"); |
2573 | goto failed_mount2; | 2638 | goto failed_mount2; |
2574 | } | 2639 | } |
2575 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) | 2640 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) |
2576 | if (!ext4_fill_flex_info(sb)) { | 2641 | if (!ext4_fill_flex_info(sb)) { |
2577 | printk(KERN_ERR | 2642 | ext4_msg(sb, KERN_ERR, |
2578 | "EXT4-fs: unable to initialize " | 2643 | "unable to initialize " |
2579 | "flex_bg meta info!\n"); | 2644 | "flex_bg meta info!"); |
2580 | goto failed_mount2; | 2645 | goto failed_mount2; |
2581 | } | 2646 | } |
2582 | 2647 | ||
@@ -2598,7 +2663,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2598 | err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0); | 2663 | err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0); |
2599 | } | 2664 | } |
2600 | if (err) { | 2665 | if (err) { |
2601 | printk(KERN_ERR "EXT4-fs: insufficient memory\n"); | 2666 | ext4_msg(sb, KERN_ERR, "insufficient memory"); |
2602 | goto failed_mount3; | 2667 | goto failed_mount3; |
2603 | } | 2668 | } |
2604 | 2669 | ||
@@ -2607,7 +2672,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2607 | /* | 2672 | /* |
2608 | * set up enough so that it can read an inode | 2673 | * set up enough so that it can read an inode |
2609 | */ | 2674 | */ |
2610 | sb->s_op = &ext4_sops; | 2675 | if (!test_opt(sb, NOLOAD) && |
2676 | EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) | ||
2677 | sb->s_op = &ext4_sops; | ||
2678 | else | ||
2679 | sb->s_op = &ext4_nojournal_sops; | ||
2611 | sb->s_export_op = &ext4_export_ops; | 2680 | sb->s_export_op = &ext4_export_ops; |
2612 | sb->s_xattr = ext4_xattr_handlers; | 2681 | sb->s_xattr = ext4_xattr_handlers; |
2613 | #ifdef CONFIG_QUOTA | 2682 | #ifdef CONFIG_QUOTA |
@@ -2615,6 +2684,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2615 | sb->dq_op = &ext4_quota_operations; | 2684 | sb->dq_op = &ext4_quota_operations; |
2616 | #endif | 2685 | #endif |
2617 | INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ | 2686 | INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ |
2687 | mutex_init(&sbi->s_orphan_lock); | ||
2688 | mutex_init(&sbi->s_resize_lock); | ||
2618 | 2689 | ||
2619 | sb->s_root = NULL; | 2690 | sb->s_root = NULL; |
2620 | 2691 | ||
@@ -2632,13 +2703,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2632 | goto failed_mount3; | 2703 | goto failed_mount3; |
2633 | if (!(sb->s_flags & MS_RDONLY) && | 2704 | if (!(sb->s_flags & MS_RDONLY) && |
2634 | EXT4_SB(sb)->s_journal->j_failed_commit) { | 2705 | EXT4_SB(sb)->s_journal->j_failed_commit) { |
2635 | printk(KERN_CRIT "EXT4-fs error (device %s): " | 2706 | ext4_msg(sb, KERN_CRIT, "error: " |
2636 | "ext4_fill_super: Journal transaction " | 2707 | "ext4_fill_super: Journal transaction " |
2637 | "%u is corrupt\n", sb->s_id, | 2708 | "%u is corrupt", |
2638 | EXT4_SB(sb)->s_journal->j_failed_commit); | 2709 | EXT4_SB(sb)->s_journal->j_failed_commit); |
2639 | if (test_opt(sb, ERRORS_RO)) { | 2710 | if (test_opt(sb, ERRORS_RO)) { |
2640 | printk(KERN_CRIT | 2711 | ext4_msg(sb, KERN_CRIT, |
2641 | "Mounting filesystem read-only\n"); | 2712 | "Mounting filesystem read-only"); |
2642 | sb->s_flags |= MS_RDONLY; | 2713 | sb->s_flags |= MS_RDONLY; |
2643 | EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; | 2714 | EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; |
2644 | es->s_state |= cpu_to_le16(EXT4_ERROR_FS); | 2715 | es->s_state |= cpu_to_le16(EXT4_ERROR_FS); |
@@ -2646,14 +2717,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2646 | if (test_opt(sb, ERRORS_PANIC)) { | 2717 | if (test_opt(sb, ERRORS_PANIC)) { |
2647 | EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; | 2718 | EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; |
2648 | es->s_state |= cpu_to_le16(EXT4_ERROR_FS); | 2719 | es->s_state |= cpu_to_le16(EXT4_ERROR_FS); |
2649 | ext4_commit_super(sb, es, 1); | 2720 | ext4_commit_super(sb, 1); |
2650 | goto failed_mount4; | 2721 | goto failed_mount4; |
2651 | } | 2722 | } |
2652 | } | 2723 | } |
2653 | } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) && | 2724 | } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) && |
2654 | EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { | 2725 | EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { |
2655 | printk(KERN_ERR "EXT4-fs: required journal recovery " | 2726 | ext4_msg(sb, KERN_ERR, "required journal recovery " |
2656 | "suppressed and not mounted read-only\n"); | 2727 | "suppressed and not mounted read-only"); |
2657 | goto failed_mount4; | 2728 | goto failed_mount4; |
2658 | } else { | 2729 | } else { |
2659 | clear_opt(sbi->s_mount_opt, DATA_FLAGS); | 2730 | clear_opt(sbi->s_mount_opt, DATA_FLAGS); |
@@ -2666,7 +2737,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2666 | if (ext4_blocks_count(es) > 0xffffffffULL && | 2737 | if (ext4_blocks_count(es) > 0xffffffffULL && |
2667 | !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0, | 2738 | !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0, |
2668 | JBD2_FEATURE_INCOMPAT_64BIT)) { | 2739 | JBD2_FEATURE_INCOMPAT_64BIT)) { |
2669 | printk(KERN_ERR "EXT4-fs: Failed to set 64-bit journal feature\n"); | 2740 | ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature"); |
2670 | goto failed_mount4; | 2741 | goto failed_mount4; |
2671 | } | 2742 | } |
2672 | 2743 | ||
@@ -2704,8 +2775,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2704 | case EXT4_MOUNT_WRITEBACK_DATA: | 2775 | case EXT4_MOUNT_WRITEBACK_DATA: |
2705 | if (!jbd2_journal_check_available_features | 2776 | if (!jbd2_journal_check_available_features |
2706 | (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) { | 2777 | (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) { |
2707 | printk(KERN_ERR "EXT4-fs: Journal does not support " | 2778 | ext4_msg(sb, KERN_ERR, "Journal does not support " |
2708 | "requested data journaling mode\n"); | 2779 | "requested data journaling mode"); |
2709 | goto failed_mount4; | 2780 | goto failed_mount4; |
2710 | } | 2781 | } |
2711 | default: | 2782 | default: |
@@ -2717,8 +2788,8 @@ no_journal: | |||
2717 | 2788 | ||
2718 | if (test_opt(sb, NOBH)) { | 2789 | if (test_opt(sb, NOBH)) { |
2719 | if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) { | 2790 | if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) { |
2720 | printk(KERN_WARNING "EXT4-fs: Ignoring nobh option - " | 2791 | ext4_msg(sb, KERN_WARNING, "Ignoring nobh option - " |
2721 | "its supported only with writeback mode\n"); | 2792 | "its supported only with writeback mode"); |
2722 | clear_opt(sbi->s_mount_opt, NOBH); | 2793 | clear_opt(sbi->s_mount_opt, NOBH); |
2723 | } | 2794 | } |
2724 | } | 2795 | } |
@@ -2729,18 +2800,18 @@ no_journal: | |||
2729 | 2800 | ||
2730 | root = ext4_iget(sb, EXT4_ROOT_INO); | 2801 | root = ext4_iget(sb, EXT4_ROOT_INO); |
2731 | if (IS_ERR(root)) { | 2802 | if (IS_ERR(root)) { |
2732 | printk(KERN_ERR "EXT4-fs: get root inode failed\n"); | 2803 | ext4_msg(sb, KERN_ERR, "get root inode failed"); |
2733 | ret = PTR_ERR(root); | 2804 | ret = PTR_ERR(root); |
2734 | goto failed_mount4; | 2805 | goto failed_mount4; |
2735 | } | 2806 | } |
2736 | if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { | 2807 | if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { |
2737 | iput(root); | 2808 | iput(root); |
2738 | printk(KERN_ERR "EXT4-fs: corrupt root inode, run e2fsck\n"); | 2809 | ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck"); |
2739 | goto failed_mount4; | 2810 | goto failed_mount4; |
2740 | } | 2811 | } |
2741 | sb->s_root = d_alloc_root(root); | 2812 | sb->s_root = d_alloc_root(root); |
2742 | if (!sb->s_root) { | 2813 | if (!sb->s_root) { |
2743 | printk(KERN_ERR "EXT4-fs: get root dentry failed\n"); | 2814 | ext4_msg(sb, KERN_ERR, "get root dentry failed"); |
2744 | iput(root); | 2815 | iput(root); |
2745 | ret = -ENOMEM; | 2816 | ret = -ENOMEM; |
2746 | goto failed_mount4; | 2817 | goto failed_mount4; |
@@ -2769,22 +2840,29 @@ no_journal: | |||
2769 | sbi->s_inode_size) { | 2840 | sbi->s_inode_size) { |
2770 | sbi->s_want_extra_isize = sizeof(struct ext4_inode) - | 2841 | sbi->s_want_extra_isize = sizeof(struct ext4_inode) - |
2771 | EXT4_GOOD_OLD_INODE_SIZE; | 2842 | EXT4_GOOD_OLD_INODE_SIZE; |
2772 | printk(KERN_INFO "EXT4-fs: required extra inode space not" | 2843 | ext4_msg(sb, KERN_INFO, "required extra inode space not" |
2773 | "available.\n"); | 2844 | "available"); |
2774 | } | 2845 | } |
2775 | 2846 | ||
2776 | if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { | 2847 | if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { |
2777 | printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - " | 2848 | ext4_msg(sb, KERN_WARNING, "Ignoring delalloc option - " |
2778 | "requested data journaling mode\n"); | 2849 | "requested data journaling mode"); |
2779 | clear_opt(sbi->s_mount_opt, DELALLOC); | 2850 | clear_opt(sbi->s_mount_opt, DELALLOC); |
2780 | } else if (test_opt(sb, DELALLOC)) | 2851 | } else if (test_opt(sb, DELALLOC)) |
2781 | printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n"); | 2852 | ext4_msg(sb, KERN_INFO, "delayed allocation enabled"); |
2853 | |||
2854 | err = ext4_setup_system_zone(sb); | ||
2855 | if (err) { | ||
2856 | ext4_msg(sb, KERN_ERR, "failed to initialize system " | ||
2857 | "zone (%d)\n", err); | ||
2858 | goto failed_mount4; | ||
2859 | } | ||
2782 | 2860 | ||
2783 | ext4_ext_init(sb); | 2861 | ext4_ext_init(sb); |
2784 | err = ext4_mb_init(sb, needs_recovery); | 2862 | err = ext4_mb_init(sb, needs_recovery); |
2785 | if (err) { | 2863 | if (err) { |
2786 | printk(KERN_ERR "EXT4-fs: failed to initalize mballoc (%d)\n", | 2864 | ext4_msg(sb, KERN_ERR, "failed to initalize mballoc (%d)", |
2787 | err); | 2865 | err); |
2788 | goto failed_mount4; | 2866 | goto failed_mount4; |
2789 | } | 2867 | } |
2790 | 2868 | ||
@@ -2798,19 +2876,11 @@ no_journal: | |||
2798 | goto failed_mount4; | 2876 | goto failed_mount4; |
2799 | }; | 2877 | }; |
2800 | 2878 | ||
2801 | /* | ||
2802 | * akpm: core read_super() calls in here with the superblock locked. | ||
2803 | * That deadlocks, because orphan cleanup needs to lock the superblock | ||
2804 | * in numerous places. Here we just pop the lock - it's relatively | ||
2805 | * harmless, because we are now ready to accept write_super() requests, | ||
2806 | * and aviro says that's the only reason for hanging onto the | ||
2807 | * superblock lock. | ||
2808 | */ | ||
2809 | EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS; | 2879 | EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS; |
2810 | ext4_orphan_cleanup(sb, es); | 2880 | ext4_orphan_cleanup(sb, es); |
2811 | EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS; | 2881 | EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS; |
2812 | if (needs_recovery) { | 2882 | if (needs_recovery) { |
2813 | printk(KERN_INFO "EXT4-fs: recovery complete.\n"); | 2883 | ext4_msg(sb, KERN_INFO, "recovery complete"); |
2814 | ext4_mark_recovery_complete(sb, es); | 2884 | ext4_mark_recovery_complete(sb, es); |
2815 | } | 2885 | } |
2816 | if (EXT4_SB(sb)->s_journal) { | 2886 | if (EXT4_SB(sb)->s_journal) { |
@@ -2823,25 +2893,30 @@ no_journal: | |||
2823 | } else | 2893 | } else |
2824 | descr = "out journal"; | 2894 | descr = "out journal"; |
2825 | 2895 | ||
2826 | printk(KERN_INFO "EXT4-fs: mounted filesystem %s with%s\n", | 2896 | ext4_msg(sb, KERN_INFO, "mounted filesystem with%s", descr); |
2827 | sb->s_id, descr); | ||
2828 | 2897 | ||
2829 | lock_kernel(); | 2898 | lock_kernel(); |
2830 | return 0; | 2899 | return 0; |
2831 | 2900 | ||
2832 | cantfind_ext4: | 2901 | cantfind_ext4: |
2833 | if (!silent) | 2902 | if (!silent) |
2834 | printk(KERN_ERR "VFS: Can't find ext4 filesystem on dev %s.\n", | 2903 | ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem"); |
2835 | sb->s_id); | ||
2836 | goto failed_mount; | 2904 | goto failed_mount; |
2837 | 2905 | ||
2838 | failed_mount4: | 2906 | failed_mount4: |
2839 | printk(KERN_ERR "EXT4-fs (device %s): mount failed\n", sb->s_id); | 2907 | ext4_msg(sb, KERN_ERR, "mount failed"); |
2908 | ext4_release_system_zone(sb); | ||
2840 | if (sbi->s_journal) { | 2909 | if (sbi->s_journal) { |
2841 | jbd2_journal_destroy(sbi->s_journal); | 2910 | jbd2_journal_destroy(sbi->s_journal); |
2842 | sbi->s_journal = NULL; | 2911 | sbi->s_journal = NULL; |
2843 | } | 2912 | } |
2844 | failed_mount3: | 2913 | failed_mount3: |
2914 | if (sbi->s_flex_groups) { | ||
2915 | if (is_vmalloc_addr(sbi->s_flex_groups)) | ||
2916 | vfree(sbi->s_flex_groups); | ||
2917 | else | ||
2918 | kfree(sbi->s_flex_groups); | ||
2919 | } | ||
2845 | percpu_counter_destroy(&sbi->s_freeblocks_counter); | 2920 | percpu_counter_destroy(&sbi->s_freeblocks_counter); |
2846 | percpu_counter_destroy(&sbi->s_freeinodes_counter); | 2921 | percpu_counter_destroy(&sbi->s_freeinodes_counter); |
2847 | percpu_counter_destroy(&sbi->s_dirs_counter); | 2922 | percpu_counter_destroy(&sbi->s_dirs_counter); |
@@ -2862,6 +2937,7 @@ failed_mount: | |||
2862 | brelse(bh); | 2937 | brelse(bh); |
2863 | out_fail: | 2938 | out_fail: |
2864 | sb->s_fs_info = NULL; | 2939 | sb->s_fs_info = NULL; |
2940 | kfree(sbi->s_blockgroup_lock); | ||
2865 | kfree(sbi); | 2941 | kfree(sbi); |
2866 | lock_kernel(); | 2942 | lock_kernel(); |
2867 | return ret; | 2943 | return ret; |
@@ -2906,27 +2982,27 @@ static journal_t *ext4_get_journal(struct super_block *sb, | |||
2906 | 2982 | ||
2907 | journal_inode = ext4_iget(sb, journal_inum); | 2983 | journal_inode = ext4_iget(sb, journal_inum); |
2908 | if (IS_ERR(journal_inode)) { | 2984 | if (IS_ERR(journal_inode)) { |
2909 | printk(KERN_ERR "EXT4-fs: no journal found.\n"); | 2985 | ext4_msg(sb, KERN_ERR, "no journal found"); |
2910 | return NULL; | 2986 | return NULL; |
2911 | } | 2987 | } |
2912 | if (!journal_inode->i_nlink) { | 2988 | if (!journal_inode->i_nlink) { |
2913 | make_bad_inode(journal_inode); | 2989 | make_bad_inode(journal_inode); |
2914 | iput(journal_inode); | 2990 | iput(journal_inode); |
2915 | printk(KERN_ERR "EXT4-fs: journal inode is deleted.\n"); | 2991 | ext4_msg(sb, KERN_ERR, "journal inode is deleted"); |
2916 | return NULL; | 2992 | return NULL; |
2917 | } | 2993 | } |
2918 | 2994 | ||
2919 | jbd_debug(2, "Journal inode found at %p: %lld bytes\n", | 2995 | jbd_debug(2, "Journal inode found at %p: %lld bytes\n", |
2920 | journal_inode, journal_inode->i_size); | 2996 | journal_inode, journal_inode->i_size); |
2921 | if (!S_ISREG(journal_inode->i_mode)) { | 2997 | if (!S_ISREG(journal_inode->i_mode)) { |
2922 | printk(KERN_ERR "EXT4-fs: invalid journal inode.\n"); | 2998 | ext4_msg(sb, KERN_ERR, "invalid journal inode"); |
2923 | iput(journal_inode); | 2999 | iput(journal_inode); |
2924 | return NULL; | 3000 | return NULL; |
2925 | } | 3001 | } |
2926 | 3002 | ||
2927 | journal = jbd2_journal_init_inode(journal_inode); | 3003 | journal = jbd2_journal_init_inode(journal_inode); |
2928 | if (!journal) { | 3004 | if (!journal) { |
2929 | printk(KERN_ERR "EXT4-fs: Could not load journal inode\n"); | 3005 | ext4_msg(sb, KERN_ERR, "Could not load journal inode"); |
2930 | iput(journal_inode); | 3006 | iput(journal_inode); |
2931 | return NULL; | 3007 | return NULL; |
2932 | } | 3008 | } |
@@ -2950,22 +3026,22 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb, | |||
2950 | 3026 | ||
2951 | BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); | 3027 | BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); |
2952 | 3028 | ||
2953 | bdev = ext4_blkdev_get(j_dev); | 3029 | bdev = ext4_blkdev_get(j_dev, sb); |
2954 | if (bdev == NULL) | 3030 | if (bdev == NULL) |
2955 | return NULL; | 3031 | return NULL; |
2956 | 3032 | ||
2957 | if (bd_claim(bdev, sb)) { | 3033 | if (bd_claim(bdev, sb)) { |
2958 | printk(KERN_ERR | 3034 | ext4_msg(sb, KERN_ERR, |
2959 | "EXT4-fs: failed to claim external journal device.\n"); | 3035 | "failed to claim external journal device"); |
2960 | blkdev_put(bdev, FMODE_READ|FMODE_WRITE); | 3036 | blkdev_put(bdev, FMODE_READ|FMODE_WRITE); |
2961 | return NULL; | 3037 | return NULL; |
2962 | } | 3038 | } |
2963 | 3039 | ||
2964 | blocksize = sb->s_blocksize; | 3040 | blocksize = sb->s_blocksize; |
2965 | hblock = bdev_hardsect_size(bdev); | 3041 | hblock = bdev_logical_block_size(bdev); |
2966 | if (blocksize < hblock) { | 3042 | if (blocksize < hblock) { |
2967 | printk(KERN_ERR | 3043 | ext4_msg(sb, KERN_ERR, |
2968 | "EXT4-fs: blocksize too small for journal device.\n"); | 3044 | "blocksize too small for journal device"); |
2969 | goto out_bdev; | 3045 | goto out_bdev; |
2970 | } | 3046 | } |
2971 | 3047 | ||
@@ -2973,8 +3049,8 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb, | |||
2973 | offset = EXT4_MIN_BLOCK_SIZE % blocksize; | 3049 | offset = EXT4_MIN_BLOCK_SIZE % blocksize; |
2974 | set_blocksize(bdev, blocksize); | 3050 | set_blocksize(bdev, blocksize); |
2975 | if (!(bh = __bread(bdev, sb_block, blocksize))) { | 3051 | if (!(bh = __bread(bdev, sb_block, blocksize))) { |
2976 | printk(KERN_ERR "EXT4-fs: couldn't read superblock of " | 3052 | ext4_msg(sb, KERN_ERR, "couldn't read superblock of " |
2977 | "external journal\n"); | 3053 | "external journal"); |
2978 | goto out_bdev; | 3054 | goto out_bdev; |
2979 | } | 3055 | } |
2980 | 3056 | ||
@@ -2982,14 +3058,14 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb, | |||
2982 | if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) || | 3058 | if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) || |
2983 | !(le32_to_cpu(es->s_feature_incompat) & | 3059 | !(le32_to_cpu(es->s_feature_incompat) & |
2984 | EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) { | 3060 | EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) { |
2985 | printk(KERN_ERR "EXT4-fs: external journal has " | 3061 | ext4_msg(sb, KERN_ERR, "external journal has " |
2986 | "bad superblock\n"); | 3062 | "bad superblock"); |
2987 | brelse(bh); | 3063 | brelse(bh); |
2988 | goto out_bdev; | 3064 | goto out_bdev; |
2989 | } | 3065 | } |
2990 | 3066 | ||
2991 | if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) { | 3067 | if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) { |
2992 | printk(KERN_ERR "EXT4-fs: journal UUID does not match\n"); | 3068 | ext4_msg(sb, KERN_ERR, "journal UUID does not match"); |
2993 | brelse(bh); | 3069 | brelse(bh); |
2994 | goto out_bdev; | 3070 | goto out_bdev; |
2995 | } | 3071 | } |
@@ -3001,25 +3077,26 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb, | |||
3001 | journal = jbd2_journal_init_dev(bdev, sb->s_bdev, | 3077 | journal = jbd2_journal_init_dev(bdev, sb->s_bdev, |
3002 | start, len, blocksize); | 3078 | start, len, blocksize); |
3003 | if (!journal) { | 3079 | if (!journal) { |
3004 | printk(KERN_ERR "EXT4-fs: failed to create device journal\n"); | 3080 | ext4_msg(sb, KERN_ERR, "failed to create device journal"); |
3005 | goto out_bdev; | 3081 | goto out_bdev; |
3006 | } | 3082 | } |
3007 | journal->j_private = sb; | 3083 | journal->j_private = sb; |
3008 | ll_rw_block(READ, 1, &journal->j_sb_buffer); | 3084 | ll_rw_block(READ, 1, &journal->j_sb_buffer); |
3009 | wait_on_buffer(journal->j_sb_buffer); | 3085 | wait_on_buffer(journal->j_sb_buffer); |
3010 | if (!buffer_uptodate(journal->j_sb_buffer)) { | 3086 | if (!buffer_uptodate(journal->j_sb_buffer)) { |
3011 | printk(KERN_ERR "EXT4-fs: I/O error on journal device\n"); | 3087 | ext4_msg(sb, KERN_ERR, "I/O error on journal device"); |
3012 | goto out_journal; | 3088 | goto out_journal; |
3013 | } | 3089 | } |
3014 | if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) { | 3090 | if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) { |
3015 | printk(KERN_ERR "EXT4-fs: External journal has more than one " | 3091 | ext4_msg(sb, KERN_ERR, "External journal has more than one " |
3016 | "user (unsupported) - %d\n", | 3092 | "user (unsupported) - %d", |
3017 | be32_to_cpu(journal->j_superblock->s_nr_users)); | 3093 | be32_to_cpu(journal->j_superblock->s_nr_users)); |
3018 | goto out_journal; | 3094 | goto out_journal; |
3019 | } | 3095 | } |
3020 | EXT4_SB(sb)->journal_bdev = bdev; | 3096 | EXT4_SB(sb)->journal_bdev = bdev; |
3021 | ext4_init_journal_params(sb, journal); | 3097 | ext4_init_journal_params(sb, journal); |
3022 | return journal; | 3098 | return journal; |
3099 | |||
3023 | out_journal: | 3100 | out_journal: |
3024 | jbd2_journal_destroy(journal); | 3101 | jbd2_journal_destroy(journal); |
3025 | out_bdev: | 3102 | out_bdev: |
@@ -3041,8 +3118,8 @@ static int ext4_load_journal(struct super_block *sb, | |||
3041 | 3118 | ||
3042 | if (journal_devnum && | 3119 | if (journal_devnum && |
3043 | journal_devnum != le32_to_cpu(es->s_journal_dev)) { | 3120 | journal_devnum != le32_to_cpu(es->s_journal_dev)) { |
3044 | printk(KERN_INFO "EXT4-fs: external journal device major/minor " | 3121 | ext4_msg(sb, KERN_INFO, "external journal device major/minor " |
3045 | "numbers have changed\n"); | 3122 | "numbers have changed"); |
3046 | journal_dev = new_decode_dev(journal_devnum); | 3123 | journal_dev = new_decode_dev(journal_devnum); |
3047 | } else | 3124 | } else |
3048 | journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev)); | 3125 | journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev)); |
@@ -3054,24 +3131,23 @@ static int ext4_load_journal(struct super_block *sb, | |||
3054 | * crash? For recovery, we need to check in advance whether we | 3131 | * crash? For recovery, we need to check in advance whether we |
3055 | * can get read-write access to the device. | 3132 | * can get read-write access to the device. |
3056 | */ | 3133 | */ |
3057 | |||
3058 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { | 3134 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { |
3059 | if (sb->s_flags & MS_RDONLY) { | 3135 | if (sb->s_flags & MS_RDONLY) { |
3060 | printk(KERN_INFO "EXT4-fs: INFO: recovery " | 3136 | ext4_msg(sb, KERN_INFO, "INFO: recovery " |
3061 | "required on readonly filesystem.\n"); | 3137 | "required on readonly filesystem"); |
3062 | if (really_read_only) { | 3138 | if (really_read_only) { |
3063 | printk(KERN_ERR "EXT4-fs: write access " | 3139 | ext4_msg(sb, KERN_ERR, "write access " |
3064 | "unavailable, cannot proceed.\n"); | 3140 | "unavailable, cannot proceed"); |
3065 | return -EROFS; | 3141 | return -EROFS; |
3066 | } | 3142 | } |
3067 | printk(KERN_INFO "EXT4-fs: write access will " | 3143 | ext4_msg(sb, KERN_INFO, "write access will " |
3068 | "be enabled during recovery.\n"); | 3144 | "be enabled during recovery"); |
3069 | } | 3145 | } |
3070 | } | 3146 | } |
3071 | 3147 | ||
3072 | if (journal_inum && journal_dev) { | 3148 | if (journal_inum && journal_dev) { |
3073 | printk(KERN_ERR "EXT4-fs: filesystem has both journal " | 3149 | ext4_msg(sb, KERN_ERR, "filesystem has both journal " |
3074 | "and inode journals!\n"); | 3150 | "and inode journals!"); |
3075 | return -EINVAL; | 3151 | return -EINVAL; |
3076 | } | 3152 | } |
3077 | 3153 | ||
@@ -3084,14 +3160,14 @@ static int ext4_load_journal(struct super_block *sb, | |||
3084 | } | 3160 | } |
3085 | 3161 | ||
3086 | if (journal->j_flags & JBD2_BARRIER) | 3162 | if (journal->j_flags & JBD2_BARRIER) |
3087 | printk(KERN_INFO "EXT4-fs: barriers enabled\n"); | 3163 | ext4_msg(sb, KERN_INFO, "barriers enabled"); |
3088 | else | 3164 | else |
3089 | printk(KERN_INFO "EXT4-fs: barriers disabled\n"); | 3165 | ext4_msg(sb, KERN_INFO, "barriers disabled"); |
3090 | 3166 | ||
3091 | if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) { | 3167 | if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) { |
3092 | err = jbd2_journal_update_format(journal); | 3168 | err = jbd2_journal_update_format(journal); |
3093 | if (err) { | 3169 | if (err) { |
3094 | printk(KERN_ERR "EXT4-fs: error updating journal.\n"); | 3170 | ext4_msg(sb, KERN_ERR, "error updating journal"); |
3095 | jbd2_journal_destroy(journal); | 3171 | jbd2_journal_destroy(journal); |
3096 | return err; | 3172 | return err; |
3097 | } | 3173 | } |
@@ -3103,7 +3179,7 @@ static int ext4_load_journal(struct super_block *sb, | |||
3103 | err = jbd2_journal_load(journal); | 3179 | err = jbd2_journal_load(journal); |
3104 | 3180 | ||
3105 | if (err) { | 3181 | if (err) { |
3106 | printk(KERN_ERR "EXT4-fs: error loading journal.\n"); | 3182 | ext4_msg(sb, KERN_ERR, "error loading journal"); |
3107 | jbd2_journal_destroy(journal); | 3183 | jbd2_journal_destroy(journal); |
3108 | return err; | 3184 | return err; |
3109 | } | 3185 | } |
@@ -3114,18 +3190,17 @@ static int ext4_load_journal(struct super_block *sb, | |||
3114 | if (journal_devnum && | 3190 | if (journal_devnum && |
3115 | journal_devnum != le32_to_cpu(es->s_journal_dev)) { | 3191 | journal_devnum != le32_to_cpu(es->s_journal_dev)) { |
3116 | es->s_journal_dev = cpu_to_le32(journal_devnum); | 3192 | es->s_journal_dev = cpu_to_le32(journal_devnum); |
3117 | sb->s_dirt = 1; | ||
3118 | 3193 | ||
3119 | /* Make sure we flush the recovery flag to disk. */ | 3194 | /* Make sure we flush the recovery flag to disk. */ |
3120 | ext4_commit_super(sb, es, 1); | 3195 | ext4_commit_super(sb, 1); |
3121 | } | 3196 | } |
3122 | 3197 | ||
3123 | return 0; | 3198 | return 0; |
3124 | } | 3199 | } |
3125 | 3200 | ||
3126 | static int ext4_commit_super(struct super_block *sb, | 3201 | static int ext4_commit_super(struct super_block *sb, int sync) |
3127 | struct ext4_super_block *es, int sync) | ||
3128 | { | 3202 | { |
3203 | struct ext4_super_block *es = EXT4_SB(sb)->s_es; | ||
3129 | struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; | 3204 | struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; |
3130 | int error = 0; | 3205 | int error = 0; |
3131 | 3206 | ||
@@ -3140,8 +3215,8 @@ static int ext4_commit_super(struct super_block *sb, | |||
3140 | * be remapped. Nothing we can do but to retry the | 3215 | * be remapped. Nothing we can do but to retry the |
3141 | * write and hope for the best. | 3216 | * write and hope for the best. |
3142 | */ | 3217 | */ |
3143 | printk(KERN_ERR "EXT4-fs: previous I/O error to " | 3218 | ext4_msg(sb, KERN_ERR, "previous I/O error to " |
3144 | "superblock detected for %s.\n", sb->s_id); | 3219 | "superblock detected"); |
3145 | clear_buffer_write_io_error(sbh); | 3220 | clear_buffer_write_io_error(sbh); |
3146 | set_buffer_uptodate(sbh); | 3221 | set_buffer_uptodate(sbh); |
3147 | } | 3222 | } |
@@ -3154,7 +3229,7 @@ static int ext4_commit_super(struct super_block *sb, | |||
3154 | &EXT4_SB(sb)->s_freeblocks_counter)); | 3229 | &EXT4_SB(sb)->s_freeblocks_counter)); |
3155 | es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive( | 3230 | es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive( |
3156 | &EXT4_SB(sb)->s_freeinodes_counter)); | 3231 | &EXT4_SB(sb)->s_freeinodes_counter)); |
3157 | 3232 | sb->s_dirt = 0; | |
3158 | BUFFER_TRACE(sbh, "marking dirty"); | 3233 | BUFFER_TRACE(sbh, "marking dirty"); |
3159 | mark_buffer_dirty(sbh); | 3234 | mark_buffer_dirty(sbh); |
3160 | if (sync) { | 3235 | if (sync) { |
@@ -3164,8 +3239,8 @@ static int ext4_commit_super(struct super_block *sb, | |||
3164 | 3239 | ||
3165 | error = buffer_write_io_error(sbh); | 3240 | error = buffer_write_io_error(sbh); |
3166 | if (error) { | 3241 | if (error) { |
3167 | printk(KERN_ERR "EXT4-fs: I/O error while writing " | 3242 | ext4_msg(sb, KERN_ERR, "I/O error while writing " |
3168 | "superblock for %s.\n", sb->s_id); | 3243 | "superblock"); |
3169 | clear_buffer_write_io_error(sbh); | 3244 | clear_buffer_write_io_error(sbh); |
3170 | set_buffer_uptodate(sbh); | 3245 | set_buffer_uptodate(sbh); |
3171 | } | 3246 | } |
@@ -3173,7 +3248,6 @@ static int ext4_commit_super(struct super_block *sb, | |||
3173 | return error; | 3248 | return error; |
3174 | } | 3249 | } |
3175 | 3250 | ||
3176 | |||
3177 | /* | 3251 | /* |
3178 | * Have we just finished recovery? If so, and if we are mounting (or | 3252 | * Have we just finished recovery? If so, and if we are mounting (or |
3179 | * remounting) the filesystem readonly, then we will end up with a | 3253 | * remounting) the filesystem readonly, then we will end up with a |
@@ -3192,14 +3266,11 @@ static void ext4_mark_recovery_complete(struct super_block *sb, | |||
3192 | if (jbd2_journal_flush(journal) < 0) | 3266 | if (jbd2_journal_flush(journal) < 0) |
3193 | goto out; | 3267 | goto out; |
3194 | 3268 | ||
3195 | lock_super(sb); | ||
3196 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) && | 3269 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) && |
3197 | sb->s_flags & MS_RDONLY) { | 3270 | sb->s_flags & MS_RDONLY) { |
3198 | EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); | 3271 | EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); |
3199 | sb->s_dirt = 0; | 3272 | ext4_commit_super(sb, 1); |
3200 | ext4_commit_super(sb, es, 1); | ||
3201 | } | 3273 | } |
3202 | unlock_super(sb); | ||
3203 | 3274 | ||
3204 | out: | 3275 | out: |
3205 | jbd2_journal_unlock_updates(journal); | 3276 | jbd2_journal_unlock_updates(journal); |
@@ -3238,7 +3309,7 @@ static void ext4_clear_journal_err(struct super_block *sb, | |||
3238 | 3309 | ||
3239 | EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; | 3310 | EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; |
3240 | es->s_state |= cpu_to_le16(EXT4_ERROR_FS); | 3311 | es->s_state |= cpu_to_le16(EXT4_ERROR_FS); |
3241 | ext4_commit_super(sb, es, 1); | 3312 | ext4_commit_super(sb, 1); |
3242 | 3313 | ||
3243 | jbd2_journal_clear_err(journal); | 3314 | jbd2_journal_clear_err(journal); |
3244 | } | 3315 | } |
@@ -3257,29 +3328,17 @@ int ext4_force_commit(struct super_block *sb) | |||
3257 | return 0; | 3328 | return 0; |
3258 | 3329 | ||
3259 | journal = EXT4_SB(sb)->s_journal; | 3330 | journal = EXT4_SB(sb)->s_journal; |
3260 | if (journal) { | 3331 | if (journal) |
3261 | sb->s_dirt = 0; | ||
3262 | ret = ext4_journal_force_commit(journal); | 3332 | ret = ext4_journal_force_commit(journal); |
3263 | } | ||
3264 | 3333 | ||
3265 | return ret; | 3334 | return ret; |
3266 | } | 3335 | } |
3267 | 3336 | ||
3268 | /* | ||
3269 | * Ext4 always journals updates to the superblock itself, so we don't | ||
3270 | * have to propagate any other updates to the superblock on disk at this | ||
3271 | * point. (We can probably nuke this function altogether, and remove | ||
3272 | * any mention to sb->s_dirt in all of fs/ext4; eventual cleanup...) | ||
3273 | */ | ||
3274 | static void ext4_write_super(struct super_block *sb) | 3337 | static void ext4_write_super(struct super_block *sb) |
3275 | { | 3338 | { |
3276 | if (EXT4_SB(sb)->s_journal) { | 3339 | lock_super(sb); |
3277 | if (mutex_trylock(&sb->s_lock) != 0) | 3340 | ext4_commit_super(sb, 1); |
3278 | BUG(); | 3341 | unlock_super(sb); |
3279 | sb->s_dirt = 0; | ||
3280 | } else { | ||
3281 | ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1); | ||
3282 | } | ||
3283 | } | 3342 | } |
3284 | 3343 | ||
3285 | static int ext4_sync_fs(struct super_block *sb, int wait) | 3344 | static int ext4_sync_fs(struct super_block *sb, int wait) |
@@ -3288,16 +3347,9 @@ static int ext4_sync_fs(struct super_block *sb, int wait) | |||
3288 | tid_t target; | 3347 | tid_t target; |
3289 | 3348 | ||
3290 | trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait); | 3349 | trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait); |
3291 | sb->s_dirt = 0; | 3350 | if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) { |
3292 | if (EXT4_SB(sb)->s_journal) { | 3351 | if (wait) |
3293 | if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, | 3352 | jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, target); |
3294 | &target)) { | ||
3295 | if (wait) | ||
3296 | jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, | ||
3297 | target); | ||
3298 | } | ||
3299 | } else { | ||
3300 | ext4_commit_super(sb, EXT4_SB(sb)->s_es, wait); | ||
3301 | } | 3353 | } |
3302 | return ret; | 3354 | return ret; |
3303 | } | 3355 | } |
@@ -3310,34 +3362,32 @@ static int ext4_freeze(struct super_block *sb) | |||
3310 | { | 3362 | { |
3311 | int error = 0; | 3363 | int error = 0; |
3312 | journal_t *journal; | 3364 | journal_t *journal; |
3313 | sb->s_dirt = 0; | ||
3314 | 3365 | ||
3315 | if (!(sb->s_flags & MS_RDONLY)) { | 3366 | if (sb->s_flags & MS_RDONLY) |
3316 | journal = EXT4_SB(sb)->s_journal; | 3367 | return 0; |
3317 | 3368 | ||
3318 | if (journal) { | 3369 | journal = EXT4_SB(sb)->s_journal; |
3319 | /* Now we set up the journal barrier. */ | ||
3320 | jbd2_journal_lock_updates(journal); | ||
3321 | 3370 | ||
3322 | /* | 3371 | /* Now we set up the journal barrier. */ |
3323 | * We don't want to clear needs_recovery flag when we | 3372 | jbd2_journal_lock_updates(journal); |
3324 | * failed to flush the journal. | ||
3325 | */ | ||
3326 | error = jbd2_journal_flush(journal); | ||
3327 | if (error < 0) | ||
3328 | goto out; | ||
3329 | } | ||
3330 | 3373 | ||
3331 | /* Journal blocked and flushed, clear needs_recovery flag. */ | 3374 | /* |
3332 | EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); | 3375 | * Don't clear the needs_recovery flag if we failed to flush |
3333 | error = ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1); | 3376 | * the journal. |
3334 | if (error) | 3377 | */ |
3335 | goto out; | 3378 | error = jbd2_journal_flush(journal); |
3379 | if (error < 0) { | ||
3380 | out: | ||
3381 | jbd2_journal_unlock_updates(journal); | ||
3382 | return error; | ||
3336 | } | 3383 | } |
3384 | |||
3385 | /* Journal blocked and flushed, clear needs_recovery flag. */ | ||
3386 | EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); | ||
3387 | error = ext4_commit_super(sb, 1); | ||
3388 | if (error) | ||
3389 | goto out; | ||
3337 | return 0; | 3390 | return 0; |
3338 | out: | ||
3339 | jbd2_journal_unlock_updates(journal); | ||
3340 | return error; | ||
3341 | } | 3391 | } |
3342 | 3392 | ||
3343 | /* | 3393 | /* |
@@ -3346,14 +3396,15 @@ out: | |||
3346 | */ | 3396 | */ |
3347 | static int ext4_unfreeze(struct super_block *sb) | 3397 | static int ext4_unfreeze(struct super_block *sb) |
3348 | { | 3398 | { |
3349 | if (EXT4_SB(sb)->s_journal && !(sb->s_flags & MS_RDONLY)) { | 3399 | if (sb->s_flags & MS_RDONLY) |
3350 | lock_super(sb); | 3400 | return 0; |
3351 | /* Reser the needs_recovery flag before the fs is unlocked. */ | 3401 | |
3352 | EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); | 3402 | lock_super(sb); |
3353 | ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1); | 3403 | /* Reset the needs_recovery flag before the fs is unlocked. */ |
3354 | unlock_super(sb); | 3404 | EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); |
3355 | jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); | 3405 | ext4_commit_super(sb, 1); |
3356 | } | 3406 | unlock_super(sb); |
3407 | jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); | ||
3357 | return 0; | 3408 | return 0; |
3358 | } | 3409 | } |
3359 | 3410 | ||
@@ -3371,7 +3422,10 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
3371 | int i; | 3422 | int i; |
3372 | #endif | 3423 | #endif |
3373 | 3424 | ||
3425 | lock_kernel(); | ||
3426 | |||
3374 | /* Store the original options */ | 3427 | /* Store the original options */ |
3428 | lock_super(sb); | ||
3375 | old_sb_flags = sb->s_flags; | 3429 | old_sb_flags = sb->s_flags; |
3376 | old_opts.s_mount_opt = sbi->s_mount_opt; | 3430 | old_opts.s_mount_opt = sbi->s_mount_opt; |
3377 | old_opts.s_resuid = sbi->s_resuid; | 3431 | old_opts.s_resuid = sbi->s_resuid; |
@@ -3432,22 +3486,15 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
3432 | (sbi->s_mount_state & EXT4_VALID_FS)) | 3486 | (sbi->s_mount_state & EXT4_VALID_FS)) |
3433 | es->s_state = cpu_to_le16(sbi->s_mount_state); | 3487 | es->s_state = cpu_to_le16(sbi->s_mount_state); |
3434 | 3488 | ||
3435 | /* | 3489 | if (sbi->s_journal) |
3436 | * We have to unlock super so that we can wait for | ||
3437 | * transactions. | ||
3438 | */ | ||
3439 | if (sbi->s_journal) { | ||
3440 | unlock_super(sb); | ||
3441 | ext4_mark_recovery_complete(sb, es); | 3490 | ext4_mark_recovery_complete(sb, es); |
3442 | lock_super(sb); | ||
3443 | } | ||
3444 | } else { | 3491 | } else { |
3445 | int ret; | 3492 | int ret; |
3446 | if ((ret = EXT4_HAS_RO_COMPAT_FEATURE(sb, | 3493 | if ((ret = EXT4_HAS_RO_COMPAT_FEATURE(sb, |
3447 | ~EXT4_FEATURE_RO_COMPAT_SUPP))) { | 3494 | ~EXT4_FEATURE_RO_COMPAT_SUPP))) { |
3448 | printk(KERN_WARNING "EXT4-fs: %s: couldn't " | 3495 | ext4_msg(sb, KERN_WARNING, "couldn't " |
3449 | "remount RDWR because of unsupported " | 3496 | "remount RDWR because of unsupported " |
3450 | "optional features (%x).\n", sb->s_id, | 3497 | "optional features (%x)", |
3451 | (le32_to_cpu(sbi->s_es->s_feature_ro_compat) & | 3498 | (le32_to_cpu(sbi->s_es->s_feature_ro_compat) & |
3452 | ~EXT4_FEATURE_RO_COMPAT_SUPP)); | 3499 | ~EXT4_FEATURE_RO_COMPAT_SUPP)); |
3453 | err = -EROFS; | 3500 | err = -EROFS; |
@@ -3456,17 +3503,15 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
3456 | 3503 | ||
3457 | /* | 3504 | /* |
3458 | * Make sure the group descriptor checksums | 3505 | * Make sure the group descriptor checksums |
3459 | * are sane. If they aren't, refuse to | 3506 | * are sane. If they aren't, refuse to remount r/w. |
3460 | * remount r/w. | ||
3461 | */ | 3507 | */ |
3462 | for (g = 0; g < sbi->s_groups_count; g++) { | 3508 | for (g = 0; g < sbi->s_groups_count; g++) { |
3463 | struct ext4_group_desc *gdp = | 3509 | struct ext4_group_desc *gdp = |
3464 | ext4_get_group_desc(sb, g, NULL); | 3510 | ext4_get_group_desc(sb, g, NULL); |
3465 | 3511 | ||
3466 | if (!ext4_group_desc_csum_verify(sbi, g, gdp)) { | 3512 | if (!ext4_group_desc_csum_verify(sbi, g, gdp)) { |
3467 | printk(KERN_ERR | 3513 | ext4_msg(sb, KERN_ERR, |
3468 | "EXT4-fs: ext4_remount: " | 3514 | "ext4_remount: Checksum for group %u failed (%u!=%u)", |
3469 | "Checksum for group %u failed (%u!=%u)\n", | ||
3470 | g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)), | 3515 | g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)), |
3471 | le16_to_cpu(gdp->bg_checksum)); | 3516 | le16_to_cpu(gdp->bg_checksum)); |
3472 | err = -EINVAL; | 3517 | err = -EINVAL; |
@@ -3480,11 +3525,10 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
3480 | * require a full umount/remount for now. | 3525 | * require a full umount/remount for now. |
3481 | */ | 3526 | */ |
3482 | if (es->s_last_orphan) { | 3527 | if (es->s_last_orphan) { |
3483 | printk(KERN_WARNING "EXT4-fs: %s: couldn't " | 3528 | ext4_msg(sb, KERN_WARNING, "Couldn't " |
3484 | "remount RDWR because of unprocessed " | 3529 | "remount RDWR because of unprocessed " |
3485 | "orphan inode list. Please " | 3530 | "orphan inode list. Please " |
3486 | "umount/remount instead.\n", | 3531 | "umount/remount instead"); |
3487 | sb->s_id); | ||
3488 | err = -EINVAL; | 3532 | err = -EINVAL; |
3489 | goto restore_opts; | 3533 | goto restore_opts; |
3490 | } | 3534 | } |
@@ -3504,8 +3548,9 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
3504 | sb->s_flags &= ~MS_RDONLY; | 3548 | sb->s_flags &= ~MS_RDONLY; |
3505 | } | 3549 | } |
3506 | } | 3550 | } |
3551 | ext4_setup_system_zone(sb); | ||
3507 | if (sbi->s_journal == NULL) | 3552 | if (sbi->s_journal == NULL) |
3508 | ext4_commit_super(sb, es, 1); | 3553 | ext4_commit_super(sb, 1); |
3509 | 3554 | ||
3510 | #ifdef CONFIG_QUOTA | 3555 | #ifdef CONFIG_QUOTA |
3511 | /* Release old quota file names */ | 3556 | /* Release old quota file names */ |
@@ -3514,7 +3559,10 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
3514 | old_opts.s_qf_names[i] != sbi->s_qf_names[i]) | 3559 | old_opts.s_qf_names[i] != sbi->s_qf_names[i]) |
3515 | kfree(old_opts.s_qf_names[i]); | 3560 | kfree(old_opts.s_qf_names[i]); |
3516 | #endif | 3561 | #endif |
3562 | unlock_super(sb); | ||
3563 | unlock_kernel(); | ||
3517 | return 0; | 3564 | return 0; |
3565 | |||
3518 | restore_opts: | 3566 | restore_opts: |
3519 | sb->s_flags = old_sb_flags; | 3567 | sb->s_flags = old_sb_flags; |
3520 | sbi->s_mount_opt = old_opts.s_mount_opt; | 3568 | sbi->s_mount_opt = old_opts.s_mount_opt; |
@@ -3532,6 +3580,8 @@ restore_opts: | |||
3532 | sbi->s_qf_names[i] = old_opts.s_qf_names[i]; | 3580 | sbi->s_qf_names[i] = old_opts.s_qf_names[i]; |
3533 | } | 3581 | } |
3534 | #endif | 3582 | #endif |
3583 | unlock_super(sb); | ||
3584 | unlock_kernel(); | ||
3535 | return err; | 3585 | return err; |
3536 | } | 3586 | } |
3537 | 3587 | ||
@@ -3545,9 +3595,8 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
3545 | if (test_opt(sb, MINIX_DF)) { | 3595 | if (test_opt(sb, MINIX_DF)) { |
3546 | sbi->s_overhead_last = 0; | 3596 | sbi->s_overhead_last = 0; |
3547 | } else if (sbi->s_blocks_last != ext4_blocks_count(es)) { | 3597 | } else if (sbi->s_blocks_last != ext4_blocks_count(es)) { |
3548 | ext4_group_t ngroups = sbi->s_groups_count, i; | 3598 | ext4_group_t i, ngroups = ext4_get_groups_count(sb); |
3549 | ext4_fsblk_t overhead = 0; | 3599 | ext4_fsblk_t overhead = 0; |
3550 | smp_rmb(); | ||
3551 | 3600 | ||
3552 | /* | 3601 | /* |
3553 | * Compute the overhead (FS structures). This is constant | 3602 | * Compute the overhead (FS structures). This is constant |
@@ -3599,11 +3648,12 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
3599 | le64_to_cpup((void *)es->s_uuid + sizeof(u64)); | 3648 | le64_to_cpup((void *)es->s_uuid + sizeof(u64)); |
3600 | buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL; | 3649 | buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL; |
3601 | buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL; | 3650 | buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL; |
3651 | |||
3602 | return 0; | 3652 | return 0; |
3603 | } | 3653 | } |
3604 | 3654 | ||
3605 | /* Helper function for writing quotas on sync - we need to start transaction before quota file | 3655 | /* Helper function for writing quotas on sync - we need to start transaction |
3606 | * is locked for write. Otherwise the are possible deadlocks: | 3656 | * before quota file is locked for write. Otherwise the are possible deadlocks: |
3607 | * Process 1 Process 2 | 3657 | * Process 1 Process 2 |
3608 | * ext4_create() quota_sync() | 3658 | * ext4_create() quota_sync() |
3609 | * jbd2_journal_start() write_dquot() | 3659 | * jbd2_journal_start() write_dquot() |
@@ -3627,7 +3677,7 @@ static int ext4_write_dquot(struct dquot *dquot) | |||
3627 | 3677 | ||
3628 | inode = dquot_to_inode(dquot); | 3678 | inode = dquot_to_inode(dquot); |
3629 | handle = ext4_journal_start(inode, | 3679 | handle = ext4_journal_start(inode, |
3630 | EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); | 3680 | EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); |
3631 | if (IS_ERR(handle)) | 3681 | if (IS_ERR(handle)) |
3632 | return PTR_ERR(handle); | 3682 | return PTR_ERR(handle); |
3633 | ret = dquot_commit(dquot); | 3683 | ret = dquot_commit(dquot); |
@@ -3643,7 +3693,7 @@ static int ext4_acquire_dquot(struct dquot *dquot) | |||
3643 | handle_t *handle; | 3693 | handle_t *handle; |
3644 | 3694 | ||
3645 | handle = ext4_journal_start(dquot_to_inode(dquot), | 3695 | handle = ext4_journal_start(dquot_to_inode(dquot), |
3646 | EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb)); | 3696 | EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb)); |
3647 | if (IS_ERR(handle)) | 3697 | if (IS_ERR(handle)) |
3648 | return PTR_ERR(handle); | 3698 | return PTR_ERR(handle); |
3649 | ret = dquot_acquire(dquot); | 3699 | ret = dquot_acquire(dquot); |
@@ -3659,7 +3709,7 @@ static int ext4_release_dquot(struct dquot *dquot) | |||
3659 | handle_t *handle; | 3709 | handle_t *handle; |
3660 | 3710 | ||
3661 | handle = ext4_journal_start(dquot_to_inode(dquot), | 3711 | handle = ext4_journal_start(dquot_to_inode(dquot), |
3662 | EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb)); | 3712 | EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb)); |
3663 | if (IS_ERR(handle)) { | 3713 | if (IS_ERR(handle)) { |
3664 | /* Release dquot anyway to avoid endless cycle in dqput() */ | 3714 | /* Release dquot anyway to avoid endless cycle in dqput() */ |
3665 | dquot_release(dquot); | 3715 | dquot_release(dquot); |
@@ -3707,7 +3757,7 @@ static int ext4_write_info(struct super_block *sb, int type) | |||
3707 | static int ext4_quota_on_mount(struct super_block *sb, int type) | 3757 | static int ext4_quota_on_mount(struct super_block *sb, int type) |
3708 | { | 3758 | { |
3709 | return vfs_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type], | 3759 | return vfs_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type], |
3710 | EXT4_SB(sb)->s_jquota_fmt, type); | 3760 | EXT4_SB(sb)->s_jquota_fmt, type); |
3711 | } | 3761 | } |
3712 | 3762 | ||
3713 | /* | 3763 | /* |
@@ -3738,9 +3788,9 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, | |||
3738 | if (EXT4_SB(sb)->s_qf_names[type]) { | 3788 | if (EXT4_SB(sb)->s_qf_names[type]) { |
3739 | /* Quotafile not in fs root? */ | 3789 | /* Quotafile not in fs root? */ |
3740 | if (path.dentry->d_parent != sb->s_root) | 3790 | if (path.dentry->d_parent != sb->s_root) |
3741 | printk(KERN_WARNING | 3791 | ext4_msg(sb, KERN_WARNING, |
3742 | "EXT4-fs: Quota file not on filesystem root. " | 3792 | "Quota file not on filesystem root. " |
3743 | "Journaled quota will not work.\n"); | 3793 | "Journaled quota will not work"); |
3744 | } | 3794 | } |
3745 | 3795 | ||
3746 | /* | 3796 | /* |
@@ -3823,8 +3873,8 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, | |||
3823 | handle_t *handle = journal_current_handle(); | 3873 | handle_t *handle = journal_current_handle(); |
3824 | 3874 | ||
3825 | if (EXT4_SB(sb)->s_journal && !handle) { | 3875 | if (EXT4_SB(sb)->s_journal && !handle) { |
3826 | printk(KERN_WARNING "EXT4-fs: Quota write (off=%llu, len=%llu)" | 3876 | ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)" |
3827 | " cancelled because transaction is not started.\n", | 3877 | " cancelled because transaction is not started", |
3828 | (unsigned long long)off, (unsigned long long)len); | 3878 | (unsigned long long)off, (unsigned long long)len); |
3829 | return -EIO; | 3879 | return -EIO; |
3830 | } | 3880 | } |
@@ -3878,10 +3928,10 @@ out: | |||
3878 | 3928 | ||
3879 | #endif | 3929 | #endif |
3880 | 3930 | ||
3881 | static int ext4_get_sb(struct file_system_type *fs_type, | 3931 | static int ext4_get_sb(struct file_system_type *fs_type, int flags, |
3882 | int flags, const char *dev_name, void *data, struct vfsmount *mnt) | 3932 | const char *dev_name, void *data, struct vfsmount *mnt) |
3883 | { | 3933 | { |
3884 | return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt); | 3934 | return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt); |
3885 | } | 3935 | } |
3886 | 3936 | ||
3887 | static struct file_system_type ext4_fs_type = { | 3937 | static struct file_system_type ext4_fs_type = { |
@@ -3893,14 +3943,14 @@ static struct file_system_type ext4_fs_type = { | |||
3893 | }; | 3943 | }; |
3894 | 3944 | ||
3895 | #ifdef CONFIG_EXT4DEV_COMPAT | 3945 | #ifdef CONFIG_EXT4DEV_COMPAT |
3896 | static int ext4dev_get_sb(struct file_system_type *fs_type, | 3946 | static int ext4dev_get_sb(struct file_system_type *fs_type, int flags, |
3897 | int flags, const char *dev_name, void *data, struct vfsmount *mnt) | 3947 | const char *dev_name, void *data,struct vfsmount *mnt) |
3898 | { | 3948 | { |
3899 | printk(KERN_WARNING "EXT4-fs: Update your userspace programs " | 3949 | printk(KERN_WARNING "EXT4-fs (%s): Update your userspace programs " |
3900 | "to mount using ext4\n"); | 3950 | "to mount using ext4\n", dev_name); |
3901 | printk(KERN_WARNING "EXT4-fs: ext4dev backwards compatibility " | 3951 | printk(KERN_WARNING "EXT4-fs (%s): ext4dev backwards compatibility " |
3902 | "will go away by 2.6.31\n"); | 3952 | "will go away by 2.6.31\n", dev_name); |
3903 | return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt); | 3953 | return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt); |
3904 | } | 3954 | } |
3905 | 3955 | ||
3906 | static struct file_system_type ext4dev_fs_type = { | 3956 | static struct file_system_type ext4dev_fs_type = { |
@@ -3917,13 +3967,16 @@ static int __init init_ext4_fs(void) | |||
3917 | { | 3967 | { |
3918 | int err; | 3968 | int err; |
3919 | 3969 | ||
3970 | err = init_ext4_system_zone(); | ||
3971 | if (err) | ||
3972 | return err; | ||
3920 | ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); | 3973 | ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); |
3921 | if (!ext4_kset) | 3974 | if (!ext4_kset) |
3922 | return -ENOMEM; | 3975 | goto out4; |
3923 | ext4_proc_root = proc_mkdir("fs/ext4", NULL); | 3976 | ext4_proc_root = proc_mkdir("fs/ext4", NULL); |
3924 | err = init_ext4_mballoc(); | 3977 | err = init_ext4_mballoc(); |
3925 | if (err) | 3978 | if (err) |
3926 | return err; | 3979 | goto out3; |
3927 | 3980 | ||
3928 | err = init_ext4_xattr(); | 3981 | err = init_ext4_xattr(); |
3929 | if (err) | 3982 | if (err) |
@@ -3948,6 +4001,11 @@ out1: | |||
3948 | exit_ext4_xattr(); | 4001 | exit_ext4_xattr(); |
3949 | out2: | 4002 | out2: |
3950 | exit_ext4_mballoc(); | 4003 | exit_ext4_mballoc(); |
4004 | out3: | ||
4005 | remove_proc_entry("fs/ext4", NULL); | ||
4006 | kset_unregister(ext4_kset); | ||
4007 | out4: | ||
4008 | exit_ext4_system_zone(); | ||
3951 | return err; | 4009 | return err; |
3952 | } | 4010 | } |
3953 | 4011 | ||
@@ -3962,6 +4020,7 @@ static void __exit exit_ext4_fs(void) | |||
3962 | exit_ext4_mballoc(); | 4020 | exit_ext4_mballoc(); |
3963 | remove_proc_entry("fs/ext4", NULL); | 4021 | remove_proc_entry("fs/ext4", NULL); |
3964 | kset_unregister(ext4_kset); | 4022 | kset_unregister(ext4_kset); |
4023 | exit_ext4_system_zone(); | ||
3965 | } | 4024 | } |
3966 | 4025 | ||
3967 | MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); | 4026 | MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); |